代码拉取完成,页面将自动刷新
同步操作将从 Chennile/红袖网小说爬虫(最简单版) 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
import java.io.*;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
public static void main(String[] args) throws Exception{
//获取整个网页内容
String str;
//写入要抓取的url
URL url = new URL("https://www.hongxiu.com/book/11809422404543803#Catalog");
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
StringBuffer sb = new StringBuffer();
BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));
while((str=reader.readLine())!=null){
sb.append(str);
}
//进行正则匹配
String content = sb.toString();
String regex = "<(a.*? data-cid=\"(.*?)\")>(.*?)</a>";
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
//找出所需要的内容
while(matcher.find()) {
String s=matcher.group(2);
String href = "https:"+s.substring(0, 61);//处理字符串
String tittle = matcher.group(3);
if(href.contains("https://www.hongxiu.com/chapter/")){
try {
// 下载链接和标题
downHtm(href, tittle);
//根据章节链接存放内容
downTxt(href,tittle);
} catch(Exception e) {
e.printStackTrace();
}
}
}
is.close();
}
public static void downTxt(String href, String tittle) throws Exception{
//获取整个网页内容
String str;
//写入要抓取的url
URL url = new URL(href);
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
StringBuffer sb = new StringBuffer();
BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "utf-8"));
while((str=reader.readLine())!=null){
sb.append(str);
}
//进行正则匹配---找出文章内容
String regex = "<div class=\"read-content j_readContent\">(.*?)</div>";
String content = sb.toString();
Pattern pattern = Pattern.compile(regex, Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
//找出所需要的内容
while(matcher.find()) {
String pstring = matcher.group(1).replace("<p>", "").replace("<%-cInfo.content %>", "");
File f=new File(tittle+".txt");
FileWriter fw=null;
fw = new FileWriter(f, true);
fw.write(pstring);
fw.flush();
fw.close();
}
is.close();
}
public static void downHtm(String href,String tittle) throws Exception{
//对于得到的url和标题----
// 先写入一个目录(里面包含 目录和链接),点击链接之后跳转到本地文件的details目录,
// 里面存放的是每一章节的内容
File f=new File("1.html");
FileWriter fw=null;
fw = new FileWriter(f, true);
fw.write("\r\n"+tittle+" "+href+"\r\n");
fw.flush();
fw.close();
}
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。