2 Star 0 Fork 0

zhtop/顶点

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
ldb.ding.js 5.81 KB
一键复制 编辑 原始数据 按行查看 历史
菜鸟_it_ 提交于 2017-12-05 16:54 . 增加了async await异步处理
// const http = require('http');
const cheerio = require("cheerio");
const fs = require("fs"),
http = require("./utils/http.js"),
leveldb = require("./utils/levedb.js");
let bookInfo = { thisBookId: 1, num: 1,prefix:'' ,fileName:''};
//27/27137/17116455 "【088】历史改变 出兵并州"
// leveldb.batch(ops,(err,value)=>{console.log(err,value[0])});
// leveldb.find({prefix:'67-67980-'},(err,value)=>{console.log(err,(value)+'+++++++++++')});
// leveldb.put('id',JSON.stringify({name:'12233'}),err=>{console.log(err)});
// leveldb.get('32-32624-',(err,value)=>{console.log(err,JSON.parse(value))});
//根据id获取章节目录
const chapter = async function(id,link='') {
let url = link||`http://m.x23us.com/html/${id}`;
return http.request(url).then(data => {
const $ = cheerio.load(data),
news_title = $(".chapter li a");
let str = [],
info = {},
title = $(".index_block"),
news_link = $("#lingxtipsu .btn")
.attr("onclick")
.split("html")[1]
.split("/")
.join("-")
.replace(/-/, "")
.replace(/'/, "");
let result = "";
info.title = $(title)
.find("h1")
.text();
let p = $(title).find("p");
info.author = $(p[0]).text();
info.cat = $(p[1]).text();
info.updated = $(p[2]).text();
info._id = id.replace(/\//g, "-");
// console.log(JSON.stringify(info));
console.log(`爬取了第${bookInfo.thisBookId++} 本,书名:${info.title}--${info._id}`);
// let fileName="/chapter1/" + news_link + info.title + ".json";
// console.log(bookInfo.fileName)
//书籍的id
leveldb.put(info._id, info, insertCall);
news_title.map((e, i) => {
let p_id =
id +
$(i)
.attr("href")
.trim()
.replace(/.html/, "")
.replace(/\//g, "*");
let ids = $(i)
.attr("href")
.trim()
.replace(/.html/, "");
let txt = $(i)
.text()
.trim();
let _id =
id +
$(i)
.attr("href")
.trim()
.replace(/\//g, "-")
.replace(".html", "");
str[str.length] = {type:'value object', key: _id.replace(/\//g, "-"), value: { _id: _id.replace(/\//g, "-"), p_id: p_id, id: ids, chapter: txt } };
let chaptersUrl = id+$(i).attr("href").trim();
chapters(chaptersUrl);
});
//存储书籍id对应的目录列表
leveldb.batch(str, callBack);
// str = str.reverse();
// console.log(JSON.stringify(str));
// fs.exists(__dirname + "/chapter", function(exists) {
// let fileName = "/chapter/" + news_link + info.title + ".json";
// exists
// ? fs.createWriteStream(__dirname + fileName)
// : fs.mkdir(__dirname + "/chapter", function(err) {
// if (err) throw err;
// fs.createWriteStream(__dirname + fileName);
// fs.writeFileSync(__dirname + fileName, JSON.stringify(str));
// });
// fs.writeFileSync(__dirname + fileName, JSON.stringify(str));
// });
})
};
//根据章节目录获取具体的文本
//章节内容
const chapters = function (p_id, link = '') {
let url = link || `http://m.x23us.com/html/${p_id}`;
return http.request(url).then(data => {
const $ = cheerio.load(data);
let txt = $("#txt").text().trim().replace(/顶点小说 23US.COM更新最快/g, ''),
nr_title = $("#nr_title").text().trim(),
_id = p_id.replace(/\//g, "-").replace(".html", "");
// fs.appendFile('111.json',JSON.stringify({ _id: _id,p_id:p_id, chapter: nr_title, content: txt }) + ",\n",err => {});
//书籍的id
leveldb.put(_id, { _id: _id, p_id: p_id, chapter: nr_title, content: txt }, insertCall);
}).catch((err) => {
if (err.url) {
chapters(err.url);
} else {
console.log(err.errMessage);
}
});
};
//书籍获取分类
const bookCat = async function(id, isClass = true) {
let url = `http://m.x23us.com${id}`;
return http.request(url).then(data => {
if (isClass) {
setBookCat(data);
} else {
bookInfo.prefix=id.replace('.html','').replace(/\//g,'')+'_';
setBookId(data);
}
});
};
const setBookCat = function(data) {
const $ = cheerio.load(data);
let cat = $(".content li a"),
page = $(".page a"),
pageNum = $(page[1])
.attr("href")
.replace("http://m.x23us.com/class/", "")
.replace(".html", "")
.split("_"),
str = [];
cat.map((key, value) => {
str[str.length] = {
url: "http://m.x23us.com",
cat: `分类:${$(cat[key]).text()}`,
id: $(cat[key]).attr("href"),
firstPage: pageNum[0],
lastPage: pageNum[1]
};
setBookId(str[key]);
});
};
const setBookId = function(data) {
const $ = cheerio.load(data);
let page = $(".page a"),
pageNum = $(page[1])
.attr("href")
.replace("http://m.x23us.com/class/", "")
.replace(".html", "")
.split("_");
let title_list = $(".cover p");
let atext = "";
let hid = "";
for (let i = 0, j = title_list.length; i < j; i++) {
atext = $(title_list[i]).find("a");
// console.log($(atext[0]).attr("href")+'---'+$(atext[0]).text());
hid = $(atext[0])
.attr("href")
.replace("/html/", "");
chapter(hid);
}
for (let j = pageNum[1]; bookInfo.num <= j; bookInfo.num++) {
bookCat(`/class/${pageNum[0]}_${bookInfo.num}.html`, false);
// fs.writeFileSync(__dirname + "/1122.txt", JSON.stringify(num));
}
};
const insertCall = function(err, value) {
if (err) {
console.log(JSON.stringify(err));
} else {
// console.log(value);
}
};
const callBack = function(err, value) {
if (err) {
console.log("错误信息:" + err);
// console.log("值" + value);
}
};
// chapter("32/32624/");
//开始爬取那个分类的数据http://m.x23us.com/class/3_1.html
bookCat("/class/3_1.html", false);
// console.log(Object)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/pasuo/DingDian.git
git@gitee.com:pasuo/DingDian.git
pasuo
DingDian
顶点
master

搜索帮助