1 Star 0 Fork 5

jishuke/爬虫-1688商品详情数据

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
getPagination.js 3.51 KB
一键复制 编辑 原始数据 按行查看 历史
gebinda 提交于 2021-07-08 18:27 . 重构
const axios = require("axios");
const cheerio = require("cheerio");
const fs = require("fs");
const userAgentPool = [
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; Tablet PC 2.0; .NET4.0E)',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; ) AppleWebKit/534.12 (KHTML, like Gecko) Maxthon/3.0 Safari/534.12',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.33 Safari/534.3 SE 2.X MetaSr 1.0',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1 QQBrowser/6.9.11079.201',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Maxthon 2.0)',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; The World)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50'
];
const url = "https://shop460bu04581k99.1688.com/page/offerlist.htm?spm=a261y.7663282.0.0.7b86ff8clcxtim";
let url_list = [];
(async function () {
async function getPage(url, pageNum, total = 10) {
let res = await axios.get(url, {
header: {
"User-Agent": userAgentPool[Math.floor(Math.random() * userAgentPool.length)]
}
});
const $ = cheerio.load(res.data);
if (pageNum == 1) {
total = $('.page-count').text();
}
const nextPage = $('.next').attr('href');
const endPage = $('.next-disabled').attr('href');
if (nextPage || endPage) {
let tab_urls = $('.offer-list-row .offer-list-row-offer .image a', '.common-column-230');
console.log("当前页数:", pageNum);
tab_urls.each((i, el) => {
let el_url = $(el).attr('href');
url_list.push(el_url);
})
console.log("" + pageNum + "页数据爬取成功");
pageNum++;
if (pageNum <= total) {
getPage(nextPage, pageNum, total)
} else {
const urlStr = JSON.stringify(url_list);
console.log("全部分页抓取成功,正在保存数据....");
const firstDir = "url";
fs.mkdir(firstDir,function(err){
if(err){
}else{
console.log(`创建${firstDir}目录成功!`);
}
});
fs.writeFile(`${firstDir}/pageUrl.txt`, urlStr, 'utf8', function (err) {
if (err) {
console.log("文件储存失败>>>>>");
console.log(err);
} else {
console.log(`文件储存成功,开打${firstDir}目录下的 pageUrl.txt 查看`)
}
})
}
}else {
if (!endPage) {
if (pageNum <= total) {
console.log("重置");
getPage(url, pageNum, total)
}
}
}
}
await getPage(url, 1);
})()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/jishuke/crawler-1688-product-details.git
git@gitee.com:jishuke/crawler-1688-product-details.git
jishuke
crawler-1688-product-details
爬虫-1688商品详情数据
master

搜索帮助