1 Star 0 Fork 0

cutepdcc/ceshi

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
app.js 5.63 KB
一键复制 编辑 原始数据 按行查看 历史
cutepdcc 提交于 2024-10-18 15:15 . add
const fs = require("fs")
const path = require("path")
const request = require("request")
const dayjs = require('dayjs')
const lodash = require('lodash')
const { readJsonFile, writeJsonFile, downloadFileByUrl, fsExistsSync } = require("./utils/file")
const { sleep, randomRange, syncRequest } = require("./utils/utils")
const fileSavePath = path.join(__dirname, "public");
const jsonFilePath = path.join(__dirname, "json");
const dataJsonFilePath = path.join(__dirname, "datajson");
const saveUrlsJson = async () => {
const dateRange = ['2024-09-27', '2024-09-27']
const isOnlyOnePage = true
const overHour = 1
let start = dateRange[0]
let end = dateRange[1]
while (start <= end) {
let dateResumeIds = []
let startTime = new Date().getTime()
while (true) {
try {
const res = await syncRequest({
url: 'https://lp.gzhcll.com/api/resume/index',
method: 'GET',
json: true,
headers: {
'content-type': 'application/json',
'authorization': 'Bearer 8821|bWZ7XISXSOIfbSIfO2t4Rs6Put0m4GsTYUZarom5',
},
body: {
isPage: 1,
page: 3,
pageSize: 100,
sex: 2,
created_at: [start, dayjs(start).add(1, 'day')
.format('YYYY-MM-DD')]
}
})
const datas = res?.data?.data ?? []
console.log("datas", datas.map(item => item.name))
const dateTotal = isOnlyOnePage ? datas.length : (res?.data?.total || datas.length)
const resumeIds = datas?.map(item => item.id) ?? []
dateResumeIds = dateResumeIds.concat(resumeIds)
console.log('dateResumeIds', dateResumeIds.length)
console.log('dateTotal', dateTotal)
let writeDataJsonPath = path.join(dataJsonFilePath, `${start}.json`)
let filterDatas = datas?.map(item => ({
...lodash.pick(item, ['name', 'mobile', 'edu.name', 'edus', 'email', 'source.name', 'status_human', 'sex_human', 'industries', 'created_at', 'updated_at'])
}))
if (fsExistsSync(writeDataJsonPath)) {
const existDatas = readJsonFile(writeDataJsonPath)
fileUrls = [...existDatas, ...filterDatas]
}
writeJsonFile(writeDataJsonPath, (filterDatas || []))
const curTime = new Date().getTime()
if (dateResumeIds.length >= dateTotal || ((curTime - startTime) / (1000 * 60 * 60)) > overHour) {
break
}
await sleep(randomRange(1000 * 3, 1000 * 8))
} catch (err) {
break
}
}
console.log("出来了", dateResumeIds.length, dateResumeIds)
let fileUrls = []
for (let i = 0; i < dateResumeIds.length; i++) {
const res = await syncRequest({
url: 'https://lp.gzhcll.com/api/resume/attachments',
method: 'GET',
json: true,
headers: {
'content-type': 'application/json',
'authorization': 'Bearer 8821|bWZ7XISXSOIfbSIfO2t4Rs6Put0m4GsTYUZarom5',
},
body: {
isPage: 1,
page: 1,
pageSize: 1,
resume_id: dateResumeIds[i]
}
})
const files = res?.data ?? []
if (files[0]) {
fileUrls.push(`${'https://lp.gzhcll.com/storage'}/${files[0].path}`)
}
console.log(i)
if (i === dateResumeIds.length - 1) {
break
}
await sleep(randomRange(1000 * 20, 1000 * 70))
}
let writeJsonPath = path.join(jsonFilePath, `${start}.json`)
if (fsExistsSync(writeJsonPath)) {
const existUrls = readJsonFile(writeJsonPath)
fileUrls = [...existUrls, ...fileUrls]
}
writeJsonFile(writeJsonPath, (fileUrls || []))
start = dayjs(start).add(1, 'day')
.format('YYYY-MM-DD')
}
}
const batchDownload = async () => {
const jsonFiles = fs.readdirSync(jsonFilePath)
for (let i = 0; i < jsonFiles.length; i++) {
const foldPath = path.join(fileSavePath, jsonFiles[i].split(".")?.[0] || 'common')
const downloadUrls = readJsonFile(path.join(jsonFilePath, jsonFiles[i]))
if (!fsExistsSync(foldPath)) {
fs.mkdirSync(foldPath)
}
for (let i = 0; i < downloadUrls.length; i++) {
downloadFileByUrl(downloadUrls[i], foldPath)
await sleep(randomRange(1000 * 5, 1000 * 10))
}
}
}
const run = async () => {
saveUrlsJson()
// batchDownload()
}
const test = async () => {
writeJsonFile(jsonFilePath + "cxk.json", { age: 18, name: 'cxk' })
console.log(readJsonFile('./json/cxk.json'))
console.log(fs.readdirSync('./json'))
console.log('==========================================================================')
const res = await syncRequest({
url: 'http://192.168.5.179:3000',
method: 'GET',
json: true,
headers: {
'content-type': 'application/json',
'authorization': 'Bearer 8821|bWZ7XISXSOIfbSIfO2t4Rs6Put0m4GsTYUZarom5',
},
body: {
isPage: 1,
page: 1,
pageSize: 1,
}
})
}
// test()
run()
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/cutepdcc/ceshi.git
git@gitee.com:cutepdcc/ceshi.git
cutepdcc
ceshi
ceshi
master

搜索帮助