1 Star 0 Fork 0

Qubit/Qzone

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
qzone.py 5.10 KB
一键复制 编辑 原始数据 按行查看 历史
Qubit 提交于 2023-05-09 19:02 . 结构功能分离
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import requests
import datetime
import json
import time
import sys
def getCode(driver):
print("打开浏览器访问页面" + time.strftime('%Y-%m-%d %H:%M',time.localtime(time.time())))
try:
driver.get('http://qzone.qq.com')
time.sleep(2)
driver.save_screenshot('./登录页.png')
print("浏览器启动并访问登录页成功,请速速扫码")
except BaseException as e:
driver.quit()
try: sys.exit(0)
except SystemExit: print("一开始就出错,浏览器已退出")
def getBlogs(driver, attr):
lis = []
count = 0 # 判断是不是没扫码
for item in attr['qzoneURLs']:
print(item[0])
try:
driver.get(item[0])
time.sleep(2)
# driver.save_screenshot('./ym.png')
small_frame = driver.find_element(By.CLASS_NAME, "app_canvas_frame")
driver.switch_to.frame(small_frame)
time.sleep(1)
ol = driver.find_elements(By.XPATH, '//ol[@class="mod_feed_lst js_error_display"]/li')
except BaseException as e:
# print(e)
if count >= 2:
print("应该是没登录,截张图,返回False")
driver.save_screenshot('./ym.png')
return False
else: count += 1
print("访问出错跳过")
# print(e)
continue
a = 0
for li in ol:
a += 1
if a > 10: break # 限制了条数================
lis.append([li.get_attribute("innerHTML"), item[1], item[2]])
return lis
def handleBlogs(lis):
# 解析
lisoups = []
for li in lis:
lisoup = BeautifulSoup(li[0],'lxml')
# 过滤掉转发的说说
if len(lisoup.select("div.box.bgr3 div.rt_content")) == 0:
lisoups.append([lisoup, li[1], li[2]])
# 生成了li内容转为soup对象组成的列表 lisoups
bloglist = []
for lisoup in lisoups:
blog = {
"blogTittle": lisoup[0].select("pre.content")[0].text.split("\n")[0][:45],
"blogContent": lisoup[0].select("pre.content")[0].text,
"blogUserid": lisoup[1],
"blogUsername": lisoup[2],
"blogTime": lisoup[0].select("a.c_tx.c_tx3.goDetail")[0].attrs["title"],
"blogLabel": "xy",
"blogData": "[\"",
"blogType": "pc",
"blogInfo": ""
}
# 为茉日天添加的标签
if "茉" in blog["blogUsername"]:
blog["blogLabel"] = "yl"
imgsum = len(lisoup[0].select(".img-attachments-inner.clearfix a"))
i = 0
for a in lisoup[0].select(".img-attachments-inner.clearfix a"):
i += 1
# 图片地址
blog["blogData"] += a.attrs["href"]
if i == imgsum:break
blog["blogData"] += "\",\""
blog["blogData"] += "\"]"
# 文章链接
link = lisoup[0].select("a.c_tx.c_tx3.goDetail")[0].attrs["href"]
if(len(link)>10):
blog["blogInfo"] = '{"link":"' + lisoup[0].select("a.c_tx.c_tx3.goDetail")[0].attrs["href"] + '"}'
# print(blog["blogTime"])
try:
if (blog["blogTime"][0] == '编'):
blog["blogTime"] = blog["blogTime"][4:]
if (blog["blogTime"][0]=='前'): blog["blogTime"] = (datetime.datetime.now() + datetime.timedelta(days = -2)).strftime('%Y年%m月%d日 ') + blog['blogTime'][2:]
elif (blog["blogTime"][0]=='昨'): blog["blogTime"] = (datetime.datetime.now() + datetime.timedelta(days = -1)).strftime('%Y年%m月%d日 ') + blog['blogTime'][2:]
else: blog["blogTime"] = datetime.datetime.now().strftime('%Y年%m月%d日 ') + blog['blogTime']
blog["blogTime"] = str(datetime.datetime.strptime(blog['blogTime'],'%Y年%m月%d日 %H:%M'))
# print(blog["blogTime"])
# print()
except BaseException as e:
print(e)
# print(blog['blogTime'])
print("出错跳过----")
continue
bloglist.append(blog)
# 生成 bloglist
return bloglist
def sendBlogs(bloglist):
# 过滤
def filterBlogs(blogContent):
if '饿了么' in blogContent: return False
if '领' in blogContent:
if '红包' in blogContent: return False
if '现金' in blogContent: return False
if '支付宝' in blogContent: return False
return True
# 请求
url = "https://sicau.xyz:8080/release_pc"
for blog in bloglist:
if filterBlogs(blog['blogContent']):
res = requests.post(url=url, data={"blogJson": json.dumps(blog, ensure_ascii = False)})
print(res.text + " >>> " + blog["blogTittle"][:10] + " <= " + blog["blogUsername"])
else:
print("-1 >>> " + blog["blogTittle"][:10] + " <= " + blog["blogUsername"])
print("请求发送完毕")
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/bit-01/qzone.git
git@gitee.com:bit-01/qzone.git
bit-01
qzone
Qzone
master

搜索帮助

D67c1975 1850385 1daf7b77 1850385