代码拉取完成,页面将自动刷新
# -*- utf-8 -*-
import requests
import json
import re
from time import sleep
from bs4 import BeautifulSoup
import time
import datetime
from requests.adapters import HTTPAdapter
from time import strftime, localtime
def baojing(wenben):
dingding = '钉钉报警接口'
headers = {
'Content-Type': 'application/json'
}
aaa = {"msgtype": "text",
"text": {
"content": wenben
}}
resp = requests.post(url=dingding, headers=headers, json=aaa)
print(resp)
def get_bs(link):
headers = {
"User - Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36"
}
s = requests.Session()
s.mount('http://', HTTPAdapter(max_retries=3)) # 设置重试次数为3次
s.mount('https://', HTTPAdapter(max_retries=3))
response = s.get(link, headers=headers, timeout=150)
ttt = response.text
soup = BeautifulSoup(ttt, 'lxml')
f1 = open('已报警内容.txt', 'r+', encoding='utf-8')
list1 = f1.readlines() #readlines获得的直接就是列表了
for page in soup:
page = str(page)
if 'content' in page:
dat = page.split('page": "search')[1].split('大家都在搜')[0]
dat1 = dat.split('span class="p_title')
for line in dat1:
if '石力派' in line and 'data-tid' in line:
line1 = line.split('target="_blank">')[1].split('</div>')[0]
line2 = re.sub('<(S*?)[^>]*>.*?|<.*? />', '', line1)
print(line2)
if '骗人' in line2 or '退款' in line2 or '内幕' in line2 or '欺' in line2 or '12315' in line2 or '投诉' in line2 or '药' in line2 or '坑' in line2 or '学费' in line2 or '欺诈' in line2 or '辱' in line2 or '曝光' in line2 or '暴光' in line2 or '民族' in line2:
neir = '贴吧舆论报警:' + '\t' + line2 + '\n'
print(neir)
if neir in list1:#此处判断注意后面的换行符
print('■■■■■■■■■■■■■已经报警过了')
else:
lianjie = str(link)
baojing('贴吧链接:'+'\t'+lianjie)
baojing(neir)
time.sleep(10)
f1.writelines(neir)
else:
continue
f1.close()
#f = open('已报警内容.txt', 'w', encoding='utf-8')
while 1:
url = 'https://tieba.baidu.com/f/search/res?ie=utf-8&qw=%E7%9F%B3%E5%8A%9B%E6%B4%BE'
try:
page = get_bs(url)
print('贴吧舆情监控完毕...30分钟后进行下一轮扫描'+'\n')
print(strftime('%Y-%m-%d %H:%M:%S', localtime()))
time.sleep(1800)
except Exception as e:
print(e)
time.sleep(7200)
continue
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。