1 Star 0 Fork 4

神1様的男Ren/baidupan

forked from Chyroc/baidupan 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
run_wap.py 3.32 KB
一键复制 编辑 原始数据 按行查看 历史
Chyroc 提交于 2015-11-01 21:54 . linux
#! /usr/bin/env python
#coding=utf-8
#
import os
from Queue import Queue
import threading
import time
import urllib
from urlparse import *
from urlparse import urljoin
import redis
import urllib2
import json
import re
def get_url(url):
try:
request = urllib2.Request(url)
return urllib2.urlopen(request,timeout=20).read()
except Exception, e:
raise e
def get_count(url):
data = re.findall(r'totalCount:"(.+?)"', get_url(url))
if len(data) == 0:
raise Exception('get_count err')
return data[0]
def get_count_all(uk):
follow_count = get_count('http://pan.baidu.com/wap/share/home/followers?uk='+str(uk)+'&start=0')
fan_count = get_count('http://pan.baidu.com/wap/share/home/fans?uk='+str(uk)+'&start=0')
return follow_count, fan_count
def get_follow_url(url):
try:
data = get_url(url)
matches = re.findall(r'parse\(\"(.+?)\"\),totalCount', data)
if len(matches) == 0:
raise Exception('get_follow_url err')
jsondata = json.loads(matches[0].replace('\\',''))
relist = []
if len(jsondata) != 0:
for dic in jsondata:
relist.append(dic['follow_uk'])
return relist
except Exception, e:
raise e
def get_fan_url(url):
try:
data = get_url(url)
matches = re.findall(r'parse\(\"(.+?)\"\),totalCount', data)
if len(matches) == 0:
raise Exception('get_fan_url err')
jsondata = json.loads(matches[0].replace('\\',''))
relist = []
if len(jsondata) != 0:
for dic in jsondata:
relist.append(dic['fans_uk'])
return relist
except Exception, e:
raise e
def get_uk_url(url):
try:
url.index('followers')
return get_follow_url(url)
except Exception, e:
try:
url.index('fans')
return get_fan_url(url)
except Exception, e:
raise Exception('url not found followers and fans')
def deal_ren():
name = threading.currentThread().getName()
while True:
uk = r.spop(key_ren_to)
if uk:
print "["+name+"号] doing\n"
try:
follow_count, fan_count = get_count_all(uk)
print follow_count
print fan_count
for i in range(0,int(follow_count),24):
r.sadd(key_url_to,'http://pan.baidu.com/wap/share/home/followers?uk='+str(uk)+'&start='+str(i))
for j in range(0,int(fan_count),24):
r.sadd(key_url_to,'http://pan.baidu.com/wap/share/home/fans?uk='+str(uk)+'&start='+str(j))
r.sadd(key_ren_ed,uk)
except Exception, e:
r.sadd(key_ren_to,uk)
print e
def deal_url():
name = threading.currentThread().getName()
while True:
url = r.spop(key_url_to)
try:
print "["+name+"号] doing\n"
data = get_uk_url(url)
for i in data:
r.sadd(key_ren_to,i)
r.sadd(key_url_ed,url)
except Exception, e:
r.sadd(key_url_to,url)
print e
if __name__ == '__main__':
key_ren_to = 'redis_pan_ren_to'
key_ren_ed = 'redis_pan_ren_ed'
key_url_to = 'redis_pan_url_to'
key_url_ed = 'redis_pan_url_ed'
num_threads=2
uks = ['842263796','321447710','2956110277','1566620287','436751272','3811108456','319707070','2435513896']
#uks = ['657260084']
r = redis.Redis(host='localhost', port=6379)
#chushuhua
is_uk = r.srandmember(key_ren_to)
if not is_uk:
for uk in uks:
r.sadd(key_ren_to,uk)
#ren
worker=threading.Thread(target=deal_ren,name='ren_1')
worker.start()
#url
worker=threading.Thread(target=deal_url,name='url_1')
worker.start()
worker=threading.Thread(target=deal_url,name='url_2')
worker.start()
worker=threading.Thread(target=deal_url,name='url_3')
worker.start()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/wikiblog/baidupan.git
git@gitee.com:wikiblog/baidupan.git
wikiblog
baidupan
baidupan
master

搜索帮助