代码拉取完成,页面将自动刷新
#coding:utf-8
import urllib, urllib2, cookielib
import re
import os
import Queue
import threading
import time
import socket
import sys
import xlrd
import traceback
socket.setdefaulttimeout(20)
queue = Queue.Queue()
class TigerMail(threading.Thread):
def __init__(self, queue, username, password):
threading.Thread.__init__(self)
self.login_count = 0
self.queue = queue
self.mainlist = []
self.name = username
self.passwd = password
self.diry = os.getcwd() + '\\' + 'mail' + '\\Westpac'
if not os.path.exists(self.diry):
os.makedirs(self.diry)
self.login_flag = self.login()
def run(self):
while not exit_flag.is_set():
url = self.queue.get()
self.GetCont(url)
sys.stdout.flush()
print '**********Finished: %s **********' % url
time.sleep(1)
self.queue.task_done()
def login(self):
try:
self.cj = cookielib.LWPCookieJar()
login_first = 'https://www.introducer.westpac.net.au/inn/secure/Logout.do'
login_url = 'https://www.introducer.westpac.net.au/inn/secure/Login.do'
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
urllib2.install_opener(self.opener)
headers = {'Accept-Language': 'zh-CN',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'www.introducer.westpac.net.au',
'Connection': 'Keep-Alive',
}
req = urllib2.Request(login_first, headers=headers,)
content = self.opener.open(req).read()
PostData = {'userID': self.name, 'password': self.passwd, 'command': 'login'}
login_data = urllib.urlencode(PostData)
headers = {'Referer': 'https://www.introducer.westpac.net.au/inn/secure/Logout.do',
'Accept-Language': 'zh-CN',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'www.introducer.westpac.net.au',
'Connection': 'Keep-Alive',
'Cache-Control': 'no-cache'
}
req = urllib2.Request(login_url, login_data, headers=headers,)
conn = self.opener.open(req)
html = conn.read()
if 'Application progress' in html:
print '%s login OK' % self.name
return 'OK'
else:
print '%s login failed, 15s later retry !' % self.name
time.sleep(15)
self.login_count += 1
if self.login_count > 5:
'6 Retries, Please check name/password!'
return 'Failed'
else:
self.login()
# for index, cookie in enumerate(self.cj):
# print '[', index, ']', cookie
except Exception as e:
print e, '%s Login() error' % self.name
time.sleep(15)
self.login()
finally:
try:
conn.close()
except NameError:
pass
def GetList(self):
try:
ListUrl = 'https://www.introducer.westpac.net.au/inn/secure/StartSearch.do'
headers = {'Referer': 'https://www.introducer.westpac.net.au/inn/secure/Login.do',
'Accept-Language': 'zh-CN',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'www.introducer.westpac.net.au',
'Connection': 'Keep-Alive',
}
req = urllib2.Request(ListUrl, headers=headers,)
conn = self.opener.open(req)
html = conn.read()
url_list_pattern = re.compile(r'<a href="(RetrieveApplicationDetail.do\?appNo=\d+)">\s+(.*?)\s+</a>', re.S)
url_list = url_list_pattern.findall(html)
self.mainlist.extend(url_list)
if 'name="command" value="Next"' in html:
#print 'Goto Next Page>>>>>>>>>>>>>>>>>>>>>>>'
self.NextUrl()
except Exception as e:
print e, 'Getlist() error'
time.sleep(15)
print 'sleep 5s'
self.GetList()
finally:
try:
conn.close()
except NameError:
pass
def NextUrl(self):
try:
next_url = 'https://www.introducer.westpac.net.au/inn/secure/ContinueSearch.do'
PostData = {'searchType': '0', 'sortField': '0', 'command': 'Next'}
login_data = urllib.urlencode(PostData)
headers = {'Referer': 'https://www.introducer.westpac.net.au/inn/secure/StartSearch.do',
'Accept-Language': 'zh-CN',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'www.introducer.westpac.net.au',
'Connection': 'Keep-Alive',
'Cache-Control': 'no-cache'
}
req = urllib2.Request(next_url, login_data, headers=headers,)
conn = self.opener.open(req)
html = conn.read()
url_list_pattern = re.compile(r'<a href="(RetrieveApplicationDetail.do\?appNo=\d+)">\s+(.*?)\s+</a>', re.S)
url_list = url_list_pattern.findall(html)
self.mainlist.extend(url_list)
if 'name="command" value="Next"' in html:
print 'Goto Next Page>>>>>>>>>>>>>>>>>>>>>>>'
self.NextUrl()
else:
print 'All list urls geted'
except Exception as e:
print e, 'NextUrl() error'
time.sleep(10)
self.NextUrl()
finally:
try:
conn.close()
except NameError:
pass
def GetCont(self, url):
try:
headers = {'Referer': 'https://www.introducer.westpac.net.au/inn/secure/StartSearch.do',
'Accept-Language': 'zh-CN',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'www.introducer.westpac.net.au',
'Connection': 'Keep-Alive',
'Cache-Control': 'no-cache'
}
req = urllib2.Request(url, headers=headers,)
conn = self.opener.open(req)
html = conn.read()
pattern = re.compile(r'''onClick="document.location.href='(.*?)'"''')
comment_url = pattern.search(html)
if comment_url:
com_url = comment_url.group(1)
self.GetComm(url, com_url)
else:
print 'GetCont no group, Loged out, Wait for 15s .'
print 'Put in queue: %s' % url
self.queue.put(url)
time.sleep(15)
self.login()
except Exception as e:
print e, url, 'Getcont() error'
print 'Put in queue: %s' % url
self.queue.put(url)
finally:
try:
conn.close()
except NameError:
pass
def GetComm(self, Con_url, com_url):
try:
headers = {'Referer': Con_url,
'Accept-Language': 'zh-CN',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'www.introducer.westpac.net.au',
'Connection': 'Keep-Alive',
'Cache-Control': 'no-cache'
}
req = urllib2.Request(com_url, headers=headers,)
conn = self.opener.open(req)
html = conn.read()
pa_name = re.compile(r'\d+-\d+-\d+')
if pa_name:
name = pa_name.search(html).group(0)
name = name.replace('-', '')
name = self.diry + '\\' + name + '.html'
with open(name, 'w') as f:
f.write(html)
if 'value="Next"' in html:
# print 'Goto Next Page Comment>>>>>>>>>>>>>>>'
self.PostComm(Con_url, com_url, name)
# else:
# print 'ALL Page Comment Geted'
else:
print 'Loged out , getcomm wait 15s.'
print 'Put in queue: %s' % Con_url
self.queue.put(Con_url)
time.sleep(15)
self.login()
except Exception as e:
print e, 'GetComm error', com_url
print 'Put in queue: %s' % Con_url
self.queue.put(Con_url)
finally:
try:
conn.close()
except NameError:
pass
def PostComm(self, Con_url, com_url, name):
try:
post_url = 'https://www.introducer.westpac.net.au/inn/secure/CommentsPaging.do'
post_data = {'command': 'Next'}
PostData = urllib.urlencode(post_data)
headers = {'Referer': com_url,
'Accept-Language': 'zh-CN',
'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Content-Type': 'application/x-www-form-urlencoded',
'Host': 'www.introducer.westpac.net.au',
'Connection': 'Keep-Alive',
'Cache-Control': 'no-cache'
}
req = urllib2.Request(post_url, PostData, headers=headers)
conn = self.opener.open(req)
html = conn.read()
with open(name, 'a') as f:
f.write(html)
if 'value="Next"' in html:
#print 'Goto Next Page Comment>>>>>>>>>>>>>>>'
self.PostComm(Con_url, post_url, name)
# else:
# print 'ALL Page Comment Geted'
except Exception as e:
print e, 'PostComm error', Con_url
print 'Put in queue: %s' % Con_url
self.queue.put(Con_url)
finally:
try:
conn.close()
except NameError:
pass
def readxls():
unps_list = []
direcotry = os.getcwd() + '\\' + 'Follow Up.xlsx'
data = xlrd.open_workbook(direcotry)
table3 = data.sheet_by_name('account')
table3_rs = table3.nrows
for i in range(table3_rs):
if table3.cell(i, 0).value == 'Westpac':
un = str(table3.cell(i, 1).value).strip()
pw = str(table3.cell(i, 2).value).strip()
if un and pw:
unps_list.append((un, pw))
return unps_list
def main_html(username, password):
try:
global exit_flag
exit_flag = threading.Event()
exit_flag.clear()
Tiger1 = TigerMail(queue, username, password)
if Tiger1.login_flag == 'OK':
print '%s login success!' % username
Tiger1.GetList()
Tiger1.mainlist = [('https://www.introducer.westpac.net.au/inn/secure/' + x, y) for x, y in Tiger1.mainlist]
print '%s have %s files' % (username, len(Tiger1.mainlist))
if len(Tiger1.mainlist)>0:
for i in range(10):
t = TigerMail(queue, username, password)
t.start()
for x, y in Tiger1.mainlist:
queue.put(x)
queue.join()
exit_flag.set()
print '%s download finished.' % username
else:
'No files found!'
else:
'%s login failed !' % username
except Exception, e:
traceback.print_exc()
def main():
up_list = readxls()
try:
for u, p in up_list:
main_html(u, p)
time.sleep(30)
print 'All account done, programme will exit after 10s.'
time.sleep(10)
os._exit(0)
except Exception, e:
traceback.print_exc()
if __name__ == '__main__':
main()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。