Fetch the repository succeeded.
This action will force synchronization from 梁新斌/Scrpay, which will overwrite any changes that you have made since you forked the repository, and can not be recovered!!!
Synchronous operation will process in the background and will refresh the page when finishing processing. Please be patient.
'''
国泰基金官网基金业绩数据抓取
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time
from bs4 import BeautifulSoup
from pymongo import MongoClient
import os
import tool
def get_url():
url = 'https://e.gtfund.com/Etrade/Jijin/view/id/001790'
return url
def get_index(sdate,edate,url):
browers = webdriver.Chrome()
wait = WebDriverWait(browers,30)
browers.get(url)
browers.maximize_window()
startdate = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#startdate_jingzhi')))
enddate = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#enddate_jingzhi')))
#查询按钮
submit_sel = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.main.gt-wrapper > div.gt-content.jijin-content > div > div.jijin-tabs > div.J_jijin_tab_5.jijin-tab-content.jijin-tab-5.active > div.fenfhong_choice.J_fenfhong_choice > table > tbody > tr > td:nth-child(7) > button')))
time.sleep(2)
startdate.clear()
enddate.clear()
startdate.send_keys(sdate)
enddate.send_keys(edate)
time.sleep(2)
submit_sel.click()
time.sleep(2)
# 最后一页的页码
zys = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.main.gt-wrapper > div.gt-content.jijin-content > div > div.jijin-tabs > div.J_jijin_tab_5.jijin-tab-content.jijin-tab-5.active > div.tab-content.tab-chart.J_chart_tab_marketlist > div.page_report.J_fenhong_page_num > a:nth-child(8)')))
last_page = zys.get_attribute('data-page')
i = 1
while i < int(last_page) + 1:
if i == 1:
time.sleep(2)
parse_html(browers.page_source,i)
i += 1
else:
# 翻页按钮
submit_fy = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.main.gt-wrapper > div.gt-content.jijin-content > div > div.jijin-tabs > div.J_jijin_tab_5.jijin-tab-content.jijin-tab-5.active > div.tab-content.tab-chart.J_chart_tab_marketlist > div.page_report.J_fenhong_page_num > a.next')))
time.sleep(1)
submit_fy.click()
time.sleep(1)
results = parse_html(browers.page_source,i)
for result in results:
tool.save_to_mongo('test','prod_fund',result)
i += 1
def parse_html(html,page):
print('正在处理第' + str(page) + '页')
soup = BeautifulSoup(html,'lxml')
tables = soup.find_all(name='div', class_='nav-table J_jingzhilist J_shouyilist gt-padding-v')
for table in tables:
for t in table.find_all(name = 'tr'):
dict = {}
if t == table.find_all(name = 'tr')[0]:
pass
else:
date = t.get_text().replace('\n', ',')
date = date[1:-2]
d_list = date.split(',')
# print(d_list)
# print(type(d_list))
dict['prod'] = '001790'
dict['date'] = d_list[0]
dict['ljjz'] = d_list[1]
dict['dwjz'] = d_list[2]
dict['rzzl'] = d_list[3]
yield dict
if __name__ =='__main__':
url = get_url()
get_index('2016-01-01','2018-01-01',url)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。