1 Star 1 Fork 5

zhenghua/Scrpay

forked from 梁新斌/Scrpay 
Create your Gitee Account
Explore and code with more than 12 million developers,Free private repositories !:)
Sign up
文件
This repository doesn't specify license. Please pay attention to the specific project description and its upstream code dependency when using it.
Clone or Download
GtFund.py 3.29 KB
Copy Edit Raw Blame History
'''
国泰基金官网基金业绩数据抓取
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time
from bs4 import BeautifulSoup
from pymongo import MongoClient
import os
import tool
def get_url():
url = 'https://e.gtfund.com/Etrade/Jijin/view/id/001790'
return url
def get_index(sdate,edate,url):
browers = webdriver.Chrome()
wait = WebDriverWait(browers,30)
browers.get(url)
browers.maximize_window()
startdate = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#startdate_jingzhi')))
enddate = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#enddate_jingzhi')))
#查询按钮
submit_sel = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.main.gt-wrapper > div.gt-content.jijin-content > div > div.jijin-tabs > div.J_jijin_tab_5.jijin-tab-content.jijin-tab-5.active > div.fenfhong_choice.J_fenfhong_choice > table > tbody > tr > td:nth-child(7) > button')))
time.sleep(2)
startdate.clear()
enddate.clear()
startdate.send_keys(sdate)
enddate.send_keys(edate)
time.sleep(2)
submit_sel.click()
time.sleep(2)
# 最后一页的页码
zys = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.main.gt-wrapper > div.gt-content.jijin-content > div > div.jijin-tabs > div.J_jijin_tab_5.jijin-tab-content.jijin-tab-5.active > div.tab-content.tab-chart.J_chart_tab_marketlist > div.page_report.J_fenhong_page_num > a:nth-child(8)')))
last_page = zys.get_attribute('data-page')
i = 1
while i < int(last_page) + 1:
if i == 1:
time.sleep(2)
parse_html(browers.page_source,i)
i += 1
else:
# 翻页按钮
submit_fy = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'body > div.main.gt-wrapper > div.gt-content.jijin-content > div > div.jijin-tabs > div.J_jijin_tab_5.jijin-tab-content.jijin-tab-5.active > div.tab-content.tab-chart.J_chart_tab_marketlist > div.page_report.J_fenhong_page_num > a.next')))
time.sleep(1)
submit_fy.click()
time.sleep(1)
results = parse_html(browers.page_source,i)
for result in results:
tool.save_to_mongo('test','prod_fund',result)
i += 1
def parse_html(html,page):
print('正在处理第' + str(page) + '页')
soup = BeautifulSoup(html,'lxml')
tables = soup.find_all(name='div', class_='nav-table J_jingzhilist J_shouyilist gt-padding-v')
for table in tables:
for t in table.find_all(name = 'tr'):
dict = {}
if t == table.find_all(name = 'tr')[0]:
pass
else:
date = t.get_text().replace('\n', ',')
date = date[1:-2]
d_list = date.split(',')
# print(d_list)
# print(type(d_list))
dict['prod'] = '001790'
dict['date'] = d_list[0]
dict['ljjz'] = d_list[1]
dict['dwjz'] = d_list[2]
dict['rzzl'] = d_list[3]
yield dict
if __name__ =='__main__':
url = get_url()
get_index('2016-01-01','2018-01-01',url)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/zhenghua0501/Scrpay.git
git@gitee.com:zhenghua0501/Scrpay.git
zhenghua0501
Scrpay
Scrpay
master

Search

0d507c66 1850385 C8b1a773 1850385