代码拉取完成,页面将自动刷新
#!/usr/bin/env python
# -*- coding:utf-8 -*-
STAR_URL = []
def urlFilter(url):
if url.find('http://www.zhihu.com/question/') == -1 or url.find('answer') != -1:
return False
else:return True
def pickQuestion(html_tree,extra = None):
qitem = dict()
qtitle = html_tree.xpath("//div[@id='zh-question-title']/h2/text()")
qitem["qTitle"] = qtitle[0].strip()
qid = html_tree.xpath('//div[@id="zh-single-question-page"]')[0].get('data-urltoken')
qitem['qID'] = qid
qcontent = html_tree.xpath('//div[@id="zh-question-detail"]/div')
qitem['qContent'] = qcontent[0].text
if qitem['qContent'] == None:
qitem['qContent'] = ''
qcommenttree = html_tree.xpath("//div[@id='zh-question-meta-wrap']/div[1]/a[1]/text()")
text = qcommenttree[1]
if len(text.split(' ')) == 1:
qcommentnum = '0'
else:
qcommentnum = text.split(' ')[0]
qitem['qCommentNum'] = qcommentnum
yield(qitem)
def pickAnswer(html_tree, extra = None):
answertrees = html_tree.xpath('//div[@id="zh-question-answer-wrap"]/div')
answerItems = []
for it in answertrees:
item = dict()
text = ''.join(it.xpath('./div[2]/div[1]/h3/a[2]/text()'))
if len(text) == 0:
text = 'anoymous'
item['aWriter'] = text
item['qID'] = extra['qID']
item['aID'] = it.attrib['data-aid']
item['aContent'] = ''.join(it.xpath('string(./div[3])').strip())
item['aVoteNum'] = it.xpath('./div[1]/button[1]/span[2]/text()')[0]
yield(item)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。