practical_python_tools
/
spider_yihaodian.py

"""
    爬取一号店的内容
"""

import requests
from lxml import html

def spider(SN, book_list=[]):

    #http://search.yhd.com/c0-0/k9787115428028
    URL = 'http://search.yhd.com/c0-0/k{isbn}'.format(isbn = SN)

    # 获取html内容
    resp = requests.get(URL)

    html_data = resp.text

    # xpath对象
    selector = html.fromstring(html_data)
    # 找到书本列表
    list = selector.xpath('//div[@id="itemSearchList"]/div')
    print(len(list))
    # 输出每个书籍的内容
    for li in list:

        title = li.xpath('div//p[@class="proName clearfix"]/a/@title') # 图书标题
        link = li.xpath('div//p[@class="proName clearfix"]/a/@href') # 图书链接
        price = li.xpath('div//p[@class="proPrice"]/em/@yhdprice') # 书籍价格
        store = li.xpath('div//p[@class="storeName limit_width"]/a/text()') # 书籍价格

        print(title[0])
        print(link[0])
        print('一号店售价：¥', price[0])
        print('未知商家' if len(store) == 0 else store[0])
        print('------------------')

        book_list.append({
            'title': title[0],
            'price': price[0],
            'link': link[0],
            'store': store
        })


if __name__ == '__main__':
    SN = '9787115428028'
    spider(SN)