master

分支 (1)

管理

管理

master

Python-Web-crawlers-and-information-extraction
/
demo.py

# https://www.icourse163.org/learn/BIT-1001870001?tid=1206093223#/learn/content

# -*- coding: utf-8 -*-
# import scrapy
# class DemoSpider(scrapy.Spider):
#     name = "demo"
#     #allowed_domains = ["python123.io"]
#     start_urls = ['https://python123.io/ws/demo.html']
#     def parse(self, response):
#         fname = response.url.split('/')[-1]
#         with open(fname, 'wb') as f:
#             f.write(response.body)
#         self.log('Saved file %s.' % name)

import requests

# url = "https://item.jd.com/2967929.html"
# try:
#       r = requests .get(url)
#       r.raise_for_status()
#       r.encoding = r.apparent_encoding
#       print(r. text[: 1000])
# except:
#       print("爬取失敗")


# url = "https://www.amazon.cn/gp/product/B01M8L5Z3Y"
# try:
#     # 通过修改headers的键值对，来模拟浏览器 Mozilla
#     kv = {'user-agent': 'Mozilla/5.0'}
#     r = requests .get(url, headers=kv)
#     print(r.request.headers)
#     r.raise_for_status()
#     print(r.encoding)
#     r.encoding = r.apparent_encoding
#     print(r.encoding)
#     print(r. text[1000: 2000])
# except:
#     print("爬取失敗")

# # 要搜索的关键词 keyword
# keyword = "Python"
# url = "http://www.baidu.com/s"
# try:
#     kv = {'wd': keyword}
#     r = requests .get(url, headers=kv)
#     print(r.request.url)
#     r.raise_for_status()
#     print(len(r. text))
# except:
#     print("爬取失敗")
#
# # 要搜索的关键词 keyword
# keyword = "Python"
# url = "http://www.so.com/s"
# try:
#     kv = {'q': keyword}
#     r = requests .get(url, params=kv)
#     print(r.request.url)
#     r.raise_for_status()
#     print(len(r. text))
# except:
#     print("爬取失敗")


# import os
# url = "http://image.nationalgeographic.com.cn/2017/0211/20170211061910157.jpg"
# root = "E://pics//"
# path = root + url.split('/')[-1]
# try:
#     if not os.path.exists(root):
#         os.mkdir(root)
#     if not os.path.exists(path):
#         r = requests.get(url)
#         with open(path, 'wb') as f:
#             f.write(r.content)
#             f.close()
#             print("文件保存成功")
#     else:
#         print("文件已经存在")
# except:
#     print("爬取失敗")

# IP查询
# url = "http://m.ip138.com/ip.asp?ip="
# try:
#     r = requests.get(url+'202.204.80.112')
#     r.raise_for_status()
#     r.encoding = r.apparent_encoding
#     print(r. text[-500:])
# except:
#     print("爬取失敗")