master

分支 (1)

管理

管理

master

machine-study
/
Xpath.py

from lxml import etree
import requests

xml = """

"""
html = """

"""
# 解析xml内容
et = etree.XML(xml)
# result = et.xpath("/book") # /表示根节点
# result = et.xpath("/book/name")  # 在xpath中间的/表示的是儿子
# result = et.xpath("/book/name/text()")[0]  # text(）拿文本
# result = et.xpath("/book//nick")   # 1/表示的是子孙后代。
# result = et.xpath("/book/*/nick/text()")  #  *通配符。谁都行
# result = et.xpath("/book/author/nick[@class='jay']/text()")
# result = et.xpath("/book/author/nick/@id])  # 获取author标签下面所有nick标签的id值
# print ( result)

# 解析html内容
et = etree.HTML(html)
result = et.xpath("/html/body/ul/li[2]/a/text()")  # 获取网页中ul下的第二个li标签中的a标签内容 返回的是列表
result = et.xpath("//li/a/text()")  # 获取网页中所有的li标签里的a标签的内容 返回的是一个迭代器
for item in result:
    herf = et.xpath("./a/@href")[0]  # 获取当前li标签下a标签的超链接文本
    conent = et.xpath("./a/text()")[0]  # 获取当前li标签下a标签的文本