”’ Created on 2017年6月6日
@author: v_huxiaoting ”’ import re from bs4 import BeautifulSoup html_doc = “”” The Dormouse’s story
The Dormouse’s story
Once upon a time there were three little sisters; and their names were Elsie, Lacie and Tillie; and they lived at the bottom of a well.
…
“”” soup = BeautifulSoup( html_doc, “html.parser” ) print(“获取所有的链接”)
links = soup.find_all(‘a’)
for link in links: print(link.name,link[‘href’],link.get_text())
print(“获取Lacie的链接”)
link_node = soup.find(‘a’,href=”http://example.com/lacie”) print(link_node.name,link_node[‘href’],link_node.get_text())
print(‘正则匹配’) link_node1 = soup.find(‘a’,href=re.compile(r”ill”))
print(link_node1.name,link_node[‘href’],link_node1.get_text())
print(“获取p段落文字”) p_node = soup.find(‘p’,class_=’title’) print(p_node.name,p_node.get_text())