BeautifulSoup
安装
打开cmd命令行pip3 install beautifulsoup4案例一
import requests
from bs4
import BeautifulSoup
def get_html(url):
response = requests.get(url)
response.encoding = response.apparent_encoding
if response.status_code ==
200:
return response.text
else:
print(
"网络访问出错")
def parse_html(html):
soup = BeautifulSoup(html,
'lxml')
for text
in soup.select(
'#syncad_1 a'):
print(text)
print(soup.title)
print(soup.title.string)
print(soup.p)
print(soup.p.name)
print(soup.a.parent.name)
if __name__ ==
"__main__":
url =
"http://news.sina.com.cn"
html = get_html(url)
if html
is not None:
parse_html(html)
案例二
import requests
from bs4
import BeautifulSoup
def get_html(url):
response = requests.get(url)
response.encoding = response.apparent_encoding
if response.status_code ==
200:
return response.text
else:
print(
"网络访问出错")
def parse_html(html):
soup = BeautifulSoup(html,
'lxml')
yield len(soup.div.contents)
for child
in soup.div.children:
yield child
if __name__ ==
"__main__":
url =
"http://music.baidu.com"
html = get_html(url)
if html
is not None:
for text
in parse_html(html):
print(text)