采用requests包和beautiful soup
注意requests get请求得到的html要encode为‘UTF-8’
得到的数据结果为新闻标题+url
# coding=utf-8
from bs4 import BeautifulSoup
import requests
import time
h1={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
o_g=['113.200.159.155:9999']
pro={'http': o_g[0],
}
html=requests.get('http://www.yicai.com/news/cbndata/',timeout=20,headers=h1,proxies=pro)
html.encoding = "utf-8"
html = html.text
soup = BeautifulSoup(html,'lxml')
con=soup.find_all('h3',class_="f-ff1 f-fwn f-fs22")
for item in con:
print item.get_text()
print item.a.attrs['href']
转载请注明原文地址: https://www.6miu.com/read-42660.html