抓取中国最好大学排名-社会声誉排名2018(request,bs4,csv)

xiaoxiao2021-02-28  38

import csv import requests from bs4 import BeautifulSoup #请求头 headers={'User-Agent':"Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11"} def getInfo(url): #获取页面代码 html=requests.get(url,headers=headers).content.decode('utf-8','ignore') soup=BeautifulSoup(html,'lxml') #获取表头 theadList=[] thead=soup.select('thead th') for head in thead: theadList.append(head.text) print(theadList) with open('ranking.csv', 'w') as f: writer = csv.writer(f) writer.writerow(theadList) #获取学校列表 schoolList=soup.select('tr.alt') for school in schoolList: #排名 ranking=school.select('td:nth-of-type(1)')[0].text #学校名称 schoolName=school.select('td:nth-of-type(2)')[0].text #省市 schoolAddress=school.select('td:nth-of-type(3)')[0].text #社会捐赠收入(千元) socialIncome = school.select('td:nth-of-type(4)')[0].text #综合排名 compreRanking=school.select('td:nth-of-type(5)')[0].text if len(compreRanking)==0: compreRanking='暂无数据' data=[ranking,schoolName,schoolAddress,socialIncome,compreRanking] print(data) #写入csv with open('ranking.csv','a+') as f: writer=csv.writer(f) writer.writerow(data) if __name__ == '__main__': url='http://www.zuihaodaxue.com/shehuishengyupaiming2018.html' getInfo(url)
转载请注明原文地址: https://www.6miu.com/read-2613692.html

最新回复(0)