python正则表达式的小练习

xiaoxiao2021-02-28  12

#一个python正则表达式的练习 #抓取糗事百科一个页面,输出标题和内容 import urllib.request import re url = "https://www.qiushibaike.com/hot/page/1" header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) '+ 'AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/45.0.2454.85 Safari/537.36 115Browser/6.0.3', 'Referer': 'https://www.qiushibaike.com/', 'Connection': 'keep-alive'} #用Request包装url和header req = urllib.request.Request(url, headers = header); #再将打包好的requst传给urlopen,对某些网站可用urlopen(url),如果拒绝连接, # 可用urlopen(request) with urllib.request.urlopen(req) as f: #转换成字节数组 content = f.read().decode("utf-8") # .*? 匹配任意字符 (.*?)匹配结果返回一个list ,pattern中第一个(.*?)对应 # item[0],第二个(.*?)对应item[1] pattern = re.compile('<div.*?class="author.*?>.*?<img.*?<h2>(.*?)'+ '</h2>.*?</a>.*?="content".*?<span>(.*?)</span>.*?="stats"', re.S) #items是一个list items = re.findall(pattern,content) i = 1 for item in items: print("第", i, "个段子") print(item[0], item[1]) i = i+1 print("----------------")
转载请注明原文地址: https://www.6miu.com/read-2250220.html

最新回复(0)