python爬取网络中的QQ号码

xiaoxiao2022-06-11  27

import urllib.request import ssl import re import os #博客地址:https://blog.csdn.net/qq_36374896 def writeFile1Bytes(htmlBytes,toPath): with open(toPath,"wb") as f: f.write(htmlBytes) def writeFile1Str(htmlBytes,toPath): with open(toPath,"wb") as f: f.write(htmlBytes) def getHtmlBytes(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36" } req = urllib.request.Request(url, headers=headers) context = ssl._create_unverified_context() response = urllib.request.urlopen(req,context=context) return response.read() def qqCrawler(url, toPath): htmlBytes = getHtmlBytes(url) # writeFile1Bytes(htmlBytes,r"C:\Users\admin\Desktop\360学习\爬虫\image\qq1.html") # writeFile1Str(htmlBytes,r"C:\Users\admin\Desktop\360学习\爬虫\image\qq2.txt") htmlStr = str(htmlBytes) pat = r"[1-9]\d{4,9}" re_qq= re.compile(pat) qqsList = re_qq.findall(htmlStr) qqsList = list(set(qqsList)) print(qqsList) print(len(qqsList)) url="http://tieba.baidu.com/p/5471533241?traceid=" toPath=r"C:\Users\admin\Desktop\360学习\爬虫\image\qq.txt" qqCrawler(url,toPath)

代码年代久远,注释当时没写,现在懒得写了

转载请注明原文地址: https://www.6miu.com/read-4931326.html

最新回复(0)