python爬去内涵段子（json文件处理)

xiaoxiao2021-02-28 50

import requests import time url = "http://neihanshequ.com/joke/?is_json=1&app_name=neihanshequ_web&max_time=1521106836" headers = {"Accept" :"application/json, text/javascript, */*; q=0.01", "Accept-Encoding":"gzip, deflate", "Accept-Language":"zh-CN,zh;q=0.9", "Cache-Control":"no-cache", "Connection":"keep-alive", "Cookie":'csrftoken=39d061a82943a131cbd937c4055726a9; tt_webid=6532073659451803139; uuid="w:d8f38aaaa8eb4c419b86571d6e8fd0a4"; _ga=GA1.2.884661281.1520866917; _gid=GA1.2.322191654.1521025498', "Host":"neihanshequ.com", "Pragma":"no-cache", "Referer":"http://neihanshequ.com/", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36", "X-CSRFToken":"39d061a82943a131cbd937c4055726a9", "X-Requested-With":"XMLHttpRequest" } response = requests.get(url, headers=headers) timestamp = response.json()["data"]["max_time"] print(timestamp) while type(timestamp)== float or type(timestamp)== int: time.sleep(3) response = requests.get(url, headers=headers) timestamp = response.json()["data"]["max_time"] with open('C:\\Users\\wwxy\\Desktop\\内涵段子\\data.txt','a',encoding='utf-8') as f: for i in range(len(response.json()["data"]["data"])): data = response.json()["data"]["data"][i]["group"]["content"] f.write(data + "\n") f.write("\n") url = "http://neihanshequ.com/joke/?is_json=1&app_name=neihanshequ_web&max_time=" + str(timestamp) print("已完成一次爬取",timestamp)

转载请注明原文地址: https://www.6miu.com/read-2630720.html

技术

最新回复(0)