Indiegogo众筹产品数据抓取(Python)

xiaoxiao2021-02-28  74

之前写了一篇小米的众筹产品数据抓取,今天分享一下Indiegogo的众筹产品的数据抓取,两个网站的请求方式不一样,但是Indiegogo对数据抓取更加严格,请求头还需要添加Cookie属性,发现这个一点,花了我不少的时间,好在后面还是发现了这个问题。 这次没有用到新的东西,好了,废话不多说,直接上源码:

import requests import json #请求头里面要添加Cookie headers = {'Accept': 'application/json, text/plain, */*', 'Accept-Encoding': 'gzip, deflate, sdch, br', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Connection': 'keep-alive', 'Host': 'www.indiegogo.com', 'If-None-Match': 'W/"fa7571dc6b68ed2ad6b530ca10a18ef3"', 'Referer': 'https://www.indiegogo.com/explore/home', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36', 'Cookie':'ki_t=1482987544992;1487728858942;1487729104639;4;16; ki_r=; D_SID=113.104.195.55:YisvIcHF4Z24s3TB2TKbitxWEwrmGlyymljvxklDswE; __ar_v4=MNA52ZMPS5A5HHGLKMZP3O:20170602:3|ZIENZZYANBHC5MQ4WYNKZI:20170602:3|6RP73TXU3VCT7KLJC3P7EZ:20170602:3; __stripe_mid=35ce441a-7019-4221-b482-b0cba67f7fd4; romref=sch-baid; romref_referer_host=www.baidu.com; D_IID=02B91A75-51CD-3F94-A882-07B72A5961C2; D_UID=8506F25A-0F30-31AC-81E7-2864983C64A4; D_ZID=FDDDFB6A-67FA-340D-9C89-564B5A0B9899; D_ZUID=5D0201DE-2FF3-3A22-AD4E-8F28FE310CF0; D_HID=2FCF09D9-15AF-3626-BC88-5C189ABE314C; _ga=GA1.2.378922805.1482987529; _gid=GA1.2.772188293.1496800270; _ceg.s=or7ggq; _ceg.u=or7ggq; __hstc=223492548.37066d182471c31df2df71226efcf1aa.1482987568028.1496806912327.1496885823487.12; __hssrc=1; __hssc=223492548.1.1496885823487; hubspotutk=37066d182471c31df2df71226efcf1aa; locale=en; cohort=www.baidu.com|sch-baid|shr-pica|sch-baid|shr-pica|sch-baid; visitor_id=bba58762b2348bf353961ae7353f1e7ab5ff697484ba0a55993771e32402d354; analytics_session_id=2cab9c7316a54d91e73058b25c9e12748d7b3fda0ea06d63e02fb536b212e3d6; recent_project_ids=2001745&2132365&2115568&2107867&1905844&1637253&2115621&1931378&2063864&1787929&2024430&1978687&1993728&2016897&2023232&1994449&2017684&2022194&1319420&1625355; _session_id=742dc34425f25f0f10f5962f23becc4f' } #获取Tech & Innovation 下的所有的产品列表 flag = 1 pg_num = 0 urls = ([]) while flag == 1: pg_num = pg_num + 1 payload = {'pg_num': pg_num} r = requests.get("https://www.indiegogo.com/private_api/explore?filter_category=Tech+&+Innovation&filter_funding=&filter_percent_funded=&filter_quick=popular_all&filter_status=&per_page=100",headers=headers,params=payload) #解析单页的产品数据列表 #先获取产品总的信息,再获取产品支持项的信息 productlist = r.json()['campaigns'] for product in productlist: print(product['id'])#产品编号 print(product['title'])#产品名称 print(product['currency_code'])#货币编号 print(product['balance'])#已筹金额 print(product['in_forever_funding'])#是否是永久众筹 print(product['url'])#产品的地址:www.indiegogo.com/url['url'] print(product['compressed_image_url'])#封面图片地址 print(product['amt_time_left'])#剩余天数 print(product['collected_percentage'])#完成度 itemUrl = "https://www.indiegogo.com/private_api/campaigns/%s/perks" % (product['id'])#构建获取产品支持项的链接 urls.append(itemUrl) print("====================================") if len(productlist) == 0: break # print(len(urls)) #支持项处理 for url in urls: print(url) items = requests.get(url,headers=headers).text itemlist = json.loads(items)['response'] for item in itemlist: print(item['id'])#itemid print(item['amount'])#产品项价格 print(item['label'])#产品项标签 print(item['description'])#产品项描述 print(item['number_available'])#产品项期望支持人数 print(item['number_claimed'])#产品项实际支持人数 print("====================================")
转载请注明原文地址: https://www.6miu.com/read-84668.html

最新回复(0)