【2018-8-20更新(第3版)】筛选7变胆小傲云苍龙宝宝

xiaoxiao2021-02-28  49

#!/usr/bin/python #coding=UTF-8 import urllib import re import sys import time import urllib2 import os import chardet import time reload(sys) sys.setdefaultencoding("utf-8") def getHtml(url): req = urllib2.Request(url) req.add_header('User-Agent','Mozilla/5.0') html = urllib2.urlopen(req).read() return html def getGoodsURL(url):#获取当前页面的商品URL html = getHtml(url) reg = r'<dt class="title"><a href="(.+?)" target' goodsre = re.compile(reg) urllist = re.findall(goodsre,html) urllist = list(set(urllist)) return urllist def GetMiddleStr(content,startStr,endStr):#获取指定的部分字符串 startIndex = content.index(startStr) if startIndex>=0: startIndex += len(startStr) endIndex = content.index(endStr) return content[startIndex:endIndex] def getDict(goodurl):#根据商品url,返回一个字典 start_str= "charObj = " end_str = ',"items"' html = getHtml(goodurl) myjson = GetMiddleStr(html,start_str,end_str)+"}" mydict = eval(myjson) return mydict def file_edit(wr_str):# 写入txt文件 f1 = open(r'D:\pet.txt','a') f1.write(wr_str) f1.close() def getPet(goodurl,pet,level):#商品url是否有指定的宝宝指定的变异等级 roledata = getDict(goodurl) petList = roledata["petList"] if petList: for i in range(0,len(petList)): petname = petList[i]["petVarLevelExplain"] #珍兽名称 petvarlevel = petList[i]["petVarLevel"] #变异等级 xingge = petList[i]["petXingGe"] #性格 0:胆小, 1:谨慎, 2:忠诚,3:精明,4:勇猛 if (pet in petname) and (petvarlevel==level): return "yes" def goods(url,m,p):# 获取第m页至第p页的所有商品url all_goods = [] for n in range(m,p+1): goodsList = getGoodsURL(url + str(n)) all_goods = all_goods + goodsList return all_goods if __name__ == "__main__": #等级110以上,逍遥,价格111--2222元,公示商品 url = "http://tl.cyg.changyou.com/goods/public?world_id=0&profession=8&price=111-2222&level=110-119&have_chosen=profession*8 price*111-2222 level*110-119&page_num=" g_List = goods(url,1,15) for i in range(0,len(g_List)): sys.stdout.write(str(i)+"/"+str(len(g_List))+"\r") sys.stdout.flush() if getPet(g_List[i],"傲云苍龙",7)=="yes": print i,"--",g_List[i] print "---end----"

更新一版,相比上一版,执行效率有所提升。在于使用了BeautifulSoup(html,’lxml’) 这一版,把python环境升级到了python 3.6.4

#coding=utf-8 import urllib from bs4 import BeautifulSoup import re import sys def get_html(url):#获取html req = urllib.request.Request(url) req.add_header('User-Agent','Mozilla/5.0') response = urllib.request.urlopen(req) html = response.read().decode('utf-8') return html def get_goods(url):#根据搜索列表页,获取商品的url,每页应该是20个 html = get_html(url) soup = BeautifulSoup(html,'lxml') soup.prettify() goods = soup.find_all('dt',attrs={'class':'title'}) g_list = [] for g in goods: g_list.append(g.a.get('href')) return g_list def get_all_pages(url):# 获取所有页的URL html = get_html(url + str(1)) soup = BeautifulSoup(html,'lxml') soup.prettify() pages = soup.find_all('a',attrs={'class':'num'}) l = [] for p in pages: l.append(int(p.string)) max_page = max(l)#获取共有多少页 all_pages_list = [] for i in range(1,max_page+1): all_pages_list.append(url + str(i)) return all_pages_list def pet_info(good_url):#根据商品详情url,获取角色的宝宝信息,输出符合要求的宝宝 start_str= "charObj = " end_str = ',"items"' html = get_html(good_url) startIndex = html.index(start_str) startIndex = startIndex + len(start_str) endIndex = html.index(end_str) role_json = html[startIndex:endIndex]+"}" role_dict = eval(role_json) petList = role_dict["petList"] if petList == []: return "no_pet" else: for i in range(0,len(petList)): petname = petList[i]["petVarLevelExplain"] #珍兽名称 petvarlevel = petList[i]["petVarLevel"] #变异等级 if (petname == "傲云苍龙") and (petvarlevel >= 7): return "matching" if __name__ == "__main__": url = "http://tl.cyg.changyou.com/goods/public?world_id=0&profession=8&price=11-2345&level=110-119&have_chosen=profession*8 price*11-2345 level*110-119&page_num=" pages_list = get_all_pages(url) print("共:",len(pages_list),"页") for p in range(0,len(pages_list)): p_goods = get_goods(pages_list[p]) print("开始分析第",p+1,"页,共有",len(p_goods),"个商品") for g in range(0,len(p_goods)): if pet_info(p_goods[g]) == "matching": print(g+1,"--match--",p_goods[g]) else: sys.stdout.write(str(g+1)+"/"+str(len(p_goods))+",NO match"+"\r") sys.stdout.flush()

今天又更新一版。

#coding=utf-8 from urllib import request,parse from bs4 import BeautifulSoup import re import sys import time def get_html(url):#获取html req = request.Request(url) req.add_header('User-Agent','Mozilla/5.0') response = request.urlopen(req) html = response.read().decode('utf-8') time.sleep(5) return html def get_goods(url):#根据搜索列表页,获取角色商品的url,每页应该是20个 html = get_html(url) soup = BeautifulSoup(html,'lxml') soup.prettify() goods = soup.find_all('dt',attrs={'class':'title'}) g_list = [] for g in goods: g_list.append(g.a.get('href')) return g_list def get_all_pages(url):# 获取所有页的URL html = get_html(url + str(1)) soup = BeautifulSoup(html,'lxml') soup.prettify() pages = soup.find_all('a',attrs={'class':'num'}) l = [] for p in pages: l.append(int(p.string)) max_page = max(l)#获取共有多少页 all_pages_list = [] for i in range(1,max_page+1): all_pages_list.append(url + str(i)) return all_pages_list def pet_info(good_url):#根据商品详情url,获取角色的宝宝信息,输出符合要求的宝宝 start_str= "charObj = " end_str = ',"items"' html = get_html(good_url) startIndex = html.index(start_str) startIndex = startIndex + len(start_str) endIndex = html.index(end_str) role_json = html[startIndex:endIndex]+"}" role_dict = eval(role_json) petList = role_dict["petList"] if petList == []: return "no_pet" else: for i in range(0,len(petList)): petname = petList[i]["petVarLevelExplain"] #珍兽名称 petvarlevel = petList[i]["petVarLevel"] #变异等级0:宝宝,9:成年 xingge = petList[i]["petXingGe"] #性格 0:胆小, 1:谨慎, 2:忠诚,3:精明,4:勇猛 if (petname == "傲云苍龙") and (petvarlevel == 7): return "matching" if __name__ == "__main__": host_url = "http://tl.cyg.changyou.com/goods/public?world_id=0&" info_dict = { 'profession':8, 'price':'222-2333', 'level':'110-119', 'xinfa':'4001-0', 'xiulian':'10001-0', 'jinjiexiulian':'3001-0', 'equipscore':'100001-400000' } tt = parse.urlencode(info_dict) have_chosen = parse.quote(tt.encode('GBK')) info_dict['have_chosen'] = have_chosen url = host_url + parse.urlencode(info_dict) +"&page_num=" pages_list = get_all_pages(url) print("共:",len(pages_list),"页") for p in range(0,len(pages_list)): p_goods = get_goods(pages_list[p]) print("开始分析第",p+1,"页,共有",len(p_goods),"个商品") for g in range(0,len(p_goods)): if pet_info(p_goods[g]) == "matching": print(g+1,"--match--",p_goods[g]) else: sys.stdout.write(str(g+1)+"/"+str(len(p_goods))+",NO match"+"\r") sys.stdout.flush()
转载请注明原文地址: https://www.6miu.com/read-78572.html

最新回复(0)