#!/usr/bin/python
import os
import time
import datetime
import codecs
#from lxml import etree
from selenium import webdriver
#import csv
from bs4 import BeautifulSoup as bp
import re
import io
import sys
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='gb18030') #cmd编码尝试 无弹窗就要在cmd下运行,则必须这样设置
def zsk(): #抓取招行各种类信用卡
zs = []
for i in range(1,20):
url = 'http://ccclub.cmbchina.com/ccproduct/cardlist.aspx?PageNo=%s'%i #从第一页中以获取最大页数
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('zsk%s.png'%i) #截图保存
driver.quit()
soup = bp(html,'html.parser')
soup = soup.find_all('h2',{'class':'cardinfotitle'})
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
zs.append(content)
print('最终结果的列表_____________________________________________________',zs)
for k in zs:
print('信用卡有_____________________________________________________',k)
list1 = []
string0 = '\n\n————————————招商银行信用卡有————————————:\n'
string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'
xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
xrtime = ''.join(xrtime)
string1 = string1 + xrtime + '\n\n'
string0 = string0 + xrtime + '\n\n'
#-------------------新增的文件需要的时间------------------
xrtime0 = time.strftime('%Y-%m-%d',time.localtime())
xrtime0 = ''.join(xrtime0)
if (os.path.exists('D:\招商银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字
with open('D:\招商银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入
lines = f.readlines()
#print('为什么未读进?!!!!!!!!!',lines)
for line in lines:
line = line.strip()
list1.append(line)
#print('原来的卡打印一下!!!!!!!!!!!!!!!',line)
#print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)
with open('D:\招商银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间
f.write(string0)
for k in zs:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(zs))) #得到卡数目,并写入卡数目
xinka = []
for k in zs:
if k not in list1:
#print('有没有将新增的卡筛选出来????????????????????',k)
xinka.append(k)
print(xinka)
if xinka != [] : #判断本次是否有新卡
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string1)
for k in xinka:
f.write(k)
f.write('\n')
f.write('\n\n新增卡数目为:')
f.write(str(len(xinka)))
else: #若不存在显示本次运行不存在新卡
f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')
else: #若不存在文件则创建
with open('D:\招商银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string0)
for k in zs:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(zs))) #得到卡数目,并写入卡数目
def zxk(): #抓取中信各种类信用卡
zx = []
#中信银行特色信用卡____________________________________________________________________
url = 'https://creditcard.ecitic.com/shenqing/tesezhutika.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('zxtsk.png') #截图保存
driver.quit()
ts = []
soup = bp(html,'html.parser')
soup = soup.find_all('p',{'class':'card_name'})
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
ts.append(content)
ts.pop()
ts.pop()
print('最终结果的列表_____________________________________________________',ts)
for i in ts:
print('特色信用卡有_____________________________________________________',i)
# 中信银行网友推荐top10信用卡_________________________________________________________
url = 'https://creditcard.ecitic.com/shenqing/index.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('zxtp10k.png') #截图保存
driver.quit()
top = []
soup = bp(html,'html.parser')
soup = soup.find_all('p',{'class':'card_name'})
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
top.append(content)
top.pop()
top.pop()
print('最终结果的列表_____________________________________________________',top)
for i in top:
print('top10卡有_____________________________________________________',i)
# 中信银行标准信用卡_________________________________________________________
url = 'https://creditcard.ecitic.com/shenqing/biaozhunka2.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('zxbzk.png') #截图保存
driver.quit()
bz = []
soup = bp(html,'html.parser')
soup = soup.find_all('p',{'class':'card_name'})
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
bz.append(content)
bz.pop()
bz.pop()
print('最终结果的列表_____________________________________________________',bz)
for i in bz:
print('标准卡有_____________________________________________________',i)
# 中信银行航空商旅信用卡_________________________________________________________
url = 'https://creditcard.ecitic.com/shenqing/hangkongshanglv.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('zxhkslk.png') #截图保存
driver.quit()
hk = []
soup = bp(html,'html.parser')
soup = soup.find_all('p',{'class':'card_name'})
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
hk.append(content)
hk.pop()
hk.pop()
print('最终结果的列表_____________________________________________________',hk)
for i in hk:
print('航空商旅卡有_____________________________________________________',i)
# 中信银行高端白金信用卡_________________________________________________________
url = 'https://creditcard.ecitic.com/shenqing/gaoduanbaijin.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('zxgdbjk.png') #截图保存
driver.quit()
bj = []
soup = bp(html,'html.parser')
soup = soup.find_all('p',{'class':'card_name'})
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
bj.append(content)
bj.pop()
bj.pop()
print('最终结果的列表_____________________________________________________',bj)
for i in bj:
print('高端白金卡有_____________________________________________________',i)
# 中信银行出国专享信用卡_________________________________________________________
url = 'https://creditcard.ecitic.com/shenqing/chuguozhuanxiang.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('zxcgzxk.png') #截图保存
driver.quit()
cg = []
soup = bp(html,'html.parser')
soup = soup.find_all('p',{'class':'card_name'})
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
cg.append(content)
cg.pop()
cg.pop()
print('最终结果的列表_____________________________________________________',cg)
for i in cg:
print('出国专享卡有_____________________________________________________',i)
for i in (ts + top + bz + hk + bj + cg) :
if i not in zx:
zx.append(i)
list1 = []
string0 = '\n\n————————————中信银行信用卡有————————————:\n'
string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'
xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
xrtime = ''.join(xrtime)
string1 = string1 + xrtime + '\n\n'
string0 = string0 + xrtime + '\n\n'
#-------------------新增的文件需要的时间------------------
xrtime0 = time.strftime('%Y-%m-%d',time.localtime())
xrtime0 = ''.join(xrtime0)
if (os.path.exists('D:\中信银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字
with open('D:\中信银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入
lines = f.readlines()
#print('为什么未读进?!!!!!!!!!',lines)
for line in lines:
line = line.strip()
list1.append(line)
#print('原来的卡打印一下!!!!!!!!!!!!!!!',line)
#print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)
with open('D:\中信银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间
f.write(string0)
for k in zx:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(zx))) #得到卡数目,并写入卡数目
xinka = []
for k in zx:
if k not in list1:
#print('有没有将新增的卡筛选出来????????????????????',k)
xinka.append(k)
print(xinka)
if xinka != [] : #判断本次是否有新卡
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string1)
for k in xinka:
f.write(k)
f.write('\n')
f.write('\n\n新增卡数目为:')
f.write(str(len(xinka)))
else: #若不存在显示本次运行不存在新卡
f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')
else: #若不存在文件则创建
with open('D:\中信银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string0)
for k in zx:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(zx))) #得到卡数目,并写入卡数目
def pfk(): #抓取浦发各种类信用卡
url = 'http://ccc.spdb.com.cn/apply_for_credit_cards/card/'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('pfk.png') #截图保存
driver.quit()
pf = []
soup = bp(html,'html.parser')
soup1 = soup.find_all('div',{'class':'ttd'})
#print('不知道这个打印出来是什么============================================',soup)
for s1 in soup1:
content1 = s1.text.strip()
content1 = ''.join(content1)
print('_____________________________________________________',content1)
pf.append(content1)
soup2 = soup.find_all('div',{'class':'ttdf'})
#print('不知道这个打印出来是什么============================================',soup)
for s2 in soup2:
content2 = s2.text.strip()
content2 = ''.join(content2)
print('_____________________________________________________',content2)
pf.append(content2)
print('最终结果的列表_____________________________________________________',pf)
for i in pf:
print('信用卡有_____________________________________________________',i)
list1 = []
string0 = '\n\n————————————浦发银行信用卡有————————————:\n'
string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'
xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
xrtime = ''.join(xrtime)
string1 = string1 + xrtime + '\n\n'
string0 = string0 + xrtime + '\n\n'
#-------------------新增的文件需要的时间------------------
xrtime0 = time.strftime('%Y-%m-%d',time.localtime())
xrtime0 = ''.join(xrtime0)
if (os.path.exists('D:\浦发银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字
with open('D:\浦发银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入
lines = f.readlines()
#print('为什么未读进?!!!!!!!!!',lines)
for line in lines:
line = line.strip()
list1.append(line)
#print('原来的卡打印一下!!!!!!!!!!!!!!!',line)
#print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)
with open('D:\浦发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间
f.write(string0)
for k in pf:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(pf))) #得到卡数目,并写入卡数目
xinka = []
for k in pf:
if k not in list1:
#print('有没有将新增的卡筛选出来????????????????????',k)
xinka.append(k)
print(xinka)
if xinka != [] : #判断本次是否有新卡
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string1)
for k in xinka:
f.write(k)
f.write('\n')
f.write('\n\n新增卡数目为:')
f.write(str(len(xinka)))
else: #若不存在显示本次运行不存在新卡
f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')
else: #若不存在文件则创建
with open('D:\浦发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string0)
for k in pf:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(pf))) #得到卡数目,并写入卡数目
def msk(): #抓取民生各种类信用卡
ms = []
for i in range(1,5):
url = 'https://creditcard.cmbc.com.cn/fe/Channel/14349?page=%s'%i
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('msk%s.png'%i) #截图保存
driver.quit()
soup = bp(html,'html.parser')
soup = soup.find_all('h3',{'class':'ms_home_page_product_content_li_title ms-textEllipsis'})
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
ms.append(content)
print('最终结果的列表_____________________________________________________',ms)
for k in ms:
print('信用卡有_____________________________________________________',k)
list1 = []
string0 = '\n\n————————————民生银行信用卡有————————————:\n'
string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'
xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
xrtime = ''.join(xrtime)
string1 = string1 + xrtime + '\n\n'
string0 = string0 + xrtime + '\n\n'
#-------------------新增的文件需要的时间------------------
xrtime0 = time.strftime('%Y-%m-%d',time.localtime())
xrtime0 = ''.join(xrtime0)
if (os.path.exists('D:\民生银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字
with open('D:\民生银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入
lines = f.readlines()
#print('为什么未读进?!!!!!!!!!',lines)
for line in lines:
line = line.strip()
list1.append(line)
#print('原来的卡打印一下!!!!!!!!!!!!!!!',line)
#print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)
with open('D:\民生银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间
f.write(string0)
for k in ms:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(ms))) #得到卡数目,并写入卡数目
xinka = []
for k in ms:
if k not in list1:
#print('有没有将新增的卡筛选出来????????????????????',k)
xinka.append(k)
print(xinka)
if xinka != [] : #判断本次是否有新卡
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string1)
for k in xinka:
f.write(k)
f.write('\n')
f.write('\n\n新增卡数目为:')
f.write(str(len(xinka)))
else: #若不存在显示本次运行不存在新卡
f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')
else: #若不存在文件则创建
with open('D:\民生银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string0)
for k in ms:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(ms))) #得到卡数目,并写入卡数目
def xyk(): #抓取兴业各种类信用卡
url = 'http://creditcard.cib.com.cn/apply/products/BJseries/xing1.html'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('xyk.png') #截图保存
driver.quit()
xy = []
soup = bp(html,'html.parser')
soup1 = soup.find_all('a',{'class':'third level '})
#print('不知道这个打印出来是什么============================================',soup)
for s1 in soup1:
content1 = s1.text.strip()
content1 = ''.join(content1)
print('_____________________________________________________',content1)
if content1 not in xy:
xy.append(content1)
soup2 = soup.find_all('a',{'class':'four level '})
#print('不知道这个打印出来是什么============================================',soup)
for s2 in soup2:
content2 = s2.text.strip()
content2 = ''.join(content2)
print('_____________________________________________________',content2)
if content2 not in xy:
xy.append(content2)
print('最终结果的列表_____________________________________________________',xy)
for i in xy:
print('信用卡有_____________________________________________________',i)
list1 = []
string0 = '\n\n————————————兴业银行信用卡有————————————:\n'
string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'
xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
xrtime = ''.join(xrtime)
string1 = string1 + xrtime + '\n\n'
string0 = string0 + xrtime + '\n\n'
#-------------------新增的文件需要的时间------------------
xrtime0 = time.strftime('%Y-%m-%d',time.localtime())
xrtime0 = ''.join(xrtime0)
if (os.path.exists('D:\兴业银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字
with open('D:\兴业银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入
lines = f.readlines()
#print('为什么未读进?!!!!!!!!!',lines)
for line in lines:
line = line.strip()
list1.append(line)
#print('原来的卡打印一下!!!!!!!!!!!!!!!',line)
#print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)
with open('D:\兴业银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间
f.write(string0)
for k in xy:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(xy))) #得到卡数目,并写入卡数目
xinka = []
for k in xy:
if k not in list1:
#print('有没有将新增的卡筛选出来????????????????????',k)
xinka.append(k)
print(xinka)
if xinka != [] : #判断本次是否有新卡
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string1)
for k in xinka:
f.write(k)
f.write('\n')
f.write('\n\n新增卡数目为:')
f.write(str(len(xinka)))
else: #若不存在显示本次运行不存在新卡
f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')
else: #若不存在文件则创建
with open('D:\兴业银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string0)
for k in xy:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(xy))) #得到卡数目,并写入卡数目
def gdk(): #抓取光大各种类信用卡
url = 'http://xyk.cebbank.com/home/ps/index.htm'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('gdk.png') #截图保存
driver.quit()
gd = []
soup = bp(html,'html.parser')
soup = soup.find_all('h4')
#print('不知道这个打印出来是什么============================================',soup)
for s in soup:
content = s.text.strip()
content = ''.join(content)
print('_____________________________________________________',content)
if content not in gd:
gd.append(content)
print('没有去掉冗余的结果是:==============================================',gd)
for j in range(3):
gd.pop(0)
for j in range(7):
gd.pop()
print('最终结果的列表_____________________________________________________',gd)
for i in gd:
print('信用卡有_____________________________________________________',i)
list1 = []
string0 = '\n\n————————————光大银行信用卡有————————————:\n'
string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'
xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
xrtime = ''.join(xrtime)
string1 = string1 + xrtime + '\n\n'
string0 = string0 + xrtime + '\n\n'
#-------------------新增的文件需要的时间------------------
xrtime0 = time.strftime('%Y-%m-%d',time.localtime())
xrtime0 = ''.join(xrtime0)
if (os.path.exists('D:\光大银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字
with open('D:\光大银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入
lines = f.readlines()
#print('为什么未读进?!!!!!!!!!',lines)
for line in lines:
line = line.strip()
list1.append(line)
#print('原来的卡打印一下!!!!!!!!!!!!!!!',line)
#print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)
with open('D:\光大银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间
f.write(string0)
for k in gd:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(gd))) #得到卡数目,并写入卡数目
xinka = []
for k in gd:
if k not in list1:
#print('有没有将新增的卡筛选出来????????????????????',k)
xinka.append(k)
print(xinka)
if xinka != [] : #判断本次是否有新卡
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string1)
for k in xinka:
f.write(k)
f.write('\n')
f.write('\n\n新增卡数目为:')
f.write(str(len(xinka)))
else: #若不存在显示本次运行不存在新卡
f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')
else: #若不存在文件则创建
with open('D:\光大银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string0)
for k in gd:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(gd))) #得到卡数目,并写入卡数目
def gfk(): #抓取广发各种类信用卡
gf = []
#广发银行推荐信用卡____________________________________________________________________
url = 'http://card.cgbchina.com.cn/Channel/11712350'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('gftjk.png') #截图保存
driver.quit()
tj = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class','text'})
#print('class = text有什么————————————————:',soup)
for soup in soup:
soup = soup.find_all('a')[0]
print('不知道这个打印出来是什么============================================',soup)
content = soup.get('title')
print(content)
print(type(content))
print('_____________________________________________________',content)
tj.append(content)
print('最终结果的列表_____________________________________________________',tj)
for i in tj:
print('推荐信用卡有_____________________________________________________',i)
# 广发银行都会白领信用卡_________________________________________________________
url = 'http://card.cgbchina.com.cn/Channel/11714125'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('gfblk.png') #截图保存
driver.quit()
bl = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class','text'})
#print('class = text有什么————————————————:',soup)
for soup in soup:
soup = soup.find_all('a')[0]
print('不知道这个打印出来是什么============================================',soup)
content = soup.get('title')
print(content)
print(type(content))
print('_____________________________________________________',content)
bl.append(content)
print('最终结果的列表_____________________________________________________',bl)
for i in bl:
print('都会白领卡有_____________________________________________________',i)
# 广发银行爱车达人信用卡_________________________________________________________
url = 'http://card.cgbchina.com.cn/Channel/11713864'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('gfack.png') #截图保存
driver.quit()
ac = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class','text'})
#print('class = text有什么————————————————:',soup)
for soup in soup:
soup = soup.find_all('a')[0]
print('不知道这个打印出来是什么============================================',soup)
content = soup.get('title')
print(content)
print(type(content))
print('_____________________________________________________',content)
ac.append(content)
print('最终结果的列表_____________________________________________________',ac)
for i in ac:
print('爱车达人卡有_____________________________________________________',i)
# 广发银行商旅优悦信用卡_________________________________________________________
url = 'http://card.cgbchina.com.cn/Channel/11713032'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('gfslk.png') #截图保存
driver.quit()
sl = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class','text'})
#print('class = text有什么————————————————:',soup)
for soup in soup:
soup = soup.find_all('a')[0]
print('不知道这个打印出来是什么============================================',soup)
content = soup.get('title')
print(content)
print(type(content))
print('_____________________________________________________',content)
sl.append(content)
print('最终结果的列表_____________________________________________________',sl)
for i in sl:
print('商旅优悦卡有_____________________________________________________',i)
# 广发银行至尊精英信用卡_________________________________________________________
url = 'http://card.cgbchina.com.cn/Channel/11712561'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('gfzzk.png') #截图保存
driver.quit()
zz = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class','text'})
#print('class = text有什么————————————————:',soup)
for soup in soup:
soup = soup.find_all('a')[0]
print('不知道这个打印出来是什么============================================',soup)
content = soup.get('title')
print(content)
print(type(content))
print('_____________________________________________________',content)
zz.append(content)
print('最终结果的列表_____________________________________________________',zz)
for i in zz:
print('至尊精英卡有_____________________________________________________',i)
# 广发银行联名信用卡_________________________________________________________
url = 'http://card.cgbchina.com.cn/Channel/15354893'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('gflmk.png') #截图保存
driver.quit()
lm = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class','text'})
#print('class = text有什么————————————————:',soup)
for soup in soup:
soup = soup.find_all('a')[0]
print('不知道这个打印出来是什么============================================',soup)
content = soup.get('title')
print(content)
print(type(content))
print('_____________________________________________________',content)
lm.append(content)
print('最终结果的列表_____________________________________________________',lm)
for i in lm:
print('联名卡有_____________________________________________________',i)
for i in (tj + bl + ac + sl + zz + lm) :
if i not in gf:
gf.append(i)
print('_____________________________________________________',i)
list1 = []
string0 = '\n\n————————————广发银行信用卡有————————————:\n'
string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'
xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
xrtime = ''.join(xrtime)
string1 = string1 + xrtime + '\n\n'
string0 = string0 + xrtime + '\n\n'
#-------------------新增的文件需要的时间------------------
xrtime0 = time.strftime('%Y-%m-%d',time.localtime())
xrtime0 = ''.join(xrtime0)
if (os.path.exists('D:\广发银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字
with open('D:\广发银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入
lines = f.readlines()
#print('为什么未读进?!!!!!!!!!',lines)
for line in lines:
line = line.strip()
list1.append(line)
#print('原来的卡打印一下!!!!!!!!!!!!!!!',line)
#print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)
with open('D:\广发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间
f.write(string0)
for k in gf:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(gf))) #得到卡数目,并写入卡数目
xinka = []
for k in gf:
if k not in list1:
#print('有没有将新增的卡筛选出来????????????????????',k)
xinka.append(k)
print(xinka)
if xinka != [] : #判断本次是否有新卡
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string1)
for k in xinka:
f.write(k)
f.write('\n')
f.write('\n\n新增卡数目为:')
f.write(str(len(xinka)))
else: #若不存在显示本次运行不存在新卡
f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')
else: #若不存在文件则创建
with open('D:\广发银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string0)
for k in gf:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(gf))) #得到卡数目,并写入卡数目
def hxk(): #抓取华夏各种类信用卡
hx = []
#华夏白金系列信用卡____________________________________________________________________
url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/platinum/list.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('hxbjk.png') #截图保存
driver.quit()
bj = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class':'Product_img_text'})
for soup in soup:
soup = soup.find_all('a')[1]
#print('有什么————————————————:',soup)
for content in soup:
content = content.string.strip()
content = ''.join(content)
bj.append(content)
print('最终结果的列表_____________________________________________________',bj)
for i in bj:
print('白金信用卡有_____________________________________________________',i)
# 华夏银行钛金系列信用卡_________________________________________________________
url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/titanium/list.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('hxtjk.png') #截图保存
driver.quit()
tj = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class':'Product_img_text'})
for soup in soup:
soup = soup.find_all('a')[1]
#print('class = text有什么————————————————:',soup)
for content in soup:
content = content.string.strip()
content = ''.join(content)
tj.append(content)
print('最终结果的列表_____________________________________________________',tj)
for i in tj:
print('钛金系列卡有_____________________________________________________',i)
# 华夏银行财智系列信用卡_________________________________________________________
url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/wisdom/list.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('hxczk.png') #截图保存
driver.quit()
cz = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class':'Product_img_text'})
for soup in soup:
soup = soup.find_all('a')[1]
#print('class = text有什么————————————————:',soup)
for content in soup:
content = content.string.strip()
content = ''.join(content)
cz.append(content)
print('最终结果的列表_____________________________________________________',cz)
for i in cz:
print('财智卡有_____________________________________________________',i)
# 华夏银行标准系列信用卡_________________________________________________________
url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/standard/list.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('hxbzk.png') #截图保存
driver.quit()
bz = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class':'Product_img_text'})
for soup in soup:
soup = soup.find_all('a')[1]
#print('class = text有什么————————————————:',soup)
for content in soup:
content = content.string.strip()
content = ''.join(content)
bz.append(content)
print('最终结果的列表_____________________________________________________',bz)
for i in bz:
print('标准系列卡有_____________________________________________________',i)
# 华夏银行联名系列信用卡_________________________________________________________
url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/joint/list.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('hxsmk.png') #截图保存
driver.quit()
lm = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class':'Product_img_text'})
for soup in soup:
soup = soup.find_all('a')[1]
#print('class = text有什么————————————————:',soup)
for content in soup:
content = content.string.strip()
content = ''.join(content)
lm.append(content)
print('最终结果的列表_____________________________________________________',lm)
for i in lm:
print('联名系列卡有_____________________________________________________',i)
# 华夏银行公务系列信用卡_________________________________________________________
url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/official/list.shtml'
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('hxgwk.png') #截图保存
driver.quit()
gw = []
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class':'Product_img_text'})
for soup in soup:
soup = soup.find_all('a')[1]
#print('class = text有什么————————————————:',soup)
for content in soup:
content = content.string.strip()
content = ''.join(content)
gw.append(content)
print('最终结果的列表_____________________________________________________',gw)
for i in gw:
print('公务系列卡有_____________________________________________________',i)
# 华夏银行缤纷系列信用卡_________________________________________________________
bf = []
for n in range(1,5):
url = 'http://creditcard.hxb.com.cn/card/cn/productfunc/productintro/colorful/list_%s.shtml'%n
driver = webdriver.PhantomJS(executable_path='D:/phantomjs/bin/phantomjs.exe')
html = driver.get(url)
driver.maximize_window() # 将浏览器最大化显示
html = driver.page_source
time.sleep(5)
print('_____________________________________________________对应html是_____________________________________________________',html)
#driver.save_screenshot('hxbfk.png') #截图保存
driver.quit()
soup = bp(html,'html.parser')
soup = soup.find_all('div',{'class':'Product_img_text'})
for soup in soup:
soup = soup.find_all('a')[1]
#print('class = text有什么————————————————:',soup)
for content in soup:
content = content.string.strip()
content = ''.join(content)
bf.append(content)
print('最终结果的列表_____________________________________________________',bf)
for i in bf:
print('缤纷系列卡有_____________________________________________________',i)
for i in (bj + tj + cz + bz + lm + gw + bf) :
if i not in hx:
hx.append(i)
print('_____________________________________________________',i)
list1 = []
string0 = '\n\n————————————华夏银行信用卡有————————————:\n'
string1 = '\n\n————————————以下是较上次运行产生的新信用卡————————————:\n'
xrtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
xrtime = ''.join(xrtime)
string1 = string1 + xrtime + '\n\n'
string0 = string0 + xrtime + '\n\n'
#-------------------新增的文件需要的时间------------------
xrtime0 = time.strftime('%Y-%m-%d',time.localtime())
xrtime0 = ''.join(xrtime0)
if (os.path.exists('D:\华夏银行信用卡.doc')): #是否存在信用卡文件 命名为不带时间后缀的名字
with open('D:\华夏银行信用卡.doc','r+',encoding='utf-8') as f: #存在即打开,读入
lines = f.readlines()
#print('为什么未读进?!!!!!!!!!',lines)
for line in lines:
line = line.strip()
list1.append(line)
#print('原来的卡打印一下!!!!!!!!!!!!!!!',line)
#print('原来的的卡列表是》》》》》》》》》》》》》》》》》》',list1)
with open('D:\华夏银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f: #将本次与上一次比较,有新增则写入,本次命名为当前时间
f.write(string0)
for k in hx:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(hx))) #得到卡数目,并写入卡数目
xinka = []
for k in hx:
if k not in list1:
#print('有没有将新增的卡筛选出来????????????????????',k)
xinka.append(k)
print(xinka)
if xinka != [] : #判断本次是否有新卡
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string1)
for k in xinka:
f.write(k)
f.write('\n')
f.write('\n\n新增卡数目为:')
f.write(str(len(xinka)))
else: #若不存在显示本次运行不存在新卡
f.write('\n\n————————————本时间点未产生新的信用卡产品!————————————\n')
else: #若不存在文件则创建
with open('D:\华夏银行信用卡%s.doc'%xrtime0,'a+',encoding='utf-8') as f:
f.write('本信用卡产品所在的网页链接是:' + url)
f.write(string0)
for k in hx:
f.write(k)
f.write('\n')
f.write('\n\n卡数目为:')
f.write(str(len(hx))) #得到卡数目,并写入卡数目
if __name__ == '__main__':
bank = input('请输入希望搜寻信用卡产品的银行:')
bank = bank.split(',')
for ban in bank:
if ban == '招商银行':
zsk()
elif ban == '浦发银行':
pfk()
elif ban == '中信银行':
zxk()
elif ban == '民生银行':
msk()
elif ban == '兴业银行':
xyk()
elif ban == '广发银行':
gfk()
elif ban == '光大银行':
gdk()
elif ban == '华夏银行':
hxk()
else :
zsk()
pfk()
zxk()
msk()
xyk()
gfk()
gdk()
hxk()
exit = input("运行完毕!请输入任意键退出……")