一、什么是urllib urllib是python内置的HTTP请求库,不需要通过pip安装,直接使用即可,主要包含urllib.request 请求模块,urlib.error异常处理模块,urllib.parse url解析模块,urllib.robotparser robots.txt解析模块 二、urllib用法 1.访问网页
response = urllib.request.urlopen("http://www.baidu.com") print(response.read().decode("utf-8"))2.设置请求参数
response =urllib.request.urlopen("http://www.baidu.com",data=data) print(response.read().decode("utf-8"))3.设置请求超时
try: response = urllib.request.urlopen("http://www.baidu.com", timeout=0.1) print(response.status) print(response.getheaders()) print(response.info()) except urllib.error.URLError as e: if isinstance(e.reason, socket.timeout): print('TIME OUT') else: print(response.read().decode("utf-8"))设置请求头
headers = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)', 'Host': 'httpbin.org'} req = request.Request(url=url, headers=headers) response = request.urlopen(req) print(response.read().decode('utf-8'))设置请求方式和请求参数
url = 'http://httpbin.org/post' dict = { 'name': 'Germey' } data = bytes(parse.urlencode(dict), encoding='utf8') req = request.Request(url=url, data=data, method='POST') response = request.urlopen(req) print(response.read().decode('utf-8'))设置代理
#代理可以去西刺上搜索 proxy_handler = urllib.request.ProxyHandler({ 'http': 'http://127.0.0.1:9743', }) opener = urllib.request.build_opener(proxy_handler) response = opener.open('http://www.baidu.com') print(response.read().decode('utf-8'))cookie
cookie = http.cookiejar.CookieJar() handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(handler) response = opener.open("http://www.baidu.com") for item in cookie: print(item.name) print(item.value)保存cookie到本地
filename = 'cookie.txt' cookie = http.cookiejar.LWPCookieJar(filename) handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(handler) response = opener.open('http://www.baidu.com') cookie.save(ignore_discard=True, ignore_expires=True)从本地加载cookie
cookie = http.cookiejar.LWPCookieJar() cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True) handler = urllib.request.HTTPCookieProcessor(cookie) opener = urllib.request.build_opener(handler) response = opener.open('http://www.baidu.com') print(response.read().decode('utf-8'))异常处理
try: response =urllib.request.urlopen('http://cuiqingcai.com/index.htm') except urllib.error.HTTPError as e: print(e.reason, e.code, e.headers, sep='\n') except urllib.error.URLError as e: print(e.reason) else: print('Request Successfully') try: response = urllib.request.urlopen('http://cuiqingcai.com/index.htm') except urllib.error.URLError as e: if hasattr(e, "code"): print(e.code) if hasattr(e, "reason"): print(e.reason)url解析
result = urllib.parse.urlparse('www.baidu.com/index.html;user?id=5#comment', scheme='https') print(result)构造url
data = ['http', 'www.baidu.com', 'index.html', 'user', 'a=6', 'comment'] print(urllib.parse.urlunparse(data))