python httplib urllib urllib2区别(一撇)

xiaoxiao2021-02-28  114

目录:

urlencode & quote & unquote (url 中带中文参数)

python httplib urllib urllib2区别(一撇)

python post请求实例 & json -- str互相转化(application/x-www-form-urlencoded \ multipart/form-data)

1, 前言:

python提供很多种非常友好的访问网页内容的方法,python2.x : 如 python的httplib、urllib和urllib2  ; python3.x 又提供了request的方法。同时,每种方法下面又分为:get  post put delete 等method..

一时间江湖上充斥着“五门八派”的各种,令初学者眼花缭乱,不知如何下手,如何学起。

但是,有一点需要提醒的是:无论哪一种方案或方法,存在既有其合理性,用着哪一种方法上手;得心应手才是王道!!!

2,  下面我们比较一下python2.x 中的三种方法,先上实例,之后分析

(1)实例

import json import sys import hashlib import urllib import httplib ### none using now def generate_json_list(): reload(sys) sys.setdefaultencoding('gbk') print "[", flag=False for line in sys.stdin: if flag: print ",", else: flag=True line=line.strip() items=line.split("\t") out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""} out["createdAt"]=items[0] out["scale"]=items[1] out["channel"]=items[2] out["word"]=items[3] print json.dumps(out,encoding="gbk").decode("unicode-escape"), print "]" import urllib2 def import_out_hotwords(key, json_str, out): HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle" #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin" #print "2--", json_str value={"configKey":key,"configValue":json_str} data=urllib.urlencode(value) print >> sys.stderr, "### 3params", value, data req = urllib2.Request(HOST, data) req.add_header("content-type", "application/x-www-form-urlencoded") req.get_method = lambda : 'PUT' response = None try: response = urllib2.urlopen(req, timeout=5) if response.code == 200: print "insertSingle Succ: ", out["word"], out["channel"], out["key"] response.close() except urllib2.URLError as e: if hasattr(e, 'code'): print 'Error code:',e.code elif hasattr(e, 'reason'): print 'Reason:',e.reason finally: if response: response.close() def import_out_hotwords_2(key, json_str, out): HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle" #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin" #print "2--", json_str value={"configKey":key,"configValue":json_str} data=urllib.urlencode(value) print >> sys.stderr, "## 2params", value, data req = urllib2.Request(HOST, data) req.add_header("content-type", "application/x-www-form-urlencoded") req.get_method = lambda : 'PUT' response = None try: response = urllib2.urlopen(req, timeout=5) if response.code == 200: print "insertSingle Succ: ", out["word"], out["channel"], out["key"] response.close() except urllib2.URLError as e: if hasattr(e, 'code'): print 'Error code:',e.code elif hasattr(e, 'reason'): print 'Reason:',e.reason finally: if response: response.close() def import_out_hotwords_old(key, json_str, out): HOST = "10.129.232.109:5005" conn = httplib.HTTPConnection(HOST) #print "2--", json_str value={"configKey":key,"configValue":json_str} data=urllib.urlencode(value) #print data headers = { 'content-type': 'application/x-www-form-urlencoded', 'cache-control': 'no-cache' } conn.request("PUT", "/api/externalHotWords/insertSingle", body=data, headers=headers) handler = conn.getresponse() if handler.status == 200: print "insertSingle Succ: ", out["word"], out["channel"], out["key"] #if handler.read().decode('utf8').encode('gbk')[0] == "OK": # print "insertSingle Succ: ", json_str conn.close() def generate_json(): reload(sys) sys.setdefaultencoding('gbk') for line in sys.stdin: line=line.strip() items=line.split("\t") if len(items) < 4: continue out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""} out["createdAt"]=items[0] #out["scale"]=items[1] out["channel"]=items[2] out["word"]=items[3] key = hashlib.md5((items[3] + items[2])).hexdigest() key = "externalHotWords_" + key out["key"] = key json_str = json.dumps(out,encoding="gbk")#.decode("unicode-escape") #import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out) import_out_hotwords_2(key, json_str, out) def generate_json_old(): reload(sys) sys.setdefaultencoding('gbk') for line in sys.stdin: line=line.strip() items=line.split("\t") if len(items) < 4: continue out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""} out["createdAt"]=items[0] #out["scale"]=items[1] out["channel"]=items[2] out["word"]=items[3] key = hashlib.md5((items[3] + items[2])).hexdigest() out["key"] = "externalHotWords_" + key json_str = json.dumps(out,encoding="gbk").decode("unicode-escape") #json_str = out #print "1--", json_str ## return 'req=' + urllib.quote(reqinfo.decode('gbk', 'ignore').encode('utf8')) import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out) #import_out_hotwords(key, json_str) if __name__=="__main__": #generate_json_list() generate_json()

下面的实例存在一个小问题:二次编码问题,首先对out进行json.dumps() 的json_str转化(正确),之后对json_str进行urllib.quote() (第一次编码);最后在 

value={"configKey":key,"configValue":json_str} 之后有urllib.urlencode() (第二次编码) 格式一:configValue={'scale':+'',+'word':+'\xb2\xe2\xca%     5Cxd4soso',+'channel':+'360_\xca\xb5\xca\xb1\xc8\xc8\xb5\xe3',+'key':+'externalHotWords_ed9f4ea3b7ff116c67366f7a576bcb08',+'type%     27:+'',+'createdAt':+'2017-06-07+11:22:32'}&configKey=ed9f4ea3b7ff116c67366f7a576bcb08 格式二:configValue=%7B%22scale%22%3A%20%22%22%2C%20%22word%22%3A%20%22%E6%B5%8B%E8%AF%95soso%22%2     52C%20%22channel%22%3A%20%22360_%E5%AE%9E%E6%97%B6%E7%83%AD%E7%82%B9%22%2C%20%22key%22%3A%20%22externalHotWords_ed9f4ea3b7f     f116c67366f7a576bcb08%22%2C%20%22type%22%3A%20%22%22%2C%20%22createdAt%22%3A%20%222017-06-07%2011%3A22%3A32%22%7D&configKey=ed9f4ea3b     7ff116c67366f7a576bcb08 显然格式二是对格式一再次进行了编码(因为{ --> {; % --> %; )

import json import sys import hashlib import urllib import httplib ### none using now def generate_json_list(): reload(sys) sys.setdefaultencoding('gbk') print "[", flag=False for line in sys.stdin: if flag: print ",", else: flag=True line=line.strip() items=line.split("\t") out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""} out["createdAt"]=items[0] out["scale"]=items[1] out["channel"]=items[2] out["word"]=items[3] print json.dumps(out,encoding="gbk").decode("unicode-escape"), print "]" import urllib2 def import_out_hotwords(key, json_str, out): HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSingle" #HOST = "http://10.129.232.109:5005/api/externalHotWords/insertSin" #print "2--", json_str value={"configKey":key,"configValue":json_str} data=urllib.urlencode(value) req = urllib2.Request(HOST, data) req.add_header("content-type", "application/x-www-form-urlencoded") req.get_method = lambda : 'PUT' response = None try: response = urllib2.urlopen(req, timeout=5) if response.code == 200: print "insertSingle Succ: ", out["word"], out["channel"], out["key"] response.close() except urllib2.URLError as e: if hasattr(e, 'code'): print 'Error code:',e.code elif hasattr(e, 'reason'): print 'Reason:',e.reason finally: if response: response.close() def import_out_hotwords_old(key, json_str, out): HOST = "10.129.232.109:5005" conn = httplib.HTTPConnection(HOST) #print "2--", json_str value={"configKey":key,"configValue":json_str} data=urllib.urlencode(value) #print data headers = { 'content-type': 'application/x-www-form-urlencoded', 'cache-control': 'no-cache' } conn.request("PUT", "/api/externalHotWords/insertSingle", body=data, headers=headers) handler = conn.getresponse() if handler.status == 200: print "insertSingle Succ: ", out["word"], out["channel"], out["key"] #if handler.read().decode('utf8').encode('gbk')[0] == "OK": # print "insertSingle Succ: ", json_str conn.close() def generate_json(): reload(sys) sys.setdefaultencoding('gbk') for line in sys.stdin: line=line.strip() items=line.split("\t") if len(items) < 4: continue out={"key":"","createdAt":"","word":"","channel":"","type":"","scale":""} out["createdAt"]=items[0] #out["scale"]=items[1] out["channel"]=items[2] out["word"]=items[3] key = hashlib.md5((items[3] + items[2])).hexdigest() out["key"] = "externalHotWords_" + key json_str = json.dumps(out,encoding="gbk").decode("unicode-escape") #json_str = out #print "1--", json_str ## return 'req=' + urllib.quote(reqinfo.decode('gbk', 'ignore').encode('utf8')) import_out_hotwords(key, urllib.quote(json_str.decode('gbk', 'ignore').encode('utf8')), out) #import_out_hotwords(key, json_str) if __name__=="__main__": #generate_json_list() generate_json()cat

CMD: cat tmp | python generate_json2.py 

[@10.134.105.160 HotRankingLoggers]# vi tmp  2017-06-07 11:22:32 6964    360_实时热点    测试APP  2017-06-07 11:22:32 6498    360_实时热点    测试soso

(2)分析(参考 python的httplib、urllib和urllib2的区别及用

urllib和urllib2

urllib 和urllib2都是接受URL请求的相关模块,但是urllib2可以接受一个Request类的实例来设置URL请求的headers,urllib仅可以接受URL。

这意味着,你不可以伪装你的User Agent字符串等。

urllib提供urlencode方法用来GET查询字符串的产生,而urllib2没有。这是为何urllib常和urllib2一起使用的原因。

目前的大部分http请求都是通过urllib2来访问的

httplib

httplib实现了HTTP和HTTPS的客户端协议,一般不直接使用,在python更高层的封装模块中(urllib,urllib2)使用了它的http实现。

(3)详解

urllib简单用法

1.     google = urllib.urlopen('http://www.google.com')  

2.    print 'http header:/n', google.info()  

3.     print 'http status:', google.getcode()  

4.    print 'url:', google.geturl()  

5.     for line in google: 就像在操作本地文件  

6.        print line,  

7.     google.close()  

urllib2简单用法

1.     import urllib2  

2.       response=urllib2.urlopen('http://www.douban.com')  

3.        html=response.read()  

实际步骤:

1、urllib2.Request()的功能是构造一个请求信息,返回的req就是一个构造好的请求    

2、urllib2.urlopen()的功能是发送刚刚构造好的请求req,并返回一个文件类的对象response,包括了所有的返回信息。    

3、通过response.read()可以读取到response里面的html,通过response.info()可以读到一些额外的信息。如下:

1.     #!/usr/bin/env python  

2.        import urllib2  

3.         req = urllib2.Request("http://www.douban.com")  

4.        response = urllib2.urlopen(req)  

5.         html = response.read()  

6.        print html  

有时你会碰到,程序也对,但是服务器拒绝你的访问。这是为什么呢?问题出在请求中的头信息(header)。 有的服务端有洁癖,不喜欢程序来触摸它。这个时候你需要将你的程序伪装成浏览器来发出请求。请求的方式就包含在header中。常见的情形:

1.     import urllib  

2.    import urllib2  

3.     url = 'http://www.someserver.com/cgi-bin/register.cgi'  

4.    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'将user_agent写入头信息  

5.     values = {'name' : 'who','password':'123456'}  

6.    headers = { 'User-Agent' : user_agent }  

7.     data = urllib.urlencode(values)  

8.    req = urllib2.Request(url, data, headers)  

9.     response = urllib2.urlopen(req)  

10.  the_page = response.read()  

values是post数据

GET方法

例如百度:

百度是通过http://www.baidu.com/s?wd=XXX 来进行查询的,这样我们需要将{‘wd’:’xxx’}这个字典进行urlencode

1.    #coding:utf-8  

2.    import urllib   

3.     import urllib2    

4.    url = 'http://www.baidu.com/s'   

5.    values = {'wd':'D_in'}     

6.    data = urllib.urlencode(values)  

7.     print data   

8.    url2 = url+'?'+data  

9.    response = urllib2.urlopen(url2)    

10.  the_page = response.read()   

11.   print the_page  

POST方法

1.    import urllib  

2.    import urllib2  

3.    url = 'http://www.someserver.com/cgi-bin/register.cgi'  

4.    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' //将user_agent写入头信息  

5.    values = {'name' : 'who','password':'123456'}      //post数据  

6.    headers = { 'User-Agent' : user_agent }  

7.    data = urllib.urlencode(values)                   //对post数据进行url编码  

8.    req = urllib2.Request(url, data, headers)  

9.    response = urllib2.urlopen(req)  

10.  the_page = response.read()  

urllib2带cookie的使用

1.    #coding:utf-8  

2.    import urllib2,urllib  

3.     import cookielib  

4.       

5.    url = r'http://www.renren.com/ajaxLogin'  

6.       

7.     #创建一个cj的cookie的容器  

8.    cj = cookielib.CookieJar()  

9.    opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))  

10.  #将要POST出去的数据进行编码  

11.  data = urllib.urlencode({"email":email,"password":pass})  

12.  r = opener.open(url,data)  

13.   print cj  

httplib简单用法

1.     #!/usr/bin/env python      

2.    # -*- coding: utf-8 -*-      

3.     import httplib    

4.    import urllib    

5.         

6.    def sendhttp():    

7.         data = urllib.urlencode({'@number': 12524, '@type''issue''@action''show'})       

8.        headers = {"Content-type""application/x-www-form-urlencoded",    

9.                    "Accept""text/plain"}    

10.      conn = httplib.HTTPConnection('bugs.python.org')    

11.       conn.request('POST''/', data, headers)    

12.      httpres = conn.getresponse()    

13.       print httpres.status    

14.      print httpres.reason    

15.       print httpres.read()               

16.                    

17.   if __name__ == '__main__':      

18.      sendhttp() 

3,get put post delete 方法,参考自 python urllib2对http的get,put,post,delete

#GET:

#!/usr/bin/env python

# -*- coding:utf-8 -*-

import urllib2

def get():

    URL ='www.baidu.com'  #页面的地址

    response =urllib2.urlopen(URL) #调用urllib2向服务器发送get请求

    returnresponse.read() #获取服务器返回的页面信息

#POST:

#!/usr/bin/env python

# -*- coding:utf-8 -*-

import urllib

import urllib2

def post():

    URL ='http://umbra.nascom.nasa.gov/cgi-bin/eit-catalog.cgi' #页面的地址

    values ={'obs_year':'2011','obs_month':'March',   #post的值

             'obs_day':'8','start_year':'2011'

             ,'start_month':'March','start_day':'8'

             ,'start_hour':'All Hours','stop_year':'2011'

             ,'stop_month':'March','stop_day':'8'

             ,'stop_hour':'All Hours','xsize':'All'

             ,'ysize':'All','wave':'all'

             ,'filter':'all','object':'all'

              ,'xbin':'all','ybin':'all'  

             ,'highc':'all'}

    data =urllib.urlencode(values)    #适用urllib对数据进行格式化编码

    printdata    #输出查看编码后的数据格式

    req =urllib2.Request(URL, data)    #生成页面请求的完整数据

    response =urllib2.urlopen(req)     #发送页面请求

    returnresponse.read()    #获取服务器返回的页面信息

#PUT

import urllib2

request = urllib2.Request('http://example.org',data='your_put_data')

request.add_header('Content-Type', 'your/contenttype')

request.get_method = lambda: 'PUT'

response = urllib2.urlopen(request)

#DELETE

import urllib2

request = urllib2.Request(uri)

request.get_method = lambda: 'DELETE'

response = urllib2.urlopen(request)

转载请注明原文地址: https://www.6miu.com/read-40815.html

最新回复(0)