urllib2.urlopen() urlib2是使用各种协议完成打开url的一个扩展包。最简单的使用方式是调用urlopen方法,比如
def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):和urllib中不同的是第三个参数为timeout了,所以代理只能在外面设置了。
import urllib2 content_stream = urllib2.urlopen('http://www.baidu.com/') content = content_stream.read() print content而
request = urllib2.Request( url = 'http://www.ideawu.net/?act=send', headers = {'Content-Type' : 'text/xml'}, data = data) #!/usr/bin/python #-*-coding:utf-8-*- import httplib,urllib; #加载模块 #定义需要进行发送的数据 params = urllib.urlencode({'title':'标题','content':'文章'}); #定义一些文件头 headers = {"Content-Type":"application/x-www-form-urlencoded", "Connection":"Keep-Alive","Referer":"http://mod.qlj.sh.cn/sing/post.php"}; #与网站构建一个连接 conn = httplib.HTTPConnection("http://mod.qlj.sh.cn/sing/"); #开始进行数据提交 同时也可以使用get进行 conn.request(method="POST",url="post.php",body=params,headers=headers); #返回处理后的数据 response = conn.getresponse(); #判断是否提交成功 if response.status == 302: print "发布成功!"; else: print "发布失败"; #关闭连接 conn.close();<span id="more-998"></span>不使用COOKIES 简单提交
import urllib2, urllib data = {'name' : 'www', 'password' : '123456'} f = urllib2.urlopen( url = 'http://www.ideawu.net/', data = urllib.urlencode(data) ) print f.read()#读取全部返回内容 print f.info() #取响应header头所有信息假设信息如下。
Date: Wed, 26 Aug 2009 08:46:03 GMT Server: Apache/2.2.9 (Unix) PHP/5.2.6 X-Powered-By: PHP/5.2.6 X-Pingback: http://www.ideawu.net/index.php/XXXX Content-Type: text/html Connection: closeContent-Length: 31206那么,如果只取header某一部分信息,如’Content-Type’部分,用:
print f.info().getheader('Content-Type')使用COOKIES 复杂
import urllib2 cookies = urllib2.HTTPCookieProcessor() opener = urllib2.build_opener(cookies) f = opener.open('http://www.ideawu.net/?act=login&name=user01') data = '<root>Hello</root>' request = urllib2.Request( url = 'http://www.ideawu.net/?act=send', headers = {'Content-Type' : 'text/xml'}, data = data) opener.open(request)一个小例子: 一、打开一个网页获取所有的内容
from urllib import urlopen doc = urlopen("http://www.baidu.com").read() print doc二、获取Http头
from urllib import urlopen doc = urlopen("http://www.baidu.com") print doc.info() print doc.info().getheader('Content-Type')三、使用代理
1 查看环境变量
print ""n".join(["%s=%s" % (k, v) for k, v in os.environ.items()]) print os.getenv("http_proxy")2 设置环境变量
import os os.putenv("http_proxy", "http://proxyaddr:<port>")3 使用代理
# Use http://www.someproxy.com:3128 for http proxying proxies = {'http': 'http://www.someproxy.com:3128'} filehandle = urllib.urlopen(some_url, proxies=proxies) # Don't use any proxies filehandle = urllib.urlopen(some_url, proxies={}) # Use proxies from environment - both versions are equivalent filehandle = urllib.urlopen(some_url, proxies=None) filehandle = urllib.urlopen(some_url)详细出处参考:http://www.jb51.net/article/15720.htm
urllib2 post
# -*- coding: cp936 -*- import urllib2 import urllib def postHttp(name=None,tel=None,address=None, price=None,num=None,paytype=None, posttype=None,other=None): url="http://www.xxx.com/dog.php" #定义要提交的数据 postdata=dict(name=name,tel=tel,address=address,price=price, num=num,paytype=paytype,posttype=posttype,other=other) #url编码 postdata=urllib.urlencode(postdata) #enable cookie request = urllib2.Request(url,postdata) response=urllib2.urlopen(request) print responseurllib
import httplib, urllib params = urllib.urlencode({'@number': 12524, '@type': 'issue', '@action': 'show'}) headers = {"Content-type": "application/x-www-form-urlencoded","Accept": "text/plain"} try: conn = httplib.HTTPConnection("bugs.python.org") conn.request("POST", "", params, headers) response = conn.getresponse() print response.status, response.reason data = response.read() except Exception as ex: pass finally: conn.close()