最简单的例子
import urllib.request with urllib.request.urlopen('http://python.org/') as response: html = response.read()
取回一个资源并临时保存
import urllib.request local_filename, headers = urllib.request.urlretrieve('http://python.org/') html = open(local_filename)
使用Request对象http://www.douban.com/
import urllib.request req = urllib.request.Request('http://www.voidspace.org.uk') with urllib.request.urlopen(req) as response: the_page = response.read()
使用POST方法发送数据, 数据需要先经过编码
import urllib.parse import urllib.request url = 'http://www.someserver.com/cgi-bin/register.cgi' values = {'name' : 'Michael Foord', 'location' : 'Northampton', 'language' : 'Python' } data = urllib.parse.urlencode(values) data = data.encode('ascii') # data should be bytes req = urllib.request.Request(url, data) with urllib.request.urlopen(req) as response: the_page = response.read()
使用GET方法发送数据, 数据需先经过编码
>>> import urllib.request >>> import urllib.parse >>> data = {} >>> data['name'] = 'Somebody Here' >>> data['location'] = 'Northampton' >>> data['language'] = 'Python' >>> url_values = urllib.parse.urlencode(data) >>> print(url_values) # The order may differ from below. name=Somebody+Here&language=Python&location=Northampton >>> url = 'http://www.example.com/example.cgi' >>> full_url = url + '?' + url_values >>> data = urllib.request.urlopen(full_url)
添加HTTP header, 指定user-agent
import urllib.parse import urllib.request url = 'http://www.someserver.com/cgi-bin/register.cgi' user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' headers = { 'User-Agent' : user_agent } values = {'name' : 'Michael Foord', 'location' : 'Northampton', 'language' : 'Python' } data = urllib.parse.urlencode(values) data = data.encode('ascii') req = urllib.request.Request(url, data, headers) with urllib.request.urlopen(req) as response: the_page = response.read()
处理异常
>>> req = urllib.request.Request('http://www.pretend_server.org') >>> try: urllib.request.urlopen(req) ... except urllib.error.URLError as e: ... print(e.reason) ... (4, 'getaddrinfo failed')
>>> req = urllib.request.Request('http://www.python.org/fish.html') >>> try: ... urllib.request.urlopen(req) ... except urllib.error.HTTPError as e: ... print(e.code) ... print(e.read()) ... 404 b'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">/n/n/n<html ... <title>Page Not Found</title>/n ...
原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/8097.html