方法一通过python的requests包:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 |
import requests url = "https://fanyi.baidu.com" res = requests.get(url) ck = res.cookies print(ck) print(type(ck)) print(ck.keys()) # 获取cookie中所有键名,以list格式输出 print(ck.items()) # 输出 <RequestsCookieJar[<Cookie BAIDUID=3A6AD66348038CBCB3BB6927F4A2CD77:FG=1 for .baidu.com/>, <Cookie locale=zh for .baidu.com/>]> <class 'requests.cookies.RequestsCookieJar'> ['BAIDUID', 'locale'] [('BAIDUID', '3A6AD66348038CBCB3BB6927F4A2CD77:FG=1'), ('locale', 'zh')] |
获取cookies是通过response的cookies 属性,可以通过res.cookies["cookie_name"]的方式获取。
并且需要注意的是,这个是一个RequestCookieJar的实例,也就是说,在requests的操作里的cookie一般都是包装到了
看源码:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
class RequestsCookieJar(cookielib.CookieJar, collections.MutableMapping): def get(self, name, default=None, domain=None, path=None): try: return self._find_no_duplicates(name, domain, path) except KeyError: return default def set(self, name, value, **kwargs): # support client code that unsets cookies by assignment of a None value: if value is None: remove_cookie_by_name(self, name, domain=kwargs.get('domain'), path=kwargs.get('path')) return if isinstance(value, Morsel): c = morsel_to_cookie(value) else: c = create_cookie(name, value, **kwargs) self.set_cookie(c) return c def iterkeys(self): for cookie in iter(self): yield cookie.name def keys(self): return list(self.iterkeys()) def itervalues(self): for cookie in iter(self): yield cookie.value def values(self): return list(self.itervalues()) def iteritems(self): for cookie in iter(self): yield cookie.name, cookie.value def items(self): return list(self.iteritems()) |
里面包括了很多的方法,可以根据需要进行使用,通常就是get, set,keys, itemitems等,和字典很像!
重点
如果需要在请求中添加cookie,可以实例化一个RequestCookieJar的类,然后把值set进去,最后在get,post方法里面指定cookies参数就行了,如下:
0 1 2 3 4 5 6 7 8 9 10 11 |
import requests from requests.cookies import RequestsCookieJar url = "http://fanyi.baidu.com/v2transapi" cookie_jar = RequestsCookieJar() cookie_jar.set("BAIDUID", "B1CCDD4B4BC886BF99364C72C8AE1C01:FG=1", domain="baidu.com") res = requests.get(url, cookies=cookie_jar) print res.status_code # 输出 200 则为正确 |
方法二使用python的cookielib包:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 |
import cookielib,urllib2 loginUrl = "https://fanyi.baidu.com" cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) res = urllib2.urlopen(loginUrl) opener.close() for index, cookie in enumerate(cj): print '[', index, ']', cookie 输出: [ 0 ] <Cookie BAIDUID=43F65D52E7131E3C108A68537FD90752:FG=1 for .baidu.com/> [ 1 ] <Cookie locale=zh for .baidu.com/> |
方法三使用python的httplib包:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
import httplib cj = '' header = {'Host': 'fanyi.baidu.com', 'Accept-Language': 'zh-CN', 'Connection': 'Keep-Alive', 'Accept-Encoding': 'gzip,deflate', 'Accept': 'text/html, application/xhtml+xml, */*', 'User-Agent': 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/28.0.1500.71 Chrome/28.0.1500.71 Safari/537.36' } # con = httplib.HTTPConnection('fanyi.baidu.com') # 因为网站使用的是https协议,所以res.status会返回301重定向 con = httplib.HTTPSConnection('fanyi.baidu.com') # 建立请求:fanyi.baidu.com/v2transapi con.request(method='GET', url='/v2transapi', headers=header) res = con.getresponse() res.read() # 调用read函数以后,才能获取content。 con.close() print(res.status) # 返回状态码 if res.getheader('Set-Cookie') != None: # 判断是否存在Set-Cookie,有的话,将cookie保存起来 print(res.getheaders()) # 获取所有头部信息 print(res.getheader('Set-Cookie')) # 获取远程服务器响应后设置的全部Cookie信息 cj = res.getheader('Set-Cookie').split(';')[0] print(cj) else: print('got no cookie') exit() # 输出 200 [('content-length', '57'), ('content-encoding', 'gzip'), ('set-cookie', 'locale=zh; expires=Sat, 01-Feb-2020 03:26:10 GMT; path=/; domain=.baidu.com, BAIDUID=F3B1D486AEEF5CB69BCDBF801064CBEE:FG=1; expires=Mon, 06-Apr-20 03:26:10 GMT; max-age=31536000; path=/; domain=.baidu.com; version=1'), ('vary', 'Accept-Encoding'), ('server', 'Apache'), ('date', 'Sun, 07 Apr 2019 03:26:10 GMT'), ('p3p', 'CP=" OTI DSP COR IVA OUR IND COM "'), ('content-type', 'application/json')] locale=zh; expires=Sat, 01-Feb-2020 03:26:10 GMT; path=/; domain=.baidu.com, BAIDUID=F3B1D486AEEF5CB69BCDBF801064CBEE:FG=1; expires=Mon, 06-Apr-20 03:26:10 GMT; max-age=31536000; path=/; domain=.baidu.com; version=1 locale=zh |
方法四使用python的selenium包:
用的比较少的selenium包,用于模拟登陆并获取cookie。
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import time,random from selenium import webdriver import requests from urllib import request from lxml import etree driver = webdriver.Chrome(executable_path=r'/Applications/Google Chrome.app/chromedriver') driver.get('http://www.renren.com/PLogin.do') time.sleep(2) driver.find_element_by_id('email').clear() driver.find_element_by_id('email').send_keys('myusername') # 输入用户名 driver.find_element_by_id('password').clear() driver.find_element_by_id('password').send_keys('mypassword') # 输入密码 img_url = 'http://icode.renren.com/getcode.do?t=web_login&rnd='+str(random.random()) request.urlretrieve(img_url,'renren_yzm.jpg') try: driver.find_element_by_id('icode').clear() img_res = input('输入验证码:') # 如果需要输入验证码,可以手工,或者接口给打码平台 driver.find_element_by_id('icode').send_keys(img_res) except: pass driver.find_element_by_id('autoLogin').click() # 自动登陆 driver.find_element_by_id('login').click() # 登陆 time.sleep(3) cookie_items = driver.get_cookies() # 获取cookie值 post = {} # 保存cookie值 for cookie in cookie_items: post[cookie['name']] = cookie['value'] print(post['t']) # 人人网登陆后需要保持登陆的cookie信息 driver.quit() # 退出selenium # ------------------------------------------------------------ url = 'http://www.renren.com/265025131/profile' headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Cookie':'t='+post['t'], } response = requests.get(url,headers=headers) print('-'*50) html = etree.HTML(response.text) title = html.xpath('//title/text()') print('目前得到的页面信息',title) print(response.url) |
还有很多方法可以实现获取cookie和设置cookie,有待学习研究。