python爬虫学习（十八）人人网cookie登录

import requests
from lxml import etree
from codeClass import YDMHttp#封装打码平台代码
path = 'code.jpg'
def getCodeText(imgPath,codeType):# 用户名username = '********'# 密码password = '******'# 软件ＩＤ，开发者分成必要参数。登录开发者后台【我的软件】获得！appid = 9812# 软件密钥，开发者分成必要参数。登录开发者后台【我的软件】获得！appkey = '3dfbf90******0d982ffb1c93'# 图片文件filename = imgPath# 验证码类型，# 例：1004表示4位字母数字，不同类型收费不同。请准确填写，否则影响识别率。在此查询所有类型 http://www.yundama.com/price.htmlcodetype = codeType# 超时时间，秒timeout = 20result = None# 检查if (username == 'username'):print('请设置好相关参数再测试')else:# 初始化yundama = YDMHttp(username, password, appid, appkey)# 登陆云打码uid = yundama.login();print('uid: %s' % uid)# 查询余额balance = yundama.balance();print('余额: %s' % balance)# 开始识别，图片路径，验证码类型ID，超时时间（秒），识别结果cid, result = yundama.decode(filename, codetype, timeout);print('cid: %s, result: %s' % (cid, result))return result#创建一个session对象
session = requests.session()#1、对验证码图片进行捕获和识别
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'}
url = "http://www.renren.com/SysHome.do"page_text=requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)
code_img_src = tree.xpath('//*[@id="verifyPic_login"]/@src')[0]
code_img_data = requests.get(url=code_img_src,headers=headers).content
with open('./code.jpg','wb') as fp:fp.write(code_img_data)#使用云打码对验证码进行识别
result = getCodeText('code.jpg',5000)
print(result)#post请求的发送（模拟登录）
login_url = 'http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2020021132191'
data={'email': '144******6@qq.com','icode':result,'origURL': 'http://www.renren.com/home','domain': 'renren.com','key_id': '1','captcha_type': 'web_login','password': '7d279330f75******a27929432e3928dee4697792f3b0','rkey': '8a9fbb66f55b09f87d64c8111173b047','f': 'http%3A%2F%2Fwww.renren.com%2F422267891%2Fprofile'
}
#使用session对象进行pose请求发送
response=session.post(url = login_url,headers=headers,data=data)
print(response.status_code)
# login_page_text = response.text
# with open('renren.html','w',encoding='utf-8')as fp:
#     fp.write(login_page_text)#爬取当用户个人主页对应的页面数据
# datali_url = 'http://www.renren.com/422******91/profile'
# datail_page_text=requests.get(url=datali_url,headers=headers).text
# with open('xinxi.html','w',encoding='utf_8') as  fp:
#     fp.write(datail_page_text)#爬取当前用户的个人主页对应的页面数据
detail_url = "http://www.renren.com/422******91/profile"
#手动cookie处理，通过抓包工具获取cookie并封装
# hearders = {
#     'Cookie':'anonymid=k5m1u0gbnt93eb; _r01_=1; taihe_bi_sdk_uid=6a4882919050d9979b2b4c7d57fd11cc; __utma=151146938.876193738.1579500553.1579500553.1579500553.1; __utmz=151146938.1579500553.1.1.utmcsr=renren.com|utmccn=(referral)|utmcmd=referral|utmcct=/; _ga=GA1.2.876193738.1579500553; jebe_key=c83edb03-b9ac-43ed-ad6f-78af6f194b74%7C4ac7eb0a940e0112ea703de4ac164901%7C1579501523827%7C1%7C1579501523923; _de=31E04E93103A701B1DE9EF59AA5E391C6DEBB8C2103DE356; depovince=GW; jebecookies=16c8bca8-6828-4f5d-ab23-c9292647c1c5|||||; JSESSIONID=abcyFnFLuBhsZeuLAv9cx; ick_login=a4b4027e-e9fc-4fc8-85a4-3c1659a8a595; taihe_bi_sdk_session=6cbdd9d7c5bcbb7ed0187441d212c9ad; p=ada0140d5dd256c429fe2cb86b0a97e41; first_login_flag=1; ln_uact=144******06@qq.com; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; t=1b56adb9ba187ed088a76ffdbf3728941; societyguester=1b56adb9ba187ed088a76ffdbf3728941; id=422267891; xnsid=42a947c9; ver=7.0; loginfrom=null; jebe_key=c83edb03-b9ac-43ed-ad6f-78af6f194b74%7C4ac7eb0a940e0112ea703de4ac164901%7C1583724542889%7C1%7C1583724544491; wp_fold=0'
# }
#使用携带coolie的serssion进行get请求的发送
detai_page_test = session.get(url=detail_url,headers=headers).text
with open('bobo.html','w',encoding='utf-8') as fp:fp.write(detai_page_test)

python爬虫学习（十八）人人网cookie登录相关推荐

python爬虫学习之淘宝模拟登录
使用教程下载chrome浏览器查看chrome浏览器的版本号,对应版本号的chromedriver驱动 pip安装下列包 pip install selenium 登录微博,并通过微博绑定淘宝账 ...
Python爬虫学习（八）识别12306的验证码信息
安装.导入第三方模块安装: pip3 install Pillowimage 模块: Image模块是在Python PIL图像处理中常见的模块,主要是用于对这个图像的基本处理,它配合open.sa ...
Python爬虫学习5：使用cookie访问网页（以豆瓣为例）
1. 先在浏览器上登录豆瓣,登录成功后打开开发者工具,可以查看到Cookie. 第一次登录后,短时间内再次打开此页面时,会发现系统已经保存了cookie, 不用再重新登录.时间长了cookie会失效, ...
【Python爬虫学习】八、股票数据定向爬虫（2020年1月31日成功爬取中财网，百度股市通web版404了）
功能描述: 目标:获取股票的名称和交易信息输出:保存到文件中程序结构设计: 步骤1:从中财网http://quote.cfi.cn/stockList.aspx获取股票列表步骤2:根据股票列表获 ...
[Python爬虫] 之十八：Selenium +phantomjs 利用 pyquery抓取电视之家网数据
一.介绍本例子用Selenium +phantomjs爬取电视之家(http://www.tvhome.com/news/)的资讯信息,输入给定关键字抓取资讯信息. 给定关键字:数字:融合:电视抓 ...
Python 爬虫学习系列教程
Python爬虫 --- 中高级爬虫学习路线 :https://www.cnblogs.com/Eeyhan/p/14148832.html 看不清图时,可以把图片保存到本地在打开查看... Pyth ...
Python爬虫学习手册
like:128-Python 爬取落网音乐 like:127-[图文详解]python爬虫实战--5分钟做个图片自动下载器 like:97-用Python写一个简单的微博爬虫 like:87-爬虫抓 ...
Python爬虫学习系列教程
大家好哈,我呢最近在学习Python爬虫,感觉非常有意思,真的让生活可以方便很多.学习过程中我把一些学习的笔记总结下来,还记录了一些自己实际写的一些小爬虫,在这里跟大家一同分享,希望对Python爬虫 ...
Python爬虫学习系列教程-----------爬虫系列你值的收藏
静觅 » Python爬虫学习系列教程:http://cuiqingcai.com/1052.html 大家好哈,我呢最近在学习Python爬虫,感觉非常有意思,真的让生活可以方便很多.学习过程中我把 ...
python爬虫学习之路
python爬虫学习之路第一章:爬虫基础 1.爬虫前奏爬虫的实际例子: 1.搜索引擎(百度.谷歌.360搜索等.) 2.伯乐在线 3.惠惠购物助手 4.数据分析与研究(数据冰山知乎专栏). 5.抢 ...

python爬虫学习（十八）人人网cookie登录

python爬虫学习（十八）人人网cookie登录相关推荐

最新文章

热门文章