selenium 淘宝爬虫（需要扫码登录一下）

from selenium import webdriver
import time
import redef gethtml(url):'''定义函数获取html源代码''''''由于淘宝是动态网页无法用requests库获取所以使用selenium模拟器'''driver=webdriver.Chrome()#构造一个Chrom浏览器对象用来控制浏览器driver.get(url)#根据具体的url访问网页# 第一种滑块验证，人工操作i3 = driver.find_element_by_css_selector('#login > div.corner-icon-view.view-type-qrcode > i')i3.click()time.sleep(15)  # 等待15秒，用来扫码(人工操作)'''将进度条拉到页面最后'''try:js = 'var q=document.documentElement.scrollTop=10000'driver.execute_script(js)  # execute_script()函数运行js下滑脚本except:print('出现错误')html = driver.page_source #获取网页源代码driver.close()#关闭浏览器print('关闭浏览器')#print(html)return htmldef xieru(html):with open("C:\\Users\86666\Desktop\python文件处理\钓鱼.html",'at',encoding='utf-8')as wenjian:wenjian.write(html)print('写入成功')with open('C:\\Users\86666\Desktop\python文件处理\钓鱼.html','rt',encoding='utf-8')as j:html2=j.read()return html2def tiqu(list,html2):'''从获得到的网页中提取需要的信息'''try:'''提取商品名称'''zhengze=re.compile(r'"raw_title":".{0,40}",')#用re.compile()函数将正则表达式的字符串转化(编译)为正则表达式对象用于多次操作'''获得付款人数'''zhengze1=re.compile(r'"view_sales":".{0,30}",')l1=zhengze.findall(html2)l2=zhengze1.findall(html2)except:print('出现错误')if len(l1)==len(l2):for i in range(len(l1)):u=eval(l1[i].split(':')[1])u2=eval(l2[i].split(':')[1])list.append([u,u2])print('打印列表')return listdef print1(list2):'''按规则打印'''print('{0:<35}\t\t\t\t\t{1:<12}'.format('付款人数','产品名称'),chr(12288))for i in range(len(list2)):print('{0:<35}\t\t\t\t\t{1:<12}'.format(str(list2[i][0][0]),str(list2[i][1][0])),chr(12288))def main():list=[]url=('https://s.taobao.com/search?q=%E9%B1%BC%E7%AB%BF&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.2017.201856-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306')html=gethtml(url)xieru(html)html2=xieru(html)list2=tiqu(list,html2)print(list2)print1(list2)main()

selenium 淘宝爬虫（需要扫码登录一下）相关推荐

淘宝app端扫码登录-解决异地登录-获取Cookie-延期Cookie
目前淘宝安全风控很高,异地扫码登录容易出现疑似账号被盗用.需要自助开通修改密码. 1. 扫码登录 - 授权 - 获取Cookie 这一步不难,需要前端动态生成一个二维码图片.请求淘宝生成二维码地址 ...
selenium淘宝爬虫
selenium淘宝爬虫使用selenium做淘宝商品爬虫 1.准备工作 2.页面分析 3.代码实现 3-1.模拟登陆 3-2.商品列表页 3-3.获取商品信息 3-4.数据库设计 3-5.爬虫执行 ...
scrapy淘宝爬虫（通过模拟登录获取cookie）获取价格信息评论
项目要求:爬取淘宝某领域下的商品名称,价格,评论. (我贼怂,如果阿里巴巴的朋友发现,请联系我,我立刻删帖,谢谢.) 一.构思流程模拟登陆获取cookie scrapy爬取数据二.分步分析 (一 ...
python爬虫淘宝实例-python 淘宝爬虫示例源码（抓取天猫数据）
[实例简介]爬取淘宝天猫网站数据 [实例截图] [核心代码] # -*- coding: utf-8 -*- #!/usr/bin/env python import datetime import ...
python爬虫代码实例源码_python 淘宝爬虫示例源码（抓取天猫数据）
爬取淘宝天猫网站数据# -*- coding: utf-8 -*- #!/usr/bin/env Python import dateTime import URLparse import sock ...
淘宝小程序扫码进入参数
目录进入方式官方的当面分享入参返回值示例代码取参生成二维码的分享官方方法自用方法入参夹带自用方法取参进入方式目前我所用过的扫码进入方式有两种,一种是右上角官方分享中的当面分享, ...
python爬虫笔记（六）网络爬虫之实战（1）——淘宝商品比价定向爬虫（解决淘宝爬虫限制：使用cookies）...
1. 淘宝商品信息定向爬虫链接: https://www.taobao.com/ 2. 实例编写 2.1 整体框架 # -*- coding: utf-8 -*-import requests i ...
一文详析微信和淘宝扫码登录背后的实现原理！
关注上方"深度学习技术前沿",选择"星标公众号", 资源干货,第一时间送达! 作者:imtech my.oschina.net/u/4231722/blog/3 ...
面试官：说说微信和淘宝扫码登录背后的实现原理？
点击上方蓝色"程序猿DD",选择"设为星标" 回复"资源"获取独家整理的学习资料! 来源 | my.oschina.net/u/423172 ...

selenium 淘宝爬虫（需要扫码登录一下）

selenium 淘宝爬虫（需要扫码登录一下）相关推荐

最新文章

热门文章