操作流程:

1. 通过selenium调起微信web页面;

2. 扫描二维码,登录;

3. 遍历列表,并抓取群聊人数;

4. 将结果写入excel表格。

注:用到的selenium库,excel操作库,chromedriver请网上查找下载并添加到运行库里面或者放到指定目录(详见代码)

代码:

package com.yang.selenium;

import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.Keys;
import org.openqa.selenium.NoSuchElementException;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.interactions.Actions;
import org.openqa.selenium.support.ui.ExpectedCondition;
import org.openqa.selenium.support.ui.WebDriverWait;

import jxl.Workbook;
import jxl.read.biff.BiffException;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;

public class Chat {

public static void main(String[] args) throws WriteException, IOException,
InterruptedException, BiffException {
// TODO Auto-generated method stub

WebDriver wd;

String resultDir = "c:\\test\\";
String driverDir = "D:\\bak\\" + "chromedriver.exe";
System.setProperty("webdriver.chrome.driver", driverDir);
wd = new ChromeDriver();
wd.get("https://web2.wechat.com/");
// wd.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
By chatSelector = By
.cssSelector("div[class='chat_item slide-left ng-scope']");

while (!checkWebElementExist(wd, chatSelector)) {
System.out.println("请扫码二维码,并等待加载完成后运行。。。");
Thread.sleep(1000);
}
By tabContactSelector = By
.cssSelector("div[class='tab_item no_extra']");
By sendMessageSelector = By.cssSelector("a[class='button']");
WebElement tabContact = wd.findElement(tabContactSelector);
tabContact.click(); // 点击通讯录tab(群聊,好友)
System.out.println("***********************************************");
System.out.println("**********  请不要改动代码,否则可能影响运行结果       **********");
System.out.println("**********  如运行结果有偏差,请联系技术人员排查       **********");
System.out.println("********  数据加载中,15秒(加载时间)之后将开始运行      ********");
System.out.println("***********************************************");
System.out.println();
System.out.println();
System.out.println();
Thread.sleep(15000);
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String startTime = df.format(new Date());
int counter = 20; // 每 counter个去判断下
System.out.println("开始运行: " + startTime);
System.out.println();

wd.findElement(By.cssSelector("h4[class='nickname ng-binding']"))
.click(); // 点击第一个
boolean up = false;
int bottom = 0, i = 0;
Thread.sleep(300);
String last_name = "-", latest_name = "-";
for (int p = 0; p < 2; p++) { // 上下移动,走遍通讯录,让通讯录加载完整
while (bottom < 9) {
last_name = latest_name;
move(wd, up);
latest_name = getValue(wd);
if (latest_name.trim().equals(last_name)) {
bottom++;
} else {
bottom = 0;
}
if (bottom == 9) {
up = true;
bottom = 0;
break;
}
}
}
System.out.println("通讯录 加载 完成时间: " + df.format(new Date()));

int j = 0;
String latest_value;
while (i < counter) { // 通讯录每个去点击一下,并添加到聊天列表里面去
i++;
doClick(wd, sendMessageSelector, tabContactSelector);
if (i == counter) {
Thread.sleep(1000);
latest_value = getValue(wd).trim();
doClick(wd, sendMessageSelector, tabContactSelector);
if (latest_value.equals(getValue(wd).trim())) {
j++;
} else {
i = 0;
j = 0;
}
if (j == 5) {
break;
}
}
}
System.out.println("通讯录 点击 完成时间: " + df.format(new Date()));

wd.findElement(By.cssSelector("i[class='web_wechat_tab_chat']"))
.click();
begainCount(wd, resultDir, counter);

/*
* for(String key : map.keySet()){ System.out.println(key + " : " +
* map.get(key)); }
*/

System.out.println("导出数据成功!!!");

String endTime = df.format(new Date());
System.out.println("***********************************************");
System.out.println("开始运行时间: " + startTime);
System.out.println("结束运行时间: " + endTime);
Thread.sleep(60000);
wd.close();
}

public static void begainCount(WebDriver wd, String resultDir, int counter)
throws InterruptedException, RowsExceededException, WriteException,
IOException, BiffException {
int k = 0, j = 0;
String latest_name = "-";
String title_name = "-", title_count = "0";
By title_countSelector = By
.cssSelector("span[class='title_count ng-binding ng-scope']");

HashMap<String, String> map = new HashMap<String, String>();
boolean up = false;
for (int p = 0; p < 2; p++) { // 聊天列表页,上下各走一遍,记录数据
while (k < counter) {
k++;
latest_name = title_name.trim();
JavascriptExecutor js = (JavascriptExecutor) wd;
if (checkWebElementExist(wd, title_countSelector)) {
title_count = (String) js
.executeScript("return document.getElementsByClassName(\"title_count ng-binding ng-scope\")[0].innerText;");
title_count = title_count.substring(
title_count.indexOf("(") + 1,
title_count.lastIndexOf(")"));
title_name = (String) js
.executeScript("return document.getElementsByClassName(\"title_name ng-binding\")[0].innerText");
System.out.println("群聊 -> " + title_name + " : "
+ title_count);
map.put(title_name, title_count);
}

if (k == counter) {
Thread.sleep(100);
title_name = (String) js
.executeScript("return document.getElementsByClassName(\"title_name ng-binding\")[0].innerText");
if (latest_name.equals(title_name.trim())) {
j++;
k = counter - 1;
} else {
j = 0;
k = 0;
}
if (j == 5) {
k = 0;
j = 0;
up = true;
break;
}
}
move(wd, up);
Thread.sleep(500);
}
}
writeExcel(resultDir, map);
}

public static String getValue(WebDriver wd) {
JavascriptExecutor js = (JavascriptExecutor) wd;
String value_name = (String) (String) js
.executeScript("return document.getElementsByClassName(\"value ng-binding\")[0].innerText");
return value_name;
}

public static String getName(WebDriver wd) {
// 获取群名
String title_name;
JavascriptExecutor js = (JavascriptExecutor) wd;
title_name = (String) js
.executeScript("return document.getElementsByClassName(\"title_name ng-binding\")[0].innerText");
return title_name;
}

public static String getCount(WebDriver wd) {
// 获取群人数
String title_count;
JavascriptExecutor js = (JavascriptExecutor) wd;
title_count = (String) js
.executeScript("return document.getElementsByClassName(\"title_count ng-binding ng-scope\")[0].innerText;");
title_count = title_count.substring(title_count.indexOf("(") + 1,
title_count.lastIndexOf(")"));
return title_count;
}

public static boolean checkWebElementExist(WebDriver wd, By selector) {
// 判断元素是否存在
try {
wd.findElement(selector);
return true;
} catch (NoSuchElementException e) {
return false;
}
}

public static void move(WebDriver wd, boolean up)
throws InterruptedException {
// 上下移动
if (up == true) {
(new Actions(wd)).sendKeys(Keys.ARROW_UP).build().perform();
} else {
(new Actions(wd)).sendKeys(Keys.ARROW_DOWN).build().perform();
}
Thread.sleep(30);
}

public static void writeExcel(String resultDir, HashMap<String, String> map)
throws InterruptedException, IOException, RowsExceededException,
WriteException, BiffException {
// 写excel表格数据
String fileName = mkDateDir(resultDir) + "\\"
+ new SimpleDateFormat("yyyy_MM_dd_HH_mm").format(new Date())
+ ".xls";
File file = new File(fileName);
WritableWorkbook book;
if (file.exists()) {
file.delete();
}
book = Workbook.createWorkbook(file);
WritableSheet sheet = book.createSheet("群聊人数统计", 0);
int m = 1, sum = 0;
Label xiaoqu_title = new Label(0, 0, "小区名");
Label renshu_title = new Label(1, 0, "人数");
Label xiaoqu, renshu;
sheet.addCell(xiaoqu_title);
sheet.addCell(renshu_title);

System.out.println("总结: ");
for (String key : map.keySet()) {
xiaoqu = new Label(0, m, key);
renshu = new Label(1, m, map.get(key));
sheet.addCell(renshu);
sheet.addCell(xiaoqu);
m++;
sum += Integer.parseInt(map.get(key));
System.out.println(key + " : " + map.get(key));
}
sheet.addCell(new Label(0, m, "总数"));
sheet.addCell(new Label(1, m, Integer.toString(sum)));

book.write();
book.close();
}

public static String mkDateDir(String reportDir) throws BiffException,
IOException {
// 创建报告存放目录,以日期为标记

String timeDir = new SimpleDateFormat("yyyy_MM_dd").format(new Date());
reportDir = reportDir + "\\\\" + timeDir;

File file = new File(reportDir);
if (!file.getParentFile().exists()) {
file.getParentFile().mkdir();
}
if (!file.exists() && !file.isDirectory()) {
file.mkdir();
}
return file.getAbsolutePath();
}

public static void doClick(WebDriver wd, final By sendMessageSelector,
By tabContactSelector) throws InterruptedException {
// 1. 点击发消息,跳到聊天列表; 2. 点击通讯录; 3。向下按
WebDriverWait wait = (new WebDriverWait(wd, 10));
wait.until(new ExpectedCondition<WebElement>() {
public WebElement apply(WebDriver d) {
return d.findElement(sendMessageSelector);
}
}).click();
// wd.findElement(sendMessageSelector).click();
// Thread.sleep(150);
wd.findElement(tabContactSelector).click();
Thread.sleep(50);
move(wd, false);
}
}

抓取微信群聊人数,并保存到excel表格相关推荐

  1. Crawler:基于BeautifulSoup库+requests库+伪装浏览器的方式实现爬取14年所有的福彩网页的福彩3D相关信息,并将其保存到Excel表格中

    Crawler:Python爬取14年所有的福彩信息,利用requests库和BeautifulSoup模块来抓取中彩网页福彩3D相关的信息,并将其保存到Excel表格中 目录 输出结果 核心代码 输 ...

  2. Python爬取URP教务系统课程表并保存到excel

    Python爬取URP教务系统课程表并保存到excel 爬取URP教务系统课程表最终结果如图所示: 接下来开始操作: 首先打开教务系统->按F12->点击Network->刷新一下界 ...

  3. Python爬取中国大学排名,并且保存到excel中

    前言 以下文章来源于数据分析和Python ,作者冈坂日川 今天发的是python爬虫爬取中国大学排名,并且保存到excel中,当然这个代码很简单,我用了半小时就写完了,我的整体框架非常清晰,可以直接 ...

  4. python爬取京东畅销榜(计算机类)图书信息(书名,作者,价格),并保存到excel表格

    爬虫新手小白的第一次"半独立"爬虫,为什么是"半独立"呢?因为基本的代码块是从其他博客借鉴过来的,在此基础上加入了自己的思考和实现. (后面的价格获取感觉自己走 ...

  5. 对豆瓣进行爬虫来获取相关数据(分别保存到Excel表格和sqlite中)

    1.存入Excel表格的代码: from bs4 import BeautifulSoup #网页解析,获取数据 import re #正则表达式,进行文字匹配 import urllib.reque ...

  6. Python爬虫鲁迅先生《经典语录》保存到Excel表格(附源码)

    Python爬虫鲁迅先生<经典语录>保存到Excel表格(附源码) 前言 今天用Python 爬取鲁迅先生<经典语录>,直接开整~ 代码运行效果展示 开发工具 Python版本 ...

  7. 爬取网易云在线课程并保存到Excel

    一.准备工作 1.打开网易云课堂,搜索Python相关课程,选择全部查看 2.打开谷歌浏览器,使用检查功能(F12)分析页面,在NetWork-XHR中发现所有课程信息都保存在"studyc ...

  8. 将爬取的数据保存到Excel表格

    第一步.导入模块 import xlwt # 导入写入excel需要的包 第二步.定义函数,将爬取好的数据保存到excel文件中,下面以保存python的关键词为例,介绍详细流程. def write ...

  9. 怎么把matlab中的图导出,matlab的数据能保存到excel表格-如何将matlab 中输出的图形保存到Excel中去,详细点...

    怎样将MATLAB中的数据输出到excel中 数据保存到excel文件 xlswrite(xlsfile, data, sheet, range); % sheet 和 range可以不指定 如: x ...

最新文章

  1. 同样都是调参,为什么人家的神经网络比我牛逼100倍?
  2. 前端开发工程师养成记
  3. android删除键监听,「React Native」Android返回键监听
  4. java+逆向工程怎么运行_MyBatis逆向工程的创建和使用
  5. Stanford CoreNLP遇到的问题
  6. 95-35-010-Topic-Topic 扩容 源码解析
  7. Chapter 8 Document Management(第8章 文档管理)—1 【中英文对照】
  8. MongoDB Hot Backup 测试及痛点
  9. ubuntu16.04系统上安装CAJViewer方法步骤教程详解
  10. Sublime Text实现代码自动生成,快速编写HTML/CSS代码
  11. 计算机无法启动printspooler,Win7系统下print spooler服务无法启动的解决方法
  12. 刘宇凡:我眼中的SEO思维
  13. csv to dbf java_CSV to DBF
  14. KALI虚拟机挂代理教程
  15. TCP连接大量CLOSE_WAIT状态问题排查
  16. 帮我写一段描写时间过得很快,但是自己又很不想时间过得那么快的小作文
  17. FAT32文件系统学习
  18. Symbian和C++ SDK入门之开发工具(转)
  19. 分享一个电气仿真软件
  20. 淘宝爆款返场什么意思?淘宝爆款返场怎么报名?

热门文章

  1. JAVA从入门到进阶(九)——集合类框架基础一
  2. 离开Facebook后,Caffe创始人贾扬清加入阿里巴巴
  3. 嵌入式培养是什么意思 看完秒懂
  4. 计算机的声音怎么设置在哪设置方法,Win7电脑声音设置的方法
  5. WebRTC[23]-Licode与Mediasoup的简单对比
  6. python -- shutil
  7. LINUX提权入门手册
  8. Ubuntu 11.10与“核高基画饼“
  9. 核高基专家许洪波:HTML5将赢得移动设备未来
  10. 2D转换综合写法(13)