1、导入依赖

        <dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.14.3</version></dependency><dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId><version>4.5.5</version></dependency>

2、代码

package com.lxq.excel;import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.lxq.excel.util.HttpClientUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;/*** @Author lixiaoqiang* @Date 2023/2/22 16:18*/
public class GetCityCode {private static String[]  classSrcs=new String[]{"provincetr","citytr","countytr","towntr","villagetr"};private static int i = 0;static String fileName;public static void main(String[] args) {try {testProvince();} catch (Exception e) {// TODO Auto-generated catch blocke.printStackTrace();}}static void testCatch(){try {//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);// System.out.println(doc.toString());int a = 10/0;}catch (Exception e){System.out.println("catch");return;}System.out.println("last");}// 省public static void testProvince() throws Exception {String url = "http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022/index.html";Document doc=Jsoup.connect(url).get();String s = HttpClientUtils.doGet("http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2022/index.html");System.out.println(s);//Document doc=Jsoup.parse(s);//Document doc = Jsoup.parse(new URL(url).openStream(), "gb2312", url);//System.out.println(doc.toString());Elements containers = doc.getElementsByClass("provincetr");Document containerDoc = Jsoup.parse(containers.toString());int size = containerDoc.select("a").size();for (int i = 0; i < size; i++) {String pH = containerDoc.select("a").get(i).attr("href");String pName = containerDoc.select("a").get(i).text();System.out.println(containerDoc.select("a").get(i).attr("href"));System.out.println(containerDoc.select("a").get(i).text());fileName = pName+".csv";if (i > 12) {int t = url.lastIndexOf("/");String cityUrl = url.substring(0, t + 1);testCity(cityUrl + pH);//resCrabData(cityUrl+pH,classSrcs[1]);}int t = url.lastIndexOf("/");String cityUrl = url.substring(0, t + 1);//testCity(cityUrl + pH,);}// System.out.println(containerDoc.toString());}public static String selectNextClassSrcByCurSrc(String curSrc){System.out.println("aaaaaaaaa=="+curSrc);for(int i =0;i<classSrcs.length;i++){String s = classSrcs[i];if(s.equals(curSrc) && i!= (classSrcs.length-1)){return classSrcs[i+1];}}return null;}// 市public static void testCity(String url) throws Exception {//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);// System.out.println(doc.toString());Document doc=Jsoup.connect(url).get();Elements containers = doc.getElementsByClass("citytr");int elementsSize = containers.size();// System.out.println(elementsSize);// System.out.println(containers.toString());for (int i = 0; i < elementsSize; i++) {Element e = containers.get(i);// System.out.println(e.toString()+"==");Document containerDoc = Jsoup.parse(e.toString());int size = containerDoc.select("a").size();// System.out.println(size);String cH = containerDoc.select("a").get(0).attr("href");String cCode = containerDoc.select("a").get(0).text();String cName = containerDoc.select("a").get(1).text();String countryUrl = url.substring(0, url.lastIndexOf("/") + 1);JSONObject json = new JSONObject();json.put("cityCode",cCode);json.put("cityName",cName);testCountry(countryUrl + cH,json.toJSONString());/** for(int j = 0;j<size;j++){ String* cH=containerDoc.select("a").get(j).attr("href"); String* cName=containerDoc.select("a").get(j).text();* System.out.println(cH+"===="); System.out.println(cName+"==="); }*/}// for(int i =0 ;i<size;i++){// String cH=containerDoc.select("a").get(i).attr("href");// String cName=containerDoc.select("a").get(i).text();// System.out.println(cH);// System.out.println(cName);// }}// 县public static void testCountry(String url,String j) throws Exception {//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);// System.out.println(doc.toString());Document doc=Jsoup.connect(url).get();Elements containers = doc.getElementsByClass("countytr");int elementsSize = containers.size();//System.out.println(elementsSize);// System.out.println(containers.toString());for (int i = 0; i < elementsSize; i++) {Element e = containers.get(i);// System.out.println(e.toString()+"==");Document containerDoc = Jsoup.parse(e.toString());int size = containerDoc.select("a").size();// System.out.println(size);if (size == 0) {// int ss=e.select("td").size();// System.out.println(ss);System.out.println("code===" + e.select("td").get(0).text());System.out.println("name===" + e.select("td").get(1).text());JSONObject json = JSON.parseObject(j);json.put("countryCode",e.select("td").get(0).text());json.put("countryName",e.select("td").get(1).text());write(json);// int ss=containerDoc.select("td").size();// System.out.println(ss);} else {String cH = containerDoc.select("a").get(0).attr("href");String cCode = containerDoc.select("a").get(0).text();String cName = containerDoc.select("a").get(1).text();JSONObject json = JSON.parseObject(j);json.put("countryCode",cCode);json.put("countryName",cName);String countryUrl = url.substring(0, url.lastIndexOf("/") + 1);testTown(countryUrl + cH,json.toJSONString());}}}// 乡、镇public static void testTown(String url,String j) throws Exception {List<JSONObject> list = new ArrayList<>();Document doc;try {//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);// System.out.println(doc.toString());doc = Jsoup.connect(url).get();}catch (Exception e){testTown(url,j);return;}Elements containers = doc.getElementsByClass("towntr");int elementsSize = containers.size();//System.out.println(elementsSize);// System.out.println(containers.toString());for (int i = 0; i < elementsSize; i++) {Element e = containers.get(i);// System.out.println(e.toString()+"==");Document containerDoc = Jsoup.parse(e.toString());int size = containerDoc.select("a").size();// System.out.println(size);if (size == 0) {// int ss=e.select("td").size();// System.out.println(ss);System.out.println("code===" + e.select("td").get(0).text());System.out.println("name===" + e.select("td").get(1).text());JSONObject json = JSON.parseObject(j);json.put("townCode",e.select("td").get(0).text());json.put("townName",e.select("td").get(1).text());write(json);// int ss=containerDoc.select("td").size();// System.out.println(ss);} else {String cH = containerDoc.select("a").get(0).attr("href");String cCode = containerDoc.select("a").get(0).text();String cName = containerDoc.select("a").get(1).text();JSONObject json = JSON.parseObject(j);json.put("townCode",cCode);json.put("townName",cName);String villageTrUrl = url.substring(0, url.lastIndexOf("/") + 1);testVillageTr(villageTrUrl + cH,json.toJSONString());}}}// 乡、镇public static void testTown2(String url,String j) throws Exception {//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);// System.out.println(doc.toString());Document doc=Jsoup.connect(url).get();Elements containers = doc.getElementsByClass("towntr");int elementsSize = containers.size();//System.out.println(elementsSize);// System.out.println(containers.toString());for (int i = 0; i < elementsSize; i++) {Element e = containers.get(i);// System.out.println(e.toString()+"==");Document containerDoc = Jsoup.parse(e.toString());int size = containerDoc.select("a").size();// System.out.println(size);if (size == 0) {// int ss=e.select("td").size();// System.out.println(ss);System.out.println("code===" + e.select("td").get(0).text());System.out.println("name===" + e.select("td").get(1).text());JSONObject json = JSON.parseObject(j);json.put("townCode",e.select("td").get(0).text());json.put("townName",e.select("td").get(1).text());write(json);// int ss=containerDoc.select("td").size();// System.out.println(ss);} else {String cH = containerDoc.select("a").get(0).attr("href");String cCode = containerDoc.select("a").get(0).text();String cName = containerDoc.select("a").get(1).text();JSONObject json = JSON.parseObject(j);json.put("townCode",cCode);json.put("townName",cName);String villageTrUrl = url.substring(0, url.lastIndexOf("/") + 1);testVillageTr(villageTrUrl + cH,json.toJSONString());}}}public static void testVillageTr(String url,String j)  {//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);// System.out.println(doc.toString());List<JSONObject> list = new ArrayList<>();try {Document doc=Jsoup.connect(url).get();Elements containers = doc.getElementsByClass("villagetr");int elementsSize = containers.size();System.err.println(elementsSize);// System.out.println(containers.toString());for (int i = 0; i < elementsSize; i++) {Element e = containers.get(i);// System.out.println(e.toString()+"==");Document containerDoc = Jsoup.parse(e.toString());int size = containerDoc.select("a").size();// System.out.println(size);if (size == 0) {int ss = e.select("td").size();System.out.println(ss);String cCode = e.select("td").get(0).text();String cName = e.select("td").get(2).text();JSONObject json = JSON.parseObject(j);json.put("villageTrCode",cCode);json.put("villageTrName",cName);list.add(json);} else {String cCode = containerDoc.select("a").get(0).text();String cName = containerDoc.select("a").get(2).text();JSONObject json = JSON.parseObject(j);json.put("villageTrCode",cCode);json.put("villageTrName",cName);list.add(json);}}}catch (Exception e){list = new ArrayList<>();System.err.println("url"+url);testVillageTr(url,j);}list.forEach(a->{write(a);});}// 村public static void testVillageTr2(String url,String j) throws Exception {//Document doc = Jsoup.parse(new URL(url).openStream(), "GBK", url);// System.out.println(doc.toString());Document doc=Jsoup.connect(url).get();Elements containers = doc.getElementsByClass("villagetr");int elementsSize = containers.size();System.out.println(elementsSize);// System.out.println(containers.toString());for (int i = 0; i < elementsSize; i++) {Element e = containers.get(i);// System.out.println(e.toString()+"==");Document containerDoc = Jsoup.parse(e.toString());int size = containerDoc.select("a").size();// System.out.println(size);if (size == 0) {int ss = e.select("td").size();System.out.println(ss);String cCode = e.select("td").get(0).text();String cName = e.select("td").get(2).text();JSONObject json = JSON.parseObject(j);json.put("villageTrCode",cCode);json.put("villageTrName",cName);write(json);} else {String cCode = containerDoc.select("a").get(0).text();String cName = containerDoc.select("a").get(2).text();JSONObject json = JSON.parseObject(j);json.put("villageTrCode",cCode);json.put("villageTrName",cName);write(json);}}}private static void write(JSONObject json){System.out.println(json.toJSONString());try {boolean newFile = false;String filePath = "E:\\md\\tmp\\"+fileName;// 输出的文件流File file = new File(filePath);if (!file.exists()){file.createNewFile();newFile = true;}FileOutputStream os = new FileOutputStream(file, true);if(newFile){os.write("cityCode,cityName,countryCode,countryName,townCode,townName,villageTrCode,villageTrName".getBytes());String newLine = System.getProperty("line.separator");os.write(newLine.getBytes());}String msg = json.getString("cityCode")+","+json.getString("cityName")+","+json.getString("countryCode")+","+json.getString("countryName")+","+json.getString("townCode")+","+json.getString("townName")+","+json.getString("villageTrCode")+","+json.getString("villageTrName");// 开始读取os.write(msg.getBytes());String newLine = System.getProperty("line.separator");os.write(newLine.getBytes());// 完毕,关闭所有链接os.close();}catch (IOException e){}}}

java爬取行政区划代码相关推荐

  1. 【Java】Java爬取国家统计局五级行政区划编码(省、市(州)、县(区)、乡(镇)、村)

    今天使用了idea+java爬取国家统计局12位行政区划编码,包括省.市(州).县(区).乡(镇).以及村委会/委员会等的行政编码和名称,将区划编码以及名称保存在数据库中. 本文内容包括数据库数据效果 ...

  2. Java爬取解析去哪儿景点信息

    前言:这两周在做 Web 课的大作业,顺便琢磨了一下如何使用 Java 从网上获取一些数据,现在写这篇博客记录一下. PS:这里仅限交流学习用,如利用代码进行恶意攻击他网站,和作者无关!!! Java ...

  3. 用java爬取学校数据_Java爬取校内论坛新帖

    Java爬取校内论坛新帖 为了保持消息灵通,博主没事会上上校内论坛看看新帖,作为爬虫爱好者,博主萌生了写个爬虫自动下载的想法. 嗯,这次就选Java. 第三方库准备 Jsoup Jsoup是一款比较好 ...

  4. Jsoup:用Java也可以爬虫,怎么使用Java进行爬虫,用Java爬取网页数据,使用Jsoup爬取数据,爬虫举例:京东搜索

    Jsoup:用Java也可以爬虫,怎么使用Java进行爬虫,用Java爬取网页数据,使用Jsoup爬取数据,爬虫举例:京东搜索 一.资源 为什么接下来的代码中要使用el.getElementsByTa ...

  5. java爬取网页内容 简单例子(2)——附jsoup的select用法详解

    [背景] 在上一篇博文 java爬取网页内容 简单例子(1)--使用正则表达式 里面,介绍了如何使用正则表达式去解析网页的内容,虽然该正则表达式比较通用,但繁琐,代码量多,现实中想要想出一条简单的正则 ...

  6. Java爬取校内论坛新帖

    Java爬取校内论坛新帖 为了保持消息灵通,博主没事会上上校内论坛看看新帖,作为爬虫爱好者,博主萌生了写个爬虫自动下载的想法. 嗯,这次就选Java. 第三方库准备 Jsoup Jsoup是一款比较好 ...

  7. Java爬取并下载酷狗音乐

    本文方法及代码仅供学习,仅供学习. 案例: 下载酷狗TOP500歌曲,代码用到的代码库包含:Jsoup.HttpClient.fastJson等. 正文: 1.分析是否可以获取到TOP500歌单 打开 ...

  8. java爬取单张图片

    我们经常需要在网页上获取一些图片,有的图片我们是可以直接下载使用,有的图片需要我们登陆账号甚至付费下载,所以在此我写了一个使用Java爬取任意网页单张图片的爬虫. 代码解析 1.图片的网络位置 2.进 ...

  9. java爬取当当网所有分类的图书信息(ISBN,作者,出版社,价格,所属分类等)

    java爬取当当网所有分类的图书信息(ISBN,作者,出版社,价格,所属分类等) 顺手写的,没有建立新项目,放我自己的项目的一个文件夹里了,有兴趣的朋友可以拉下来试试 https://gitee.co ...

最新文章

  1. java面向对象——包+继承+多态(一)
  2. 用python实现图书管理系统
  3. 缓存系统MemCached的Java客户端优化历程
  4. sklearn集合算法预测泰坦尼克号幸存者
  5. 信号量使用例子_用信号量锁定:一个例子
  6. LeetCode刷题(34)-Rotate List
  7. mysql将俩个SQL查询出来的不一样的结果横向拼接成一行数据
  8. 用c语言编声光报警子程序,C语言编程的智能火灾报警监测系统
  9. 数据结构 7并查集(DISJOINT SET)
  10. C语言基础教程之强制类型转换
  11. 【转】C语言条件编译及编译预处理阶段
  12. Android ThreadPool
  13. NYOJ水题--最短街区问题
  14. HDOJ 2870 Largest Submatrix
  15. 上下定高 中间自适应_B站微服务框架Kratos详细教程(3)中间件
  16. 串口服务器485转以太网
  17. tomcat 热部署的实现
  18. 安徽建立“库长制” 千余名库长保粮食安全
  19. 刻录linux安装光盘,如何将红旗Linux5的两个ISO安装光盘镜像刻录到一张DVD光盘上,做成安装光盘[原创]...
  20. 2012 九月十月百度人搜,阿里巴巴,腾讯华为笔试面试八十题(第331-410题)

热门文章

  1. 3Dmax动画导出 unity导入 自用
  2. [nlp] 负采样 nce_loss
  3. 属性导出FusionCharts图表控件中文版使用手册
  4. 象花儿一样怒放的生命
  5. 汪峰-怒放的生命 MP3试听,歌词下载
  6. CeGui+Ogre
  7. centos配置ipv6
  8. excel教程自学网_台湾超级推荐朋友圈广告投放美工教程自学网
  9. 双高斯拟合,差距啊,继续努力吧!
  10. iOS 仿支付宝首页样式