html获取网站标题,批量获取网站标题
【实例简介】
批量获取网站的标题
【实例截图】
【核心代码】
using HttpCodeLib;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Windows.Forms;
namespace 批量获取网站标题
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
protected List ItemsSource
{
get;
private set;
}
protected List CurrentCacheItemsSource
{
get;
private set;
}
private void button2_Click(object sender, EventArgs e)
{
stop = 0;
logg = 0;
if (listView1.Items.Count > 0)
{
button2.Enabled = false;
button5.Enabled = false;
button1.Enabled = false;
button3.Enabled = false;
button4.Enabled = false;
button6.Visible = true;
button7.Visible = true;
progressBar1.Value = 0;
progressBar1.Visible = true;
progressBar1.Maximum = listView1.Items.Count;
//ThreadPool.SetMaxThreads(20, 20);//允许线程池中运行最多 20 个线程
for (int i = 0; i < listView1.Items.Count&&stop==0; i )
{
Thread th = new Thread(get_biaoti);
th.IsBackground = true;
string url = listView1.Items[i].SubItems[1].Text;
List list = new List();
list.Add(i.ToString());
list.Add(url);
th.Start(list);//可以执行进程
thCount ;
while (thCount >= maxThCount||logg==1)
{
Thread.Sleep(10);
Application.DoEvents();
}
// ThreadPool.QueueUserWorkItem(new WaitCallback(get_biaoti), list);
}
}
}
int thCount = 0;
int maxThCount = 20;
int logg = 0;
int stop = 0;
///
/// 获取标题
///
/// 页面内容
private void get_biaoti(object obj)
{
int i = (int.Parse((obj as List)[0]));
string url = (obj as List)[1];
string res = string.Empty;
if (jiekou == 1)
{
//string url=obj.ToString();
//string res = get_html(url);
url = string.Format(@"http://www.link114.cn/get.php?title&{0}&398666685656", url);
res = get_html(url);
//string result = GetRegexStr(res);
string[] result = res.Split(new char[] { ':' });
if (result.Length == 0)
res = "超时";
if (result.Length == 2)
{
res = result[1];
if (res == "-1")
res = "网址错误";
if (res == "")
res = "无法访问";
}
}
if (jiekou == 2)
{
res = get_html(url);
string result = GetRegexStr(res);
if (result.Length == 0)
res = "超时";
else
res = result;
}
thCount--;
//Thread.Sleep(300);//试试等待来响应
this.BeginInvoke(new Action(() => ThreadCallBack(i, res)));
}
private void ThreadCallBack(int i, string str)
{
//int MaxWorkerThreads, miot, AvailableWorkerThreads, aiot;
//ThreadPool.GetMaxThreads(out MaxWorkerThreads, out miot);
//AvailableWorkerThreads = aiot = 0;
//获得可用的线程数量
//ThreadPool.GetAvailableThreads(out AvailableWorkerThreads, out aiot);
//label2.Text = (MaxWorkerThreads - AvailableWorkerThreads).ToString();
label2.Text = thCount.ToString();
progressBar1.PerformStep();
this.listView1.Items[i].SubItems[2].Text = str.ToString();//修改状态
// listView1.Refresh();
listView1.Invalidate();
//this.ItemsSource = items;
//this.CurrentCacheItemsSource = this.ItemsSource;
//LoadListViewItems(this.CurrentCacheItemsSource);
if (progressBar1.Value == progressBar1.Maximum)
{
progressBar1.Visible = false;
button2.Enabled = true;
button5.Enabled = true;
button1.Enabled = true;
button3.Enabled = true;
button4.Enabled = true;
button6.Visible = false;
button7.Visible = false;
}
//if ((MaxWorkerThreads - AvailableWorkerThreads) == 0)
// MessageBox.Show("执行完毕");
listView1.Items[i].EnsureVisible();
}
///
/// 获取网页内容
///
/// 网址
///
public string get_html(string url)
{
try
{
HttpHelpers helper = new HttpHelpers();//请求执行对象
HttpItems items;//请求参数对象
HttpResults hr = new HttpResults();//请求结果对象
string StrCookie = "";//设置初始Cookie值
string res = string.Empty;//请求结果,请求类型不是图片时有效
// string url = "www.baidu.com";//请求地址
items = new HttpItems();//每次重新初始化请求对象
// items.Timeout = 1000;
items.URL = url;//设置请求地址
items.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:17.0) Gecko/20100101 Firefox/17.0";//设置UserAgent
items.Cookie = StrCookie;//设置字符串方式提交cookie
items.Allowautoredirect = true;//设置自动跳转(True为允许跳转) 如需获取跳转后URL 请使用 hr.RedirectUrl
items.ContentType = "application/x-www-form-urlencoded";//内容类型
hr = helper.GetHtml(items, ref StrCookie);//提交请求
res = hr.Html;//具体结果
return res;//返回具体结果
}
catch (Exception e)
{
return e.ToString();
}
}
///*调用方法: 直接粘贴内容至Code中,调用GetRegexStr("这里填写要处理的字符串")*/
System.Text.RegularExpressions.Regex reg;//正则表达式变量
/
/ 正则表达式获取文本结果
/
/ 请替换为需要处理的字符串
/ 处理结果
private string GetRegexStr(string reString)
{
string regexCode = "(?<=
).*?(?=)";
reg = new System.Text.RegularExpressions.Regex(regexCode);
string math = reg.Match(reString).ToString();
return math;
}
int i = 1;
private void button3_Click(object sender, EventArgs e)
{
if (textBox1.Text != "")
{
ListViewItem item = null;
item = new ListViewItem();
item.SubItems[0].Text = (i ).ToString();
item.SubItems.Add(textBox1.Text.ToString());
item.SubItems.Add("等待检测");
items.Add(item);
this.ItemsSource = items;
this.CurrentCacheItemsSource = this.ItemsSource;
LoadListViewItems(this.CurrentCacheItemsSource);
listView1.Items[i - 2].EnsureVisible();
//listView1.setSelection(listView1.getBottom());
}
}
private void Form1_FormClosing(object sender, FormClosingEventArgs e)
{
//Application.Exit();
System.Environment.Exit(0);
}
//int thCount = 0;
//int maxThCount = 10;
List items = new List();
private void daoru(object fName)
{
ListViewItem item = null;
string[] str_ = (string[])fName;
foreach (string file in str_)
{
string[] str = File.ReadAllLines(file, Encoding.ASCII);
foreach (string url in str)
{
item = new ListViewItem();
item.Text = (i ).ToString();
item.SubItems.Add(url.ToString());
item.SubItems.Add("等待检测");
items.Add(item);
}
this.BeginInvoke(new Action(() => ThreadCallBack_2()));
//listView1.BeginUpdate();
//listView1.EndUpdate();
}
//}), null);
}
private void ThreadCallBack_2()
{
this.ItemsSource = items;
this.CurrentCacheItemsSource = this.ItemsSource;
LoadListViewItems(this.CurrentCacheItemsSource);
//listView1.VirtualMode = false;
}
private void button1_Click(object sender, EventArgs e)
{
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
string[] fName = openFileDialog1.FileNames;
ThreadPool.QueueUserWorkItem(new WaitCallback(daoru), fName);//线程池
}
}
///
/// 导出数据到txt文件.
/// 2009-04-24 YJ 定义函数.
///
///
///
private void button4_Click(object sender, EventArgs e)
{
if (listView1.Items.Count > 0)
{
//此处的文本文件在工程下Bin的程序集目录下
string stFilePath = Application.StartupPath.Trim() "//导出文件" DateTime.Now.ToString("yyyy年MM月dd日hh时mm分ss秒") ".txt";
StreamWriter swStream;
if (File.Exists(stFilePath))
{
swStream = new StreamWriter(stFilePath);
}
else
{
swStream = File.CreateText(stFilePath);
}
for (int i = 0; i < listView1.Items.Count; i )
{
for (int j = 0; j < listView1.Items[i].SubItems.Count; j )
{
string _strTemp = listView1.Items[i].SubItems[j].Text;
swStream.Write(_strTemp);
//插入""作为分隔符,可以任取
if (j == 0)
swStream.Write(".");
if (j == 1)
swStream.Write("");
}
swStream.WriteLine();
}
//关闭流,释放资源
swStream.Flush();
swStream.Close();
//导入Txt文件后,自动打开文件
//Process.Start("notepad.exe", stFilePath);
}
else
MessageBox.Show("列表为空,无法导出", "错误");
}
private void Form1_Load(object sender, EventArgs e)
{
//this.listView1.ListViewItemSorter = new Common.ListViewColumnSorter();
//this.listView1.ColumnClick = new ColumnClickEventHandler(Common.ListViewHelper.ListView_ColumnClick);
radioButton1.Checked=true;
Control.CheckForIllegalCrossThreadCalls = false;
}
private void LoadListViewItems(List items)
{
//listView.Items.Clear();
//if (items == null)
//{
// //stripStatusInfo.Text = "当前总共记录数为:0";
// return;
//}
// listView1.GridLines = true;
listView1.FullRowSelect = true;
//listView1.View = View.Details;
//listView1.Scrollable = true;
//listView1.MultiSelect = false;
// listView1.HeaderStyle = ColumnHeaderStyle.Clickable;
//listView1.Visible = true;
listView1.VirtualListSize = items.Count;
listView1.VirtualMode = true;
listView1.RetrieveVirtualItem = new RetrieveVirtualItemEventHandler(listView_RetrieveVirtualItem);
// stripStatusInfo.Text = "当前总共记录数为:" items.Count;
}
void listView_RetrieveVirtualItem(object sender, RetrieveVirtualItemEventArgs e)
{
if (this.CurrentCacheItemsSource == null || this.CurrentCacheItemsSource.Count == 0)
{
return;
}
e.Item = this.CurrentCacheItemsSource[e.ItemIndex];
if (e.ItemIndex == this.CurrentCacheItemsSource.Count)
{
this.CurrentCacheItemsSource = null;
}
}
private void button5_Click(object sender, EventArgs e)
{
items.Clear();
this.ItemsSource = items;
this.CurrentCacheItemsSource = this.ItemsSource;
LoadListViewItems(this.CurrentCacheItemsSource);
i = 1;
}
private void button6_Click(object sender, EventArgs e)
{
//logg = true;
//ThreadPool.SetMaxThreads(0, 0);
}
private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
{
}
int jiekou = 1;
private void radioButton1_CheckedChanged(object sender, EventArgs e)
{
if (radioButton1.Checked)
jiekou = 1;
else
jiekou = 2;
}
private void button6_Click_1(object sender, EventArgs e)
{
//Thread.Sleep(0);
if (button6.Text == "暂 停")
{
logg = 1;//暂停标志位
button6.Text = "继 续";
}
else
{
logg = 0;//暂停标志位
button6.Text = "暂 停";
}
}
private void button7_Click(object sender, EventArgs e)
{
stop = 1;
label2.Text = "0";
progressBar1.Visible = false;
button2.Enabled = true;
button5.Enabled = true;
button1.Enabled = true;
button3.Enabled = true;
button4.Enabled = true;
button6.Text = "暂 停";
button6.Visible = false;
button7.Visible = false;
}
}
}
html获取网站标题,批量获取网站标题相关推荐
- 微信小程序获取formId (批量获取)
针对微信小程序发送模板消息需要的formId,本人表示有的时候很头疼,所以给他家提供了一个收集formId的简单方法: <form bindsubmit="form_submit&qu ...
- Tushare批量获取各城市股票日度行情数据
Tushare批量获取各股票行情数据 tushare接口基本设置 1.tushare注册 2.软件准备 3.调用数据库 批量获取股票收盘价 ID:447607 tushare为广大金融分析人员提供了一 ...
- 【批量获取文件名及批量文件重命名】
批量获取文件名及批量文件重命名 批量获取文件名 1.批量获取文件名 批量文件重命名 2.批量文件重命名 本次操作所用文件路径及该目录下所有文件如下图: 批量获取文件名 1.批量获取文件名 步骤一:新建 ...
- 批量提取html title,怎样批量提取网站的标题和链接呢?
前段时间,我把Blog重新升级和更换了程序,对于SEO来说,当你的网站结构发生了变化,为了不影响收录,最好把对应的链接提交到百度站长平台上,也就是A文章原来的连接要和新的连接要对上.但是连接怎样提出来 ...
- 用EXCEL批量获取网页标题的方法
这段时间准备做淘宝,但不知道卖什么产品,因此想从一些B2B 网站上扒拉一些产品词下来挨个研究,但一个一个的打开网页查看产品太慢太费事,但想到这些产品词都存在于网页标题上,因此想到了用excel来批量获 ...
- 浏览器批量采集网站标题 保存Excel表格
有一批网址,不知道是什么网站,怎样才能快速获得网站标题呢?方法很简单,就是使用浏览器一个个地打开,记录下网站标题就可以了,关键是找一个具有自动完成功能的浏览器.具体步骤如下: 1.先把网址整理一下,保 ...
- Python爬虫1:批量获取电影标题和剧照
1.爬取某电影网站的电影名称 下面展示一些 代码. import requests from lxml import etreedef dianying(number):url ='https://w ...
- python爬虫下载链接_【Python项目】简单爬虫批量获取资源网站的下载链接
简单爬虫批量获取资源网站的下载链接 1 由来 自己在收集剧集资源的时候,这些网站的下载链接还要手动一个一个复制到百度云离线下载里,稍微懂了一点编程就不想做这种无意义的劳动了.于是就写了一个这样的一个小 ...
- 什么是网站描述?如何批量获取网站描述?网站优化推广跟网站描述有什么关系?
在建立网站时,用户可能会想,是否要在百度优化选项中对网站页面进行描述. 此时,用户需要了解,网站描述实际上是对网站页面内容的简要总结.如果描述与网站页面内容相匹配,搜索引擎将把描述作为网站介绍或摘要的 ...
最新文章
- SAP WM初阶之TO报表LX10 - Evaluation of movements per storage type
- oracle错误:1067进程意外终止
- SAP CRM WebClient UI BSP customized theme storage table
- springboot取yml中的值_@Value拜拜:更优雅的获取springboot yml中的值
- qt for android 图片可拉伸,qt实现九宫格布局,图片拉伸
- bootstrap文件不能被识别_如何使用npm安装bootstrap
- K8S精华问答 | Kubernetes集群不能正常工作,难道是防火墙问题?
- 消息推送平台高可用实践(下)
- http的“无连接”指的是_头条一面:HTTP协议无状态中的 quot;状态quot; 到底指的是什么?...
- oracle ola_Ola HallengrenSQL Server维护解决方案–数据库完整性检查
- SpringBoot+Shiro,java开发面试问题大全及答案大全
- db2怎么限定查询条数_查询数据限制显示条数
- 集合差集操作:a - b 的含义为在集合a中,但不在b中的元素集合。
- 说的特别好的一句话,送给每一个热爱编程的人
- 广州岑村科目二a考场,第一次考满分通过
- python使用openCV图像加载(转化为灰度图像)、平滑图像处理就是将每个像素的值变换为其相邻元素的平均值、可视化平滑处理之后的图像(Blurring Images)
- 一种高效的Polar码冻结比特编译码方法
- 对数函数 (logarithmic function)
- 【C/C++】scanf,printf 函数
- Python与数据库