C#爬虫selenium webdriver使用火狐浏览器截取全屏并转成pdf
- 对于网页页面很长的,使用webdriver自带的方法进行截屏时,默认的最大截取宽度是16384,超过最大宽度就要对页面进行裁剪拼接
public Bitmap GetFullScreenshot(){Bitmap stitchedImage = null;try{long totalwidth1 = (long)((IJavaScriptExecutor)this.webDriver).ExecuteScript("return document.body.offsetWidth");//documentElement.scrollWidth");long totalHeight1 = (long)((IJavaScriptExecutor)this.webDriver).ExecuteScript("return document.body.parentNode.scrollHeight");int totalWidth = (int)totalwidth1;int totalHeight = (int)totalHeight1;// Get the Size of the Viewportlong viewportWidth1 = (long)((IJavaScriptExecutor)this.webDriver).ExecuteScript("return document.body.clientWidth");//documentElement.scrollWidth");long viewportHeight1 = (long)((IJavaScriptExecutor)this.webDriver).ExecuteScript("return window.innerHeight");//documentElement.scrollWidth");int viewportWidth = (int)viewportWidth1;int viewportHeight = (int)viewportHeight1;// Split the Screen in multiple RectanglesList<Rectangle> rectangles = new List<Rectangle>();// Loop until the Total Height is reachedfor (int i = 0; i < totalHeight; i += viewportHeight){int newHeight = viewportHeight;// Fix if the Height of the Element is too bigif (i + viewportHeight > totalHeight){newHeight = totalHeight - i;}// Loop until the Total Width is reachedfor (int ii = 0; ii < totalWidth; ii += viewportWidth){int newWidth = viewportWidth;// Fix if the Width of the Element is too bigif (ii + viewportWidth > totalWidth){newWidth = totalWidth - ii;}// Create and add the RectangleRectangle currRect = new Rectangle(ii, i, newWidth, newHeight);rectangles.Add(currRect);}}// Build the ImagestitchedImage = new Bitmap(totalWidth, totalHeight);// Get all Screenshots and stitch them togetherRectangle previous = Rectangle.Empty;foreach (var rectangle in rectangles){// Calculate the Scrolling (if needed)if (previous != Rectangle.Empty){int xDiff = rectangle.Right - previous.Right;int yDiff = rectangle.Bottom - previous.Bottom;// Scroll//selenium.RunScript(String.Format("window.scrollBy({0}, {1})", xDiff, yDiff));((IJavaScriptExecutor)this.webDriver).ExecuteScript(String.Format("window.scrollBy({0}, {1})", xDiff, yDiff));System.Threading.Thread.Sleep(200);}// Take Screenshotvar screenshot = ((ITakesScreenshot)this.webDriver).GetScreenshot();// Build an Image out of the ScreenshotImage screenshotImage;using (MemoryStream memStream = new MemoryStream(screenshot.AsByteArray)){screenshotImage = Image.FromStream(memStream);}// Calculate the Source RectangleRectangle sourceRectangle = new Rectangle(viewportWidth - rectangle.Width, viewportHeight - rectangle.Height, rectangle.Width, rectangle.Height);// Copy the Imageusing (Graphics g = Graphics.FromImage(stitchedImage)){g.DrawImage(screenshotImage, rectangle, sourceRectangle, GraphicsUnit.Pixel);}// Set the Previous Rectangleprevious = rectangle;}}catch (Exception ex){// handle}return stitchedImage;}
- 截取图片前要对确保网页上面的内容都加载完
public void ExecuteJS(string js){switch (this.browserType){case BrowsersType.IE:{((QA.IE.InternetExplorerDriver)this.webDriver).ExecuteScript(js, null);}; break;case BrowsersType.Chrome:{((QA.Chrome.ChromeDriver)this.webDriver).ExecuteScript(js, null);}; break;case BrowsersType.Firefox:{((QA.Firefox.FirefoxDriver)this.webDriver).ExecuteScript(js, null);}; break;case BrowsersType.Safari:{((QA.Safari.SafariDriver)this.webDriver).ExecuteScript(js, null);}; break;}}
public void GoToUrl(string url){this.webDriver.Navigate().GoToUrl(url);}
上面方法都是对webdriver进行二次封装的,封装在OneDriver 类里面
下面是调用截取全屏的方法
OneDriver driver = new OneDriver(BrowsersType.Firefox, "");
string pngFilename = filename.Replace(".pdf", ".png");
try
{string tmpurl = url.Replace("http", "https");try { driver.GoToUrl(url); }catch { driver.GoToUrl(tmpurl); }Thread.Sleep(2000);
//滚动滚动条使得页面内容加载完成(次数需要根据页面长度进行调整)for (int i = 1; i <= 20; i++){string jsCode = "window.scrollTo({top: document.body.scrollHeight / 20 * " + i + ", behavior: \"smooth\"});";//使用IJavaScriptExecutor接口运行js代码driver.ExecuteJS(jsCode);//暂停滚动Thread.Sleep(1000);}//截屏前要将滚动条复位到初始位置driver.ExecuteJS("var q=document.documentElement.scrollTop=0");driver.GetFullScreenshot().Save(pngFilename);}
catch { }
finally { driver.Cleanup(); }
备注:截屏前如果不将滚动条复位到初始位置,拼接的图片会出现一些异常,容易把未复位时的可视区域截取多次
- 使用itextsharp将png图片插入到pdf中,因itextsharp每页中图片最大为14440*14440,超过得截取进行拼接
//从截屏里面截取部分图片
private bool SaveCutPic(string sourcePath, string savePath,int startX, int startY, int saveWidth, int saveHeight, int endX, int endY)
{using (System.Drawing.Image originalImg = System.Drawing.Image.FromFile(sourcePath)){try{System.Drawing.Bitmap partImg = new System.Drawing.Bitmap(saveWidth, saveHeight);System.Drawing.Graphics graphics = System.Drawing.Graphics.FromImage(partImg);//目标位置System.Drawing.Rectangle destRect = new System.Drawing.Rectangle(new System.Drawing.Point(endX, endY), new System.Drawing.Size(saveWidth, saveHeight));//原图位置(默认从原图中截取的图片大小等于目标图片的大小)System.Drawing.Rectangle origRect = new System.Drawing.Rectangle(new System.Drawing.Point(startX, startY), new System.Drawing.Size(saveWidth, saveHeight));///注释 文字水印 System.Drawing.Graphics G = System.Drawing.Graphics.FromImage(partImg);G.Clear(System.Drawing.Color.White);// 指定高质量的双三次插值法。执行预筛选以确保高质量的收缩。此模式可产生质量最高的转换图像。 G.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;// 指定高质量、低速度呈现。 G.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;graphics.DrawImage(originalImg, destRect, origRect, System.Drawing.GraphicsUnit.Pixel);G.Dispose();originalImg.Dispose();if (File.Exists(savePath)){File.SetAttributes(savePath, FileAttributes.Normal);File.Delete(savePath);}//效果图partImg.Save(savePath, System.Drawing.Imaging.ImageFormat.Jpeg);partImg.Dispose();}catch{return false;}}return true;
}
全屏截取后,按照图片宽度进行拆分,再将拆分后的图片插入到PDF中
string pngFilename = filename.Replace(".pdf", ".png");
iTextSharp.text.Image image = iTextSharp.text.Image.GetInstance(pngFilename);
if (image.Height > 14440)
{//处理需要拆分int counter = image.Height % 14400 == 0 ? (int)image.Height / 14400 : (int)image.Height / 14400 + 1;for (int i = 1; i <= counter; i++){using (System.Drawing.Image noneImage = System.Drawing.Image.FromFile(pngFilename)){System.Drawing.Bitmap bmp = new System.Drawing.Bitmap(noneImage);if (i < counter)SaveCutPic(pngFilename, pngFilename.Replace(".png", i.ToString() + ".png"), 0, (i - 1) * 14400, noneImage.Width, 14400, 0, 0);elseSaveCutPic(pngFilename, pngFilename.Replace(".png", i.ToString() + ".png"), 0, (i - 1) * 14400, noneImage.Width, noneImage.Height - (i - 1) * 14400, 0, 0);}}using (FileStream fs = new FileStream(filename, FileMode.Create, FileAccess.Write, FileShare.None)){using (Document doc = new Document()){using (PdfWriter writer = PdfWriter.GetInstance(doc, fs)){writer.SetFullCompression();writer.SetPdfVersion(iTextSharp.text.pdf.PdfWriter.PDF_VERSION_1_7);doc.Open();for (int i = 1; i <= counter; i++){image = iTextSharp.text.Image.GetInstance(pngFilename.Replace(".png", i.ToString() + ".png"));image.SetAbsolutePosition(0, 0);doc.SetPageSize(new iTextSharp.text.Rectangle(0, 0, image.Width, image.Height, 0));doc.NewPage();writer.DirectContent.AddImage(image, false);}doc.Close();}}}}
else
{//可直接插入PDF中using (FileStream fs = new FileStream(filename, FileMode.Create, FileAccess.Write, FileShare.None)){using (Document doc = new Document(image)){using (PdfWriter writer = PdfWriter.GetInstance(doc, fs)){writer.SetFullCompression();writer.SetPdfVersion(iTextSharp.text.pdf.PdfWriter.PDF_VERSION_1_7);doc.Open();image.SetAbsolutePosition(0, 0);doc.SetPageSize(new iTextSharp.text.Rectangle(0, 0, image.Width, image.Height, 0));doc.NewPage();writer.DirectContent.AddImage(image, false);doc.Close();}}}
}
C#爬虫selenium webdriver使用火狐浏览器截取全屏并转成pdf相关推荐
- 火狐浏览器的全屏兼容问题 allowfullscreen=true
一般开发后台系统都用到 iframe 来嵌套内容区域, <iframe allowfullscreen="true" .... 加上allowfullscreen=" ...
- Chrome浏览器截取全屏(无需安装任何插件)
Mac系统操作,截取整个网页,方法如下: (1)在待截取网页鼠标右键,点击"检查". (2)按"shift+command+p"组合健,如下图所示: (3)输入 ...
- 包括edge,Chrome,火狐、百度,360等浏览器怎么全屏_如何在Microsoft Edge中启用和禁用全屏模式以及解决浏览器无法开启或关闭全屏的问题
文章目录 1. 引出问题 2. 解决问题 2.1 使用缩放菜单 2.2 Win + Shift + Enter窍门 2.3 最大化与全屏模式 2.4 以全屏模式观看网络视频 3. 重要总结 4. 解决 ...
- 前端js使浏览器窗口全屏与退出----浏览器全屏时 监测通过esc按键退出全屏 (退出全屏时有页面上的相关处理)
vue项目 项目分为顶部导航.侧边导航.以及右边mainContent区域 需求是 让项目的其中一个页面有全屏功能 并且在全屏时隐藏掉顶部的顶导航栏 实现: 1.在state里 定义一个全局 ...
- jquery实现浏览器全屏和浏览器退出全屏
2019独角兽企业重金招聘Python工程师标准>>> js方法如下: /** 浏览器全屏*/ function fullScreen() {var el = document.do ...
- 兼容IE浏览器的全屏机制
由于IE浏览器较某些版本不支持H5的全屏机制,因此需要对全屏做兼容处理,以方便人们的使用. 首先我们要介绍两个方法: 1. 请求进入全屏模式 element.requestFullscreen() 该 ...
- JS全屏代码,解决PDF.js在iframe中部分浏览器全屏功能错误
JS全屏代码,解决PDF.js在iframe中部分浏览器全屏功能错误 PDF.js在iframe模式在火狐浏览器中按钮被屏蔽 经分析,viewer.js 发现了判断逻辑 debug发现 第二个判断后为 ...
- 浏览器的全屏功能小结
原文地址: 浏览器的全屏功能小结 背景 浏览器页面在视频播放, 图片浏览, 编辑文本等场景, 会遇到增大页面的可查看和可交互区域的需求. 对于这种场景, 一般做法是提供一个全屏按钮, 用户可以选择点击 ...
- 使用JavaScript使浏览器进入全屏或退出全屏
使用JavaScript使浏览器进入全屏或退出全屏 首先使用fullscreenElement判断浏览器是否在全屏状态. 如果是:则调用exitFullscreen函数退出全屏,否则调用request ...
最新文章
- 资深程序员感叹:表妹成绩好却无奈辍学开理发店,月入6万,上大学没用!网友:那是你没用!...
- 在Visual Studio中使用序列图描述对象之间的互动
- StringBuider 的效率一定高吗?要看你怎么使用
- keymap in ubuntu
- Linux群常见问题整理(一)[转]
- Linux 线程调度与优先级
- JS 判断是否为IP格式
- 文件系统EXT3,EXT4和XFS的区别
- php远程下载头像,Laravel 项目中 远程把图片下载到本地
- Android 网络质量 + 下载测速 (facebook/network-connection-class 使用)
- CSS中absolute和relative
- 在腾讯的八年,我的职业思考!
- Linux下常用压缩 解压命令和压缩比率对比
- 电脑蓝屏了怎么办修复,电脑蓝屏解决修复方法
- 陳三甲网络笔记:即使你摆正了姿势,也不一定能赚到钱!
- 线性代数基础2--齐次线性方程组的解及方程组解的总结
- 面试必备:冒泡,选择,插入,希尔,归并,快速排序大合集
- 最新推荐 | 清华NLP图神经网络GNN论文分门别类,16大应用200+篇论文
- python 如何获取百度热点内容
- python colormap 顺滑_平滑Matplotlib Colormap