C#爬虫selenium webdriver使用火狐浏览器截取全屏并转成pdf

对于网页页面很长的，使用webdriver自带的方法进行截屏时，默认的最大截取宽度是16384，超过最大宽度就要对页面进行裁剪拼接

public Bitmap GetFullScreenshot(){Bitmap stitchedImage = null;try{long totalwidth1 = (long)((IJavaScriptExecutor)this.webDriver).ExecuteScript("return document.body.offsetWidth");//documentElement.scrollWidth");long totalHeight1 = (long)((IJavaScriptExecutor)this.webDriver).ExecuteScript("return  document.body.parentNode.scrollHeight");int totalWidth = (int)totalwidth1;int totalHeight = (int)totalHeight1;// Get the Size of the Viewportlong viewportWidth1 = (long)((IJavaScriptExecutor)this.webDriver).ExecuteScript("return document.body.clientWidth");//documentElement.scrollWidth");long viewportHeight1 = (long)((IJavaScriptExecutor)this.webDriver).ExecuteScript("return window.innerHeight");//documentElement.scrollWidth");int viewportWidth = (int)viewportWidth1;int viewportHeight = (int)viewportHeight1;// Split the Screen in multiple RectanglesList<Rectangle> rectangles = new List<Rectangle>();// Loop until the Total Height is reachedfor (int i = 0; i < totalHeight; i += viewportHeight){int newHeight = viewportHeight;// Fix if the Height of the Element is too bigif (i + viewportHeight > totalHeight){newHeight = totalHeight - i;}// Loop until the Total Width is reachedfor (int ii = 0; ii < totalWidth; ii += viewportWidth){int newWidth = viewportWidth;// Fix if the Width of the Element is too bigif (ii + viewportWidth > totalWidth){newWidth = totalWidth - ii;}// Create and add the RectangleRectangle currRect = new Rectangle(ii, i, newWidth, newHeight);rectangles.Add(currRect);}}// Build the ImagestitchedImage = new Bitmap(totalWidth, totalHeight);// Get all Screenshots and stitch them togetherRectangle previous = Rectangle.Empty;foreach (var rectangle in rectangles){// Calculate the Scrolling (if needed)if (previous != Rectangle.Empty){int xDiff = rectangle.Right - previous.Right;int yDiff = rectangle.Bottom - previous.Bottom;// Scroll//selenium.RunScript(String.Format("window.scrollBy({0}, {1})", xDiff, yDiff));((IJavaScriptExecutor)this.webDriver).ExecuteScript(String.Format("window.scrollBy({0}, {1})", xDiff, yDiff));System.Threading.Thread.Sleep(200);}// Take Screenshotvar screenshot = ((ITakesScreenshot)this.webDriver).GetScreenshot();// Build an Image out of the ScreenshotImage screenshotImage;using (MemoryStream memStream = new MemoryStream(screenshot.AsByteArray)){screenshotImage = Image.FromStream(memStream);}// Calculate the Source RectangleRectangle sourceRectangle = new Rectangle(viewportWidth - rectangle.Width, viewportHeight - rectangle.Height, rectangle.Width, rectangle.Height);// Copy the Imageusing (Graphics g = Graphics.FromImage(stitchedImage)){g.DrawImage(screenshotImage, rectangle, sourceRectangle, GraphicsUnit.Pixel);}// Set the Previous Rectangleprevious = rectangle;}}catch (Exception ex){// handle}return stitchedImage;}

截取图片前要对确保网页上面的内容都加载完

        public void ExecuteJS(string js){switch (this.browserType){case BrowsersType.IE:{((QA.IE.InternetExplorerDriver)this.webDriver).ExecuteScript(js, null);}; break;case BrowsersType.Chrome:{((QA.Chrome.ChromeDriver)this.webDriver).ExecuteScript(js, null);}; break;case BrowsersType.Firefox:{((QA.Firefox.FirefoxDriver)this.webDriver).ExecuteScript(js, null);}; break;case BrowsersType.Safari:{((QA.Safari.SafariDriver)this.webDriver).ExecuteScript(js, null);}; break;}}

public void GoToUrl(string url){this.webDriver.Navigate().GoToUrl(url);}

上面方法都是对webdriver进行二次封装的，封装在OneDriver 类里面

下面是调用截取全屏的方法

OneDriver driver = new OneDriver(BrowsersType.Firefox, "");
string pngFilename = filename.Replace(".pdf", ".png");
try
{string tmpurl = url.Replace("http", "https");try { driver.GoToUrl(url);  }catch { driver.GoToUrl(tmpurl); }Thread.Sleep(2000);
//滚动滚动条使得页面内容加载完成（次数需要根据页面长度进行调整）for (int i = 1; i <= 20; i++){string jsCode = "window.scrollTo({top: document.body.scrollHeight / 20 * " + i + ", behavior: \"smooth\"});";//使用IJavaScriptExecutor接口运行js代码driver.ExecuteJS(jsCode);//暂停滚动Thread.Sleep(1000);}//截屏前要将滚动条复位到初始位置driver.ExecuteJS("var q=document.documentElement.scrollTop=0");driver.GetFullScreenshot().Save(pngFilename);}
catch { }
finally { driver.Cleanup(); }

备注：截屏前如果不将滚动条复位到初始位置，拼接的图片会出现一些异常，容易把未复位时的可视区域截取多次

使用itextsharp将png图片插入到pdf中，因itextsharp每页中图片最大为14440*14440，超过得截取进行拼接

//从截屏里面截取部分图片
private bool SaveCutPic(string sourcePath, string savePath,int startX, int startY, int saveWidth,      int saveHeight, int endX, int endY)
{using (System.Drawing.Image originalImg = System.Drawing.Image.FromFile(sourcePath)){try{System.Drawing.Bitmap partImg = new System.Drawing.Bitmap(saveWidth, saveHeight);System.Drawing.Graphics graphics = System.Drawing.Graphics.FromImage(partImg);//目标位置System.Drawing.Rectangle destRect = new System.Drawing.Rectangle(new System.Drawing.Point(endX, endY), new System.Drawing.Size(saveWidth, saveHeight));//原图位置（默认从原图中截取的图片大小等于目标图片的大小）System.Drawing.Rectangle origRect = new System.Drawing.Rectangle(new System.Drawing.Point(startX, startY), new System.Drawing.Size(saveWidth, saveHeight));///注释 文字水印  System.Drawing.Graphics G = System.Drawing.Graphics.FromImage(partImg);G.Clear(System.Drawing.Color.White);// 指定高质量的双三次插值法。执行预筛选以确保高质量的收缩。此模式可产生质量最高的转换图像。 G.InterpolationMode = System.Drawing.Drawing2D.InterpolationMode.HighQualityBicubic;// 指定高质量、低速度呈现。 G.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality;graphics.DrawImage(originalImg, destRect, origRect, System.Drawing.GraphicsUnit.Pixel);G.Dispose();originalImg.Dispose();if (File.Exists(savePath)){File.SetAttributes(savePath, FileAttributes.Normal);File.Delete(savePath);}//效果图partImg.Save(savePath, System.Drawing.Imaging.ImageFormat.Jpeg);partImg.Dispose();}catch{return false;}}return true;
}

全屏截取后，按照图片宽度进行拆分，再将拆分后的图片插入到PDF中

string pngFilename = filename.Replace(".pdf", ".png");
iTextSharp.text.Image image = iTextSharp.text.Image.GetInstance(pngFilename);
if (image.Height > 14440)
{//处理需要拆分int counter = image.Height % 14400 == 0 ? (int)image.Height / 14400 : (int)image.Height / 14400 + 1;for (int i = 1; i <= counter; i++){using (System.Drawing.Image noneImage = System.Drawing.Image.FromFile(pngFilename)){System.Drawing.Bitmap bmp = new System.Drawing.Bitmap(noneImage);if (i < counter)SaveCutPic(pngFilename, pngFilename.Replace(".png", i.ToString() + ".png"), 0, (i - 1) * 14400, noneImage.Width,  14400, 0, 0);elseSaveCutPic(pngFilename, pngFilename.Replace(".png", i.ToString() + ".png"), 0, (i - 1) * 14400, noneImage.Width, noneImage.Height - (i - 1) * 14400, 0, 0);}}using (FileStream fs = new FileStream(filename, FileMode.Create, FileAccess.Write, FileShare.None)){using (Document doc = new Document()){using (PdfWriter writer = PdfWriter.GetInstance(doc, fs)){writer.SetFullCompression();writer.SetPdfVersion(iTextSharp.text.pdf.PdfWriter.PDF_VERSION_1_7);doc.Open();for (int i = 1; i <= counter; i++){image = iTextSharp.text.Image.GetInstance(pngFilename.Replace(".png", i.ToString() + ".png"));image.SetAbsolutePosition(0, 0);doc.SetPageSize(new iTextSharp.text.Rectangle(0, 0, image.Width, image.Height, 0));doc.NewPage();writer.DirectContent.AddImage(image, false);}doc.Close();}}}}
else
{//可直接插入PDF中using (FileStream fs = new FileStream(filename, FileMode.Create, FileAccess.Write, FileShare.None)){using (Document doc = new Document(image)){using (PdfWriter writer = PdfWriter.GetInstance(doc, fs)){writer.SetFullCompression();writer.SetPdfVersion(iTextSharp.text.pdf.PdfWriter.PDF_VERSION_1_7);doc.Open();image.SetAbsolutePosition(0, 0);doc.SetPageSize(new iTextSharp.text.Rectangle(0, 0, image.Width, image.Height, 0));doc.NewPage();writer.DirectContent.AddImage(image, false);doc.Close();}}}
}

C#爬虫selenium webdriver使用火狐浏览器截取全屏并转成pdf相关推荐

火狐浏览器的全屏兼容问题 allowfullscreen=true
一般开发后台系统都用到 iframe 来嵌套内容区域, <iframe allowfullscreen="true" .... 加上allowfullscreen=" ...
Chrome浏览器截取全屏（无需安装任何插件）
Mac系统操作,截取整个网页,方法如下: (1)在待截取网页鼠标右键,点击"检查". (2)按"shift+command+p"组合健,如下图所示: (3)输入 ...
包括edge，Chrome，火狐、百度，360等浏览器怎么全屏_如何在Microsoft Edge中启用和禁用全屏模式以及解决浏览器无法开启或关闭全屏的问题
文章目录 1. 引出问题 2. 解决问题 2.1 使用缩放菜单 2.2 Win + Shift + Enter窍门 2.3 最大化与全屏模式 2.4 以全屏模式观看网络视频 3. 重要总结 4. 解决 ...
前端js使浏览器窗口全屏与退出----浏览器全屏时监测通过esc按键退出全屏（退出全屏时有页面上的相关处理）
vue项目项目分为顶部导航.侧边导航.以及右边mainContent区域需求是让项目的其中一个页面有全屏功能并且在全屏时隐藏掉顶部的顶导航栏实现: 1.在state里定义一个全局 ...
jquery实现浏览器全屏和浏览器退出全屏
2019独角兽企业重金招聘Python工程师标准>>> js方法如下: /** 浏览器全屏*/ function fullScreen() {var el = document.do ...
兼容IE浏览器的全屏机制
由于IE浏览器较某些版本不支持H5的全屏机制,因此需要对全屏做兼容处理,以方便人们的使用. 首先我们要介绍两个方法: 1. 请求进入全屏模式 element.requestFullscreen() 该 ...
JS全屏代码，解决PDF.js在iframe中部分浏览器全屏功能错误
JS全屏代码,解决PDF.js在iframe中部分浏览器全屏功能错误 PDF.js在iframe模式在火狐浏览器中按钮被屏蔽经分析,viewer.js 发现了判断逻辑 debug发现第二个判断后为 ...
浏览器的全屏功能小结
原文地址: 浏览器的全屏功能小结背景浏览器页面在视频播放, 图片浏览, 编辑文本等场景, 会遇到增大页面的可查看和可交互区域的需求. 对于这种场景, 一般做法是提供一个全屏按钮, 用户可以选择点击 ...
使用JavaScript使浏览器进入全屏或退出全屏
使用JavaScript使浏览器进入全屏或退出全屏首先使用fullscreenElement判断浏览器是否在全屏状态. 如果是:则调用exitFullscreen函数退出全屏,否则调用request ...

C#爬虫selenium webdriver使用火狐浏览器截取全屏并转成pdf

下面是调用截取全屏的方法

C#爬虫selenium webdriver使用火狐浏览器截取全屏并转成pdf相关推荐

最新文章

热门文章