java如何解析word大纲_java读取word并解析
java POI3.8处理word模板,文字 图片 表格
将word模板里面的特殊标签换成 文字,图片,
以下是处理的代码
特殊标签最好的复制粘贴到word模板里面 ,因为手动敲入可能有点小的差别都导致这个标签不是一小块(chunk)
这样会无法识别,文字样式设置的时候也最好选择特殊标签整体进行设置,尽量不要多选(例如标签后面跟上一个空格)
这里的替换包含了文字样式的替换,图片的替换
-------------------------------------------------------------------------------------------------------------------------------------
package com.util.export;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.poi.POIXMLDocument;
import
org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.xwpf.usermodel.ParagraphAlignment;
import org.apache.poi.xwpf.usermodel.UnderlinePatterns;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.poi.xwpf.usermodel.XWPFTableRow;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlToken;
import
org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps;
import
org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;
import
org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline;
public class WordCompileReport {
public static void searchAndReplace(String
srcPath, String destPath,
Map map,Map mapImage) {
try {
XWPFDocument document = new XWPFDocument(
POIXMLDocument.openPackage(srcPath));
//替换表格占位符
checkTables(document,map);
//替换段落占位符
checkParagraphs(document,map);
//在末尾添加文字
addParagraphToWord(document,"这里添加文字",30,0,"#EB9074",true);
//替换图片
replaceTextToImage(document,mapImage,200,200);
FileOutputStream outStream = null;
outStream = new FileOutputStream(destPath);
document.write(outStream);
outStream.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public static void checkTables(XWPFDocument
document,
Map map) {
Iterator it = document.getTablesIterator();
while (it.hasNext()) {
XWPFTable table = (XWPFTable) it.next();
int rcount = table.getNumberOfRows();
for (int i = 0; i < rcount; i++) {
XWPFTableRow row = table.getRow(i);
List cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
List listCell;
for (Entry e : map.entrySet()) {
listCell = cell.getParagraphs();
List cellRun;
Map mapAttr = new HashMap();
for (int j = 0; j < listCell.size(); j++)
{
if (listCell.get(j).getText().indexOf(e.getKey())
!= -1) {
cellRun = listCell.get(j).getRuns();
for (int c = 0; c < cellRun.size(); c++)
{
if (cellRun.get(c).getText(0).equals(e.getKey()))
{
mapAttr =
getWordXWPFRunStyle(cellRun.get(c));
listCell.get(j).removeRun(c);
XWPFRun newRun =
listCell.get(j).insertNewRun(c);
setWordXWPFRunStyle(newRun, mapAttr,e.getValue(),
false);
}
}
}
}
}
}
}
}
}
public static void checkParagraphs(XWPFDocument
document,Map map){
List listRun;
Map mapAttr = new HashMap();
List listParagraphs =
document.getParagraphs();
for (int sa = 0; sa < listParagraphs.size();
sa++) {
for (Entry e : map.entrySet()) {
if
(listParagraphs.get(sa).getText().indexOf(e.getKey()) != -1)
{
listRun = listParagraphs.get(sa).getRuns();
for (int p = 0; p < listRun.size(); p++)
{
if (listRun.get(p).toString().equals(e.getKey()))
{
//得到占位符的文本格式
XWPFRun runOld =
listParagraphs.get(sa).getRuns().get(p);
mapAttr=getWordXWPFRunStyle(runOld);
//封装该占位符文本样式到map
listParagraphs.get(sa).removeRun(p);//移除占位符
//创建设置对应占位符的文本
XWPFRun runNew =
listParagraphs.get(sa).insertNewRun(p);
setWordXWPFRunStyle(runNew,mapAttr,e.getValue(),true);
}
}
}
}
}
}
public static Map getWordXWPFRunStyle(XWPFRun
runOld){
Map mapAttr = new HashMap();
mapAttr.put("Color", runOld.getColor());
if(-1==runOld.getFontSize()){
mapAttr.put("FontSize", 12);
}else{
mapAttr.put("FontSize",
runOld.getFontSize());
}
mapAttr.put("Subscript",
runOld.getSubscript());
mapAttr.put("Underline",
runOld.getUnderline());
mapAttr.put("FontFamily",runOld.getFontFamily());
return mapAttr;
}
public static XWPFRun setWordXWPFRunStyle(XWPFRun
runNew,Map mapAttr,String text,boolean flag){
runNew.setColor((String)
mapAttr.get("Color"));
if("-1".equals(mapAttr.get("FontSize").toString())){//处理小四字号读取为-1的问题
runNew.setFontSize(12);
}else{
runNew.setFontSize((Integer)
mapAttr.get("FontSize"));
}
runNew.setBold(flag);
runNew.setUnderline((UnderlinePatterns)
mapAttr.get("Underline"));
runNew.setText(text);
runNew.setSubscript((VerticalAlign)
mapAttr.get("Subscript"));
runNew.setFontFamily((String)
mapAttr.get("FontFamily"));
return runNew;
}
public static void updatePicture(XWPFDocument
document,int id, int width, int height) {
if(id==0){
id = document.getAllPictures().size()-1;
}
final int EMU = 9525;
width *= EMU;
height *= EMU;
String blipId =
document.getAllPictures().get(id).getPackageRelationship()
.getId();
CTInline inline =
document.createParagraph().createRun().getCTR()
.addNewDrawing().addNewInline();
String picXml = ""
+ ""
+ " "
+
"
"
+
"
" +
"
+ id
+ "\" name=\"Generated\"/>"
+
"
"
+
"
"
+
"
"
+
"
+ blipId
+ "\"
xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"/>"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
+ width
+ "\" cy=\""
+ height
+ "\"/>"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+ " " +
"";
// CTGraphicalObjectData graphicData =
inline.addNewGraphic().addNewGraphicData();
XmlToken xmlToken = null;
try {
xmlToken = XmlToken.Factory.parse(picXml);
} catch (XmlException xe) {
xe.printStackTrace();
}
inline.set(xmlToken);
// graphicData.set(xmlToken);
inline.setDistT(0);
inline.setDistB(0);
inline.setDistL(0);
inline.setDistR(0);
CTPositiveSize2D extent =
inline.addNewExtent();
extent.setCx(width);
extent.setCy(height);
CTNonVisualDrawingProps docPr =
inline.addNewDocPr();
docPr.setId(id);
docPr.setName("IMG_" + id);
docPr.setDescr("IMG_" + id);
}
public static void addPictureToWord(XWPFDocument
document,String imagePath,int imageType,int width,int
height){
if(0==imageType){
imageType=XWPFDocument.PICTURE_TYPE_JPEG;
}
try {
String ind = document.addPictureData(new
FileInputStream(imagePath), imageType);
} catch (InvalidFormatException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
updatePicture(document,document.getAllPictures().size()-1,400,400);
}
public static void
addParagraphToWord(XWPFDocument document,String text,int
fontSize,int alignment,String RGBColor,boolean isBold){
XWPFParagraph paragraph =
document.createParagraph();
if(1==alignment){
paragraph.setAlignment(ParagraphAlignment.CENTER);
}else if(2==alignment){
paragraph.setAlignment(ParagraphAlignment.CENTER);
}else if(3==alignment){
paragraph.setAlignment(ParagraphAlignment.RIGHT);
}else{
paragraph.setIndentationLeft(alignment);
}
XWPFRun runOne = paragraph.createRun();
runOne.setText(text);
runOne.setBold(isBold);
runOne.setFontSize(fontSize);
if(RGBColor.startsWith("#")){
runOne.setColor(RGBColor.substring(1));
}else{
runOne.setColor(RGBColor);
}
}
public static void
addRunToParagraph(XWPFParagraph paragraph,String text,int
fontSize,String RGBColor,boolean isBold,boolean isWrap){
XWPFRun runText = paragraph.createRun();
//
runText.setStrike(true);
//删除线
runText.setBold(isBold);
runText.setColor(RGBColor);
runText.setFontSize(fontSize);
runText.setText(text);
if(isWrap)runText.addBreak();
}
public static void
replaceTextToImage(XWPFDocument document,Map mapImage,int width,int
height){
List listRun;
List listParagraphs =
document.getParagraphs();
for (int sa = 0; sa < listParagraphs.size();
sa++) {
for (Entry e : mapImage.entrySet()) {
if
(listParagraphs.get(sa).getText().indexOf(e.getKey()) != -1)
{
listRun = listParagraphs.get(sa).getRuns();
for (int p = 0; p < listRun.size(); p++)
{
if (listRun.get(p).toString().equals(e.getKey()))
{
listParagraphs.get(sa).removeRun(p);//移除占位符
//获得当前CTInline
CTInline inline =
listParagraphs.get(sa).createRun().getCTR().addNewDrawing().addNewInline();
try {
insertPicture(document,e.getValue(),inline,width,height);
} catch (InvalidFormatException e1) {
e1.printStackTrace();
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
}
}
}
}
}
}
public static void insertPicture(XWPFDocument
document,String filePath,CTInline inline,int width, int height)
throws InvalidFormatException, FileNotFoundException{
String ind = document.addPictureData(new
FileInputStream(filePath), 5);
int id =
document.getAllPictures().size()-1;
final int EMU = 9525;
width *= EMU;
height *= EMU;
String blipId =
document.getAllPictures().get(id).getPackageRelationship()
.getId();
String picXml = ""
+ ""
+ " "
+
"
"
+
"
" +
"
+ id
+ "\" name=\"Generated\"/>"
+
"
"
+
"
"
+
"
"
+
"
+ blipId
+ "\"
xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\"/>"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
+ width
+ "\" cy=\""
+ height
+ "\"/>"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+
"
"
+ " " +
"";
inline.addNewGraphic().addNewGraphicData();
XmlToken xmlToken = null;
try {
xmlToken = XmlToken.Factory.parse(picXml);
} catch (XmlException xe) {
xe.printStackTrace();
}
inline.set(xmlToken);
inline.setDistT(0);
inline.setDistB(0);
inline.setDistL(0);
inline.setDistR(0);
CTPositiveSize2D extent =
inline.addNewExtent();
extent.setCx(width);
extent.setCy(height);
CTNonVisualDrawingProps docPr =
inline.addNewDocPr();
docPr.setId(id);
docPr.setName("IMG_" + id);
docPr.setDescr("IMG_" + id);
}
public static void main(String[] args) {
HashMap map = new HashMap();
HashMap mapImage = new HashMap();
map.put("${name}$", "02");
map.put("${userIDs}$", "5201314");
mapImage.put("${image1}$", "F:\\A.jpg");
mapImage.put("${image2}$", "F:\\B.jpg");
String srcPath = "c:\\zhenli\\cc.docx";
String destPath = "c:\\zhenli\\输出模版.docx";
searchAndReplace(srcPath, destPath,
map,mapImage);
}
}
java如何解析word大纲_java读取word并解析相关推荐
- php识别word语言,PHP读取word文档
在PHP中读取和写入WORD文档的代码 php // 建立一个指向新COM组件的索引 $word = new COM("word.application") or die(&quo ...
- python导入word转换的html,python如何转换word格式、读取word内容、转成html
# python如何转换word格式.读取word内容.转成html? import docx from win32com import client as wc # 首先将doc转换成docx wo ...
- java如何解析word大纲_java解析word文件
POI是Apache的一个开源项目,可以到Apache网站下载相应的jar包文件,及其源文件. POI提供了提取一些非TXT文本中文本内容的API,比如提取Word,Excel等,使用起来非常方便. ...
- java读取word文件并设置其字体样式_Java读取word文件,字体,颜色(示例代码)
在Android读取Word文件时,在网上查看时可以用tm-extractors,但好像没有提到怎么读取Word文档中字体的颜色,字体,上下标等相关的属性.但由于需要,要把doc文档中的内容(字体,下 ...
- java word文本框_Java 读取Word文本框中的文本、图片、表格
Word可插入文本框,文本框中可嵌入文本.图片.表格等内容.对文档中的已有文本框,也可以读取其中的内容.本文以Java程序代码来展示如何读取文本框,包括读取文本框中的文本.图片以及表格等. [程序环境 ...
- java读取带格式word文档_Java读取word文档解决方案
java读取word文档时,虽然网上介绍了很多插件poi.java2Word.jacob.itext等等,poi无法读取格式(新的API估计行好像还在处于研发阶段,不太稳定,做项目不太敢用):java ...
- java 读取word 带格式_java读取word带格式_游戏下载_游戏攻略资讯_尽在搜狗爱玩
Started learning Java and think you're ready to move on to the next level? Find out if you have a go ...
- java 取商_Java读取word文档,转换为网页
public classTest3 {/*** *@parampath *@parampaths *@paramsavepaths*/ public static voidchange(String ...
- java word插件开发_java生成word的几种方案
1. Jacob是Java-COM Bridge的缩写,它在Java与微软的COM组件之间构建一座桥梁.使用Jacob自带的DLL动态链接库,并通过JNI的方式实现了在Java平台上对COM程序的调用 ...
最新文章
- employee.java,Java基础系列(六):对象与类(上)
- k8s的ingress使用
- 【Android开发】【数据库】Realm For Android
- ios 给网页传值_ios常见的页面传值方式
- Visual Studio 添加外部工具
- 91.(leaflet篇)leaflet态势标绘-进攻方向绘制
- 数据结构与算法之并查集
- 计算机信息安全专业代码0839,全国网络空间安全学科专业分布
- Xposed框架详解
- 《供应链管理》(一)-刘宝红
- 使用ROS提取udacity .bag文件中的压缩图片
- 罗振宇2019-2020“时间的朋友”跨年演讲精华版全文
- 30个专业配色网站, 让你配色从此更专业
- Android开发wifi功能(附近Wi-Fi,输入密码,链接Wi-Fi)
- Qt 纯代码模仿 WPS 登录界面
- 自动配音软件下载与使用
- 写作真的可以赚钱吗?
- 浅谈电弧光保护在10kV变电站高压室的应用方案
- 【C】 取一个整数 a 从右端开始的 4~7 位
- BAT批处理基本命令