一个excel(20M)就能干趴你的poi,你信吗(附源码)?
点击上方“阿拉奇学Java”,选择“置顶或者星标”
优质文章第一时间送达!
链接: www.cnblogs.com/rongdi/p/11872810.html
推荐阅读 | Java 的 JSP 已经被淘汰了吗?
推荐阅读 | 知乎高赞:本科生如何才能进入腾讯、阿里等一流互联网大厂?
package com.example.utils;
import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;import org.apache.poi.xssf.eventusermodel.XSSFReader;import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.xml.sax.Attributes;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;import java.io.InputStream;import java.sql.SQLException;import java.util.ArrayList;import java.util.Iterator;import java.util.List;
/** * 百度上直接copy过来的 * XSSF and SAX (Event API) */public abstract class BigDataParseExcelUtil extends DefaultHandler { private ReadOnlySharedStringsTable sst; private String lastContents; private boolean nextIsString; private int sheetIndex = -1; private List<String> rowlist = new ArrayList<String>(); private int curRow = 0; // 当前行 private int curCol = 0; // 当前列索引 private int preCol = 0; // 上一列列索引 private int titleRow = 0; // 标题行,一般情况下为0 private int rowsize = 0; // 列数 private List excelList = new ArrayList(); //excel全部转换为list
// excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型
public abstract void optRows(int sheetIndex, int curRow, List<String> rowlist, List excelList) throws SQLException, Exception;
// 只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3
/** * @param filename * @param sheetId sheetId为要遍历的sheet索引,从1开始,1-3 * @throws Exception */ public void processOneSheet(String filename, int sheetId) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); XMLReader parser = fetchSheetParser(strings); // rId2 found by processing the Workbook // 根据 rId# 或 rSheet# 查找sheet InputStream sheet2 = r.getSheet("rId" + sheetId); sheetIndex++; InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); }
@Override public void characters(char[] ch, int start, int length) throws SAXException { // 得到单元格内容的值 lastContents += new String(ch, start, length); }
public void process(InputStream inputStream) throws Exception { OPCPackage pkg = OPCPackage.open(inputStream); XSSFReader r = new XSSFReader(pkg); ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); XMLReader parser = fetchSheetParser(strings); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { curRow = 0; sheetIndex++; InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } }
/** * 遍历 excel 文件 */ public void process(File file) throws Exception { OPCPackage pkg = OPCPackage.open(file); XSSFReader r = new XSSFReader(pkg); ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg); XMLReader parser = fetchSheetParser(strings); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { curRow = 0; sheetIndex++; InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } }
public XMLReader fetchSheetParser(ReadOnlySharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader(); // .createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst = sst; parser.setContentHandler(this); return parser; }
@Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => 单元格 if (name.equals("c")) { // 如果下一个元素是 SST 的索引,则将nextIsString标记为true String cellType = attributes.getValue("t"); String rowStr = attributes.getValue("r"); curCol = this.getRowIndex(rowStr); if (cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // 置空 lastContents = ""; }
@Override public void endElement(String uri, String localName, String name) throws SAXException { // 根据SST的索引值的到单元格的真正要存储的字符串 // 这时characters()方法可能会被调用多次 if (nextIsString) { try { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)) .toString(); } catch (Exception e) { } } // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引 // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符 if (name.equals("v")) { String value = lastContents.trim(); value = value.equals("") ? " " : value; int cols = curCol - preCol; if (cols > 1) { for (int i = 0; i < cols - 1; i++) { rowlist.add(preCol, ""); } } preCol = curCol; rowlist.add(curCol - 1, value); } else { // 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法 if (name.equals("row")) { int tmpCols = rowlist.size(); if (curRow > this.titleRow && tmpCols < this.rowsize) { for (int i = 0; i < this.rowsize - tmpCols; i++) { rowlist.add(rowlist.size(), ""); } } try { optRows(sheetIndex, curRow, rowlist, excelList); } catch (SQLException e) { e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } if (curRow == this.titleRow) { this.rowsize = rowlist.size(); } rowlist.clear(); curRow++; curCol = 0; preCol = 0; } } }
// 得到列索引,每一列c元素的r属性构成为字母加数字的形式,字母组合为列索引,数字组合为行索引, // 如AB45,表示为第(A-A+1)*26+(B-A+1)*26列,45行 public int getRowIndex(String rowStr) { rowStr = rowStr.replaceAll("[^A-Z]", ""); byte[] rowAbc = rowStr.getBytes(); int len = rowAbc.length; float num = 0; for (int i = 0; i < len; i++) { num += (rowAbc[i] - 'A' + 1) * Math.pow(26, len - i - 1); } return (int) num; }
}
package com.example.service;
import com.example.utils.BigDataParseExcelUtil;import org.springframework.stereotype.Service;
import java.io.InputStream;import java.sql.SQLException;import java.util.List;
/** * @author: rongdi * @date: */@Servicepublic class ExcelService {
public void import1(InputStream inputStream) throws Exception {
BigDataParseExcelUtil xlx = new BigDataParseExcelUtil() { @Override public void optRows(int sheetIndex, int curRow, List<String> rowlist, List excelList) throws SQLException { System.out.println(rowlist); } }; xlx.process(inputStream); }
}
package com.example.controller;
import com.example.service.ExcelService;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Controller;import org.springframework.web.bind.annotation.RequestMapping;import org.springframework.web.bind.annotation.RequestParam;import org.springframework.web.bind.annotation.ResponseBody;import org.springframework.web.multipart.MultipartFile;
/** * @author: rongdi * @date: */@Controllerpublic class ExcelController {
@Autowired private ExcelService excelService;
@RequestMapping("/excel/import1") @ResponseBody public String import1(@RequestParam("file") MultipartFile multipartFile) throws Exception { excelService.import1(multipartFile.getInputStream()); return "ok"; }
}
使用postman等工具,导入上面说的20M的文件22.xlsx,报错如下:
那我们优化一下不使用inputStream,直接使用一个File传入看看
public void import2(File file) throws Exception { BigDataParseExcelUtil xlx = new BigDataParseExcelUtil() { @Override public void optRows(int sheetIndex, int curRow, List<String> rowlist, List excelList) throws SQLException { System.out.println(rowlist); } }; xlx.process(file); }
@RequestMapping("/excel/import2") @ResponseBody public String import2(@RequestParam("file") MultipartFile multipartFile) throws Exception { // 延迟解析比率 ZipSecureFile.setMinInflateRatio(-1.0d); File tmp = Files.createTempFile("tmp-", ".xlsx").toFile(); Files.copy(multipartFile.getInputStream(), Paths.get(tmp.getPath()), StandardCopyOption.REPLACE_EXISTING); excelService.import2(tmp); return "ok"; }
我们是不是可以直接往strings里添加字符串和获取字符串的方法那里替换掉,不要使用strings这个集合存储所有字符串。但是既然excel里设计成使用一个sharedStrings.xml存放公共的字符串,而不是像csv格式那样,每次读一行取一行数据就好了。那么这个sharedStrings.xml中的数据总要解析出来,总要有个地方存储里面的数据,不然怎么结合sheet.xml的格式获取到每一行的数据呢?所以这里就很尴尬了,不能每次解析sharedStrings.xml时不保存每次需要获取strings的时候,再去解析一下这个xm吧,如果从本文章的xml上来看,要重复解析25W次,效率极其低。现在问题可以简化成我们需要把sharedStrings.xml解析出的所有字符串放在一个地方,还能方便解析,由于怕内存溢出,肯定不能放在内存中了。那么这里就有一些选择,比如解析出的字符串按加入strings集合的顺序放入数据库,文件,外部存储或者缓存(限制内存大小,多余写入文件)存储中。然后使用的时候按照索引位置idx去一一取出。本文章先使用临时文件来放这些数据,因为不想搞那么复杂,导入任务不管再多复杂的系统中,最终执行的都会是一个单节点,在单节点中先使用本机资源这种就近资源是最方便的。如下直接先复制源码,然后修改上述说的两个地方。
package com.example.utils;
import org.apache.poi.ooxml.util.SAXHelper;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.openxml4j.opc.PackagePart;import org.apache.poi.ss.usermodel.RichTextString;import org.apache.poi.util.Removal;import org.apache.poi.xssf.model.SharedStrings;import org.apache.poi.xssf.usermodel.XSSFRelation;import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.xml.sax.Attributes;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;import java.io.File;import java.io.FileNotFoundException;import java.io.FileOutputStream;import java.io.FileReader;import java.io.IOException;import java.io.InputStream;import java.io.LineNumberReader;import java.io.PushbackInputStream;import java.nio.file.Files;import java.util.ArrayList;import java.util.LinkedHashMap;import java.util.List;import java.util.Map;
import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
public class ReadOnlySharedStringsTable extends DefaultHandler implements SharedStrings {
protected final boolean includePhoneticRuns;
/** * An integer representing the total count of strings in the workbook. This count does not * include any numbers, it counts only the total of text strings in the workbook. */ protected int count;
/** * An integer representing the total count of unique strings in the Shared String Table. * A string is unique even if it is a copy of another string, but has different formatting applied * at the character level. */ protected int uniqueCount;
/** * The shared strings table. */ private List<String> strings;
private File tmp = null;
FileOutputStream fos = null;
private int counts;
private Map<Integer,String> map = new LinkedHashMap<Integer,String>();
public ReadOnlySharedStringsTable(OPCPackage pkg) throws IOException, SAXException { this(pkg, true); }
public ReadOnlySharedStringsTable(OPCPackage pkg, boolean includePhoneticRuns) throws IOException, SAXException { this.includePhoneticRuns = includePhoneticRuns; ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
// Some workbooks have no shared strings table. if (parts.size() > 0) { PackagePart sstPart = parts.get(0); readFrom(sstPart.getInputStream()); } }
/** * Like POIXMLDocumentPart constructor * * Calls {@link #ReadOnlySharedStringsTable(PackagePart, boolean)}, with a * value of <code>true</code> to include phonetic runs. * * @since POI 3.14-Beta1 */ public ReadOnlySharedStringsTable(PackagePart part) throws IOException, SAXException { this(part, true); }
/** * Like POIXMLDocumentPart constructor * * @since POI 3.14-Beta3 */ public ReadOnlySharedStringsTable(PackagePart part, boolean includePhoneticRuns) throws IOException, SAXException { this.includePhoneticRuns = includePhoneticRuns; readFrom(part.getInputStream()); }
/** * Read this shared strings table from an XML file. * * @param is The input stream containing the XML document. * @throws IOException if an error occurs while reading. * @throws SAXException if parsing the XML data fails. */ public void readFrom(InputStream is) throws IOException, SAXException { // test if the file is empty, otherwise parse it PushbackInputStream pis = new PushbackInputStream(is, 1); int emptyTest = pis.read(); if (emptyTest > -1) { pis.unread(emptyTest); InputSource sheetSource = new InputSource(pis); try { XMLReader sheetParser = SAXHelper.newXMLReader(); sheetParser.setContentHandler(this); sheetParser.parse(sheetSource); } catch(ParserConfigurationException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } } }
/** * Return an integer representing the total count of strings in the workbook. This count does not * include any numbers, it counts only the total of text strings in the workbook. * * @return the total count of strings in the workbook */ @Override public int getCount() { return this.count; }
/** * Returns an integer representing the total count of unique strings in the Shared String Table. * A string is unique even if it is a copy of another string, but has different formatting applied * at the character level. * * @return the total count of unique strings in the workbook */ @Override public int getUniqueCount() { return this.uniqueCount; }
/** * Return the string at a given index. * Formatting is ignored. * * @param idx index of item to return. * @return the item at the specified position in this Shared String table. * @deprecated use <code>getItemAt</code> instead */ @Removal(version = "4.2") @Deprecated public String getEntryAt(int idx) { /** * 这里就是修改部分了,直接从按行存储的临时文件读取需要的字符串 */ String value = map.get(idx + 1); if(value == null) {
return readString(idx,1000,this.uniqueCount); } else { return value; }
}
/** * 从指定位置读取size个字符串,这里是使用局部性原理,每次读取size个字符串, * 以免每次需要读取文件,性能极低 * @return */ private String readString(int idx,int size,int numbers) { map.clear(); int currNumber = idx + 1; if (currNumber < 0 || currNumber > numbers) { return null; } try { FileReader in = new FileReader(tmp); LineNumberReader reader = new LineNumberReader(in); try { String line = ""; for(int i = 1;i <= numbers;i ++) { line = reader.readLine(); if(i >= currNumber && i < currNumber + size) { map.put(i, line); } } } finally { reader.close(); in.close(); } } catch (Exception e) { System.out.println(e.getMessage()); } return map.get(idx + 1); }
/** * Returns all the strings. * Formatting is ignored. * * @return a list with all the strings * @deprecated use <code>getItemAt</code> instead */ @Removal(version = "4.2") @Deprecated public List<String> getItems() { return strings; }
@Override public RichTextString getItemAt(int idx) { return new XSSFRichTextString(getEntryAt(idx)); }
ContentHandler methods
private StringBuilder characters; private boolean tIsOpen; private boolean inRPh;
@Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if (uri != null && ! uri.equals(NS_SPREADSHEETML)) { return; }
if ("sst".equals(localName)) { String count = attributes.getValue("count"); if(count != null) this.count = Integer.parseInt(count); String uniqueCount = attributes.getValue("uniqueCount"); if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount); try { tmp = Files.createTempFile("tmp-", ".xlsx").toFile(); } catch (IOException e) { e.printStackTrace(); } // this.strings = new ArrayList<>(this.uniqueCount); characters = new StringBuilder(64); try { fos = new FileOutputStream(tmp,true); } catch (FileNotFoundException e) { e.printStackTrace(); } } else if ("si".equals(localName)) { characters.setLength(0); } else if ("t".equals(localName)) { tIsOpen = true; } else if ("rPh".equals(localName)) { inRPh = true; //append space...this assumes that rPh always comes after regular <t> if (includePhoneticRuns && characters.length() > 0) { characters.append(" "); } } }
@Override public void endElement(String uri, String localName, String name) throws SAXException { if (uri != null && ! uri.equals(NS_SPREADSHEETML)) { return; }
if ("si".equals(localName)) { // strings.add(characters.toString().intern()); try { /** * 这里就是修改的一部分,这里直接把字符串按行存入临时文件 */ counts ++; fos.write((characters.toString() + "\n").getBytes()); if(counts == this.uniqueCount) { fos.close(); } } catch (IOException e) { e.printStackTrace(); } } else if ("t".equals(localName)) { tIsOpen = false; } else if ("rPh".equals(localName)) { inRPh = false; } }
/** * Captures characters only if a t(ext) element is open. */ @Override public void characters(char[] ch, int start, int length) throws SAXException { if (tIsOpen) { if (inRPh && includePhoneticRuns) { characters.append(ch, start, length); } else if (! inRPh){ characters.append(ch, start, length); } } }
}
package com.example.advanceevent;
import com.example.utils.FileUtils;import org.ehcache.Cache;import org.ehcache.CacheManager;import org.ehcache.config.CacheConfiguration;import org.ehcache.config.builders.CacheConfigurationBuilder;import org.ehcache.config.builders.CacheManagerBuilder;import org.ehcache.config.builders.ResourcePoolsBuilder;import org.ehcache.config.units.MemoryUnit;import org.ehcache.core.Ehcache;import org.slf4j.Logger;import org.slf4j.LoggerFactory;
import java.io.File;import java.util.HashMap;import java.util.UUID;
/** * @author: rongdi * @date: */public class ReadCache {
private static final Logger LOGGER = LoggerFactory.getLogger(Ehcache.class); private int index = 0; private HashMap<Integer, String> dataMap = new HashMap(1334); private static CacheManager fileCacheManager; private static CacheConfiguration<Integer, HashMap> fileCacheConfiguration; private static CacheManager activeCacheManager; private CacheConfiguration<Integer, HashMap> activeCacheConfiguration; private Cache<Integer, HashMap> fileCache; private Cache<Integer, HashMap> activeCache; private String cacheAlias; private int cacheMiss = 0;
public ReadCache(int maxCacheActivateSize) { this.activeCacheConfiguration = CacheConfigurationBuilder.newCacheConfigurationBuilder(Integer.class, HashMap.class, ResourcePoolsBuilder.newResourcePoolsBuilder().heap((long)maxCacheActivateSize, MemoryUnit.MB)).withSizeOfMaxObjectGraph(1000000L).withSizeOfMaxObjectSize((long)maxCacheActivateSize, MemoryUnit.MB).build(); init(); }
private void init() { this.cacheAlias = UUID.randomUUID().toString(); this.fileCache = fileCacheManager.createCache(this.cacheAlias, fileCacheConfiguration); this.activeCache = activeCacheManager.createCache(this.cacheAlias, this.activeCacheConfiguration); }
public void put(String value) { this.dataMap.put(this.index, value); if ((this.index + 1) % 1000 == 0) { this.fileCache.put(this.index / 1000, this.dataMap); this.dataMap = new HashMap(1334); }
++this.index; if (LOGGER.isDebugEnabled() && this.index % 1000000 == 0) { LOGGER.debug("Already put :{}", this.index); }
}
public String get(Integer key) { if (key != null && key >= 0) { int route = key / 1000; HashMap<Integer, String> dataMap = (HashMap)this.activeCache.get(route); if (dataMap == null) { dataMap = (HashMap)this.fileCache.get(route); this.activeCache.put(route, dataMap); if (LOGGER.isDebugEnabled() && this.cacheMiss++ % 1000 == 0) { LOGGER.debug("Cache misses count:{}", this.cacheMiss); } }
return (String)dataMap.get(key); } else { return null; } }
public void putFinished() { if (this.dataMap != null) { this.fileCache.put(this.index / 1000, this.dataMap); } }
public void destroy() { fileCacheManager.removeCache(this.cacheAlias); activeCacheManager.removeCache(this.cacheAlias); }
static { File cacheFile = FileUtils.createCacheTmpFile(); fileCacheManager = CacheManagerBuilder.newCacheManagerBuilder().with(CacheManagerBuilder.persistence(cacheFile)).build(true); activeCacheManager = CacheManagerBuilder.newCacheManagerBuilder().build(true); fileCacheConfiguration = CacheConfigurationBuilder.newCacheConfigurationBuilder(Integer.class, HashMap.class, ResourcePoolsBuilder.newResourcePoolsBuilder().disk(10L, MemoryUnit.GB)).withSizeOfMaxObjectGraph(1000000L).withSizeOfMaxObjectSize(10L, MemoryUnit.GB).build(); }
}
package com.example.advanceevent;
import org.apache.poi.ooxml.util.SAXHelper;import org.apache.poi.openxml4j.opc.OPCPackage;import org.apache.poi.openxml4j.opc.PackagePart;import org.apache.poi.ss.usermodel.RichTextString;import org.apache.poi.util.Removal;import org.apache.poi.xssf.model.SharedStrings;import org.apache.poi.xssf.usermodel.XSSFRelation;import org.apache.poi.xssf.usermodel.XSSFRichTextString;import org.xml.sax.Attributes;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;import java.io.IOException;import java.io.InputStream;import java.io.PushbackInputStream;import java.util.ArrayList;import java.util.List;
import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
public class ReadOnlySharedStringsTable extends DefaultHandler implements SharedStrings {
protected final boolean includePhoneticRuns;
/** * An integer representing the total count of strings in the workbook. This count does not * include any numbers, it counts only the total of text strings in the workbook. */ protected int count;
/** * An integer representing the total count of unique strings in the Shared String Table. * A string is unique even if it is a copy of another string, but has different formatting applied * at the character level. */ protected int uniqueCount;
/** * 缓存 */ ReadCache readCache = new ReadCache(100);
private int counts;
public ReadOnlySharedStringsTable(OPCPackage pkg) throws IOException, SAXException { this(pkg, true); }
public ReadOnlySharedStringsTable(OPCPackage pkg, boolean includePhoneticRuns) throws IOException, SAXException { this.includePhoneticRuns = includePhoneticRuns; ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
// Some workbooks have no shared strings table. if (parts.size() > 0) { PackagePart sstPart = parts.get(0); readFrom(sstPart.getInputStream()); } }
/** * Like POIXMLDocumentPart constructor * * Calls {@link #ReadOnlySharedStringsTable(PackagePart, boolean)}, with a * value of <code>true</code> to include phonetic runs. * * @since POI 3.14-Beta1 */ public ReadOnlySharedStringsTable(PackagePart part) throws IOException, SAXException { this(part, true); }
/** * Like POIXMLDocumentPart constructor * * @since POI 3.14-Beta3 */ public ReadOnlySharedStringsTable(PackagePart part, boolean includePhoneticRuns) throws IOException, SAXException { this.includePhoneticRuns = includePhoneticRuns; readFrom(part.getInputStream()); }
/** * Read this shared strings table from an XML file. * * @param is The input stream containing the XML document. * @throws IOException if an error occurs while reading. * @throws SAXException if parsing the XML data fails. */ public void readFrom(InputStream is) throws IOException, SAXException { // test if the file is empty, otherwise parse it PushbackInputStream pis = new PushbackInputStream(is, 1); int emptyTest = pis.read(); if (emptyTest > -1) { pis.unread(emptyTest); InputSource sheetSource = new InputSource(pis); try { XMLReader sheetParser = SAXHelper.newXMLReader(); sheetParser.setContentHandler(this); sheetParser.parse(sheetSource); } catch(ParserConfigurationException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } } }
/** * Return an integer representing the total count of strings in the workbook. This count does not * include any numbers, it counts only the total of text strings in the workbook. * * @return the total count of strings in the workbook */ @Override public int getCount() { return this.count; }
/** * Returns an integer representing the total count of unique strings in the Shared String Table. * A string is unique even if it is a copy of another string, but has different formatting applied * at the character level. * * @return the total count of unique strings in the workbook */ @Override public int getUniqueCount() { return this.uniqueCount; }
/** * Return the string at a given index. * Formatting is ignored. * * @param idx index of item to return. * @return the item at the specified position in this Shared String table. * @deprecated use <code>getItemAt</code> instead */ @Removal(version = "4.2") @Deprecated public String getEntryAt(int idx) { /** * 这里就是修改部分了,直接从按行存储的临时文件读取需要的字符串 */ return readCache.get(idx);
}
/** * Returns all the strings. * Formatting is ignored. * * @return a list with all the strings * @deprecated use <code>getItemAt</code> instead */ @Removal(version = "4.2") @Deprecated public List<String> getItems() { return null; }
@Override public RichTextString getItemAt(int idx) { return new XSSFRichTextString(getEntryAt(idx)); }
ContentHandler methods
private StringBuilder characters; private boolean tIsOpen; private boolean inRPh;
@Override public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { if (uri != null && ! uri.equals(NS_SPREADSHEETML)) { return; }
if ("sst".equals(localName)) { String count = attributes.getValue("count"); if(count != null) this.count = Integer.parseInt(count); String uniqueCount = attributes.getValue("uniqueCount"); if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount); // this.strings = new ArrayList<>(this.uniqueCount); characters = new StringBuilder(64); } else if ("si".equals(localName)) { characters.setLength(0); } else if ("t".equals(localName)) { tIsOpen = true; } else if ("rPh".equals(localName)) { inRPh = true; //append space...this assumes that rPh always comes after regular <t> if (includePhoneticRuns && characters.length() > 0) { characters.append(" "); } } }
@Override public void endElement(String uri, String localName, String name) throws SAXException { if (uri != null && ! uri.equals(NS_SPREADSHEETML)) { return; }
if ("si".equals(localName)) { // strings.add(characters.toString().intern()); readCache.put(characters.toString()); /** * 这里就是修改的一部分,这里直接把字符串按行存入临时文件 */ counts ++; if(counts == this.uniqueCount) { readCache.putFinished(); } } else if ("t".equals(localName)) { tIsOpen = false; } else if ("rPh".equals(localName)) { inRPh = false; } }
/** * Captures characters only if a t(ext) element is open. */ @Override public void characters(char[] ch, int start, int length) throws SAXException { if (tIsOpen) { if (inRPh && includePhoneticRuns) { characters.append(ch, start, length); } else if (! inRPh){ characters.append(ch, start, length); } } }
}
至此代码效率有了相当大的提高,而且内存溢出问题也得到解决。详细测试代码:https://github.com/rongdi/poi-example.git
看到这里啦,说明你对这篇文章感兴趣,帮忙转发一下或者点击文章右下角在看。感谢啦!关注公众号,回复「进群」即可进入无广告技术交流群。同时送上250本电子书+学习视频作为见面礼!
有你想看的 精彩
Java 的 JSP 已经被淘汰了吗?
知乎高赞:本科生如何才能进入腾讯、阿里等一流互联网大厂?
ArrayList集合为什么不能使用foreach增加、删除、修改元素
有一种幸福,叫娶个女项目经理做老婆
互联网公司忽悠员工的黑话
面试字节跳动,被怼的体无完肤!
别在 Java 代码里乱打日志了,这才是正确的打日志姿势
有了这套模板,女朋友再也不用担心我刷不动 LeetCode 了
支付宝架构师眼中的高并发架构
20M文件从30秒压缩到1秒,我是如何做到的(附源码)?
39 个奇葩代码注释,看完笑哭了。
一个excel(20M)就能干趴你的poi,你信吗(附源码)?相关推荐
- apache poi 修改docx表格_一个excel(20M)就能干趴你的poi,你信吗(附源码)?
点击上方"阿拉奇学Java",选择"置顶或者星标" 优质文章第一时间送达! 链接: www.cnblogs.com/rongdi/p/11872810.html ...
- 用Java写了一个类QQ界面聊天小项目,可在线聊天(附源码)
作者: AC路上 blog.csdn.net/weixin_44048140/article/details/109612049 1.功能实现 1.修改功能(密码.昵称.个性签名) 2.添加好友.删除 ...
- Java 中如何解决 POI 读写 excel 几万行数据时内存溢出的问题?(附源码)
>>号外:关注"Java精选"公众号,菜单栏->聚合->干货分享,回复关键词领取视频资料.开源项目. 1. Excel2003与Excel2007 两个版本 ...
- 用python爬小说_今天分享一个用Python来爬取小说的小脚本!(附源码)
本文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,如有问题请及时联系我们以作处理. 以下文章天气预报数据分析与统计之美 ,作者:❦大头雪糕❦ Python GUI制作小说下载器教学讲 ...
- 今天分享一个用Python来爬取小说的小脚本!(附源码)
本文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,如有问题请及时联系我们以作处理. 以下文章来源于大邓和他的Python ,作者:大邓 Python爬取知乎数据案例讲解视频 http ...
- 太赞了,用Java写了一个类QQ界面聊天小项目,可在线聊天(附源码)~
点击上方 "后端架构师"关注, 星标或置顶一起成长 后台回复"大礼包"有惊喜礼包! 关注订阅号「后端架构师」,收看更多精彩内容 每日英文 Sometime yo ...
- Python数据分析实战-提取DataFrame(Excel)某列(字段)最全操作(附源码和实现效果)
实现功能: Python数据分析实战-提取DataFrame(Excel)某列(字段)最全操作,代码演示了单列提取和多列提取两种情况,其中单列提取有返回series格式和dataframe两种情况,在 ...
- python爬取天气预报数据并保存为txt格式_今天分享一个用Python来爬取小说的小脚本!(附源码)...
本文的文字及图片来源于网络,仅供学习.交流使用,不具有任何商业用途,如有问题请及时联系我们以作处理. 以下文章天气预报数据分析与统计之美 ,作者:❦大头雪糕❦ Python GUI制作小说下载器教学讲 ...
- 手把手教你用Python批量实现在Excel后里面新加一列,并且内容为excel 表名(附源码)...
点击上方"Python爬虫与数据挖掘",进行关注 回复"书籍"即可获赠Python从入门到进阶共10本电子书 今 日 鸡 汤 打起黄莺儿,莫教枝上啼. 大家好, ...
最新文章
- 【bat脚本】使用ffmpeg工具剥离某一个声道
- 如何垂直居中一个浮动元素
- ssl双向认证_详解TLS/SSL运行机制
- SC15 供应商质量管理工程师(武汉)
- python软件不用买吗_Python 3.3+中的软件包不需要__init__.py吗
- 海南关于推荐扬尘监控系统的通知_实时监管!定州对44家混凝土搅拌企业实施远程视频监控...
- android netty导入_Netty在Android中使用
- 企业信息化与BI系统建设规划
- 本科生毕业论文中期检查表
- Java用while求100以内奇数和
- git branch -d和-D
- Exception Details: System.Data.OleDb.OleDbException: 操作必须使用一个可更新的查询。
- AttributeError: module ‘win32com.gen_py.00020905-0000-0000-C000-000000000046x0x8x7‘ has no attribute
- 第四周项目3--单链表应用之递增判断
- 如何准备项目启动大会
- java实现图片滚动_怎么用Java代码使图片自行滚动浏览
- MAX7219产品级驱动分享
- suse linux VNC配置
- ProxyCap v4.12
- eCharts实现多图表切换