diff --git a/src/examples/src/org/apache/poi/xssf/streaming/examples/HybridStreaming.java b/src/examples/src/org/apache/poi/xssf/streaming/examples/HybridStreaming.java new file mode 100644 index 0000000000..99af0356a1 --- /dev/null +++ b/src/examples/src/org/apache/poi/xssf/streaming/examples/HybridStreaming.java @@ -0,0 +1,57 @@ +package org.apache.poi.xssf.streaming.examples; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; +import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; +import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; +import org.apache.poi.xssf.usermodel.XSSFComment; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; +import org.xml.sax.SAXException; + +/** + * This demonstrates how a hybrid approach to workbook read can be taken, using + */ +public class HybridStreaming { + + private static final String SHEET_TO_STREAM = "large sheet"; + + public static void main(String[] args) throws IOException, SAXException { + InputStream sourceBytes = new FileInputStream("/path/too/workbook.xlsx"); + XSSFWorkbook workbook = new XSSFWorkbook(sourceBytes) { + /** Avoid DOM parse of large sheet */ + public void parseSheet(java.util.Map shIdMap, CTSheet ctSheet) { + if (SHEET_TO_STREAM.equals(ctSheet.getName())) { + return; + } + }; + }; + + // Having avoided a DOM-based parse of the sheet, we can stream it instead. + ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(workbook.getPackage()); + new XSSFSheetXMLHandler(workbook.getStylesSource(), strings, createSheetContentsHandler(), false); + workbook.close(); + } + + private static SheetContentsHandler createSheetContentsHandler() { + return new SheetContentsHandler() { + + public void startRow(int rowNum) { + } + + public void headerFooter(String text, boolean isHeader, String tagName) { + } + + public void endRow(int rowNum) { + } + + public void cell(String cellReference, String formattedValue, XSSFComment comment) { + } + }; + } + +} diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java index cb9ba9a5fe..4b4a30e0b6 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java @@ -401,7 +401,11 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable shIdMap, CTSheet ctSheet) { + /** + * Not normally to be called externally, but possibly to be overridden to avoid + * the DOM based parse of large sheets (see examples). + */ + public void parseSheet(Map shIdMap, CTSheet ctSheet) { XSSFSheet sh = shIdMap.get(ctSheet.getId()); if(sh == null) { logger.log(POILogger.WARN, "Sheet with name " + ctSheet.getName() + " and r:id " + ctSheet.getId()+ " was defined, but didn't exist in package, skipping");