mirror of https://github.com/apache/poi.git
make xssf streaming code more extensible
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1836795 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3014d51f51
commit
543c2e3ca9
|
@ -59,10 +59,10 @@ import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class makes it easy to get at individual parts
|
* This class makes it easy to get at individual parts
|
||||||
* of an OOXML .xlsx file, suitable for low memory sax
|
* of an OOXML .xlsx file, suitable for low memory sax
|
||||||
* parsing or similar.
|
* parsing or similar.
|
||||||
* It makes up the core part of the EventUserModel support
|
* It makes up the core part of the EventUserModel support
|
||||||
* for XSSF.
|
* for XSSF.
|
||||||
*/
|
*/
|
||||||
public class XSSFReader {
|
public class XSSFReader {
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ public class XSSFReader {
|
||||||
// strict OOXML likely not fully supported, see #57699
|
// strict OOXML likely not fully supported, see #57699
|
||||||
// this code is similar to POIXMLDocumentPart.getPartFromOPCPackage(), but I could not combine it
|
// this code is similar to POIXMLDocumentPart.getPartFromOPCPackage(), but I could not combine it
|
||||||
// easily due to different return values
|
// easily due to different return values
|
||||||
if(coreDocRelationship == null) {
|
if (coreDocRelationship == null) {
|
||||||
if (this.pkg.getRelationshipsByType(
|
if (this.pkg.getRelationshipsByType(
|
||||||
PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0) != null) {
|
PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0) != null) {
|
||||||
throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
|
throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
|
||||||
|
@ -106,27 +106,27 @@ public class XSSFReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Opens up the Shared Strings Table, parses it, and
|
* Opens up the Shared Strings Table, parses it, and
|
||||||
* returns a handy object for working with
|
* returns a handy object for working with
|
||||||
* shared strings.
|
* shared strings.
|
||||||
*/
|
*/
|
||||||
public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
|
public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
|
||||||
ArrayList<PackagePart> parts = pkg.getPartsByContentType( XSSFRelation.SHARED_STRINGS.getContentType());
|
ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
|
||||||
return parts.size() == 0 ? null : new SharedStringsTable(parts.get(0));
|
return parts.size() == 0 ? null : new SharedStringsTable(parts.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Opens up the Styles Table, parses it, and
|
* Opens up the Styles Table, parses it, and
|
||||||
* returns a handy object for working with cell styles
|
* returns a handy object for working with cell styles
|
||||||
*/
|
*/
|
||||||
public StylesTable getStylesTable() throws IOException, InvalidFormatException {
|
public StylesTable getStylesTable() throws IOException, InvalidFormatException {
|
||||||
ArrayList<PackagePart> parts = pkg.getPartsByContentType( XSSFRelation.STYLES.getContentType());
|
ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFRelation.STYLES.getContentType());
|
||||||
if(parts.size() == 0) return null;
|
if (parts.size() == 0) return null;
|
||||||
|
|
||||||
// Create the Styles Table, and associate the Themes if present
|
// Create the Styles Table, and associate the Themes if present
|
||||||
StylesTable styles = new StylesTable(parts.get(0));
|
StylesTable styles = new StylesTable(parts.get(0));
|
||||||
parts = pkg.getPartsByContentType( XSSFRelation.THEME.getContentType());
|
parts = pkg.getPartsByContentType(XSSFRelation.THEME.getContentType());
|
||||||
if(parts.size() != 0) {
|
if (parts.size() != 0) {
|
||||||
styles.setTheme(new ThemesTable(parts.get(0)));
|
styles.setTheme(new ThemesTable(parts.get(0)));
|
||||||
}
|
}
|
||||||
return styles;
|
return styles;
|
||||||
}
|
}
|
||||||
|
@ -134,7 +134,7 @@ public class XSSFReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an InputStream to read the contents of the
|
* Returns an InputStream to read the contents of the
|
||||||
* shared strings table.
|
* shared strings table.
|
||||||
*/
|
*/
|
||||||
public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
|
public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
|
||||||
return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
|
return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
|
||||||
|
@ -142,7 +142,7 @@ public class XSSFReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an InputStream to read the contents of the
|
* Returns an InputStream to read the contents of the
|
||||||
* styles table.
|
* styles table.
|
||||||
*/
|
*/
|
||||||
public InputStream getStylesData() throws IOException, InvalidFormatException {
|
public InputStream getStylesData() throws IOException, InvalidFormatException {
|
||||||
return XSSFRelation.STYLES.getContents(workbookPart);
|
return XSSFRelation.STYLES.getContents(workbookPart);
|
||||||
|
@ -150,7 +150,7 @@ public class XSSFReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an InputStream to read the contents of the
|
* Returns an InputStream to read the contents of the
|
||||||
* themes table.
|
* themes table.
|
||||||
*/
|
*/
|
||||||
public InputStream getThemesData() throws IOException, InvalidFormatException {
|
public InputStream getThemesData() throws IOException, InvalidFormatException {
|
||||||
return XSSFRelation.THEME.getContents(workbookPart);
|
return XSSFRelation.THEME.getContents(workbookPart);
|
||||||
|
@ -158,8 +158,8 @@ public class XSSFReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an InputStream to read the contents of the
|
* Returns an InputStream to read the contents of the
|
||||||
* main Workbook, which contains key overall data for
|
* main Workbook, which contains key overall data for
|
||||||
* the file, including sheet definitions.
|
* the file, including sheet definitions.
|
||||||
*/
|
*/
|
||||||
public InputStream getWorkbookData() throws IOException, InvalidFormatException {
|
public InputStream getWorkbookData() throws IOException, InvalidFormatException {
|
||||||
return workbookPart.getInputStream();
|
return workbookPart.getInputStream();
|
||||||
|
@ -167,18 +167,19 @@ public class XSSFReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an InputStream to read the contents of the
|
* Returns an InputStream to read the contents of the
|
||||||
* specified Sheet.
|
* specified Sheet.
|
||||||
|
*
|
||||||
* @param relId The relationId of the sheet, from a r:id on the workbook
|
* @param relId The relationId of the sheet, from a r:id on the workbook
|
||||||
*/
|
*/
|
||||||
public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
|
public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
|
||||||
PackageRelationship rel = workbookPart.getRelationship(relId);
|
PackageRelationship rel = workbookPart.getRelationship(relId);
|
||||||
if(rel == null) {
|
if (rel == null) {
|
||||||
throw new IllegalArgumentException("No Sheet found with r:id " + relId);
|
throw new IllegalArgumentException("No Sheet found with r:id " + relId);
|
||||||
}
|
}
|
||||||
|
|
||||||
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
||||||
PackagePart sheet = pkg.getPart(relName);
|
PackagePart sheet = pkg.getPart(relName);
|
||||||
if(sheet == null) {
|
if (sheet == null) {
|
||||||
throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
|
throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
|
||||||
}
|
}
|
||||||
return sheet.getInputStream();
|
return sheet.getInputStream();
|
||||||
|
@ -186,10 +187,10 @@ public class XSSFReader {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an Iterator which will let you get at all the
|
* Returns an Iterator which will let you get at all the
|
||||||
* different Sheets in turn.
|
* different Sheets in turn.
|
||||||
* Each sheet's InputStream is only opened when fetched
|
* Each sheet's InputStream is only opened when fetched
|
||||||
* from the Iterator. It's up to you to close the
|
* from the Iterator. It's up to you to close the
|
||||||
* InputStreams when done with each one.
|
* InputStreams when done with each one.
|
||||||
*/
|
*/
|
||||||
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
|
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
|
||||||
return new SheetIterator(workbookPart);
|
return new SheetIterator(workbookPart);
|
||||||
|
@ -201,7 +202,7 @@ public class XSSFReader {
|
||||||
public static class SheetIterator implements Iterator<InputStream> {
|
public static class SheetIterator implements Iterator<InputStream> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps relId and the corresponding PackagePart
|
* Maps relId and the corresponding PackagePart
|
||||||
*/
|
*/
|
||||||
private final Map<String, PackagePart> sheetMap;
|
private final Map<String, PackagePart> sheetMap;
|
||||||
|
|
||||||
|
@ -232,7 +233,7 @@ public class XSSFReader {
|
||||||
sheetMap = new HashMap<>();
|
sheetMap = new HashMap<>();
|
||||||
OPCPackage pkg = wb.getPackage();
|
OPCPackage pkg = wb.getPackage();
|
||||||
Set<String> worksheetRels = getSheetRelationships();
|
Set<String> worksheetRels = getSheetRelationships();
|
||||||
for(PackageRelationship rel : wb.getRelationships()){
|
for (PackageRelationship rel : wb.getRelationships()) {
|
||||||
String relType = rel.getRelationshipType();
|
String relType = rel.getRelationshipType();
|
||||||
if (worksheetRels.contains(relType)) {
|
if (worksheetRels.contains(relType)) {
|
||||||
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
||||||
|
@ -242,7 +243,7 @@ public class XSSFReader {
|
||||||
//step 2. Read array of CTSheet elements, wrap it in a LinkedList
|
//step 2. Read array of CTSheet elements, wrap it in a LinkedList
|
||||||
//and construct an iterator
|
//and construct an iterator
|
||||||
sheetIterator = createSheetIteratorFromWB(wb);
|
sheetIterator = createSheetIteratorFromWB(wb);
|
||||||
} catch (InvalidFormatException e){
|
} catch (InvalidFormatException e) {
|
||||||
throw new POIXMLException(e);
|
throw new POIXMLException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -311,7 +312,7 @@ public class XSSFReader {
|
||||||
try {
|
try {
|
||||||
PackagePart sheetPkg = sheetMap.get(sheetId);
|
PackagePart sheetPkg = sheetMap.get(sheetId);
|
||||||
return sheetPkg.getInputStream();
|
return sheetPkg.getInputStream();
|
||||||
} catch(IOException e) {
|
} catch (IOException e) {
|
||||||
throw new POIXMLException(e);
|
throw new POIXMLException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -324,67 +325,63 @@ public class XSSFReader {
|
||||||
public String getSheetName() {
|
public String getSheetName() {
|
||||||
return xssfSheetRef.getName();
|
return xssfSheetRef.getName();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the comments associated with this sheet,
|
* Returns the comments associated with this sheet,
|
||||||
* or null if there aren't any
|
* or null if there aren't any
|
||||||
*/
|
*/
|
||||||
public CommentsTable getSheetComments() {
|
public CommentsTable getSheetComments() {
|
||||||
PackagePart sheetPkg = getSheetPart();
|
PackagePart sheetPkg = getSheetPart();
|
||||||
|
|
||||||
// Do we have a comments relationship? (Only ever one if so)
|
// Do we have a comments relationship? (Only ever one if so)
|
||||||
try {
|
try {
|
||||||
PackageRelationshipCollection commentsList =
|
PackageRelationshipCollection commentsList =
|
||||||
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
|
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
|
||||||
if(commentsList.size() > 0) {
|
if (commentsList.size() > 0) {
|
||||||
PackageRelationship comments = commentsList.getRelationship(0);
|
PackageRelationship comments = commentsList.getRelationship(0);
|
||||||
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
|
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
|
||||||
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
|
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
|
||||||
return new CommentsTable(commentsPart);
|
return new CommentsTable(commentsPart);
|
||||||
}
|
}
|
||||||
} catch (InvalidFormatException e) {
|
} catch (InvalidFormatException|IOException e) {
|
||||||
return null;
|
LOGGER.log(POILogger.WARN, e);
|
||||||
} catch (IOException e) {
|
return null;
|
||||||
return null;
|
}
|
||||||
}
|
return null;
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the shapes associated with this sheet,
|
* Returns the shapes associated with this sheet,
|
||||||
* an empty list or null if there is an exception
|
* an empty list or null if there is an exception
|
||||||
*/
|
*/
|
||||||
public List<XSSFShape> getShapes() {
|
public List<XSSFShape> getShapes() {
|
||||||
PackagePart sheetPkg = getSheetPart();
|
PackagePart sheetPkg = getSheetPart();
|
||||||
List<XSSFShape> shapes= new LinkedList<>();
|
List<XSSFShape> shapes = new LinkedList<>();
|
||||||
// Do we have a comments relationship? (Only ever one if so)
|
// Do we have a comments relationship? (Only ever one if so)
|
||||||
try {
|
try {
|
||||||
PackageRelationshipCollection drawingsList = sheetPkg.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation());
|
PackageRelationshipCollection drawingsList = sheetPkg.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation());
|
||||||
for (int i = 0; i < drawingsList.size(); i++){
|
for (int i = 0; i < drawingsList.size(); i++) {
|
||||||
PackageRelationship drawings = drawingsList.getRelationship(i);
|
PackageRelationship drawings = drawingsList.getRelationship(i);
|
||||||
PackagePartName drawingsName = PackagingURIHelper.createPartName(drawings.getTargetURI());
|
PackagePartName drawingsName = PackagingURIHelper.createPartName(drawings.getTargetURI());
|
||||||
PackagePart drawingsPart = sheetPkg.getPackage().getPart(drawingsName);
|
PackagePart drawingsPart = sheetPkg.getPackage().getPart(drawingsName);
|
||||||
if (drawingsPart == null) {
|
if (drawingsPart == null) {
|
||||||
//parts can go missing; Excel ignores them silently -- TIKA-2134
|
//parts can go missing; Excel ignores them silently -- TIKA-2134
|
||||||
LOGGER.log(POILogger.WARN, "Missing drawing: "+drawingsName +". Skipping it.");
|
LOGGER.log(POILogger.WARN, "Missing drawing: " + drawingsName + ". Skipping it.");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
XSSFDrawing drawing = new XSSFDrawing(drawingsPart);
|
XSSFDrawing drawing = new XSSFDrawing(drawingsPart);
|
||||||
shapes.addAll(drawing.getShapes());
|
shapes.addAll(drawing.getShapes());
|
||||||
}
|
}
|
||||||
} catch (XmlException e){
|
} catch (XmlException|InvalidFormatException|IOException e) {
|
||||||
return null;
|
LOGGER.log(POILogger.WARN, e);
|
||||||
} catch (InvalidFormatException e) {
|
return null;
|
||||||
return null;
|
}
|
||||||
} catch (IOException e) {
|
return shapes;
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return shapes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public PackagePart getSheetPart() {
|
public PackagePart getSheetPart() {
|
||||||
String sheetId = xssfSheetRef.getId();
|
String sheetId = xssfSheetRef.getId();
|
||||||
return sheetMap.get(sheetId);
|
return sheetMap.get(sheetId);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -58,16 +58,16 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
|
|
||||||
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFEventBasedExcelExtractor.class);
|
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFEventBasedExcelExtractor.class);
|
||||||
|
|
||||||
private OPCPackage container;
|
protected OPCPackage container;
|
||||||
private POIXMLProperties properties;
|
protected POIXMLProperties properties;
|
||||||
|
|
||||||
private Locale locale;
|
protected Locale locale;
|
||||||
private boolean includeTextBoxes = true;
|
protected boolean includeTextBoxes = true;
|
||||||
private boolean includeSheetNames = true;
|
protected boolean includeSheetNames = true;
|
||||||
private boolean includeCellComments;
|
protected boolean includeCellComments;
|
||||||
private boolean includeHeadersFooters = true;
|
protected boolean includeHeadersFooters = true;
|
||||||
private boolean formulasNotResults;
|
protected boolean formulasNotResults;
|
||||||
private boolean concatenatePhoneticRuns = true;
|
protected boolean concatenatePhoneticRuns = true;
|
||||||
|
|
||||||
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(OPCPackage.open(path));
|
this(OPCPackage.open(path));
|
||||||
|
@ -254,7 +254,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected SharedStrings createSharedStringsTable(OPCPackage container, boolean concatenatePhoneticRuns)
|
protected SharedStrings createSharedStringsTable(XSSFReader xssfReader, OPCPackage container)
|
||||||
throws IOException, SAXException {
|
throws IOException, SAXException {
|
||||||
return new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
|
return new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
|
||||||
}
|
}
|
||||||
|
@ -264,8 +264,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||||
*/
|
*/
|
||||||
public String getText() {
|
public String getText() {
|
||||||
try {
|
try {
|
||||||
SharedStrings strings = createSharedStringsTable(container, concatenatePhoneticRuns);
|
|
||||||
XSSFReader xssfReader = new XSSFReader(container);
|
XSSFReader xssfReader = new XSSFReader(container);
|
||||||
|
SharedStrings strings = createSharedStringsTable(xssfReader, container);
|
||||||
StylesTable styles = xssfReader.getStylesTable();
|
StylesTable styles = xssfReader.getStylesTable();
|
||||||
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
||||||
StringBuilder text = new StringBuilder(64);
|
StringBuilder text = new StringBuilder(64);
|
||||||
|
|
Loading…
Reference in New Issue