mirror of https://github.com/apache/poi.git
The ReadOnlySharedStringsTable is re-usable for other event based things, so copy out to it's own class
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@903170 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4c1c3a3ae3
commit
675b6ad976
|
@ -21,7 +21,6 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParser;
|
||||
|
@ -30,15 +29,11 @@ import javax.xml.parsers.SAXParserFactory;
|
|||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackageAccess;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.ss.usermodel.BuiltinFormats;
|
||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFReader;
|
||||
import org.apache.poi.xssf.model.StylesTable;
|
||||
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.InputSource;
|
||||
|
@ -84,192 +79,6 @@ public class XLSX2CSV {
|
|||
NUMBER,
|
||||
}
|
||||
|
||||
/**
|
||||
* Each cell is enclosed in "si". Each cell can have multiple "t" elements.
|
||||
* Example input
|
||||
*
|
||||
* <pre>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
||||
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2">
|
||||
<si>
|
||||
<r>
|
||||
<rPr>
|
||||
<b />
|
||||
<sz val="11" />
|
||||
<color theme="1" />
|
||||
<rFont val="Calibri" />
|
||||
<family val="2" />
|
||||
<scheme val="minor" />
|
||||
</rPr>
|
||||
<t>This:</t>
|
||||
</r>
|
||||
<r>
|
||||
<rPr>
|
||||
<sz val="11" />
|
||||
<color theme="1" />
|
||||
<rFont val="Calibri" />
|
||||
<family val="2" />
|
||||
<scheme val="minor" />
|
||||
</rPr>
|
||||
<t xml:space="preserve">Causes Problems</t>
|
||||
</r>
|
||||
</si>
|
||||
<si>
|
||||
<t>This does not</t>
|
||||
</si>
|
||||
</sst>
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
static class ReadonlySharedStringsTable extends DefaultHandler {
|
||||
|
||||
/**
|
||||
* An integer representing the total count of strings in the workbook. This count does not
|
||||
* include any numbers, it counts only the total of text strings in the workbook.
|
||||
*/
|
||||
private int count;
|
||||
|
||||
/**
|
||||
* An integer representing the total count of unique strings in the Shared String Table.
|
||||
* A string is unique even if it is a copy of another string, but has different formatting applied
|
||||
* at the character level.
|
||||
*/
|
||||
private int uniqueCount;
|
||||
|
||||
/**
|
||||
* The shared strings table.
|
||||
*/
|
||||
private String[] strings;
|
||||
|
||||
/**
|
||||
* @param pkg
|
||||
* @throws IOException
|
||||
* @throws SAXException
|
||||
* @throws ParserConfigurationException
|
||||
*/
|
||||
public ReadonlySharedStringsTable(OPCPackage pkg)
|
||||
throws IOException, SAXException, ParserConfigurationException {
|
||||
ArrayList<PackagePart> parts =
|
||||
pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
|
||||
|
||||
// Some workbooks have no shared strings table.
|
||||
if (parts.size() > 0) {
|
||||
PackagePart sstPart = parts.get(0);
|
||||
readFrom(sstPart.getInputStream());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Like POIXMLDocumentPart constructor
|
||||
*
|
||||
* @param part
|
||||
* @param rel_ignored
|
||||
* @throws IOException
|
||||
*/
|
||||
public ReadonlySharedStringsTable(PackagePart part, PackageRelationship rel_ignored)
|
||||
throws IOException, SAXException, ParserConfigurationException {
|
||||
readFrom(part.getInputStream());
|
||||
}
|
||||
|
||||
/**
|
||||
* Read this shared strings table from an XML file.
|
||||
*
|
||||
* @param is The input stream containing the XML document.
|
||||
* @throws IOException if an error occurs while reading.
|
||||
* @throws SAXException
|
||||
* @throws ParserConfigurationException
|
||||
*/
|
||||
public void readFrom(InputStream is) throws IOException, SAXException, ParserConfigurationException {
|
||||
InputSource sheetSource = new InputSource(is);
|
||||
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
|
||||
SAXParser saxParser = saxFactory.newSAXParser();
|
||||
XMLReader sheetParser = saxParser.getXMLReader();
|
||||
sheetParser.setContentHandler(this);
|
||||
sheetParser.parse(sheetSource);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an integer representing the total count of strings in the workbook. This count does not
|
||||
* include any numbers, it counts only the total of text strings in the workbook.
|
||||
*
|
||||
* @return the total count of strings in the workbook
|
||||
*/
|
||||
public int getCount() {
|
||||
return this.count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an integer representing the total count of unique strings in the Shared String Table.
|
||||
* A string is unique even if it is a copy of another string, but has different formatting applied
|
||||
* at the character level.
|
||||
*
|
||||
* @return the total count of unique strings in the workbook
|
||||
*/
|
||||
public int getUniqueCount() {
|
||||
return this.uniqueCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string item by index
|
||||
*
|
||||
* @param idx index of item to return.
|
||||
* @return the item at the specified position in this Shared String table.
|
||||
*/
|
||||
public String getEntryAt(int idx) {
|
||||
return strings[idx];
|
||||
}
|
||||
|
||||
//// ContentHandler methods ////
|
||||
|
||||
private StringBuffer characters;
|
||||
private boolean tIsOpen;
|
||||
private int index;
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
* @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
|
||||
*/
|
||||
public void startElement(String uri, String localName, String name,
|
||||
Attributes attributes) throws SAXException {
|
||||
if ("sst".equals(name)) {
|
||||
String count = attributes.getValue("count");
|
||||
String uniqueCount = attributes.getValue("uniqueCount");
|
||||
this.count = Integer.parseInt(count);
|
||||
this.uniqueCount = Integer.parseInt(uniqueCount);
|
||||
this.strings = new String[this.uniqueCount];
|
||||
index = 0;
|
||||
characters = new StringBuffer();
|
||||
} else if ("si".equals(name)) {
|
||||
characters.setLength(0);
|
||||
} else if ("t".equals(name)) {
|
||||
tIsOpen = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
* @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
|
||||
*/
|
||||
public void endElement(String uri, String localName, String name)
|
||||
throws SAXException {
|
||||
if ("si".equals(name)) {
|
||||
strings[index] = characters.toString();
|
||||
++index;
|
||||
} else if ("t".equals(name)) {
|
||||
tIsOpen = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Captures characters only if a t(ext?) element is open.
|
||||
*/
|
||||
public void characters(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
if (tIsOpen)
|
||||
characters.append(ch, start, length);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
|
||||
|
@ -289,7 +98,7 @@ public class XLSX2CSV {
|
|||
/**
|
||||
* Table with unique strings
|
||||
*/
|
||||
private ReadonlySharedStringsTable sharedStringsTable;
|
||||
private ReadOnlySharedStringsTable sharedStringsTable;
|
||||
|
||||
/**
|
||||
* Destination for data
|
||||
|
@ -330,7 +139,7 @@ public class XLSX2CSV {
|
|||
*/
|
||||
public MyXSSFSheetHandler(
|
||||
StylesTable styles,
|
||||
ReadonlySharedStringsTable strings,
|
||||
ReadOnlySharedStringsTable strings,
|
||||
int cols,
|
||||
PrintStream target) {
|
||||
this.stylesTable = styles;
|
||||
|
@ -384,12 +193,8 @@ public class XLSX2CSV {
|
|||
else if ("str".equals(cellType))
|
||||
nextDataType = xssfDataType.FORMULA;
|
||||
else if (cellStyleStr != null) {
|
||||
/*
|
||||
* It's a number, but possibly has a style and/or special format.
|
||||
* Nick Burch said to use org.apache.poi.ss.usermodel.BuiltinFormats,
|
||||
* and I see javadoc for that at apache.org, but it's not in the
|
||||
* POI 3.5 Beta 5 jars. Scheduled to appear in 3.5 beta 6.
|
||||
*/
|
||||
// It's a number, but almost certainly one
|
||||
// with a special style or format
|
||||
int styleIndex = Integer.parseInt(cellStyleStr);
|
||||
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
|
||||
this.formatIndex = style.getDataFormat();
|
||||
|
@ -553,7 +358,7 @@ public class XLSX2CSV {
|
|||
*/
|
||||
public void processSheet(
|
||||
StylesTable styles,
|
||||
ReadonlySharedStringsTable strings,
|
||||
ReadOnlySharedStringsTable strings,
|
||||
InputStream sheetInputStream)
|
||||
throws IOException, ParserConfigurationException, SAXException {
|
||||
|
||||
|
@ -577,7 +382,7 @@ public class XLSX2CSV {
|
|||
public void process()
|
||||
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
|
||||
|
||||
ReadonlySharedStringsTable strings = new ReadonlySharedStringsTable(this.xlsxPackage);
|
||||
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
|
||||
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
|
||||
StylesTable styles = xssfReader.getStylesTable();
|
||||
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
||||
|
|
|
@ -0,0 +1,221 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xssf.eventusermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParser;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
/**
|
||||
* <p>This is a lightweight way to process the Shared Strings
|
||||
* table. Most of the text cells will reference something
|
||||
* from in here.
|
||||
* <p>Note that each SI entry can have multiple T elements, if the
|
||||
* string is made up of bits with different formatting.
|
||||
* <p>Example input:
|
||||
* <pre>
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
||||
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2">
|
||||
<si>
|
||||
<r>
|
||||
<rPr>
|
||||
<b />
|
||||
<sz val="11" />
|
||||
<color theme="1" />
|
||||
<rFont val="Calibri" />
|
||||
<family val="2" />
|
||||
<scheme val="minor" />
|
||||
</rPr>
|
||||
<t>This:</t>
|
||||
</r>
|
||||
<r>
|
||||
<rPr>
|
||||
<sz val="11" />
|
||||
<color theme="1" />
|
||||
<rFont val="Calibri" />
|
||||
<family val="2" />
|
||||
<scheme val="minor" />
|
||||
</rPr>
|
||||
<t xml:space="preserve">Causes Problems</t>
|
||||
</r>
|
||||
</si>
|
||||
<si>
|
||||
<t>This does not</t>
|
||||
</si>
|
||||
</sst>
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class ReadOnlySharedStringsTable extends DefaultHandler {
|
||||
/**
|
||||
* An integer representing the total count of strings in the workbook. This count does not
|
||||
* include any numbers, it counts only the total of text strings in the workbook.
|
||||
*/
|
||||
private int count;
|
||||
|
||||
/**
|
||||
* An integer representing the total count of unique strings in the Shared String Table.
|
||||
* A string is unique even if it is a copy of another string, but has different formatting applied
|
||||
* at the character level.
|
||||
*/
|
||||
private int uniqueCount;
|
||||
|
||||
/**
|
||||
* The shared strings table.
|
||||
*/
|
||||
private String[] strings;
|
||||
|
||||
/**
|
||||
* @param pkg
|
||||
* @throws IOException
|
||||
* @throws SAXException
|
||||
* @throws ParserConfigurationException
|
||||
*/
|
||||
public ReadOnlySharedStringsTable(OPCPackage pkg)
|
||||
throws IOException, SAXException {
|
||||
ArrayList<PackagePart> parts =
|
||||
pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
|
||||
|
||||
// Some workbooks have no shared strings table.
|
||||
if (parts.size() > 0) {
|
||||
PackagePart sstPart = parts.get(0);
|
||||
readFrom(sstPart.getInputStream());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Like POIXMLDocumentPart constructor
|
||||
*
|
||||
* @param part
|
||||
* @param rel_ignored
|
||||
* @throws IOException
|
||||
*/
|
||||
public ReadOnlySharedStringsTable(PackagePart part, PackageRelationship rel_ignored)
|
||||
throws IOException, SAXException {
|
||||
readFrom(part.getInputStream());
|
||||
}
|
||||
|
||||
/**
|
||||
* Read this shared strings table from an XML file.
|
||||
*
|
||||
* @param is The input stream containing the XML document.
|
||||
* @throws IOException if an error occurs while reading.
|
||||
* @throws SAXException
|
||||
* @throws ParserConfigurationException
|
||||
*/
|
||||
public void readFrom(InputStream is) throws IOException, SAXException {
|
||||
InputSource sheetSource = new InputSource(is);
|
||||
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
|
||||
try {
|
||||
SAXParser saxParser = saxFactory.newSAXParser();
|
||||
XMLReader sheetParser = saxParser.getXMLReader();
|
||||
sheetParser.setContentHandler(this);
|
||||
sheetParser.parse(sheetSource);
|
||||
} catch(ParserConfigurationException e) {
|
||||
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an integer representing the total count of strings in the workbook. This count does not
|
||||
* include any numbers, it counts only the total of text strings in the workbook.
|
||||
*
|
||||
* @return the total count of strings in the workbook
|
||||
*/
|
||||
public int getCount() {
|
||||
return this.count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an integer representing the total count of unique strings in the Shared String Table.
|
||||
* A string is unique even if it is a copy of another string, but has different formatting applied
|
||||
* at the character level.
|
||||
*
|
||||
* @return the total count of unique strings in the workbook
|
||||
*/
|
||||
public int getUniqueCount() {
|
||||
return this.uniqueCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the string at a given index.
|
||||
* Formatting is ignored.
|
||||
*
|
||||
* @param idx index of item to return.
|
||||
* @return the item at the specified position in this Shared String table.
|
||||
*/
|
||||
public String getEntryAt(int idx) {
|
||||
return strings[idx];
|
||||
}
|
||||
|
||||
//// ContentHandler methods ////
|
||||
|
||||
private StringBuffer characters;
|
||||
private boolean tIsOpen;
|
||||
private int index;
|
||||
|
||||
public void startElement(String uri, String localName, String name,
|
||||
Attributes attributes) throws SAXException {
|
||||
if ("sst".equals(name)) {
|
||||
String count = attributes.getValue("count");
|
||||
String uniqueCount = attributes.getValue("uniqueCount");
|
||||
this.count = Integer.parseInt(count);
|
||||
this.uniqueCount = Integer.parseInt(uniqueCount);
|
||||
this.strings = new String[this.uniqueCount];
|
||||
index = 0;
|
||||
characters = new StringBuffer();
|
||||
} else if ("si".equals(name)) {
|
||||
characters.setLength(0);
|
||||
} else if ("t".equals(name)) {
|
||||
tIsOpen = true;
|
||||
}
|
||||
}
|
||||
|
||||
public void endElement(String uri, String localName, String name)
|
||||
throws SAXException {
|
||||
if ("si".equals(name)) {
|
||||
strings[index] = characters.toString();
|
||||
++index;
|
||||
} else if ("t".equals(name)) {
|
||||
tIsOpen = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Captures characters only if a t(ext) element is open.
|
||||
*/
|
||||
public void characters(char[] ch, int start, int length)
|
||||
throws SAXException {
|
||||
if (tIsOpen)
|
||||
characters.append(ch, start, length);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue