mirror of https://github.com/apache/poi.git
Fix for SharedStringsTable in ooxml excel support, and related test updates now we have more to test
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@610506 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e1b5e01f8b
commit
715329aa5e
|
@ -49,7 +49,6 @@ public class HSSFXML extends HXFDocument {
|
|||
public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
|
||||
|
||||
private WorkbookDocument workbookDoc;
|
||||
|
||||
private SharedStringsTable sharedStrings;
|
||||
|
||||
public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
|
||||
|
@ -92,8 +91,14 @@ public class HSSFXML extends HXFDocument {
|
|||
WorksheetDocument.Factory.parse(sheetPart.getInputStream());
|
||||
return sheetDoc.getWorksheet();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the shared string at the given index
|
||||
*/
|
||||
public String getSharedString(int index) {
|
||||
return this.sharedStrings.get(index);
|
||||
}
|
||||
protected SharedStringsTable _getSharedStringsTable() {
|
||||
return sharedStrings;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,64 +18,61 @@
|
|||
package org.apache.poi.hssf.model;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxml4j.opc.PackagePart;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
|
||||
|
||||
|
||||
public class SharedStringsTable extends LinkedList<String> {
|
||||
|
||||
private static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
|
||||
|
||||
/** XXX: should have been using an XMLBeans object, but it cannot parse the sharedStrings schema, so we'll use DOM temporarily.
|
||||
CTSst sst;
|
||||
*/
|
||||
|
||||
public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
|
||||
|
||||
private SstDocument doc;
|
||||
private PackagePart part;
|
||||
|
||||
private DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
|
||||
|
||||
private DocumentBuilder parser;
|
||||
|
||||
public SharedStringsTable(PackagePart part) throws IOException {
|
||||
public SharedStringsTable(PackagePart part) throws IOException, XmlException {
|
||||
this.part = part;
|
||||
InputStream is = part.getInputStream();
|
||||
try {
|
||||
builderFactory.setNamespaceAware(true);
|
||||
this.parser = builderFactory.newDocumentBuilder();
|
||||
readFrom(is);
|
||||
} catch (ParserConfigurationException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (SAXException e) {
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
if (is != null) is.close();
|
||||
}
|
||||
|
||||
|
||||
doc = SstDocument.Factory.parse(
|
||||
part.getInputStream()
|
||||
);
|
||||
read();
|
||||
}
|
||||
|
||||
public void readFrom(InputStream is) throws IOException, SAXException {
|
||||
Document doc = parser.parse(is);
|
||||
Element root = doc.getDocumentElement();
|
||||
NodeList sis = root.getElementsByTagNameNS(MAIN_SML_NS_URI, "si");
|
||||
for (int i = 0 ; i < sis.getLength() ; ++i) {
|
||||
Element si = (Element) sis.item(i);
|
||||
NodeList ts = si.getElementsByTagNameNS(MAIN_SML_NS_URI, "t");
|
||||
String t = "";
|
||||
if (ts.getLength() > 0 && ts.item(0).getFirstChild() != null) {
|
||||
t = ts.item(0).getFirstChild().getNodeValue();
|
||||
add(t);
|
||||
}
|
||||
}
|
||||
private void read() {
|
||||
CTRst[] sts = doc.getSst().getSiArray();
|
||||
for (int i = 0; i < sts.length; i++) {
|
||||
add(sts[i].getT());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the current shared strings table into
|
||||
* the associated OOXML PackagePart
|
||||
*/
|
||||
public void write() throws IOException {
|
||||
CTSst sst = doc.getSst();
|
||||
|
||||
// Remove the old list
|
||||
for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) {
|
||||
sst.removeSi(i);
|
||||
}
|
||||
|
||||
// Add the new one
|
||||
for(String s : this) {
|
||||
sst.addNewSi().setT(s);
|
||||
}
|
||||
|
||||
// Update the counts
|
||||
sst.setCount(this.size());
|
||||
sst.setUniqueCount(this.size());
|
||||
|
||||
// Write out
|
||||
OutputStream out = part.getOutputStream();
|
||||
doc.save(out);
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,8 @@ public class HSSFXMLCell {
|
|||
switch (cell.getT().intValue()) {
|
||||
case STCellType.INT_S:
|
||||
return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
|
||||
case STCellType.INT_INLINE_STR:
|
||||
return cell.getV();
|
||||
case STCellType.INT_N:
|
||||
return cell.getV();
|
||||
// TODO: support other types
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.poi.hssf;
|
|||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.poi.hssf.model.SharedStringsTable;
|
||||
import org.apache.poi.hxf.HXFDocument;
|
||||
import org.openxml4j.opc.Package;
|
||||
import org.openxml4j.opc.PackagePart;
|
||||
|
@ -124,4 +125,36 @@ public class TestHSSFXML extends TestCase {
|
|||
assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
|
||||
assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
|
||||
}
|
||||
|
||||
public void testSharedStringBasics() throws Exception {
|
||||
HSSFXML xml = new HSSFXML(
|
||||
HXFDocument.openPackage(sampleFile)
|
||||
);
|
||||
assertNotNull(xml._getSharedStringsTable());
|
||||
|
||||
SharedStringsTable sst = xml._getSharedStringsTable();
|
||||
assertEquals(10, sst.size());
|
||||
|
||||
assertEquals("Lorem", sst.get(0));
|
||||
for(int i=0; i<sst.size(); i++) {
|
||||
assertEquals(sst.get(i), xml.getSharedString(i));
|
||||
}
|
||||
|
||||
// Add a few more, then save and reload, checking
|
||||
// changes have been kept
|
||||
sst.add("Foo");
|
||||
sst.add("Bar");
|
||||
sst.set(0, "LoremLorem");
|
||||
|
||||
sst.write();
|
||||
|
||||
xml = new HSSFXML(xml.getPackage());
|
||||
sst = xml._getSharedStringsTable();
|
||||
assertEquals(12, sst.size());
|
||||
|
||||
assertEquals("LoremLorem", sst.get(0));
|
||||
for(int i=0; i<sst.size(); i++) {
|
||||
assertEquals(sst.get(i), xml.getSharedString(i));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -101,32 +101,32 @@ public class TestHXFExcelExtractor extends TestCase {
|
|||
extractor.setIncludeSheetNames(false);
|
||||
text = extractor.getText();
|
||||
assertEquals(
|
||||
"0\t111\n" +
|
||||
"1\t222\n" +
|
||||
"2\t333\n" +
|
||||
"3\t444\n" +
|
||||
"4\t555\n" +
|
||||
"5\t666\n" +
|
||||
"6\t777\n" +
|
||||
"7\t888\n" +
|
||||
"8\t999\n" +
|
||||
"9\t4995\n" +
|
||||
"Lorem\t111\n" +
|
||||
"ipsum\t222\n" +
|
||||
"dolor\t333\n" +
|
||||
"sit\t444\n" +
|
||||
"amet\t555\n" +
|
||||
"consectetuer\t666\n" +
|
||||
"adipiscing\t777\n" +
|
||||
"elit\t888\n" +
|
||||
"Nunc\t999\n" +
|
||||
"at\t4995\n" +
|
||||
"\n\n", text);
|
||||
|
||||
// Now get formulas not their values
|
||||
extractor.setFormulasNotResults(true);
|
||||
text = extractor.getText();
|
||||
assertEquals(
|
||||
"0\t111\n" +
|
||||
"1\t222\n" +
|
||||
"2\t333\n" +
|
||||
"3\t444\n" +
|
||||
"4\t555\n" +
|
||||
"5\t666\n" +
|
||||
"6\t777\n" +
|
||||
"7\t888\n" +
|
||||
"8\t999\n" +
|
||||
"9\tSUM(B1:B9)\n" +
|
||||
"Lorem\t111\n" +
|
||||
"ipsum\t222\n" +
|
||||
"dolor\t333\n" +
|
||||
"sit\t444\n" +
|
||||
"amet\t555\n" +
|
||||
"consectetuer\t666\n" +
|
||||
"adipiscing\t777\n" +
|
||||
"elit\t888\n" +
|
||||
"Nunc\t999\n" +
|
||||
"at\tSUM(B1:B9)\n" +
|
||||
"\n\n", text);
|
||||
|
||||
// With sheet names too
|
||||
|
@ -134,16 +134,16 @@ public class TestHXFExcelExtractor extends TestCase {
|
|||
text = extractor.getText();
|
||||
assertEquals(
|
||||
"Sheet1\n" +
|
||||
"0\t111\n" +
|
||||
"1\t222\n" +
|
||||
"2\t333\n" +
|
||||
"3\t444\n" +
|
||||
"4\t555\n" +
|
||||
"5\t666\n" +
|
||||
"6\t777\n" +
|
||||
"7\t888\n" +
|
||||
"8\t999\n" +
|
||||
"9\tSUM(B1:B9)\n\n" +
|
||||
"Lorem\t111\n" +
|
||||
"ipsum\t222\n" +
|
||||
"dolor\t333\n" +
|
||||
"sit\t444\n" +
|
||||
"amet\t555\n" +
|
||||
"consectetuer\t666\n" +
|
||||
"adipiscing\t777\n" +
|
||||
"elit\t888\n" +
|
||||
"Nunc\t999\n" +
|
||||
"at\tSUM(B1:B9)\n\n" +
|
||||
"Sheet2\n\n" +
|
||||
"Sheet3\n"
|
||||
, text);
|
||||
|
@ -161,9 +161,10 @@ public class TestHXFExcelExtractor extends TestCase {
|
|||
assertTrue(text.length() > 0);
|
||||
|
||||
// Might not have all formatting it should do!
|
||||
// TODO decide if we should really have the "null" in there
|
||||
assertTrue(text.startsWith(
|
||||
"Avgtxfull\n" +
|
||||
"3\t13\t3\t2\t2\t3\t2\t"
|
||||
"null\t(iii) AVERAGE TAX RATES ON ANNUAL"
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -184,8 +185,8 @@ public class TestHXFExcelExtractor extends TestCase {
|
|||
POITextExtractor extractor = extractors[i];
|
||||
|
||||
String text = extractor.getText().replaceAll("[\r\t]", "");
|
||||
System.out.println(text.length());
|
||||
System.out.println(text);
|
||||
//System.out.println(text.length());
|
||||
//System.out.println(text);
|
||||
assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
|
||||
Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
|
||||
Matcher m = pattern.matcher(text);
|
||||
|
|
Loading…
Reference in New Issue