Fix for SharedStringsTable in ooxml excel support, and related test updates now we have more to test

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@610506 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-01-09 18:46:30 +00:00
parent e1b5e01f8b
commit 715329aa5e
5 changed files with 121 additions and 83 deletions

View File

@ -49,7 +49,6 @@ public class HSSFXML extends HXFDocument {
public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
private WorkbookDocument workbookDoc;
private SharedStringsTable sharedStrings;
public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
@ -92,8 +91,14 @@ public class HSSFXML extends HXFDocument {
WorksheetDocument.Factory.parse(sheetPart.getInputStream());
return sheetDoc.getWorksheet();
}
/**
* Returns the shared string at the given index
*/
public String getSharedString(int index) {
return this.sharedStrings.get(index);
}
protected SharedStringsTable _getSharedStringsTable() {
return sharedStrings;
}
}

View File

@ -18,64 +18,61 @@
package org.apache.poi.hssf.model;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.LinkedList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.opc.PackagePart;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
public class SharedStringsTable extends LinkedList<String> {
private static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
/** XXX: should have been using an XMLBeans object, but it cannot parse the sharedStrings schema, so we'll use DOM temporarily.
CTSst sst;
*/
public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
private SstDocument doc;
private PackagePart part;
private DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
private DocumentBuilder parser;
public SharedStringsTable(PackagePart part) throws IOException {
public SharedStringsTable(PackagePart part) throws IOException, XmlException {
this.part = part;
InputStream is = part.getInputStream();
try {
builderFactory.setNamespaceAware(true);
this.parser = builderFactory.newDocumentBuilder();
readFrom(is);
} catch (ParserConfigurationException e) {
throw new RuntimeException(e);
} catch (SAXException e) {
throw new RuntimeException(e);
} finally {
if (is != null) is.close();
}
doc = SstDocument.Factory.parse(
part.getInputStream()
);
read();
}
public void readFrom(InputStream is) throws IOException, SAXException {
Document doc = parser.parse(is);
Element root = doc.getDocumentElement();
NodeList sis = root.getElementsByTagNameNS(MAIN_SML_NS_URI, "si");
for (int i = 0 ; i < sis.getLength() ; ++i) {
Element si = (Element) sis.item(i);
NodeList ts = si.getElementsByTagNameNS(MAIN_SML_NS_URI, "t");
String t = "";
if (ts.getLength() > 0 && ts.item(0).getFirstChild() != null) {
t = ts.item(0).getFirstChild().getNodeValue();
add(t);
}
}
private void read() {
CTRst[] sts = doc.getSst().getSiArray();
for (int i = 0; i < sts.length; i++) {
add(sts[i].getT());
}
}
/**
* Writes the current shared strings table into
* the associated OOXML PackagePart
*/
public void write() throws IOException {
CTSst sst = doc.getSst();
// Remove the old list
for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) {
sst.removeSi(i);
}
// Add the new one
for(String s : this) {
sst.addNewSi().setT(s);
}
// Update the counts
sst.setCount(this.size());
sst.setUniqueCount(this.size());
// Write out
OutputStream out = part.getOutputStream();
doc.save(out);
out.close();
}
}

View File

@ -42,6 +42,8 @@ public class HSSFXMLCell {
switch (cell.getT().intValue()) {
case STCellType.INT_S:
return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
case STCellType.INT_INLINE_STR:
return cell.getV();
case STCellType.INT_N:
return cell.getV();
// TODO: support other types

View File

@ -18,6 +18,7 @@ package org.apache.poi.hssf;
import java.io.File;
import org.apache.poi.hssf.model.SharedStringsTable;
import org.apache.poi.hxf.HXFDocument;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart;
@ -124,4 +125,36 @@ public class TestHSSFXML extends TestCase {
assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
}
public void testSharedStringBasics() throws Exception {
HSSFXML xml = new HSSFXML(
HXFDocument.openPackage(sampleFile)
);
assertNotNull(xml._getSharedStringsTable());
SharedStringsTable sst = xml._getSharedStringsTable();
assertEquals(10, sst.size());
assertEquals("Lorem", sst.get(0));
for(int i=0; i<sst.size(); i++) {
assertEquals(sst.get(i), xml.getSharedString(i));
}
// Add a few more, then save and reload, checking
// changes have been kept
sst.add("Foo");
sst.add("Bar");
sst.set(0, "LoremLorem");
sst.write();
xml = new HSSFXML(xml.getPackage());
sst = xml._getSharedStringsTable();
assertEquals(12, sst.size());
assertEquals("LoremLorem", sst.get(0));
for(int i=0; i<sst.size(); i++) {
assertEquals(sst.get(i), xml.getSharedString(i));
}
}
}

View File

@ -101,32 +101,32 @@ public class TestHXFExcelExtractor extends TestCase {
extractor.setIncludeSheetNames(false);
text = extractor.getText();
assertEquals(
"0\t111\n" +
"1\t222\n" +
"2\t333\n" +
"3\t444\n" +
"4\t555\n" +
"5\t666\n" +
"6\t777\n" +
"7\t888\n" +
"8\t999\n" +
"9\t4995\n" +
"Lorem\t111\n" +
"ipsum\t222\n" +
"dolor\t333\n" +
"sit\t444\n" +
"amet\t555\n" +
"consectetuer\t666\n" +
"adipiscing\t777\n" +
"elit\t888\n" +
"Nunc\t999\n" +
"at\t4995\n" +
"\n\n", text);
// Now get formulas not their values
extractor.setFormulasNotResults(true);
text = extractor.getText();
assertEquals(
"0\t111\n" +
"1\t222\n" +
"2\t333\n" +
"3\t444\n" +
"4\t555\n" +
"5\t666\n" +
"6\t777\n" +
"7\t888\n" +
"8\t999\n" +
"9\tSUM(B1:B9)\n" +
"Lorem\t111\n" +
"ipsum\t222\n" +
"dolor\t333\n" +
"sit\t444\n" +
"amet\t555\n" +
"consectetuer\t666\n" +
"adipiscing\t777\n" +
"elit\t888\n" +
"Nunc\t999\n" +
"at\tSUM(B1:B9)\n" +
"\n\n", text);
// With sheet names too
@ -134,16 +134,16 @@ public class TestHXFExcelExtractor extends TestCase {
text = extractor.getText();
assertEquals(
"Sheet1\n" +
"0\t111\n" +
"1\t222\n" +
"2\t333\n" +
"3\t444\n" +
"4\t555\n" +
"5\t666\n" +
"6\t777\n" +
"7\t888\n" +
"8\t999\n" +
"9\tSUM(B1:B9)\n\n" +
"Lorem\t111\n" +
"ipsum\t222\n" +
"dolor\t333\n" +
"sit\t444\n" +
"amet\t555\n" +
"consectetuer\t666\n" +
"adipiscing\t777\n" +
"elit\t888\n" +
"Nunc\t999\n" +
"at\tSUM(B1:B9)\n\n" +
"Sheet2\n\n" +
"Sheet3\n"
, text);
@ -161,9 +161,10 @@ public class TestHXFExcelExtractor extends TestCase {
assertTrue(text.length() > 0);
// Might not have all formatting it should do!
// TODO decide if we should really have the "null" in there
assertTrue(text.startsWith(
"Avgtxfull\n" +
"3\t13\t3\t2\t2\t3\t2\t"
"null\t(iii) AVERAGE TAX RATES ON ANNUAL"
));
}
@ -184,8 +185,8 @@ public class TestHXFExcelExtractor extends TestCase {
POITextExtractor extractor = extractors[i];
String text = extractor.getText().replaceAll("[\r\t]", "");
System.out.println(text.length());
System.out.println(text);
//System.out.println(text.length());
//System.out.println(text);
assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
Matcher m = pattern.matcher(text);