Fix bug #49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@958965 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-06-29 13:38:24 +00:00
parent b75c47e1e5
commit f69404de8d
4 changed files with 21 additions and 1 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.7-beta2" date="2010-??-??">
<action dev="POI-DEVELOPERS" type="fix">49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text</action>
<action dev="POI-DEVELOPERS" type="fix">XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though</action>
</release>
<release version="3.7-beta1" date="2010-06-20">

View File

@ -133,8 +133,14 @@ public class XWPFParagraph implements IBodyElement{
while (c.toNextSelection()) {
XmlObject o = c.getObject();
if (o instanceof CTText) {
String tagName = o.getDomNode().getNodeName();
// Field Codes (w:instrText, defined in spec sec. 17.16.23)
// come up as instances of CTText, but we don't want them
// in the normal text output
if (!"w:instrText".equals(tagName)) {
text.append(((CTText) o).getStringValue());
}
}
if (o instanceof CTPTab) {
text.append("\t");
}

View File

@ -237,4 +237,17 @@ public class TestXWPFWordExtractor extends TestCase {
// Now check the first paragraph in total
assertTrue(extractor.getText().contains("a\tb\n"));
}
/**
* The output should not contain field codes, e.g. those specified in the
* w:instrText tag (spec sec. 17.16.23)
*/
public void testNoFieldCodes() {
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("FieldCodes.docx");
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
String text = extractor.getText();
assertTrue(text.length() > 0);
assertFalse(text.contains("AUTHOR"));
assertFalse(text.contains("CREATEDATE"));
}
}

Binary file not shown.