mirror of https://github.com/apache/poi.git
Bug 55966: Include content control text in word extraction also if it is part of a paragraph
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1875802 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
456dc4d368
commit
da2afc19e2
|
@ -90,7 +90,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should we concatenate phonetic runs in extraction. Default is <code>true</code>
|
* Should we concatenate phonetic runs in extraction. Default is <code>true</code>
|
||||||
* @param concatenatePhoneticRuns
|
* @param concatenatePhoneticRuns If phonetic runs should be concatenated
|
||||||
*/
|
*/
|
||||||
public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
|
public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
|
||||||
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
|
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
|
||||||
|
@ -138,9 +138,10 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||||
extractHeaders(text, headerFooterPolicy);
|
extractHeaders(text, headerFooterPolicy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (IRunElement run : paragraph.getIRuns()) {
|
||||||
for (IRunElement run : paragraph.getRuns()) {
|
if (run instanceof XWPFSDT) {
|
||||||
if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
|
text.append(((XWPFSDT) run).getContent().getText());
|
||||||
|
} else if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
|
||||||
text.append(((XWPFRun)run).text());
|
text.append(((XWPFRun)run).text());
|
||||||
} else {
|
} else {
|
||||||
text.append(run);
|
text.append(run);
|
||||||
|
|
|
@ -17,6 +17,16 @@
|
||||||
|
|
||||||
package org.apache.poi.xwpf.extractor;
|
package org.apache.poi.xwpf.extractor;
|
||||||
|
|
||||||
|
import org.apache.poi.util.StringUtil;
|
||||||
|
import org.apache.poi.xwpf.XWPFTestDataSamples;
|
||||||
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static org.apache.poi.POITestCase.assertContains;
|
import static org.apache.poi.POITestCase.assertContains;
|
||||||
import static org.apache.poi.POITestCase.assertEndsWith;
|
import static org.apache.poi.POITestCase.assertEndsWith;
|
||||||
import static org.apache.poi.POITestCase.assertNotContained;
|
import static org.apache.poi.POITestCase.assertNotContained;
|
||||||
|
@ -25,16 +35,6 @@ import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.poi.util.StringUtil;
|
|
||||||
import org.apache.poi.xwpf.XWPFTestDataSamples;
|
|
||||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for HXFWordExtractor
|
* Tests for HXFWordExtractor
|
||||||
*/
|
*/
|
||||||
|
@ -460,4 +460,21 @@ public class TestXWPFWordExtractor {
|
||||||
assertContains(txt, "footer 1");
|
assertContains(txt, "footer 1");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void bug55966() throws IOException {
|
||||||
|
try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55966.docx")) {
|
||||||
|
String expected = "Content control within a paragraph is here text content from within a paragraph second control with a new\n" +
|
||||||
|
"line\n" +
|
||||||
|
"\n" +
|
||||||
|
"Content control that is the entire paragraph\n";
|
||||||
|
|
||||||
|
XWPFWordExtractor extractedDoc = new XWPFWordExtractor(doc);
|
||||||
|
|
||||||
|
String actual = extractedDoc.getText();
|
||||||
|
|
||||||
|
extractedDoc.close();
|
||||||
|
assertEquals(expected, actual);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue