mirror of https://github.com/apache/poi.git
Bug 55966: Include content control text in word extraction also if it is part of a paragraph
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1875802 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
456dc4d368
commit
da2afc19e2
|
@ -90,7 +90,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||
|
||||
/**
|
||||
* Should we concatenate phonetic runs in extraction. Default is <code>true</code>
|
||||
* @param concatenatePhoneticRuns
|
||||
* @param concatenatePhoneticRuns If phonetic runs should be concatenated
|
||||
*/
|
||||
public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
|
||||
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
|
||||
|
@ -138,9 +138,10 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
|
|||
extractHeaders(text, headerFooterPolicy);
|
||||
}
|
||||
|
||||
|
||||
for (IRunElement run : paragraph.getRuns()) {
|
||||
if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
|
||||
for (IRunElement run : paragraph.getIRuns()) {
|
||||
if (run instanceof XWPFSDT) {
|
||||
text.append(((XWPFSDT) run).getContent().getText());
|
||||
} else if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
|
||||
text.append(((XWPFRun)run).text());
|
||||
} else {
|
||||
text.append(run);
|
||||
|
|
|
@ -17,6 +17,16 @@
|
|||
|
||||
package org.apache.poi.xwpf.extractor;
|
||||
|
||||
import org.apache.poi.util.StringUtil;
|
||||
import org.apache.poi.xwpf.XWPFTestDataSamples;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.apache.poi.POITestCase.assertContains;
|
||||
import static org.apache.poi.POITestCase.assertEndsWith;
|
||||
import static org.apache.poi.POITestCase.assertNotContained;
|
||||
|
@ -25,16 +35,6 @@ import static org.junit.Assert.assertEquals;
|
|||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.poi.util.StringUtil;
|
||||
import org.apache.poi.xwpf.XWPFTestDataSamples;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests for HXFWordExtractor
|
||||
*/
|
||||
|
@ -460,4 +460,21 @@ public class TestXWPFWordExtractor {
|
|||
assertContains(txt, "footer 1");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bug55966() throws IOException {
|
||||
try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55966.docx")) {
|
||||
String expected = "Content control within a paragraph is here text content from within a paragraph second control with a new\n" +
|
||||
"line\n" +
|
||||
"\n" +
|
||||
"Content control that is the entire paragraph\n";
|
||||
|
||||
XWPFWordExtractor extractedDoc = new XWPFWordExtractor(doc);
|
||||
|
||||
String actual = extractedDoc.getText();
|
||||
|
||||
extractedDoc.close();
|
||||
assertEquals(expected, actual);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue