mirror of https://github.com/apache/poi.git
[bug-63575] support capitalized text in XWPFWordExtractor
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1903729 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ab5cb372e5
commit
80f89a3674
|
@ -27,6 +27,7 @@ import java.math.RoundingMode;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
import javax.xml.namespace.QName;
|
import javax.xml.namespace.QName;
|
||||||
|
|
||||||
|
@ -35,10 +36,7 @@ import org.apache.poi.ooxml.POIXMLException;
|
||||||
import org.apache.poi.ooxml.util.DocumentHelper;
|
import org.apache.poi.ooxml.util.DocumentHelper;
|
||||||
import org.apache.poi.ooxml.util.POIXMLUnits;
|
import org.apache.poi.ooxml.util.POIXMLUnits;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
import org.apache.poi.util.HexDump;
|
import org.apache.poi.util.*;
|
||||||
import org.apache.poi.util.Internal;
|
|
||||||
import org.apache.poi.util.Removal;
|
|
||||||
import org.apache.poi.util.Units;
|
|
||||||
import org.apache.poi.wp.usermodel.CharacterRun;
|
import org.apache.poi.wp.usermodel.CharacterRun;
|
||||||
import org.apache.xmlbeans.*;
|
import org.apache.xmlbeans.*;
|
||||||
import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
|
import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
|
||||||
|
@ -1381,7 +1379,13 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
|
||||||
// come up as instances of CTText, but we don't want them
|
// come up as instances of CTText, but we don't want them
|
||||||
// in the normal text output
|
// in the normal text output
|
||||||
if (!("instrText".equals(node.getLocalName()) && XWPFDocument.NS_OOXML_WP_MAIN.equals(node.getNamespaceURI()))) {
|
if (!("instrText".equals(node.getLocalName()) && XWPFDocument.NS_OOXML_WP_MAIN.equals(node.getNamespaceURI()))) {
|
||||||
text.append(((CTText) o).getStringValue());
|
String textValue = ((CTText) o).getStringValue();
|
||||||
|
if (textValue != null) {
|
||||||
|
if (isCapitalized() || isSmallCaps()) {
|
||||||
|
textValue = textValue.toUpperCase(LocaleUtil.getUserLocale());
|
||||||
|
}
|
||||||
|
text.append(textValue);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1391,7 +1395,9 @@ public class XWPFRun implements ISDTContents, IRunElement, CharacterRun {
|
||||||
if (ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
|
if (ctfldChar.getFldCharType() == STFldCharType.BEGIN) {
|
||||||
if (ctfldChar.getFfData() != null) {
|
if (ctfldChar.getFfData() != null) {
|
||||||
for (CTFFCheckBox checkBox : ctfldChar.getFfData().getCheckBoxList()) {
|
for (CTFFCheckBox checkBox : ctfldChar.getFfData().getCheckBoxList()) {
|
||||||
text.append((checkBox.getDefault() != null && POIXMLUnits.parseOnOff(checkBox.getDefault().xgetVal())) ? "|X|" : "|_|");
|
String textValue = checkBox.getDefault() != null && POIXMLUnits.parseOnOff(checkBox.getDefault().xgetVal()) ?
|
||||||
|
"|X|" : "|_|";
|
||||||
|
text.append(textValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -478,4 +478,13 @@ class TestXWPFWordExtractor {
|
||||||
assertEquals(expected, actual);
|
assertEquals(expected, actual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testCapitalizedFlag() throws IOException {
|
||||||
|
try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("capitalized.docx");
|
||||||
|
XWPFWordExtractor extractor = new XWPFWordExtractor(doc)) {
|
||||||
|
String txt = extractor.getText();
|
||||||
|
assertEquals( "The following word is: CAPITALIZED.", txt.trim());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue