[bug-63576] support capitalized text in WordExtractor (HWPF)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1903738 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2022-08-28 14:16:01 +00:00
parent 25f00ba7c2
commit 913d1eecf5
3 changed files with 13 additions and 0 deletions
poi-scratchpad/src
main/java/org/apache/poi/hwpf/converter
test/java/org/apache/poi/hwpf/extractor
test-data/document

View File

@ -52,6 +52,7 @@ import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.util.Beta; import org.apache.poi.util.Beta;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.util.StringUtil; import org.apache.poi.util.StringUtil;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
@ -445,6 +446,10 @@ public abstract class AbstractWordConverter {
continue; continue;
} }
if (characterRun.isCapitalized() || characterRun.isSmallCaps()) {
text = text.toUpperCase(LocaleUtil.getUserLocale());
}
if (characterRun.isSpecialCharacter()) { if (characterRun.isSpecialCharacter()) {
if (text.charAt(0) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE if (text.charAt(0) == SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE
&& (wordDocument instanceof HWPFDocument)) { && (wordDocument instanceof HWPFDocument)) {

View File

@ -402,6 +402,14 @@ public final class TestWordExtractor {
} }
} }
@Test
void testCapitalized() throws Exception {
try (WordExtractor wExt = openExtractor("capitalized.doc")) {
String text = wExt.getText().trim();
assertEquals("The following word is: CAPITALIZED.", text);
}
}
private WordExtractor openExtractor(String fileName) throws IOException { private WordExtractor openExtractor(String fileName) throws IOException {
try (InputStream is = docTests.openResourceAsStream(fileName)) { try (InputStream is = docTests.openResourceAsStream(fileName)) {
return new WordExtractor(is); return new WordExtractor(is);

Binary file not shown.