mirror of https://github.com/apache/poi.git
bug 50955 - incorporate info from the DocumentSummaryInformation for
guessing the encoding. Back off to the old method if DocSummInfo is not available. Thanks to Andreas Beeker for recommending this direction. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1791002 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
97c5c54eb7
commit
4657756a12
src/scratchpad/src/org/apache/poi/hwpf
|
@ -19,8 +19,12 @@ package org.apache.poi.hwpf;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.poi.hpsf.CustomProperties;
|
||||
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
||||
import org.apache.poi.hpsf.Section;
|
||||
import org.apache.poi.hwmf.record.HwmfFont;
|
||||
import org.apache.poi.hwpf.model.ComplexFileTable;
|
||||
import org.apache.poi.hwpf.model.FontTable;
|
||||
|
@ -188,7 +192,32 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
|||
* @return The detected Charset from the old font table
|
||||
*/
|
||||
private Charset guessCodePage(OldFontTable fontTable) {
|
||||
|
||||
//try to get it out of the overall document summary information
|
||||
DocumentSummaryInformation summaryInformation = getDocumentSummaryInformation();
|
||||
if (summaryInformation != null) {
|
||||
CustomProperties customProperties = summaryInformation.getCustomProperties();
|
||||
if (customProperties != null) {
|
||||
int codePage = customProperties.getCodepage();
|
||||
try {
|
||||
return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
//swallow
|
||||
}
|
||||
}
|
||||
//for now, try to get first valid code page in a valid section
|
||||
for (Section section : summaryInformation.getSections()) {
|
||||
if (section.getOffset() < 0) {
|
||||
continue;
|
||||
}
|
||||
int codePage = section.getCodepage();
|
||||
try {
|
||||
return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
//swallow
|
||||
}
|
||||
}
|
||||
}
|
||||
//if that still doesn't work, pick the first non-default non symbol charset
|
||||
for (OldFfn oldFfn : fontTable.getFontNames()) {
|
||||
HwmfFont.WmfCharset wmfCharset = HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff);
|
||||
if (wmfCharset != null &&
|
||||
|
|
Loading…
Reference in New Issue