mirror of https://github.com/apache/poi.git
Bug 60677 -- handle multibyte encodings correctly in HwmfGraphics' drawString. Thanks to Dominik Stadler for finding this bug and a triggering doc via large scale regression testing.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1781593 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
62e584e656
commit
d73819d8da
|
@ -338,13 +338,44 @@ public class HwmfGraphics {
|
||||||
if (dx == null || dx.length == 0) {
|
if (dx == null || dx.length == 0) {
|
||||||
addAttributes(as, font);
|
addAttributes(as, font);
|
||||||
} else {
|
} else {
|
||||||
for (int i=0; i<len; i++) {
|
int[] dxNormed = dx;
|
||||||
|
//for multi-byte encodings (e.g. Shift_JIS), the byte length
|
||||||
|
//might not equal the string length().
|
||||||
|
//The x information is stored in dx[], an array parallel to the
|
||||||
|
//byte array text[]. dx[] stores the x info in the
|
||||||
|
//first byte of a multibyte character, but dx[] stores 0
|
||||||
|
//for the other bytes in that character.
|
||||||
|
//We need to map this information to the String offsets
|
||||||
|
//dx[0] = 13 text[0] = -125
|
||||||
|
//dx[1] = 0 text[1] = 118
|
||||||
|
//dx[2] = 14 text[2] = -125
|
||||||
|
//dx[3] = 0 text[3] = -115
|
||||||
|
// needs to be remapped as:
|
||||||
|
//dxNormed[0] = 13 textString.get(0) = U+30D7
|
||||||
|
//dxNormed[1] = 14 textString.get(1) = U+30ED
|
||||||
|
if (textString.length() != text.length) {
|
||||||
|
int codePoints = textString.codePointCount(0, textString.length());
|
||||||
|
dxNormed = new int[codePoints];
|
||||||
|
int dxPosition = 0;
|
||||||
|
for (int offset = 0; offset < textString.length(); ) {
|
||||||
|
dxNormed[offset] = dx[dxPosition];
|
||||||
|
int[] chars = new int[1];
|
||||||
|
int cp = textString.codePointAt(offset);
|
||||||
|
chars[0] = cp;
|
||||||
|
//now figure out how many bytes it takes to encode that
|
||||||
|
//code point in the charset
|
||||||
|
int byteLength = new String(chars, 0, chars.length).getBytes(charset).length;
|
||||||
|
dxPosition += byteLength;
|
||||||
|
offset += Character.charCount(cp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < dxNormed.length; i++) {
|
||||||
addAttributes(as, font);
|
addAttributes(as, font);
|
||||||
// Tracking works as a prefix/advance space on characters whereas
|
// Tracking works as a prefix/advance space on characters whereas
|
||||||
// dx[...] is the complete width of the current char
|
// dx[...] is the complete width of the current char
|
||||||
// therefore we need to add the additional/suffix width to the next char
|
// therefore we need to add the additional/suffix width to the next char
|
||||||
if (i<len-1) {
|
if (i < dxNormed.length - 1) {
|
||||||
as.addAttribute(TextAttribute.TRACKING, (dx[i]-fontW)/fontH, i+1, i+2);
|
as.addAttribute(TextAttribute.TRACKING, (dxNormed[i] - fontW) / fontH, i + 1, i + 2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -222,4 +222,30 @@ public class TestHwmfParsing {
|
||||||
assertTrue(txt.contains("\u0411\u0430\u043B\u0430\u043D\u0441"));
|
assertTrue(txt.contains("\u0411\u0430\u043B\u0430\u043D\u0441"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Ignore("If we decide we can use the common crawl file attached to Bug 60677, " +
|
||||||
|
"we can turn this back on")
|
||||||
|
public void testShift_JIS() throws Exception {
|
||||||
|
//TODO: move test file to framework and fix this
|
||||||
|
File f = new File("C:/data/file8.wmf");
|
||||||
|
HwmfPicture wmf = new HwmfPicture(new FileInputStream(f));
|
||||||
|
|
||||||
|
Charset charset = LocaleUtil.CHARSET_1252;
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
//this is pure hackery for specifying the font
|
||||||
|
//this happens to work on this test file, but you need to
|
||||||
|
//do what Graphics does by maintaining the stack, etc.!
|
||||||
|
for (HwmfRecord r : wmf.getRecords()) {
|
||||||
|
if (r.getRecordType().equals(HwmfRecordType.createFontIndirect)) {
|
||||||
|
HwmfFont font = ((HwmfText.WmfCreateFontIndirect)r).getFont();
|
||||||
|
charset = (font.getCharSet().getCharset() == null) ? LocaleUtil.CHARSET_1252 : font.getCharSet().getCharset();
|
||||||
|
}
|
||||||
|
if (r.getRecordType().equals(HwmfRecordType.extTextOut)) {
|
||||||
|
HwmfText.WmfExtTextOut textOut = (HwmfText.WmfExtTextOut)r;
|
||||||
|
sb.append(textOut.getText(charset)).append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String txt = sb.toString();
|
||||||
|
assertTrue(txt.contains("\u822A\u7A7A\u60C5\u5831\u696D\u52D9\u3078\u306E\uFF27\uFF29\uFF33"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue