mirror of https://github.com/apache/poi.git
Support for escaped unicode characters in Shared String Table, see bug #49653
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@979952 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e46e2c44a7
commit
8ddb1b6dbd
|
@ -34,10 +34,11 @@
|
|||
|
||||
<changes>
|
||||
<release version="3.7-beta2" date="2010-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">49593 - preserve leading and trailing white spaces in XWPFRun</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49653 - Support for escaped unicode characters in Shared String Table</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49593 - preserve leading and trailing white spaces in XWPFRun</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">49455 - Insert the content of fldSimple fields into the XWPFWordTextExtractor output</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">49640 - Fixed parsing formulas containing defined names beginning with an underscore</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49640 - Fixed parsing formulas containing defined names beginning with an underscore</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">49538 - Added implementation for POISSON()</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">49524 - Support for setting cell text to be vertically rotated, via style.setRotation(0xff)</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49609 - Case insensitive matching of OOXML part names</action>
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
package org.apache.poi.xssf.usermodel;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
|
||||
|
@ -75,6 +77,8 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXstring;
|
|||
* @author Yegor Kozlov
|
||||
*/
|
||||
public class XSSFRichTextString implements RichTextString {
|
||||
private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-F]{4})_");
|
||||
|
||||
private CTRst st;
|
||||
private StylesTable styles;
|
||||
|
||||
|
@ -337,13 +341,13 @@ public class XSSFRichTextString implements RichTextString {
|
|||
*/
|
||||
public String getString() {
|
||||
if(st.sizeOfRArray() == 0) {
|
||||
return st.getT();
|
||||
return utfDecode(st.getT());
|
||||
}
|
||||
StringBuffer buf = new StringBuffer();
|
||||
for(CTRElt r : st.getRList()){
|
||||
buf.append(r.getT());
|
||||
}
|
||||
return buf.toString();
|
||||
return utfDecode(buf.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -490,4 +494,39 @@ public class XSSFRichTextString implements RichTextString {
|
|||
c.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
|
||||
* the characters are escaped using the Unicode numerical character representation escape character
|
||||
* format _xHHHH_, where H represents a hexadecimal character in the character's value.
|
||||
* <p>
|
||||
* Example: The Unicode character 0D is invalid in an XML 1.0 document,
|
||||
* so it shall be escaped as <code>_x000D_</code>.
|
||||
* </p>
|
||||
* See section 3.18.9 in the OOXML spec.
|
||||
*
|
||||
* @param value the string to decode
|
||||
* @return the decoded string
|
||||
*/
|
||||
static String utfDecode(String value){
|
||||
if(value == null) return null;
|
||||
|
||||
StringBuffer buf = new StringBuffer();
|
||||
Matcher m = utfPtrn.matcher(value);
|
||||
int idx = 0;
|
||||
while(m.find()) {
|
||||
int pos = m.start();
|
||||
if( pos > idx) {
|
||||
buf.append(value.substring(idx, pos));
|
||||
}
|
||||
|
||||
String code = m.group(1);
|
||||
int icode = Integer.decode("0x" + code);
|
||||
buf.append((char)icode);
|
||||
|
||||
idx = m.end();
|
||||
}
|
||||
buf.append(value.substring(idx));
|
||||
return buf.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -130,4 +130,16 @@ public final class TestXSSFRichTextString extends TestCase {
|
|||
assertEquals("<xml-fragment xml:space=\"preserve\"> Apache</xml-fragment>", xs.xmlText());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* test that unicode representation_ xHHHH_ is properly processed
|
||||
*/
|
||||
public void testUtfDecode() {
|
||||
CTRst st = CTRst.Factory.newInstance();
|
||||
st.setT("abc_x000D_2ef_x000D_");
|
||||
XSSFRichTextString rt = new XSSFRichTextString(st);
|
||||
//_x000D_ is converted into carriage return
|
||||
assertEquals("abc\r2ef\r", rt.getString());
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue