mirror of https://github.com/apache/poi.git
Support for escaped unicode characters in Shared String Table, see bug #49653
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@979952 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e46e2c44a7
commit
8ddb1b6dbd
|
@ -34,10 +34,11 @@
|
||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.7-beta2" date="2010-??-??">
|
<release version="3.7-beta2" date="2010-??-??">
|
||||||
<action dev="POI-DEVELOPERS" type="add">49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord</action>
|
<action dev="POI-DEVELOPERS" type="fix">49653 - Support for escaped unicode characters in Shared String Table</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">49593 - preserve leading and trailing white spaces in XWPFRun</action>
|
<action dev="POI-DEVELOPERS" type="fix">49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord</action>
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">49593 - preserve leading and trailing white spaces in XWPFRun</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">49455 - Insert the content of fldSimple fields into the XWPFWordTextExtractor output</action>
|
<action dev="POI-DEVELOPERS" type="add">49455 - Insert the content of fldSimple fields into the XWPFWordTextExtractor output</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">49640 - Fixed parsing formulas containing defined names beginning with an underscore</action>
|
<action dev="POI-DEVELOPERS" type="fix">49640 - Fixed parsing formulas containing defined names beginning with an underscore</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">49538 - Added implementation for POISSON()</action>
|
<action dev="POI-DEVELOPERS" type="add">49538 - Added implementation for POISSON()</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">49524 - Support for setting cell text to be vertically rotated, via style.setRotation(0xff)</action>
|
<action dev="POI-DEVELOPERS" type="add">49524 - Support for setting cell text to be vertically rotated, via style.setRotation(0xff)</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">49609 - Case insensitive matching of OOXML part names</action>
|
<action dev="POI-DEVELOPERS" type="fix">49609 - Case insensitive matching of OOXML part names</action>
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
package org.apache.poi.xssf.usermodel;
|
package org.apache.poi.xssf.usermodel;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
import javax.xml.namespace.QName;
|
import javax.xml.namespace.QName;
|
||||||
|
|
||||||
|
@ -75,6 +77,8 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXstring;
|
||||||
* @author Yegor Kozlov
|
* @author Yegor Kozlov
|
||||||
*/
|
*/
|
||||||
public class XSSFRichTextString implements RichTextString {
|
public class XSSFRichTextString implements RichTextString {
|
||||||
|
private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-F]{4})_");
|
||||||
|
|
||||||
private CTRst st;
|
private CTRst st;
|
||||||
private StylesTable styles;
|
private StylesTable styles;
|
||||||
|
|
||||||
|
@ -337,13 +341,13 @@ public class XSSFRichTextString implements RichTextString {
|
||||||
*/
|
*/
|
||||||
public String getString() {
|
public String getString() {
|
||||||
if(st.sizeOfRArray() == 0) {
|
if(st.sizeOfRArray() == 0) {
|
||||||
return st.getT();
|
return utfDecode(st.getT());
|
||||||
}
|
}
|
||||||
StringBuffer buf = new StringBuffer();
|
StringBuffer buf = new StringBuffer();
|
||||||
for(CTRElt r : st.getRList()){
|
for(CTRElt r : st.getRList()){
|
||||||
buf.append(r.getT());
|
buf.append(r.getT());
|
||||||
}
|
}
|
||||||
return buf.toString();
|
return utfDecode(buf.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -490,4 +494,39 @@ public class XSSFRichTextString implements RichTextString {
|
||||||
c.dispose();
|
c.dispose();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For all characters which cannot be represented in XML as defined by the XML 1.0 specification,
|
||||||
|
* the characters are escaped using the Unicode numerical character representation escape character
|
||||||
|
* format _xHHHH_, where H represents a hexadecimal character in the character's value.
|
||||||
|
* <p>
|
||||||
|
* Example: The Unicode character 0D is invalid in an XML 1.0 document,
|
||||||
|
* so it shall be escaped as <code>_x000D_</code>.
|
||||||
|
* </p>
|
||||||
|
* See section 3.18.9 in the OOXML spec.
|
||||||
|
*
|
||||||
|
* @param value the string to decode
|
||||||
|
* @return the decoded string
|
||||||
|
*/
|
||||||
|
static String utfDecode(String value){
|
||||||
|
if(value == null) return null;
|
||||||
|
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
Matcher m = utfPtrn.matcher(value);
|
||||||
|
int idx = 0;
|
||||||
|
while(m.find()) {
|
||||||
|
int pos = m.start();
|
||||||
|
if( pos > idx) {
|
||||||
|
buf.append(value.substring(idx, pos));
|
||||||
|
}
|
||||||
|
|
||||||
|
String code = m.group(1);
|
||||||
|
int icode = Integer.decode("0x" + code);
|
||||||
|
buf.append((char)icode);
|
||||||
|
|
||||||
|
idx = m.end();
|
||||||
|
}
|
||||||
|
buf.append(value.substring(idx));
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -130,4 +130,16 @@ public final class TestXSSFRichTextString extends TestCase {
|
||||||
assertEquals("<xml-fragment xml:space=\"preserve\"> Apache</xml-fragment>", xs.xmlText());
|
assertEquals("<xml-fragment xml:space=\"preserve\"> Apache</xml-fragment>", xs.xmlText());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test that unicode representation_ xHHHH_ is properly processed
|
||||||
|
*/
|
||||||
|
public void testUtfDecode() {
|
||||||
|
CTRst st = CTRst.Factory.newInstance();
|
||||||
|
st.setT("abc_x000D_2ef_x000D_");
|
||||||
|
XSSFRichTextString rt = new XSSFRichTextString(st);
|
||||||
|
//_x000D_ is converted into carriage return
|
||||||
|
assertEquals("abc\r2ef\r", rt.getString());
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue