Bug 60289: Fix handling of unicode escapes with lowercase hex-chars

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1766065 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2016-10-21 16:31:51 +00:00
parent dcae0284ea
commit 8b69cfda06
3 changed files with 22 additions and 9 deletions

View File

@ -75,7 +75,7 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.STXstring;
* </blockquote> * </blockquote>
*/ */
public class XSSFRichTextString implements RichTextString { public class XSSFRichTextString implements RichTextString {
private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-F]{4})_"); private static final Pattern utfPtrn = Pattern.compile("_x([0-9A-Fa-f]{4})_");
private CTRst st; private CTRst st;
private StylesTable styles; private StylesTable styles;
@ -244,12 +244,13 @@ public class XSSFRichTextString implements RichTextString {
if(ctFont.sizeOfShadowArray() > 0) pr.addNewShadow().setVal(ctFont.getShadowArray(0).getVal()); if(ctFont.sizeOfShadowArray() > 0) pr.addNewShadow().setVal(ctFont.getShadowArray(0).getVal());
if(ctFont.sizeOfStrikeArray() > 0) pr.addNewStrike().setVal(ctFont.getStrikeArray(0).getVal()); if(ctFont.sizeOfStrikeArray() > 0) pr.addNewStrike().setVal(ctFont.getStrikeArray(0).getVal());
} }
/** /**
* Does this string have any explicit formatting applied, or is * Does this string have any explicit formatting applied, or is
* it just text in the default style? * it just text in the default style?
*/ */
public boolean hasFormatting() { public boolean hasFormatting() {
//noinspection deprecation - for performance reasons!
CTRElt[] rs = st.getRArray(); CTRElt[] rs = st.getRArray();
if (rs == null || rs.length == 0) { if (rs == null || rs.length == 0) {
return false; return false;
@ -311,6 +312,7 @@ public class XSSFRichTextString implements RichTextString {
return utfDecode(st.getT()); return utfDecode(st.getT());
} }
StringBuilder buf = new StringBuilder(); StringBuilder buf = new StringBuilder();
//noinspection deprecation - for performance reasons!
for(CTRElt r : st.getRArray()){ for(CTRElt r : st.getRArray()){
buf.append(r.getT()); buf.append(r.getT());
} }
@ -381,6 +383,7 @@ public class XSSFRichTextString implements RichTextString {
public XSSFFont getFontAtIndex( int index ) { public XSSFFont getFontAtIndex( int index ) {
final ThemesTable themes = getThemesTable(); final ThemesTable themes = getThemesTable();
int pos = 0; int pos = 0;
//noinspection deprecation - for performance reasons!
for(CTRElt r : st.getRArray()){ for(CTRElt r : st.getRArray()){
final int length = r.getT().length(); final int length = r.getT().length();
if(index >= pos && index < pos + length) { if(index >= pos && index < pos + length) {
@ -406,6 +409,7 @@ public class XSSFRichTextString implements RichTextString {
protected void setStylesTableReference(StylesTable tbl){ protected void setStylesTableReference(StylesTable tbl){
styles = tbl; styles = tbl;
if(st.sizeOfRArray() > 0) { if(st.sizeOfRArray() > 0) {
//noinspection deprecation - for performance reasons!
for (CTRElt r : st.getRArray()) { for (CTRElt r : st.getRArray()) {
CTRPrElt pr = r.getRPr(); CTRPrElt pr = r.getRPr();
if(pr != null && pr.sizeOfRFontArray() > 0){ if(pr != null && pr.sizeOfRFontArray() > 0){
@ -556,6 +560,7 @@ public class XSSFRichTextString implements RichTextString {
TreeMap<Integer, CTRPrElt> getFormatMap(CTRst entry){ TreeMap<Integer, CTRPrElt> getFormatMap(CTRst entry){
int length = 0; int length = 0;
TreeMap<Integer, CTRPrElt> formats = new TreeMap<Integer, CTRPrElt>(); TreeMap<Integer, CTRPrElt> formats = new TreeMap<Integer, CTRPrElt>();
//noinspection deprecation - for performance reasons!
for (CTRElt r : entry.getRArray()) { for (CTRElt r : entry.getRArray()) {
String txt = r.getT(); String txt = r.getT();
CTRPrElt fmt = r.getRPr(); CTRPrElt fmt = r.getRPr();

View File

@ -229,13 +229,18 @@ public final class TestXSSFRichTextString extends TestCase {
/** /**
* test that unicode representation_ xHHHH_ is properly processed * test that unicode representation_ xHHHH_ is properly processed
*/ */
public void testUtfDecode() { public void testUtfDecode() throws IOException {
CTRst st = CTRst.Factory.newInstance(); CTRst st = CTRst.Factory.newInstance();
st.setT("abc_x000D_2ef_x000D_"); st.setT("abc_x000D_2ef_x000D_");
XSSFRichTextString rt = new XSSFRichTextString(st); XSSFRichTextString rt = new XSSFRichTextString(st);
//_x000D_ is converted into carriage return //_x000D_ is converted into carriage return
assertEquals("abc\r2ef\r", rt.getString()); assertEquals("abc\r2ef\r", rt.getString());
// Test Lowercase case
CTRst st2 = CTRst.Factory.newInstance();
st2.setT("abc_x000d_2ef_x000d_");
XSSFRichTextString rt2 = new XSSFRichTextString(st2);
assertEquals("abc\r2ef\r", rt2.getString());
} }
public void testApplyFont_lowlevel(){ public void testApplyFont_lowlevel(){
@ -382,6 +387,7 @@ public final class TestXSSFRichTextString extends TestCase {
public void testLineBreaks_bug48877() throws IOException{ public void testLineBreaks_bug48877() throws IOException{
XSSFFont font = new XSSFFont(); XSSFFont font = new XSSFFont();
//noinspection deprecation
font.setBoldweight(XSSFFont.BOLDWEIGHT_BOLD); font.setBoldweight(XSSFFont.BOLDWEIGHT_BOLD);
font.setFontHeightInPoints((short) 14); font.setFontHeightInPoints((short) 14);
XSSFRichTextString str; XSSFRichTextString str;
@ -423,8 +429,7 @@ public final class TestXSSFRichTextString extends TestCase {
str.applyFont(0, 4, font); str.applyFont(0, 4, font);
t1 = str.getCTRst().getRArray(0).xgetT(); t1 = str.getCTRst().getRArray(0).xgetT();
t2 = str.getCTRst().getRArray(1).xgetT(); t2 = str.getCTRst().getRArray(1).xgetT();
// YK: don't know why, but XmlBeans converts leading tab characters to spaces assertEquals("<xml-fragment xml:space=\"preserve\">Tab\t</xml-fragment>", t1.xmlText());
//assertEquals("<xml-fragment>Tab\t</xml-fragment>", t1.xmlText());
assertEquals("<xml-fragment xml:space=\"preserve\">separated\n</xml-fragment>", t2.xmlText()); assertEquals("<xml-fragment xml:space=\"preserve\">separated\n</xml-fragment>", t2.xmlText());
str = new XSSFRichTextString("\n\n\nNew Line\n\n"); str = new XSSFRichTextString("\n\n\nNew Line\n\n");
@ -439,8 +444,6 @@ public final class TestXSSFRichTextString extends TestCase {
assertEquals("<xml-fragment xml:space=\"preserve\">\n\n</xml-fragment>", t3.xmlText()); assertEquals("<xml-fragment xml:space=\"preserve\">\n\n</xml-fragment>", t3.xmlText());
} }
@Test
public void testBug56511() { public void testBug56511() {
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("56511.xlsx"); XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("56511.xlsx");
for (Sheet sheet : wb) { for (Sheet sheet : wb) {
@ -470,7 +473,6 @@ public final class TestXSSFRichTextString extends TestCase {
} }
} }
@Test
public void testBug56511_values() { public void testBug56511_values() {
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("56511.xlsx"); XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("56511.xlsx");
Sheet sheet = wb.getSheetAt(0); Sheet sheet = wb.getSheetAt(0);
@ -533,4 +535,10 @@ public final class TestXSSFRichTextString extends TestCase {
assertEquals(font, rts.getFontAtIndex(s2-1)); assertEquals(font, rts.getFontAtIndex(s2-1));
assertEquals("<xml-fragment/>", rts.getFontAtIndex(s3-1).toString()); assertEquals("<xml-fragment/>", rts.getFontAtIndex(s3-1).toString());
} }
public void test60289UtfDecode() throws IOException {
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("60289.xlsx");
assertEquals("Rich Text\r\nTest", wb.getSheetAt(0).getRow(1).getCell(1).getRichStringCellValue().getString());
wb.close();
}
} }

Binary file not shown.