mirror of https://github.com/apache/poi.git
[Bug 61246] fix issue where SXSSF sheet data has unicode surrogate chars replaced by '?'
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1800705 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dc0a33679c
commit
2f417e17bd
|
@ -122,12 +122,16 @@ public class SheetDataWriter {
|
||||||
* flush and close the temp data writer.
|
* flush and close the temp data writer.
|
||||||
* This method <em>must</em> be invoked before calling {@link #getWorksheetXMLInputStream()}
|
* This method <em>must</em> be invoked before calling {@link #getWorksheetXMLInputStream()}
|
||||||
*/
|
*/
|
||||||
public void close() throws IOException{
|
public void close() throws IOException {
|
||||||
_out.flush();
|
flush();
|
||||||
_out.close();
|
_out.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void flush() throws IOException {
|
||||||
|
_out.flush();
|
||||||
|
}
|
||||||
|
|
||||||
protected File getTempFile(){
|
protected File getTempFile() {
|
||||||
return _fd;
|
return _fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -329,7 +333,7 @@ public class SheetDataWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
//Taken from jdk1.3/src/javax/swing/text/html/HTMLWriter.java
|
//Taken from jdk1.3/src/javax/swing/text/html/HTMLWriter.java
|
||||||
protected void outputQuotedString(String s) throws IOException {
|
protected void outputQuotedString(String s) throws IOException {
|
||||||
if (s == null || s.length() == 0) {
|
if (s == null || s.length() == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -393,15 +397,21 @@ public class SheetDataWriter {
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
// YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks.
|
// YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks.
|
||||||
// the same rule applies to unicode surrogates and "not a character" symbols.
|
// the same rule applies to "not a character" symbols.
|
||||||
if( c < ' ' || Character.isLowSurrogate(c) || Character.isHighSurrogate(c) ||
|
if (replaceWithQuestionMark(c)) {
|
||||||
('\uFFFE' <= c && c <= '\uFFFF')) {
|
|
||||||
if (counter > last) {
|
if (counter > last) {
|
||||||
_out.write(chars, last, counter - last);
|
_out.write(chars, last, counter - last);
|
||||||
}
|
}
|
||||||
_out.write('?');
|
_out.write('?');
|
||||||
last = counter + 1;
|
last = counter + 1;
|
||||||
}
|
}
|
||||||
|
else if (Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
|
||||||
|
if (counter > last) {
|
||||||
|
_out.write(chars, last, counter - last);
|
||||||
|
}
|
||||||
|
_out.write(c);
|
||||||
|
last = counter + 1;
|
||||||
|
}
|
||||||
else if (c > 127) {
|
else if (c > 127) {
|
||||||
if (counter > last) {
|
if (counter > last) {
|
||||||
_out.write(chars, last, counter - last);
|
_out.write(chars, last, counter - last);
|
||||||
|
@ -421,6 +431,10 @@ public class SheetDataWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static boolean replaceWithQuestionMark(char c) {
|
||||||
|
return c < ' ' || ('\uFFFE' <= c && c <= '\uFFFF');
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Deletes the temporary file that backed this sheet on disk.
|
* Deletes the temporary file that backed this sheet on disk.
|
||||||
* @return true if the file was deleted, false if it wasn't.
|
* @return true if the file was deleted, false if it wasn't.
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* ====================================================================
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
* ====================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.poi.xssf.streaming;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.poi.util.IOUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public final class TestSheetDataWriter {
|
||||||
|
|
||||||
|
final String unicodeSurrogates = "\uD835\uDF4A\uD835\uDF4B\uD835\uDF4C\uD835\uDF4D\uD835\uDF4E"
|
||||||
|
+ "\uD835\uDF4F\uD835\uDF50\uD835\uDF51\uD835\uDF52\uD835\uDF53\uD835\uDF54\uD835"
|
||||||
|
+ "\uDF55\uD835\uDF56\uD835\uDF57\uD835\uDF58\uD835\uDF59\uD835\uDF5A\uD835\uDF5B"
|
||||||
|
+ "\uD835\uDF5C\uD835\uDF5D\uD835\uDF5E\uD835\uDF5F\uD835\uDF60\uD835\uDF61\uD835"
|
||||||
|
+ "\uDF62\uD835\uDF63\uD835\uDF64\uD835\uDF65\uD835\uDF66\uD835\uDF67\uD835\uDF68"
|
||||||
|
+ "\uD835\uDF69\uD835\uDF6A\uD835\uDF6B\uD835\uDF6C\uD835\uDF6D\uD835\uDF6E\uD835"
|
||||||
|
+ "\uDF6F\uD835\uDF70\uD835\uDF71\uD835\uDF72\uD835\uDF73\uD835\uDF74\uD835\uDF75"
|
||||||
|
+ "\uD835\uDF76\uD835\uDF77\uD835\uDF78\uD835\uDF79\uD835\uDF7A";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReplaceWithQuestionMark() {
|
||||||
|
for(int i = 0; i < unicodeSurrogates.length(); i++) {
|
||||||
|
assertFalse(SheetDataWriter.replaceWithQuestionMark(unicodeSurrogates.charAt(i)));
|
||||||
|
}
|
||||||
|
assertTrue(SheetDataWriter.replaceWithQuestionMark('\uFFFE'));
|
||||||
|
assertTrue(SheetDataWriter.replaceWithQuestionMark('\uFFFF'));
|
||||||
|
assertTrue(SheetDataWriter.replaceWithQuestionMark('\u0000'));
|
||||||
|
assertTrue(SheetDataWriter.replaceWithQuestionMark('\u000F'));
|
||||||
|
assertTrue(SheetDataWriter.replaceWithQuestionMark('\u001F'));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWriteUnicodeSurrogates() throws IOException {
|
||||||
|
SheetDataWriter writer = new SheetDataWriter();
|
||||||
|
try {
|
||||||
|
writer.outputQuotedString(unicodeSurrogates);
|
||||||
|
writer.flush();
|
||||||
|
File file = writer.getTempFile();
|
||||||
|
FileInputStream is = new FileInputStream(file);
|
||||||
|
String text;
|
||||||
|
try {
|
||||||
|
text = new String(IOUtils.toByteArray(is), "UTF-8");
|
||||||
|
} finally {
|
||||||
|
is.close();
|
||||||
|
}
|
||||||
|
assertEquals(unicodeSurrogates, text);
|
||||||
|
} finally {
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue