mirror of https://github.com/apache/poi.git
[Bug 61246] fix issue where SXSSF sheet data has unicode surrogate chars replaced by '?'
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1800705 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dc0a33679c
commit
2f417e17bd
|
@ -123,10 +123,14 @@ public class SheetDataWriter {
|
|||
* This method <em>must</em> be invoked before calling {@link #getWorksheetXMLInputStream()}
|
||||
*/
|
||||
public void close() throws IOException {
|
||||
_out.flush();
|
||||
flush();
|
||||
_out.close();
|
||||
}
|
||||
|
||||
protected void flush() throws IOException {
|
||||
_out.flush();
|
||||
}
|
||||
|
||||
protected File getTempFile() {
|
||||
return _fd;
|
||||
}
|
||||
|
@ -393,15 +397,21 @@ public class SheetDataWriter {
|
|||
break;
|
||||
default:
|
||||
// YK: XmlBeans silently replaces all ISO control characters ( < 32) with question marks.
|
||||
// the same rule applies to unicode surrogates and "not a character" symbols.
|
||||
if( c < ' ' || Character.isLowSurrogate(c) || Character.isHighSurrogate(c) ||
|
||||
('\uFFFE' <= c && c <= '\uFFFF')) {
|
||||
// the same rule applies to "not a character" symbols.
|
||||
if (replaceWithQuestionMark(c)) {
|
||||
if (counter > last) {
|
||||
_out.write(chars, last, counter - last);
|
||||
}
|
||||
_out.write('?');
|
||||
last = counter + 1;
|
||||
}
|
||||
else if (Character.isHighSurrogate(c) || Character.isLowSurrogate(c)) {
|
||||
if (counter > last) {
|
||||
_out.write(chars, last, counter - last);
|
||||
}
|
||||
_out.write(c);
|
||||
last = counter + 1;
|
||||
}
|
||||
else if (c > 127) {
|
||||
if (counter > last) {
|
||||
_out.write(chars, last, counter - last);
|
||||
|
@ -421,6 +431,10 @@ public class SheetDataWriter {
|
|||
}
|
||||
}
|
||||
|
||||
static boolean replaceWithQuestionMark(char c) {
|
||||
return c < ' ' || ('\uFFFE' <= c && c <= '\uFFFF');
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the temporary file that backed this sheet on disk.
|
||||
* @return true if the file was deleted, false if it wasn't.
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* ====================================================================
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
package org.apache.poi.xssf.streaming;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
public final class TestSheetDataWriter {
|
||||
|
||||
final String unicodeSurrogates = "\uD835\uDF4A\uD835\uDF4B\uD835\uDF4C\uD835\uDF4D\uD835\uDF4E"
|
||||
+ "\uD835\uDF4F\uD835\uDF50\uD835\uDF51\uD835\uDF52\uD835\uDF53\uD835\uDF54\uD835"
|
||||
+ "\uDF55\uD835\uDF56\uD835\uDF57\uD835\uDF58\uD835\uDF59\uD835\uDF5A\uD835\uDF5B"
|
||||
+ "\uD835\uDF5C\uD835\uDF5D\uD835\uDF5E\uD835\uDF5F\uD835\uDF60\uD835\uDF61\uD835"
|
||||
+ "\uDF62\uD835\uDF63\uD835\uDF64\uD835\uDF65\uD835\uDF66\uD835\uDF67\uD835\uDF68"
|
||||
+ "\uD835\uDF69\uD835\uDF6A\uD835\uDF6B\uD835\uDF6C\uD835\uDF6D\uD835\uDF6E\uD835"
|
||||
+ "\uDF6F\uD835\uDF70\uD835\uDF71\uD835\uDF72\uD835\uDF73\uD835\uDF74\uD835\uDF75"
|
||||
+ "\uD835\uDF76\uD835\uDF77\uD835\uDF78\uD835\uDF79\uD835\uDF7A";
|
||||
|
||||
@Test
|
||||
public void testReplaceWithQuestionMark() {
|
||||
for(int i = 0; i < unicodeSurrogates.length(); i++) {
|
||||
assertFalse(SheetDataWriter.replaceWithQuestionMark(unicodeSurrogates.charAt(i)));
|
||||
}
|
||||
assertTrue(SheetDataWriter.replaceWithQuestionMark('\uFFFE'));
|
||||
assertTrue(SheetDataWriter.replaceWithQuestionMark('\uFFFF'));
|
||||
assertTrue(SheetDataWriter.replaceWithQuestionMark('\u0000'));
|
||||
assertTrue(SheetDataWriter.replaceWithQuestionMark('\u000F'));
|
||||
assertTrue(SheetDataWriter.replaceWithQuestionMark('\u001F'));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteUnicodeSurrogates() throws IOException {
|
||||
SheetDataWriter writer = new SheetDataWriter();
|
||||
try {
|
||||
writer.outputQuotedString(unicodeSurrogates);
|
||||
writer.flush();
|
||||
File file = writer.getTempFile();
|
||||
FileInputStream is = new FileInputStream(file);
|
||||
String text;
|
||||
try {
|
||||
text = new String(IOUtils.toByteArray(is), "UTF-8");
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
assertEquals(unicodeSurrogates, text);
|
||||
} finally {
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue