mirror of https://github.com/apache/poi.git
[github-198] Remove jdk.charset module dependency for spreadsheets generation. Thanks to Robert Marcano. This closes #198
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1884631 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8ff37c2a8a
commit
d20fa44305
|
@ -31,8 +31,6 @@ import java.util.Set;
|
||||||
public class CodePageUtil
|
public class CodePageUtil
|
||||||
{
|
{
|
||||||
|
|
||||||
public static final Set<Charset> DOUBLE_BYTE_CHARSETS = Collections.singleton(StringUtil.BIG5);
|
|
||||||
|
|
||||||
/** <p>Codepage 037, a special case</p> */
|
/** <p>Codepage 037, a special case</p> */
|
||||||
public static final int CP_037 = 37;
|
public static final int CP_037 = 37;
|
||||||
|
|
||||||
|
@ -446,27 +444,4 @@ public class CodePageUtil
|
||||||
return "cp" + codepage;
|
return "cp" + codepage;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* This tries to convert a LE byte array in cp950
|
|
||||||
* (Microsoft's dialect of Big5) to a String.
|
|
||||||
* We know MS zero-padded ascii, and we drop those.
|
|
||||||
* There may be areas for improvement in this.
|
|
||||||
*
|
|
||||||
* @param data
|
|
||||||
* @param offset
|
|
||||||
* @param lengthInBytes
|
|
||||||
* @return Decoded String
|
|
||||||
*/
|
|
||||||
public static String cp950ToString(byte[] data, int offset, int lengthInBytes) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes);
|
|
||||||
int c = reader.read();
|
|
||||||
while (c != -1) {
|
|
||||||
sb.append((char)c);
|
|
||||||
c = reader.read();
|
|
||||||
}
|
|
||||||
reader.close();
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,6 @@ public final class StringUtil {
|
||||||
public static final Charset UTF16LE = StandardCharsets.UTF_16LE;
|
public static final Charset UTF16LE = StandardCharsets.UTF_16LE;
|
||||||
public static final Charset UTF8 = StandardCharsets.UTF_8;
|
public static final Charset UTF8 = StandardCharsets.UTF_8;
|
||||||
public static final Charset WIN_1252 = Charset.forName("cp1252");
|
public static final Charset WIN_1252 = Charset.forName("cp1252");
|
||||||
public static final Charset BIG5 = Charset.forName("Big5");
|
|
||||||
|
|
||||||
private StringUtil() {
|
private StringUtil() {
|
||||||
// no instances of this class
|
// no instances of this class
|
||||||
|
|
Binary file not shown.
|
@ -82,6 +82,7 @@ module org.apache.poi.scratchpad {
|
||||||
exports org.apache.poi.hemf.hemfplus.extractor to junit;
|
exports org.apache.poi.hemf.hemfplus.extractor to junit;
|
||||||
exports org.apache.poi.hslf to junit;
|
exports org.apache.poi.hslf to junit;
|
||||||
exports org.apache.poi.hwmf to junit;
|
exports org.apache.poi.hwmf to junit;
|
||||||
|
exports org.apache.poi.hwpf.util to junit;
|
||||||
|
|
||||||
opens org.apache.poi.hwpf.model to org.mockito;
|
opens org.apache.poi.hwpf.model to org.mockito;
|
||||||
opens org.apache.poi.hwpf.model.types to org.mockito;
|
opens org.apache.poi.hwpf.model.types to org.mockito;
|
||||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.poi.hwpf.model.TextPieceTable;
|
||||||
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.util.CodePageUtil;
|
import org.apache.poi.hwpf.util.DoubleByteUtil;
|
||||||
import org.apache.poi.util.IOUtils;
|
import org.apache.poi.util.IOUtils;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.NotImplemented;
|
import org.apache.poi.util.NotImplemented;
|
||||||
|
@ -176,7 +176,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
||||||
_fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin(), MAX_RECORD_LENGTH);
|
_fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin(), MAX_RECORD_LENGTH);
|
||||||
|
|
||||||
int numChars = textData.length;
|
int numChars = textData.length;
|
||||||
if (CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
|
if (DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
|
||||||
numChars /= 2;
|
numChars /= 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ import java.nio.charset.Charset;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
import org.apache.poi.util.CodePageUtil;
|
import org.apache.poi.hwpf.util.DoubleByteUtil;
|
||||||
import org.apache.poi.util.IOUtils;
|
import org.apache.poi.util.IOUtils;
|
||||||
import org.apache.poi.util.Internal;
|
import org.apache.poi.util.Internal;
|
||||||
|
|
||||||
|
@ -73,7 +73,7 @@ public class OldTextPieceTable extends TextPieceTable {
|
||||||
boolean unicode = pieces[x].isUnicode();
|
boolean unicode = pieces[x].isUnicode();
|
||||||
int multiple = 1;
|
int multiple = 1;
|
||||||
if (unicode ||
|
if (unicode ||
|
||||||
(charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) {
|
(charset != null && DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) {
|
||||||
multiple = 2;
|
multiple = 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ public class OldTextPieceTable extends TextPieceTable {
|
||||||
@Override
|
@Override
|
||||||
protected int getEncodingMultiplier(TextPiece textPiece) {
|
protected int getEncodingMultiplier(TextPiece textPiece) {
|
||||||
Charset charset = textPiece.getPieceDescriptor().getCharset();
|
Charset charset = textPiece.getPieceDescriptor().getCharset();
|
||||||
if (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) {
|
if (charset != null && DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.poi.hwpf.model;
|
||||||
|
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
|
||||||
import org.apache.poi.util.CodePageUtil;
|
import org.apache.poi.hwpf.util.DoubleByteUtil;
|
||||||
import org.apache.poi.util.Internal;
|
import org.apache.poi.util.Internal;
|
||||||
import org.apache.poi.util.StringUtil;
|
import org.apache.poi.util.StringUtil;
|
||||||
|
|
||||||
|
@ -77,8 +77,8 @@ public class TextPiece extends PropertyNode<TextPiece> {
|
||||||
* Create the StringBuilder from the text and unicode flag
|
* Create the StringBuilder from the text and unicode flag
|
||||||
*/
|
*/
|
||||||
private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) {
|
private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) {
|
||||||
if (StringUtil.BIG5.equals(pd.getCharset())) {
|
if (DoubleByteUtil.BIG5.equals(pd.getCharset())) {
|
||||||
return new StringBuilder(CodePageUtil.cp950ToString(text, 0, text.length));
|
return new StringBuilder(DoubleByteUtil.cp950ToString(text, 0, text.length));
|
||||||
}
|
}
|
||||||
|
|
||||||
String str = new String(text, 0, text.length, (pd.isUnicode()) ? StringUtil.UTF16LE : pd.getCharset());
|
String str = new String(text, 0, text.length, (pd.isUnicode()) ? StringUtil.UTF16LE : pd.getCharset());
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
package org.apache.poi.hwpf.util;
|
||||||
|
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilities for working with double byte CodePages.
|
||||||
|
*
|
||||||
|
* <p>Provides constants for understanding numeric codepages,
|
||||||
|
* along with utilities to translate these into Java Character Sets.</p>
|
||||||
|
*/
|
||||||
|
public class DoubleByteUtil
|
||||||
|
{
|
||||||
|
|
||||||
|
public static final Charset BIG5 = Charset.forName("Big5");
|
||||||
|
|
||||||
|
public static final Set<Charset> DOUBLE_BYTE_CHARSETS = Collections.singleton(BIG5);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tries to convert a LE byte array in cp950
|
||||||
|
* (Microsoft's dialect of Big5) to a String.
|
||||||
|
* We know MS zero-padded ascii, and we drop those.
|
||||||
|
* There may be areas for improvement in this.
|
||||||
|
*
|
||||||
|
* @param data
|
||||||
|
* @param offset
|
||||||
|
* @param lengthInBytes
|
||||||
|
* @return Decoded String
|
||||||
|
*/
|
||||||
|
public static String cp950ToString(byte[] data, int offset, int lengthInBytes) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes);
|
||||||
|
int c = reader.read();
|
||||||
|
while (c != -1) {
|
||||||
|
sb.append((char)c);
|
||||||
|
c = reader.read();
|
||||||
|
}
|
||||||
|
reader.close();
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -15,13 +15,18 @@
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
|
|
||||||
package org.apache.poi.util;
|
package org.apache.poi.hwpf.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.CharBuffer;
|
import java.nio.CharBuffer;
|
||||||
import java.nio.charset.CharsetDecoder;
|
import java.nio.charset.CharsetDecoder;
|
||||||
|
|
||||||
|
import org.apache.poi.util.Internal;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stream that converts CP950 (MSOffice's dialect of Big5), with
|
* Stream that converts CP950 (MSOffice's dialect of Big5), with
|
||||||
* zero-byte padding for ASCII and in LittleEndianOrder.
|
* zero-byte padding for ASCII and in LittleEndianOrder.
|
||||||
|
@ -31,11 +36,10 @@ public class LittleEndianCP950Reader extends Reader {
|
||||||
|
|
||||||
private static final POILogger LOGGER = POILogFactory.getLogger(LittleEndianCP950Reader.class);
|
private static final POILogger LOGGER = POILogFactory.getLogger(LittleEndianCP950Reader.class);
|
||||||
|
|
||||||
|
|
||||||
private static final char UNMAPPABLE = '?';
|
private static final char UNMAPPABLE = '?';
|
||||||
private final ByteBuffer doubleByteBuffer = ByteBuffer.allocate(2);
|
private final ByteBuffer doubleByteBuffer = ByteBuffer.allocate(2);
|
||||||
private final CharBuffer charBuffer = CharBuffer.allocate(2);
|
private final CharBuffer charBuffer = CharBuffer.allocate(2);
|
||||||
private final CharsetDecoder decoder = StringUtil.BIG5.newDecoder();
|
private final CharsetDecoder decoder = DoubleByteUtil.BIG5.newDecoder();
|
||||||
|
|
||||||
//https://en.wikipedia.org/wiki/Code_page_950
|
//https://en.wikipedia.org/wiki/Code_page_950
|
||||||
//see private use area
|
//see private use area
|
|
@ -15,8 +15,7 @@
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
==================================================================== */
|
==================================================================== */
|
||||||
|
|
||||||
package org.apache.poi.util;
|
package org.apache.poi.hwpf.util;
|
||||||
|
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
Loading…
Reference in New Issue