mirror of https://github.com/apache/poi.git
[github-198] Remove jdk.charset module dependency for spreadsheets generation. Thanks to Robert Marcano. This closes #198
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1884631 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8ff37c2a8a
commit
d20fa44305
|
@ -31,8 +31,6 @@ import java.util.Set;
|
|||
public class CodePageUtil
|
||||
{
|
||||
|
||||
public static final Set<Charset> DOUBLE_BYTE_CHARSETS = Collections.singleton(StringUtil.BIG5);
|
||||
|
||||
/** <p>Codepage 037, a special case</p> */
|
||||
public static final int CP_037 = 37;
|
||||
|
||||
|
@ -446,27 +444,4 @@ public class CodePageUtil
|
|||
return "cp" + codepage;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This tries to convert a LE byte array in cp950
|
||||
* (Microsoft's dialect of Big5) to a String.
|
||||
* We know MS zero-padded ascii, and we drop those.
|
||||
* There may be areas for improvement in this.
|
||||
*
|
||||
* @param data
|
||||
* @param offset
|
||||
* @param lengthInBytes
|
||||
* @return Decoded String
|
||||
*/
|
||||
public static String cp950ToString(byte[] data, int offset, int lengthInBytes) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes);
|
||||
int c = reader.read();
|
||||
while (c != -1) {
|
||||
sb.append((char)c);
|
||||
c = reader.read();
|
||||
}
|
||||
reader.close();
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,6 @@ public final class StringUtil {
|
|||
public static final Charset UTF16LE = StandardCharsets.UTF_16LE;
|
||||
public static final Charset UTF8 = StandardCharsets.UTF_8;
|
||||
public static final Charset WIN_1252 = Charset.forName("cp1252");
|
||||
public static final Charset BIG5 = Charset.forName("Big5");
|
||||
|
||||
private StringUtil() {
|
||||
// no instances of this class
|
||||
|
|
Binary file not shown.
|
@ -82,6 +82,7 @@ module org.apache.poi.scratchpad {
|
|||
exports org.apache.poi.hemf.hemfplus.extractor to junit;
|
||||
exports org.apache.poi.hslf to junit;
|
||||
exports org.apache.poi.hwmf to junit;
|
||||
exports org.apache.poi.hwpf.util to junit;
|
||||
|
||||
opens org.apache.poi.hwpf.model to org.mockito;
|
||||
opens org.apache.poi.hwpf.model.types to org.mockito;
|
||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.poi.hwpf.model.TextPieceTable;
|
|||
import org.apache.poi.hwpf.usermodel.Range;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.CodePageUtil;
|
||||
import org.apache.poi.hwpf.util.DoubleByteUtil;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.NotImplemented;
|
||||
|
@ -176,7 +176,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
|||
_fib.getFibBase().getFcMac()-_fib.getFibBase().getFcMin(), MAX_RECORD_LENGTH);
|
||||
|
||||
int numChars = textData.length;
|
||||
if (CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
|
||||
if (DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(guessedCharset)) {
|
||||
numChars /= 2;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import java.nio.charset.Charset;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.poi.util.CodePageUtil;
|
||||
import org.apache.poi.hwpf.util.DoubleByteUtil;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.Internal;
|
||||
|
||||
|
@ -73,7 +73,7 @@ public class OldTextPieceTable extends TextPieceTable {
|
|||
boolean unicode = pieces[x].isUnicode();
|
||||
int multiple = 1;
|
||||
if (unicode ||
|
||||
(charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) {
|
||||
(charset != null && DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(charset))) {
|
||||
multiple = 2;
|
||||
}
|
||||
|
||||
|
@ -106,7 +106,7 @@ public class OldTextPieceTable extends TextPieceTable {
|
|||
@Override
|
||||
protected int getEncodingMultiplier(TextPiece textPiece) {
|
||||
Charset charset = textPiece.getPieceDescriptor().getCharset();
|
||||
if (charset != null && CodePageUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) {
|
||||
if (charset != null && DoubleByteUtil.DOUBLE_BYTE_CHARSETS.contains(charset)) {
|
||||
return 2;
|
||||
}
|
||||
return 1;
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.poi.hwpf.model;
|
|||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.poi.util.CodePageUtil;
|
||||
import org.apache.poi.hwpf.util.DoubleByteUtil;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.StringUtil;
|
||||
|
||||
|
@ -77,8 +77,8 @@ public class TextPiece extends PropertyNode<TextPiece> {
|
|||
* Create the StringBuilder from the text and unicode flag
|
||||
*/
|
||||
private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) {
|
||||
if (StringUtil.BIG5.equals(pd.getCharset())) {
|
||||
return new StringBuilder(CodePageUtil.cp950ToString(text, 0, text.length));
|
||||
if (DoubleByteUtil.BIG5.equals(pd.getCharset())) {
|
||||
return new StringBuilder(DoubleByteUtil.cp950ToString(text, 0, text.length));
|
||||
}
|
||||
|
||||
String str = new String(text, 0, text.length, (pd.isUnicode()) ? StringUtil.UTF16LE : pd.getCharset());
|
||||
|
|
|
@ -0,0 +1,59 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hwpf.util;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Utilities for working with double byte CodePages.
|
||||
*
|
||||
* <p>Provides constants for understanding numeric codepages,
|
||||
* along with utilities to translate these into Java Character Sets.</p>
|
||||
*/
|
||||
public class DoubleByteUtil
|
||||
{
|
||||
|
||||
public static final Charset BIG5 = Charset.forName("Big5");
|
||||
|
||||
public static final Set<Charset> DOUBLE_BYTE_CHARSETS = Collections.singleton(BIG5);
|
||||
|
||||
/**
|
||||
* This tries to convert a LE byte array in cp950
|
||||
* (Microsoft's dialect of Big5) to a String.
|
||||
* We know MS zero-padded ascii, and we drop those.
|
||||
* There may be areas for improvement in this.
|
||||
*
|
||||
* @param data
|
||||
* @param offset
|
||||
* @param lengthInBytes
|
||||
* @return Decoded String
|
||||
*/
|
||||
public static String cp950ToString(byte[] data, int offset, int lengthInBytes) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
LittleEndianCP950Reader reader = new LittleEndianCP950Reader(data, offset, lengthInBytes);
|
||||
int c = reader.read();
|
||||
while (c != -1) {
|
||||
sb.append((char)c);
|
||||
c = reader.read();
|
||||
}
|
||||
reader.close();
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
|
@ -15,13 +15,18 @@
|
|||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.util;
|
||||
package org.apache.poi.hwpf.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* Stream that converts CP950 (MSOffice's dialect of Big5), with
|
||||
* zero-byte padding for ASCII and in LittleEndianOrder.
|
||||
|
@ -31,11 +36,10 @@ public class LittleEndianCP950Reader extends Reader {
|
|||
|
||||
private static final POILogger LOGGER = POILogFactory.getLogger(LittleEndianCP950Reader.class);
|
||||
|
||||
|
||||
private static final char UNMAPPABLE = '?';
|
||||
private final ByteBuffer doubleByteBuffer = ByteBuffer.allocate(2);
|
||||
private final CharBuffer charBuffer = CharBuffer.allocate(2);
|
||||
private final CharsetDecoder decoder = StringUtil.BIG5.newDecoder();
|
||||
private final CharsetDecoder decoder = DoubleByteUtil.BIG5.newDecoder();
|
||||
|
||||
//https://en.wikipedia.org/wiki/Code_page_950
|
||||
//see private use area
|
|
@ -15,8 +15,7 @@
|
|||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.util;
|
||||
|
||||
package org.apache.poi.hwpf.util;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
Loading…
Reference in New Issue