Charset.forName() for known encodings makes catching UnknownEncodingException obsolete

Unify UTF-16LE conversion to StringUtil
BugFix for RecordInputStream.readFully in combination with continuing records
BugFix for integration tests - fix pathname for handler/exclude lookup on windows

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1648032 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2014-12-27 01:33:28 +00:00
parent 70d7ec0e69
commit b91e480006
31 changed files with 256 additions and 306 deletions

View File

@ -16,3 +16,8 @@ This product contains the Piccolo XML Parser for Java
This product contains the chunks_parse_cmds.tbl file from the vsdump program.
Copyright (C) 2006-2007 Valek Filippov (frob@df.ru)
This product contains parts of the eID Applet project
(http://eid-applet.googlecode.com). Copyright (c) 2009-2014
FedICT (federal ICT department of Belgium), e-Contract.be BVBA (https://www.e-contract.be),
Bart Hanssens from FedICT

View File

@ -22,8 +22,8 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import org.apache.poi.hwpf.model.StyleDescription;
import org.apache.poi.hwpf.model.StyleSheet;
@ -37,10 +37,9 @@ public final class Word2Forrest
HWPFDocument _doc;
@SuppressWarnings("unused")
public Word2Forrest(HWPFDocument doc, OutputStream stream)
throws IOException, UnsupportedEncodingException
public Word2Forrest(HWPFDocument doc, OutputStream stream) throws IOException
{
OutputStreamWriter out = new OutputStreamWriter (stream, "UTF-8");
OutputStreamWriter out = new OutputStreamWriter (stream, Charset.forName("UTF-8"));
_out = out;
_doc = doc;

View File

@ -20,6 +20,7 @@ package org.apache.poi;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
@ -153,7 +154,6 @@ public class TestAllFiles {
private static final Set<String> EXPECTED_FAILURES = new HashSet<String>();
static {
// password protected files
EXPECTED_FAILURES.add("poifs/protect.xlsx");
EXPECTED_FAILURES.add("spreadsheet/password.xls");
EXPECTED_FAILURES.add("spreadsheet/51832.xls");
EXPECTED_FAILURES.add("document/PasswordProtected.doc");
@ -161,10 +161,14 @@ public class TestAllFiles {
EXPECTED_FAILURES.add("slideshow/Password_Protected-56-hello.ppt");
EXPECTED_FAILURES.add("slideshow/Password_Protected-np-hello.ppt");
EXPECTED_FAILURES.add("slideshow/cryptoapi-proc2356.ppt");
EXPECTED_FAILURES.add("document/bug53475-password-is-pass.docx");
EXPECTED_FAILURES.add("document/bug53475-password-is-solrcell.docx");
//EXPECTED_FAILURES.add("document/bug53475-password-is-pass.docx");
//EXPECTED_FAILURES.add("document/bug53475-password-is-solrcell.docx");
EXPECTED_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
EXPECTED_FAILURES.add("spreadsheet/35897-type4.xls");
//EXPECTED_FAILURES.add("poifs/protect.xlsx");
//EXPECTED_FAILURES.add("poifs/protected_sha512.xlsx");
//EXPECTED_FAILURES.add("poifs/extenxls_pwd123.xlsx");
//EXPECTED_FAILURES.add("poifs/protected_agile.docx");
// TODO: fails XMLExportTest, is this ok?
EXPECTED_FAILURES.add("spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx");
@ -178,15 +182,12 @@ public class TestAllFiles {
// TODO: good to ignore?
EXPECTED_FAILURES.add("spreadsheet/sample-beta.xlsx");
EXPECTED_FAILURES.add("spreadsheet/49931.xls");
EXPECTED_FAILURES.add("poifs/protected_sha512.xlsx");
EXPECTED_FAILURES.add("poifs/extenxls_pwd123.xlsx");
EXPECTED_FAILURES.add("openxml4j/ContentTypeHasParameters.ooxml");
// This is actually a spreadsheet!
EXPECTED_FAILURES.add("hpsf/TestRobert_Flaherty.doc");
// some files that are broken, Excel 5.0/95, Word 95, ...
EXPECTED_FAILURES.add("poifs/protected_agile.docx");
EXPECTED_FAILURES.add("spreadsheet/43493.xls");
EXPECTED_FAILURES.add("spreadsheet/46904.xls");
EXPECTED_FAILURES.add("document/56880.doc");
@ -231,6 +232,7 @@ public class TestAllFiles {
List<Object[]> files = new ArrayList<Object[]>();
for(String file : scanner.getIncludedFiles()) {
file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise
files.add(new Object[] { file, HANDLERS.get(getExtension(file)) });
}
@ -246,7 +248,7 @@ public class TestAllFiles {
@Test
public void testAllFiles() throws Exception {
assertNotNull("Unknown file extension for file: " + file + ": " + getExtension(file), handler);
InputStream stream = new FileInputStream(new File("test-data", file));
InputStream stream = new BufferedInputStream(new FileInputStream(new File("test-data", file)),100);
try {
handler.handleFile(stream);

View File

@ -18,7 +18,11 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public final class POIXMLDocumentHandler {
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
@ -28,4 +32,15 @@ public final class POIXMLDocumentHandler {
assertNotNull(doc.getProperties());
assertNotNull(doc.getRelations());
}
protected static boolean isEncrypted(InputStream stream) throws IOException {
if (POIFSFileSystem.hasPOIFSHeader(stream)) {
POIFSFileSystem poifs = new POIFSFileSystem(stream);
if (poifs.getRoot().hasEntry("EncryptedPackage")) {
return true;
}
throw new IOException("wrong file format or file extension for OO XML file");
}
return false;
}
}

View File

@ -28,6 +28,9 @@ import org.junit.Test;
public class XSLFFileHandler implements FileHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
// ignore password protected files
if (POIXMLDocumentHandler.isEncrypted(stream)) return;
XSLFSlideShow slide = new XSLFSlideShow(OPCPackage.open(stream));
assertNotNull(slide.getPresentation());
assertNotNull(slide.getSlideMasterReferences());

View File

@ -32,6 +32,9 @@ import org.xml.sax.SAXException;
public class XSSFFileHandler extends SpreadsheetHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
// ignore password protected files
if (POIXMLDocumentHandler.isEncrypted(stream)) return;
XSSFWorkbook wb = new XSSFWorkbook(stream);
// use the combined handler for HSSF/XSSF

View File

@ -25,6 +25,9 @@ import org.junit.Test;
public class XWPFFileHandler implements FileHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
// ignore password protected files
if (POIXMLDocumentHandler.isEncrypted(stream)) return;
XWPFDocument doc = new XWPFDocument(stream);
new POIXMLDocumentHandler().handlePOIXMLDocument(doc);

View File

@ -285,9 +285,30 @@ public final class RecordInputStream implements LittleEndianInput {
}
public void readFully(byte[] buf, int off, int len) {
checkRecordPosition(len);
_dataInput.readFully(buf, off, len);
_currentDataOffset+=len;
int origLen = len;
if (buf == null) {
throw new NullPointerException();
} else if (off < 0 || len < 0 || len > buf.length - off) {
throw new IndexOutOfBoundsException();
}
while (len > 0) {
int nextChunk = Math.min(available(),len);
if (nextChunk == 0) {
if (!hasNextRecord()) {
throw new RecordFormatException("Can't read the remaining "+len+" bytes of the requested "+origLen+" bytes. No further record exists.");
} else {
nextRecord();
nextChunk = Math.min(available(),len);
assert(nextChunk > 0);
}
}
checkRecordPosition(nextChunk);
_dataInput.readFully(buf, off, nextChunk);
_currentDataOffset+=nextChunk;
off += nextChunk;
len -= nextChunk;
}
}
public String readString() {
@ -362,6 +383,7 @@ public final class RecordInputStream implements LittleEndianInput {
nextRecord();
// note - the compressed flag may change on the fly
byte compressFlag = readByte();
assert(compressFlag == 0 || compressFlag == 1);
isCompressedEncoding = (compressFlag == 0);
}
}

View File

@ -19,8 +19,6 @@ package org.apache.poi.hssf.usermodel;
import java.awt.Dimension;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.poi.ddf.DefaultEscherRecordFactory;
import org.apache.poi.ddf.EscherBSERecord;
@ -40,11 +38,13 @@ import org.apache.poi.ss.usermodel.Picture;
import org.apache.poi.ss.util.ImageUtils;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.StringUtil;
/**
* Represents a escher picture. Eg. A GIF, JPEG etc...
*/
public class HSSFPicture extends HSSFSimpleShape implements Picture {
@SuppressWarnings("unused")
private static POILogger logger = POILogFactory.getLogger(HSSFPicture.class);
public static final int PICTURE_TYPE_EMF = HSSFWorkbook.PICTURE_TYPE_EMF; // Windows Enhanced Metafile
@ -226,16 +226,14 @@ public class HSSFPicture extends HSSFSimpleShape implements Picture {
EscherProperties.BLIP__BLIPFILENAME);
return (null == propFile)
? ""
: new String(propFile.getComplexData(), Charset.forName("UTF-16LE")).trim();
: StringUtil.getFromUnicodeLE(propFile.getComplexData()).trim();
}
public void setFileName(String data){
try {
EscherComplexProperty prop = new EscherComplexProperty(EscherProperties.BLIP__BLIPFILENAME, true, data.getBytes("UTF-16LE"));
// TODO: add trailing \u0000?
byte bytes[] = StringUtil.getToUnicodeLE(data);
EscherComplexProperty prop = new EscherComplexProperty(EscherProperties.BLIP__BLIPFILENAME, true, bytes);
setPropertyValue(prop);
} catch (UnsupportedEncodingException e) {
logger.log( POILogger.ERROR, "Unsupported encoding: UTF-16LE");
}
}
@Override

View File

@ -35,6 +35,7 @@ import javax.crypto.spec.RC2ParameterSpec;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.util.StringUtil;
/**
* Helper functions used for standard and agile encryption
@ -100,7 +101,7 @@ public class CryptoFunctions {
MessageDigest hashAlg = getMessageDigest(hashAlgorithm);
hashAlg.update(salt);
byte[] hash = hashAlg.digest(getUtf16LeString(password));
byte[] hash = hashAlg.digest(StringUtil.getToUnicodeLE(password));
byte[] iterator = new byte[LittleEndianConsts.INT_SIZE];
byte[] first = (iteratorFirst ? iterator : hash);
@ -266,10 +267,6 @@ public class CryptoFunctions {
return result;
}
public static byte[] getUtf16LeString(String str) {
return str.getBytes(Charset.forName("UTF-16LE"));
}
public static MessageDigest getMessageDigest(HashAlgorithm hashAlgorithm) {
try {
if (hashAlgorithm.needsBouncyCastle) {

View File

@ -18,7 +18,7 @@
package org.apache.poi.poifs.crypt;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.poifs.crypt.standard.EncryptionRecord;
@ -30,6 +30,7 @@ import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.util.LittleEndianInput;
import org.apache.poi.util.LittleEndianOutput;
import org.apache.poi.util.StringUtil;
public class DataSpaceMapUtils {
public static void addDefaultDataSpace(DirectoryEntry dir) throws IOException {
@ -302,32 +303,30 @@ public class DataSpaceMapUtils {
public static String readUnicodeLPP4(LittleEndianInput is) {
int length = is.readInt();
byte data[] = new byte[length];
is.readFully(data);
if (length%2 != 0) {
throw new EncryptedDocumentException(
"UNICODE-LP-P4 structure is a multiple of 4 bytes. "
+ "If Padding is present, it MUST be exactly 2 bytes long");
}
String result = StringUtil.readUnicodeLE(is, length/2);
if (length%4==2) {
// Padding (variable): A set of bytes that MUST be of the correct size such that the size of the
// UNICODE-LP-P4 structure is a multiple of 4 bytes. If Padding is present, it MUST be exactly
// 2 bytes long, and each byte MUST be 0x00.
is.readShort();
}
try {
return new String(data, 0, data.length, "UTF-16LE");
} catch (UnsupportedEncodingException e) {
throw new EncryptedDocumentException(e);
}
return result;
}
public static void writeUnicodeLPP4(LittleEndianOutput os, String str) {
try {
byte buf[] = str.getBytes("UTF-16LE");
public static void writeUnicodeLPP4(LittleEndianOutput os, String string) {
byte buf[] = StringUtil.getToUnicodeLE(string);
os.writeInt(buf.length);
os.write(buf);
if (buf.length%4==2) {
os.writeShort(0);
}
} catch (UnsupportedEncodingException e) {
throw new EncryptedDocumentException(e);
}
}
public static String readUtf8LPP4(LittleEndianInput is) {
@ -352,11 +351,8 @@ public class DataSpaceMapUtils {
is.readByte();
}
}
try {
return new String(data, 0, data.length, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new EncryptedDocumentException(e);
}
return new String(data, 0, data.length, Charset.forName("UTF-8"));
}
public static void writeUtf8LPP4(LittleEndianOutput os, String str) {
@ -364,8 +360,7 @@ public class DataSpaceMapUtils {
os.writeInt(str == null ? 0 : 4);
os.writeInt(0);
} else {
try {
byte buf[] = str.getBytes("UTF-8");
byte buf[] = str.getBytes(Charset.forName("UTF-8"));
os.writeInt(buf.length);
os.write(buf);
int scratchBytes = buf.length%4;
@ -374,9 +369,6 @@ public class DataSpaceMapUtils {
os.writeByte(0);
}
}
} catch (UnsupportedEncodingException e) {
throw new EncryptedDocumentException(e);
}
}
}

View File

@ -22,14 +22,17 @@ import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.security.MessageDigest;
import java.util.Arrays;
import javax.crypto.Cipher;
import javax.crypto.SecretKey;
import javax.crypto.spec.SecretKeySpec;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.poifs.crypt.*;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
public class BinaryRC4Decryptor extends Decryptor {
private long _length = -1L;
@ -99,7 +102,7 @@ public class BinaryRC4Decryptor extends Decryptor {
password = password.substring(0, 255);
HashAlgorithm hashAlgo = ver.getHashAlgorithm();
MessageDigest hashAlg = CryptoFunctions.getMessageDigest(hashAlgo);
byte hash[] = hashAlg.digest(CryptoFunctions.getUtf16LeString(password));
byte hash[] = hashAlg.digest(StringUtil.getToUnicodeLE(password));
byte salt[] = ver.getSalt();
hashAlg.reset();
for (int i = 0; i < 16; i++) {

View File

@ -21,7 +21,6 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.security.GeneralSecurityException;
import java.security.MessageDigest;
import java.util.Arrays;
@ -48,6 +47,7 @@ import org.apache.poi.util.BoundedInputStream;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianInputStream;
import org.apache.poi.util.StringUtil;
public class CryptoAPIDecryptor extends Decryptor {
@ -185,7 +185,7 @@ public class CryptoAPIDecryptor extends Decryptor {
HashAlgorithm hashAlgo = ver.getHashAlgorithm();
MessageDigest hashAlg = CryptoFunctions.getMessageDigest(hashAlgo);
hashAlg.update(ver.getSalt());
byte hash[] = hashAlg.digest(CryptoFunctions.getUtf16LeString(password));
byte hash[] = hashAlg.digest(StringUtil.getToUnicodeLE(password));
SecretKey skey = new SecretKeySpec(hash, ver.getCipherAlgorithm().jceId);
return skey;
}
@ -224,9 +224,7 @@ public class CryptoAPIDecryptor extends Decryptor {
entry.flags = leis.readUByte();
boolean isStream = StreamDescriptorEntry.flagStream.isSet(entry.flags);
entry.reserved2 = leis.readInt();
byte nameBuf[] = new byte[nameSize * 2];
leis.read(nameBuf);
entry.streamName = new String(nameBuf, Charset.forName("UTF-16LE"));
entry.streamName = StringUtil.readUnicodeLE(leis, nameSize);
leis.readShort();
assert(entry.streamName.length() == nameSize);
}

View File

@ -21,7 +21,6 @@ import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.security.GeneralSecurityException;
import java.security.MessageDigest;
import java.security.SecureRandom;
@ -49,6 +48,7 @@ import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.StringUtil;
public class CryptoAPIEncryptor extends Encryptor {
private final CryptoAPIEncryptionInfoBuilder builder;
@ -164,7 +164,7 @@ public class CryptoAPIEncryptor extends Encryptor {
bos.write(buf, 0, 1);
LittleEndian.putUInt(buf, 0, sde.reserved2);
bos.write(buf, 0, 4);
byte nameBytes[] = sde.streamName.getBytes(Charset.forName("UTF-16LE"));
byte nameBytes[] = StringUtil.getToUnicodeLE(sde.streamName);
bos.write(nameBytes, 0, nameBytes.length);
LittleEndian.putShort(buf, 0, (short)0); // null-termination
bos.write(buf, 0, 2);

View File

@ -16,7 +16,6 @@
==================================================================== */
package org.apache.poi.poifs.crypt.standard;
import static org.apache.poi.poifs.crypt.CryptoFunctions.getUtf16LeString;
import static org.apache.poi.poifs.crypt.EncryptionInfo.flagAES;
import static org.apache.poi.poifs.crypt.EncryptionInfo.flagCryptoAPI;
@ -32,6 +31,7 @@ import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.util.LittleEndianInput;
import org.apache.poi.util.LittleEndianOutput;
import org.apache.poi.util.StringUtil;
public class StandardEncryptionHeader extends EncryptionHeader implements EncryptionRecord {
@ -103,7 +103,7 @@ public class StandardEncryptionHeader extends EncryptionHeader implements Encryp
bos.writeInt(0); // reserved2
String cspName = getCspName();
if (cspName == null) cspName = getCipherProvider().cipherProviderName;
bos.write(getUtf16LeString(cspName));
bos.write(StringUtil.getToUnicodeLE(cspName));
bos.writeShort(0);
int headerSize = bos.getWriteIndex()-startIdx-LittleEndianConsts.INT_SIZE;
sizeOutput.writeInt(headerSize);

View File

@ -17,7 +17,7 @@
package org.apache.poi.util;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.text.FieldPosition;
import java.text.NumberFormat;
import java.util.Iterator;
@ -26,16 +26,17 @@ import org.apache.poi.hssf.record.RecordInputStream;
/**
* Title: String Utility Description: Collection of string handling utilities<p/>
*
* Note - none of the methods in this class deals with {@link org.apache.poi.hssf.record.ContinueRecord}s. For such
* functionality, consider using {@link RecordInputStream
} *
* Note - none of the methods in this class deals with {@link org.apache.poi.hssf.record.ContinueRecord}s.
* For such functionality, consider using {@link RecordInputStream}
*
*
*@author Andrew C. Oliver
*@author Sergei Kozello (sergeikozello at mail.ru)
*@author Toshiaki Kamoshida (kamoshida.toshiaki at future dot co dot jp)
*/
public class StringUtil {
private static final String ENCODING_ISO_8859_1 = "ISO-8859-1";
private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
private static final Charset UTF16LE = Charset.forName("UTF-16LE");
private StringUtil() {
// no instances of this class
@ -73,11 +74,7 @@ public class StringUtil {
throw new IllegalArgumentException("Illegal length " + len);
}
try {
return new String(string, offset, len * 2, "UTF-16LE");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
return new String(string, offset, len * 2, UTF16LE);
}
/**
@ -95,6 +92,16 @@ public class StringUtil {
return getFromUnicodeLE(string, 0, string.length / 2);
}
/**
* Convert String to 16-bit unicode characters in little endian format
*
* @param string the string
* @return the byte array of 16-bit unicode characters
*/
public static byte[] getToUnicodeLE(String string) {
return string.getBytes(UTF16LE);
}
/**
* Read 8 bit data (in ISO-8859-1 codepage) into a (unicode) Java
* String and return.
@ -109,20 +116,16 @@ public class StringUtil {
final byte[] string,
final int offset,
final int len) {
try {
int len_to_use = Math.min(len, string.length - offset);
return new String(string, offset, len_to_use, ENCODING_ISO_8859_1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
return new String(string, offset, len_to_use, ISO_8859_1);
}
public static String readCompressedUnicode(LittleEndianInput in, int nChars) {
char[] buf = new char[nChars];
for (int i = 0; i < buf.length; i++) {
buf[i] = (char) in.readUByte();
}
return new String(buf);
byte[] buf = new byte[nChars];
in.readFully(buf);
return new String(buf, ISO_8859_1);
}
/**
* InputStream <tt>in</tt> is expected to contain:
* <ol>
@ -225,21 +228,12 @@ public class StringUtil {
* when written
*/
public static void putCompressedUnicode(String input, byte[] output, int offset) {
byte[] bytes;
try {
bytes = input.getBytes(ENCODING_ISO_8859_1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
byte[] bytes = input.getBytes(ISO_8859_1);
System.arraycopy(bytes, 0, output, offset, bytes.length);
}
public static void putCompressedUnicode(String input, LittleEndianOutput out) {
byte[] bytes;
try {
bytes = input.getBytes(ENCODING_ISO_8859_1);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
byte[] bytes = input.getBytes(ISO_8859_1);
out.write(bytes);
}
@ -253,30 +247,18 @@ public class StringUtil {
* @param offset the offset to start writing into the byte array
*/
public static void putUnicodeLE(String input, byte[] output, int offset) {
byte[] bytes;
try {
bytes = input.getBytes("UTF-16LE");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
byte[] bytes = input.getBytes(UTF16LE);
System.arraycopy(bytes, 0, output, offset, bytes.length);
}
public static void putUnicodeLE(String input, LittleEndianOutput out) {
byte[] bytes;
try {
bytes = input.getBytes("UTF-16LE");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
byte[] bytes = input.getBytes(UTF16LE);
out.write(bytes);
}
public static String readUnicodeLE(LittleEndianInput in, int nChars) {
char[] buf = new char[nChars];
for (int i = 0; i < buf.length; i++) {
buf[i] = (char) in.readUShort();
}
return new String(buf);
byte[] bytes = new byte[nChars*2];
in.readFully(bytes);
return new String(bytes, UTF16LE);
}
/**
@ -358,7 +340,7 @@ public class StringUtil {
* @return the encoding we want to use, currently hardcoded to ISO-8859-1
*/
public static String getPreferredEncoding() {
return ENCODING_ISO_8859_1;
return ISO_8859_1.name();
}
/**
@ -386,12 +368,7 @@ public class StringUtil {
* @return true if string needs Unicode to be represented.
*/
public static boolean isUnicodeString(final String value) {
try {
return !value.equals(new String(value.getBytes(ENCODING_ISO_8859_1),
ENCODING_ISO_8859_1));
} catch (UnsupportedEncodingException e) {
return true;
}
return !value.equals(new String(value.getBytes(ISO_8859_1), ISO_8859_1));
}
/**

View File

@ -20,7 +20,7 @@ package org.apache.poi.openxml4j.opc;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.regex.Pattern;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
@ -748,13 +748,7 @@ public final class PackagingURIHelper {
int n = s.length();
if (n == 0) return s;
ByteBuffer bb;
try {
bb = ByteBuffer.wrap(s.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e){
// should not happen
throw new RuntimeException(e);
}
ByteBuffer bb = ByteBuffer.wrap(s.getBytes(Charset.forName("UTF-8")));
StringBuilder sb = new StringBuilder();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;

View File

@ -18,7 +18,7 @@
package org.apache.poi.xssf.usermodel;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Date;
import junit.framework.TestCase;
@ -66,9 +66,10 @@ public final class TestUnfixedBugs extends TestCase {
verifyBug54084Unicode(wbStreamingWritten);
}
private void verifyBug54084Unicode(Workbook wb) throws UnsupportedEncodingException {
private void verifyBug54084Unicode(Workbook wb) {
// expected data is stored in UTF-8 in a text-file
String testData = new String(HSSFTestDataSamples.getTestDataFileContent("54084 - Greek - beyond BMP.txt"), "UTF-8").trim();
byte data[] = HSSFTestDataSamples.getTestDataFileContent("54084 - Greek - beyond BMP.txt");
String testData = new String(data, Charset.forName("UTF-8")).trim();
Sheet sheet = wb.getSheetAt(0);
Row row = sheet.getRow(0);

View File

@ -20,7 +20,7 @@ package org.apache.poi.hmef;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LZWDecompresser;
@ -73,6 +73,7 @@ public final class CompressedRTF extends LZWDecompresser {
compressedSize = LittleEndian.readInt(src);
decompressedSize = LittleEndian.readInt(src);
int compressionType = LittleEndian.readInt(src);
@SuppressWarnings("unused")
int dataCRC = LittleEndian.readInt(src);
// TODO - Handle CRC checking on the output side
@ -117,15 +118,11 @@ public final class CompressedRTF extends LZWDecompresser {
@Override
protected int populateDictionary(byte[] dict) {
try {
// Copy in the RTF constants
byte[] preload = LZW_RTF_PRELOAD.getBytes("US-ASCII");
byte[] preload = LZW_RTF_PRELOAD.getBytes(Charset.forName("US-ASCII"));
System.arraycopy(preload, 0, dict, 0, preload.length);
// Start adding new codes after the constants
return preload.length;
} catch(UnsupportedEncodingException e) {
throw new RuntimeException("Your JVM is broken as it doesn't support US ASCII");
}
}
}

View File

@ -17,7 +17,7 @@
package org.apache.poi.hmef.attribute;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.poi.hmef.Attachment;
import org.apache.poi.hmef.HMEFMessage;
@ -41,11 +41,7 @@ public final class MAPIStringAttribute extends MAPIAttribute {
String tmpData = null;
if(type == Types.ASCII_STRING.getId()) {
try {
tmpData = new String(data, CODEPAGE);
} catch(UnsupportedEncodingException e) {
throw new RuntimeException("JVM Broken - core encoding " + CODEPAGE + " missing");
}
tmpData = new String(data, Charset.forName(CODEPAGE));
} else if(type == Types.UNICODE_STRING.getId()) {
tmpData = StringUtil.getFromUnicodeLE(data);
} else {

View File

@ -17,15 +17,26 @@
package org.apache.poi.hslf.model;
import org.apache.poi.ddf.*;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.util.LittleEndian;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import org.apache.poi.ddf.EscherClientDataRecord;
import org.apache.poi.ddf.EscherComplexProperty;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.ddf.EscherProperties;
import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.ddf.EscherSpRecord;
import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.hslf.record.Document;
import org.apache.poi.hslf.record.ExControl;
import org.apache.poi.hslf.record.ExObjList;
import org.apache.poi.hslf.record.OEShapeAtom;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.RecordTypes;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
/**
* Represents an ActiveX control in a PowerPoint document.
*
@ -155,14 +166,10 @@ public final class ActiveXShape extends Picture {
ExControl ctrl = getExControl();
ctrl.getExControlAtom().setSlideId(sheet._getSheetNumber());
try {
String name = ctrl.getProgId() + "-" + getControlIndex();
byte[] data = (name + '\u0000').getBytes("UTF-16LE");
String name = ctrl.getProgId() + "-" + getControlIndex() + '\u0000';
byte[] data = StringUtil.getToUnicodeLE(name);
EscherComplexProperty prop = new EscherComplexProperty(EscherProperties.GROUPSHAPE__SHAPENAME, false, data);
EscherOptRecord opt = getEscherOptRecord();
opt.addEscherProperty(prop);
} catch (UnsupportedEncodingException e){
throw new HSLFException(e);
}
}
}

View File

@ -17,23 +17,30 @@
package org.apache.poi.hslf.model;
import org.apache.poi.ddf.*;
import org.apache.poi.hslf.usermodel.PictureData;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.hslf.record.Document;
import org.apache.poi.hslf.blip.Bitmap;
import org.apache.poi.hslf.exceptions.HSLFException;
import org.apache.poi.util.POILogger;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.awt.*;
import java.awt.Graphics2D;
import java.awt.geom.AffineTransform;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.List;
import javax.imageio.ImageIO;
import org.apache.poi.ddf.EscherBSERecord;
import org.apache.poi.ddf.EscherComplexProperty;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.ddf.EscherProperties;
import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.ddf.EscherSimpleProperty;
import org.apache.poi.ddf.EscherSpRecord;
import org.apache.poi.hslf.blip.Bitmap;
import org.apache.poi.hslf.record.Document;
import org.apache.poi.hslf.usermodel.PictureData;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.StringUtil;
/**
* Represents a picture in a PowerPoint document.
@ -199,7 +206,7 @@ public class Picture extends SimpleShape {
logger.log(POILogger.DEBUG, "EscherContainerRecord.BSTORE_CONTAINER was not found ");
return null;
}
List lst = bstore.getChildRecords();
List<EscherRecord> lst = bstore.getChildRecords();
int idx = getPictureIndex();
if (idx == 0){
logger.log(POILogger.DEBUG, "picture index was not found, returning ");
@ -216,17 +223,9 @@ public class Picture extends SimpleShape {
public String getPictureName(){
EscherOptRecord opt = getEscherOptRecord();
EscherComplexProperty prop = (EscherComplexProperty)getEscherProperty(opt, EscherProperties.BLIP__BLIPFILENAME);
String name = null;
if(prop != null){
try {
name = new String(prop.getComplexData(), "UTF-16LE");
int idx = name.indexOf('\u0000');
return idx == -1 ? name : name.substring(0, idx);
} catch (UnsupportedEncodingException e){
throw new HSLFException(e);
}
}
return name;
if (prop == null) return null;
String name = StringUtil.getFromUnicodeLE(prop.getComplexData());
return name.trim();
}
/**
@ -236,13 +235,9 @@ public class Picture extends SimpleShape {
*/
public void setPictureName(String name){
EscherOptRecord opt = getEscherOptRecord();
try {
byte[] data = (name + '\u0000').getBytes("UTF-16LE");
byte[] data = StringUtil.getToUnicodeLE(name + '\u0000');
EscherComplexProperty prop = new EscherComplexProperty(EscherProperties.BLIP__BLIPFILENAME, false, data);
opt.addEscherProperty(prop);
} catch (UnsupportedEncodingException e){
throw new HSLFException(e);
}
}
/**

View File

@ -17,10 +17,11 @@
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
/**
* This atom corresponds exactly to a Windows Logical Font (LOGFONT) structure.
@ -77,21 +78,14 @@ public final class FontEntityAtom extends RecordAtom {
* @return font name
*/
public String getFontName(){
String name = null;
try {
int i = 0;
while(i < 64){
int maxLen = Math.min(_recdata.length,64);
for(int i = 0; i < maxLen; i+=2){
//loop until find null-terminated end of the font name
if(_recdata[i] == 0 && _recdata[i + 1] == 0) {
name = new String(_recdata, 0, i, "UTF-16LE");
break;
return StringUtil.getFromUnicodeLE(_recdata, 0, i/2);
}
i += 2;
}
} catch (UnsupportedEncodingException e){
throw new RuntimeException(e.getMessage(), e);
}
return name;
return null;
}
/**
@ -103,8 +97,8 @@ public final class FontEntityAtom extends RecordAtom {
*/
public void setFontName(String name){
// Add a null termination if required
if(! name.endsWith("\000")) {
name = name + "\000";
if(! name.endsWith("\u0000")) {
name += '\u0000';
}
// Ensure it's not now too long
@ -113,12 +107,8 @@ public final class FontEntityAtom extends RecordAtom {
}
// Everything's happy, so save the name
try {
byte[] bytes = name.getBytes("UTF-16LE");
byte[] bytes = StringUtil.getToUnicodeLE(name);
System.arraycopy(bytes, 0, _recdata, 0, bytes.length);
} catch (UnsupportedEncodingException e){
throw new RuntimeException(e.getMessage(), e);
}
}
public void setFontIndex(int idx){

View File

@ -19,7 +19,7 @@ package org.apache.poi.hsmf.datatypes;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.Calendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -61,12 +61,8 @@ public class MessageSubmissionChunk extends Chunk {
public void readValue(InputStream value) throws IOException {
// Stored in the file as us-ascii
try {
byte[] data = IOUtils.toByteArray(value);
rawId = new String(data, "ASCII");
} catch(UnsupportedEncodingException e) {
throw new RuntimeException("Core encoding not found, JVM broken?", e);
}
rawId = new String(data, Charset.forName("ASCII"));
// Now process the date
String[] parts = rawId.split(";");
@ -97,12 +93,8 @@ public class MessageSubmissionChunk extends Chunk {
}
public void writeValue(OutputStream out) throws IOException {
try {
byte[] data = rawId.getBytes("ASCII");
byte[] data = rawId.getBytes(Charset.forName("ASCII"));
out.write(data);
} catch(UnsupportedEncodingException e) {
throw new RuntimeException("Core encoding not found, JVM broken?", e);
}
}
/**

View File

@ -20,7 +20,7 @@ package org.apache.poi.hsmf.datatypes;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.poi.hsmf.datatypes.Types.MAPIType;
import org.apache.poi.util.IOUtils;
@ -98,14 +98,9 @@ public class StringChunk extends Chunk {
}
private void storeString() {
if (type == Types.ASCII_STRING) {
try {
rawValue = value.getBytes(encoding7Bit);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Encoding not found - " + encoding7Bit, e);
}
rawValue = value.getBytes(Charset.forName(encoding7Bit));
} else if (type == Types.UNICODE_STRING) {
rawValue = new byte[value.length()*2];
StringUtil.putUnicodeLE(value, rawValue, 0);
rawValue = StringUtil.getToUnicodeLE(value);
} else {
throw new IllegalArgumentException("Invalid type " + type + " for String Chunk");
}
@ -149,10 +144,6 @@ public class StringChunk extends Chunk {
}
// Decode
try {
return new String(data, encoding);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("Encoding not found - " + encoding, e);
}
return new String(data, Charset.forName(encoding));
}
}

View File

@ -16,18 +16,16 @@
==================================================================== */
package org.apache.poi.hwpf.model;
import java.io.IOException;
import org.apache.poi.util.Internal;
import org.apache.poi.util.StringUtil;
@Internal
public class SinglentonTextPiece extends TextPiece
{
public SinglentonTextPiece( StringBuilder buffer ) throws IOException
public SinglentonTextPiece( StringBuilder buffer )
{
super( 0, buffer.length(), buffer.toString().getBytes( "UTF-16LE" ),
new PieceDescriptor( new byte[8], 0 ) );
super( 0, buffer.length(), StringUtil.getToUnicodeLE(buffer.toString()), new PieceDescriptor( new byte[8], 0 ) );
}
@Override

View File

@ -17,7 +17,6 @@
package org.apache.poi.hwpf.model;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
@ -26,6 +25,7 @@ import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.StringUtil;
/**
* Comment me
@ -104,14 +104,7 @@ public final class StyleDescription implements HDFType
nameLength = std[nameStart];
}
try
{
_name = new String(std, nameStart, nameLength * multiplier, "UTF-16LE");
}
catch (UnsupportedEncodingException ignore)
{
// ignore
}
_name = StringUtil.getFromUnicodeLE(std, nameStart, (nameLength*multiplier)/2);
//length then null terminator.
int grupxStart = ((nameLength + 1) * multiplier) + nameStart;

View File

@ -18,7 +18,7 @@
package org.apache.poi.hwpf.model;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import org.apache.poi.util.Internal;
@ -81,16 +81,8 @@ public class TextPiece extends PropertyNode<TextPiece>
* Create the StringBuilder from the text and unicode flag
*/
private static StringBuilder buildInitSB(byte[] text, PieceDescriptor pd) {
String str;
try {
if(pd.isUnicode()) {
str = new String(text, "UTF-16LE");
} else {
str = new String(text, "Cp1252");
}
} catch(UnsupportedEncodingException e) {
throw new RuntimeException("Your Java is broken! It doesn't know about basic, required character encodings!");
}
String str = new String(text, Charset.forName(pd.isUnicode() ? "UTF-16LE" : "Cp1252"));
return new StringBuilder(str);
}
@ -120,12 +112,9 @@ public class TextPiece extends PropertyNode<TextPiece>
public byte[] getRawBytes()
{
try {
return ((CharSequence)_buf).toString().getBytes(_usesUnicode ?
"UTF-16LE" : "Cp1252");
} catch (UnsupportedEncodingException ignore) {
throw new RuntimeException("Your Java is broken! It doesn't know about basic, required character encodings!");
}
return ((CharSequence)_buf).toString().getBytes(
Charset.forName(_usesUnicode ? "UTF-16LE" : "Cp1252")
);
}
/**

View File

@ -392,7 +392,6 @@ public class Range { // TODO -instantiable superclass
*/
@Deprecated
public CharacterRun insertBefore(String text, CharacterProperties props)
// throws UnsupportedEncodingException
{
initAll();
PAPX papx = _paragraphs.get(_parStart);
@ -421,7 +420,6 @@ public class Range { // TODO -instantiable superclass
*/
@Deprecated
public CharacterRun insertAfter(String text, CharacterProperties props)
// throws UnsupportedEncodingException
{
initAll();
PAPX papx = _paragraphs.get(_parEnd - 1);
@ -448,7 +446,6 @@ public class Range { // TODO -instantiable superclass
*/
@Deprecated
public Paragraph insertBefore(ParagraphProperties props, int styleIndex)
// throws UnsupportedEncodingException
{
return this.insertBefore(props, styleIndex, "\r");
}
@ -471,7 +468,6 @@ public class Range { // TODO -instantiable superclass
*/
@Deprecated
protected Paragraph insertBefore(ParagraphProperties props, int styleIndex, String text)
// throws UnsupportedEncodingException
{
initAll();
StyleSheet ss = _doc.getStyleSheet();
@ -501,7 +497,6 @@ public class Range { // TODO -instantiable superclass
*/
@Deprecated
public Paragraph insertAfter(ParagraphProperties props, int styleIndex)
// throws UnsupportedEncodingException
{
return this.insertAfter(props, styleIndex, "\r");
}
@ -524,7 +519,6 @@ public class Range { // TODO -instantiable superclass
*/
@Deprecated
protected Paragraph insertAfter(ParagraphProperties props, int styleIndex, String text)
// throws UnsupportedEncodingException
{
initAll();
StyleSheet ss = _doc.getStyleSheet();

View File

@ -23,6 +23,7 @@ import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherOptRecord;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.util.StringUtil;
/**
* Test cases for {@link OfficeDrawing} and {@link OfficeDrawingsImpl} classes.
@ -48,7 +49,7 @@ public class TestOfficeDrawings extends TestCase
EscherComplexProperty gtextUNICODE = (EscherComplexProperty) officeArtFOPT
.lookup( 0x00c0 );
String text = new String( gtextUNICODE.getComplexData(), "UTF-16LE" );
String text = StringUtil.getFromUnicodeLE(gtextUNICODE.getComplexData());
assertEquals( "DRAFT CONTRACT\0", text );
}
}

View File

@ -17,13 +17,13 @@
package org.apache.poi.util;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.text.NumberFormat;
import org.apache.poi.util.StringUtil.StringsIterator;
import junit.framework.TestCase;
import org.apache.poi.util.StringUtil.StringsIterator;
/**
* Unit test for StringUtil
*
@ -61,12 +61,7 @@ public final class TestStringUtil extends TestCase {
(byte) 'o', (byte) ' ', (byte) 'W', (byte) 'o',
(byte) 'r', (byte) 'l', (byte) 'd', (byte) 0xAE
};
String input;
try {
input = new String( expected_output, StringUtil.getPreferredEncoding() );
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
String input = new String( expected_output, Charset.forName(StringUtil.getPreferredEncoding()) );
StringUtil.putCompressedUnicode( input, output, 0 );
for ( int j = 0; j < expected_output.length; j++ )