diff --git a/src/java/org/apache/poi/poifs/common/POIFSConstants.java b/src/java/org/apache/poi/poifs/common/POIFSConstants.java index 399f52be4b..ff2050274d 100644 --- a/src/java/org/apache/poi/poifs/common/POIFSConstants.java +++ b/src/java/org/apache/poi/poifs/common/POIFSConstants.java @@ -27,7 +27,11 @@ package org.apache.poi.poifs.common; public interface POIFSConstants { + /** Most files use 512 bytes as their big block size */ public static final int BIG_BLOCK_SIZE = 0x0200; + /** Some use 4096 bytes */ + public static final int LARGER_BIG_BLOCK_SIZE = 0x1000; + public static final int END_OF_CHAIN = -2; public static final int PROPERTY_SIZE = 0x0080; public static final int UNUSED_BLOCK = -1; diff --git a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java index fe94b4aaf9..73911e6b0e 100644 --- a/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java +++ b/src/java/org/apache/poi/poifs/eventfilesystem/POIFSReader.java @@ -78,7 +78,7 @@ public class POIFSReader HeaderBlockReader header_block_reader = new HeaderBlockReader(stream); // read the rest of the stream into blocks - RawDataBlockList data_blocks = new RawDataBlockList(stream); + RawDataBlockList data_blocks = new RawDataBlockList(stream, header_block_reader.getBigBlockSize()); // set up the block allocation table (necessary for the // data_blocks to be manageable diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java index ef9acfe60b..61774dc676 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java @@ -33,6 +33,7 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.dev.POIFSViewable; import org.apache.poi.poifs.property.DirectoryProperty; import org.apache.poi.poifs.property.Property; @@ -63,7 +64,6 @@ public class POIFSFileSystem { private static final Log _logger = LogFactory.getLog(POIFSFileSystem.class); - private static final class CloseIgnoringInputStream extends InputStream { private final InputStream _is; @@ -91,11 +91,16 @@ public class POIFSFileSystem private PropertyTable _property_table; private List _documents; private DirectoryNode _root; + + /** + * What big block size the file uses. Most files + * use 512 bytes, but a few use 4096 + */ + private int bigBlockSize = POIFSConstants.BIG_BLOCK_SIZE; /** * Constructor, intended for writing */ - public POIFSFileSystem() { _property_table = new PropertyTable(); @@ -138,13 +143,15 @@ public class POIFSFileSystem this(); boolean success = false; - // read the header block from the stream HeaderBlockReader header_block_reader; - // read the rest of the stream into blocks RawDataBlockList data_blocks; try { + // read the header block from the stream header_block_reader = new HeaderBlockReader(stream); - data_blocks = new RawDataBlockList(stream); + bigBlockSize = header_block_reader.getBigBlockSize(); + + // read the rest of the stream into blocks + data_blocks = new RawDataBlockList(stream, bigBlockSize); success = true; } finally { closeInputStream(stream, success); @@ -307,7 +314,7 @@ public class POIFSFileSystem // create a list of BATManaged objects: the documents plus the // property table and the small block table - List bm_objects = new ArrayList(); + List bm_objects = new ArrayList(); bm_objects.addAll(_documents); bm_objects.add(_property_table); @@ -602,6 +609,13 @@ public class POIFSFileSystem return "POIFS FileSystem"; } + /** + * @return The Big Block size, normally 512 bytes, sometimes 4096 bytes + */ + public int getBigBlockSize() { + return bigBlockSize; + } + /* ********** END begin implementation of POIFSViewable ********** */ } // end public class POIFSFileSystem diff --git a/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java b/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java index 0d5bb817b4..b001b81058 100644 --- a/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java +++ b/src/java/org/apache/poi/poifs/storage/HeaderBlockReader.java @@ -21,8 +21,6 @@ package org.apache.poi.poifs.storage; import java.io.*; -import java.util.*; - import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.util.IOUtils; @@ -30,7 +28,6 @@ import org.apache.poi.util.IntegerField; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianConsts; import org.apache.poi.util.LongField; -import org.apache.poi.util.ShortField; /** * The block containing the archive header @@ -41,6 +38,11 @@ import org.apache.poi.util.ShortField; public class HeaderBlockReader implements HeaderBlockConstants { + /** + * What big block size the file uses. Most files + * use 512 bytes, but a few use 4096 + */ + private int bigBlockSize = POIFSConstants.BIG_BLOCK_SIZE; // number of big block allocation table blocks (int) private IntegerField _bat_count; @@ -69,20 +71,27 @@ public class HeaderBlockReader public HeaderBlockReader(final InputStream stream) throws IOException { - _data = new byte[ POIFSConstants.BIG_BLOCK_SIZE ]; - int byte_count = IOUtils.readFully(stream, _data); - - if (byte_count != POIFSConstants.BIG_BLOCK_SIZE) - { - if (byte_count == -1) - //Cant have -1 bytes read in the error message! - byte_count = 0; - String type = " byte" + ((byte_count == 1) ? ("") - : ("s")); - - throw new IOException("Unable to read entire header; " - + byte_count + type + " read; expected " - + POIFSConstants.BIG_BLOCK_SIZE + " bytes"); + // At this point, we don't know how big our + // block sizes are + // So, read the first 32 bytes to check, then + // read the rest of the block + byte[] blockStart = new byte[32]; + int bsCount = IOUtils.readFully(stream, blockStart); + if(bsCount != 32) { + alertShortRead(bsCount); + } + + // Figure out our block size + if(blockStart[30] == 12) { + bigBlockSize = POIFSConstants.LARGER_BIG_BLOCK_SIZE; + } + _data = new byte[ bigBlockSize ]; + System.arraycopy(blockStart, 0, _data, 0, blockStart.length); + + // Now we can read the rest of our header + int byte_count = IOUtils.readFully(stream, _data, blockStart.length, _data.length - blockStart.length); + if (byte_count+bsCount != bigBlockSize) { + alertShortRead(byte_count); } // verify signature @@ -110,13 +119,24 @@ public class HeaderBlockReader _xbat_start = new IntegerField(_xbat_start_offset, _data); _xbat_count = new IntegerField(_xbat_count_offset, _data); } + + private void alertShortRead(int read) throws IOException { + if (read == -1) + //Cant have -1 bytes read in the error message! + read = 0; + String type = " byte" + ((read == 1) ? ("") + : ("s")); + + throw new IOException("Unable to read entire header; " + + read + type + " read; expected " + + bigBlockSize + " bytes"); + } /** * get start of Property Table * * @return the index of the first block of the Property Table */ - public int getPropertyStart() { return _property_start.get(); @@ -174,5 +194,12 @@ public class HeaderBlockReader { return _xbat_start.get(); } + + /** + * @return The Big Block size, normally 512 bytes, sometimes 4096 bytes + */ + public int getBigBlockSize() { + return bigBlockSize; + } } // end public class HeaderBlockReader diff --git a/src/java/org/apache/poi/poifs/storage/RawDataBlockList.java b/src/java/org/apache/poi/poifs/storage/RawDataBlockList.java index eed318fb55..76ab219562 100644 --- a/src/java/org/apache/poi/poifs/storage/RawDataBlockList.java +++ b/src/java/org/apache/poi/poifs/storage/RawDataBlockList.java @@ -37,19 +37,20 @@ public class RawDataBlockList * Constructor RawDataBlockList * * @param stream the InputStream from which the data will be read + * @param bigBlockSize The big block size, either 512 bytes or 4096 bytes * * @exception IOException on I/O errors, and if an incomplete * block is read */ - public RawDataBlockList(final InputStream stream) + public RawDataBlockList(final InputStream stream, int bigBlockSize) throws IOException { List blocks = new ArrayList(); while (true) { - RawDataBlock block = new RawDataBlock(stream); + RawDataBlock block = new RawDataBlock(stream, bigBlockSize); if (block.eof()) { diff --git a/src/testcases/org/apache/poi/poifs/property/TestPropertyTable.java b/src/testcases/org/apache/poi/poifs/property/TestPropertyTable.java index 895c40f704..008504fb00 100644 --- a/src/testcases/org/apache/poi/poifs/property/TestPropertyTable.java +++ b/src/testcases/org/apache/poi/poifs/property/TestPropertyTable.java @@ -25,6 +25,7 @@ import java.util.*; import junit.framework.*; +import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.storage.BlockAllocationTableReader; import org.apache.poi.poifs.storage.RawDataBlockList; @@ -2598,7 +2599,7 @@ public class TestPropertyTable ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xFF }; RawDataBlockList data_blocks = - new RawDataBlockList(new ByteArrayInputStream(raw_data_array)); + new RawDataBlockList(new ByteArrayInputStream(raw_data_array), POIFSConstants.BIG_BLOCK_SIZE); int[] bat_array = { 15 diff --git a/src/testcases/org/apache/poi/poifs/storage/LocalRawDataBlockList.java b/src/testcases/org/apache/poi/poifs/storage/LocalRawDataBlockList.java index ed3e8d9259..21049ebf18 100644 --- a/src/testcases/org/apache/poi/poifs/storage/LocalRawDataBlockList.java +++ b/src/testcases/org/apache/poi/poifs/storage/LocalRawDataBlockList.java @@ -19,6 +19,8 @@ package org.apache.poi.poifs.storage; +import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndianConsts; @@ -47,7 +49,7 @@ public class LocalRawDataBlockList public LocalRawDataBlockList() throws IOException { - super(new ByteArrayInputStream(new byte[ 0 ])); + super(new ByteArrayInputStream(new byte[ 0 ]), POIFSConstants.BIG_BLOCK_SIZE); _list = new ArrayList(); _array = null; } diff --git a/src/testcases/org/apache/poi/poifs/storage/TestRawDataBlockList.java b/src/testcases/org/apache/poi/poifs/storage/TestRawDataBlockList.java index ac6fc08c05..d151029762 100644 --- a/src/testcases/org/apache/poi/poifs/storage/TestRawDataBlockList.java +++ b/src/testcases/org/apache/poi/poifs/storage/TestRawDataBlockList.java @@ -21,6 +21,7 @@ package org.apache.poi.poifs.storage; import java.io.*; +import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.DummyPOILogger; import org.apache.poi.util.POILogFactory; @@ -69,7 +70,7 @@ public class TestRawDataBlockList { data[ j ] = ( byte ) j; } - new RawDataBlockList(new ByteArrayInputStream(data)); + new RawDataBlockList(new ByteArrayInputStream(data), POIFSConstants.BIG_BLOCK_SIZE); } /** @@ -81,7 +82,7 @@ public class TestRawDataBlockList public void testEmptyConstructor() throws IOException { - new RawDataBlockList(new ByteArrayInputStream(new byte[ 0 ])); + new RawDataBlockList(new ByteArrayInputStream(new byte[ 0 ]), POIFSConstants.BIG_BLOCK_SIZE); } /** @@ -108,7 +109,7 @@ public class TestRawDataBlockList // Check we logged the error logger.reset(); - new RawDataBlockList(new ByteArrayInputStream(data)); + new RawDataBlockList(new ByteArrayInputStream(data), POIFSConstants.BIG_BLOCK_SIZE); assertEquals(1, logger.logged.size()); } } diff --git a/src/testcases/org/apache/poi/poifs/storage/TestSmallBlockTableReader.java b/src/testcases/org/apache/poi/poifs/storage/TestSmallBlockTableReader.java index bb2e3c4c0e..4d4254a91e 100644 --- a/src/testcases/org/apache/poi/poifs/storage/TestSmallBlockTableReader.java +++ b/src/testcases/org/apache/poi/poifs/storage/TestSmallBlockTableReader.java @@ -25,6 +25,7 @@ import java.util.*; import junit.framework.*; +import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.property.PropertyTable; import org.apache.poi.poifs.property.RootProperty; @@ -2112,7 +2113,7 @@ public class TestSmallBlockTableReader ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xFF, ( byte ) 0xFF }; RawDataBlockList data_blocks = - new RawDataBlockList(new ByteArrayInputStream(raw_data_array)); + new RawDataBlockList(new ByteArrayInputStream(raw_data_array), POIFSConstants.BIG_BLOCK_SIZE); int[] bat_array = { 15