mirror of https://github.com/apache/poi.git
More NPOIFS BAT vs XBAT confusion fixes. Also fixes recent POIFS regression on big files, and adds a POIFS unit test for XBAT containing files (previously there wasn't one)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053511 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ada898d0cf
commit
bf27275eb2
|
@ -45,6 +45,7 @@ import org.apache.poi.poifs.nio.FileBackedDataSource;
|
|||
import org.apache.poi.poifs.property.DirectoryProperty;
|
||||
import org.apache.poi.poifs.property.NPropertyTable;
|
||||
import org.apache.poi.poifs.storage.BATBlock;
|
||||
import org.apache.poi.poifs.storage.BlockAllocationTableReader;
|
||||
import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
|
||||
import org.apache.poi.poifs.storage.HeaderBlock;
|
||||
import org.apache.poi.poifs.storage.HeaderBlockConstants;
|
||||
|
@ -179,6 +180,9 @@ public class NPOIFSFileSystem extends BlockStore
|
|||
|
||||
// Have the header processed
|
||||
_header = new HeaderBlock(headerBuffer);
|
||||
|
||||
// Sanity check the block count
|
||||
BlockAllocationTableReader.sanityCheckBlockCount(_header.getBATCount());
|
||||
|
||||
// We need to buffer the whole file into memory when
|
||||
// working with an InputStream.
|
||||
|
@ -455,8 +459,8 @@ public class NPOIFSFileSystem extends BlockStore
|
|||
System.arraycopy(_header.getBATArray(), 0, newBATs, 0, newBATs.length-1);
|
||||
newBATs[newBATs.length-1] = offset;
|
||||
_header.setBATArray(newBATs);
|
||||
_header.setBATCount(newBATs.length);
|
||||
}
|
||||
_header.setBATCount(_bat_blocks.size());
|
||||
|
||||
// The current offset stores us, but the next one is free
|
||||
return offset+1;
|
||||
|
|
|
@ -233,39 +233,30 @@ public final class BATBlock extends BigBlock {
|
|||
|
||||
/**
|
||||
* Calculates the maximum size of a file which is addressable given the
|
||||
* number of FAT (BAT and XBAT) sectors specified.
|
||||
* number of FAT (BAT) sectors specified. (We don't care if those BAT
|
||||
* blocks come from the 109 in the header, or from header + XBATS, it
|
||||
* won't affect the calculation)
|
||||
*
|
||||
* For files with 109 or fewer BATs:
|
||||
* The actual file size will be between [size of fatCount-1 blocks] and
|
||||
* The actual file size will be between [size of fatCount-1 blocks] and
|
||||
* [size of fatCount blocks].
|
||||
* For 512 byte block sizes, this means we may over-estimate by up to 65kb.
|
||||
* For 4096 byte block sizes, this means we may over-estimate by up to 4mb
|
||||
*
|
||||
* For files with more than 109 BATs (i.e. has XBATs):
|
||||
* Each XBAT can hold 127/1023 BATs, which in turn address 128/1024 blocks.
|
||||
* For 512 byte block sizes, this means we may over-estimate by up to 8mb
|
||||
* For 4096 byte block sizes, this means we may over-estimate by up to 4gb,
|
||||
* but only for files of more than 436mb in size
|
||||
*/
|
||||
public static int calculateMaximumSize(final POIFSBigBlockSize bigBlockSize,
|
||||
final int numBAT, final int numXBAT) {
|
||||
final int numBATs) {
|
||||
int size = 1; // Header isn't FAT addressed
|
||||
|
||||
// The header contains up to 109 BATs, each of which can
|
||||
// address 128/1024 blocks
|
||||
size += (numBAT * bigBlockSize.getBATEntriesPerBlock());
|
||||
|
||||
// Each XBAT holds up to 127/1024 BATs, each of which can
|
||||
// address 128/1024 blocks
|
||||
size += (numXBAT * bigBlockSize.getXBATEntriesPerBlock() *
|
||||
bigBlockSize.getBATEntriesPerBlock());
|
||||
// The header has up to 109 BATs, and extra ones are referenced
|
||||
// from XBATs
|
||||
// However, all BATs can contain 128/1024 blocks
|
||||
size += (numBATs * bigBlockSize.getBATEntriesPerBlock());
|
||||
|
||||
// So far we've been in sector counts, turn into bytes
|
||||
return size * bigBlockSize.getBigBlockSize();
|
||||
}
|
||||
public static int calculateMaximumSize(final HeaderBlock header)
|
||||
{
|
||||
return calculateMaximumSize(header.getBigBlockSize(), header.getBATCount(), header.getXBATCount());
|
||||
return calculateMaximumSize(header.getBigBlockSize(), header.getBATCount());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -81,16 +81,7 @@ public final class BlockAllocationTableReader {
|
|||
int xbat_count, int xbat_index, BlockList raw_block_list) throws IOException {
|
||||
this(bigBlockSize);
|
||||
|
||||
if (block_count <= 0) {
|
||||
throw new IOException(
|
||||
"Illegal block count; minimum count is 1, got " + block_count
|
||||
+ " instead");
|
||||
}
|
||||
|
||||
if (block_count > MAX_BLOCK_COUNT) {
|
||||
throw new IOException("Block count " + block_count
|
||||
+ " is too high. POI maximum is " + MAX_BLOCK_COUNT + ".");
|
||||
}
|
||||
sanityCheckBlockCount(block_count);
|
||||
|
||||
// We want to get the whole of the FAT table
|
||||
// To do this:
|
||||
|
@ -186,6 +177,21 @@ public final class BlockAllocationTableReader {
|
|||
this.bigBlockSize = bigBlockSize;
|
||||
_entries = new IntList();
|
||||
}
|
||||
|
||||
public static void sanityCheckBlockCount(int block_count) throws IOException {
|
||||
if (block_count <= 0) {
|
||||
throw new IOException(
|
||||
"Illegal block count; minimum count is 1, got " +
|
||||
block_count + " instead"
|
||||
);
|
||||
}
|
||||
if (block_count > MAX_BLOCK_COUNT) {
|
||||
throw new IOException(
|
||||
"Block count " + block_count +
|
||||
" is too high. POI maximum is " + MAX_BLOCK_COUNT + "."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* walk the entries from a specified point and return the
|
||||
|
|
|
@ -50,8 +50,8 @@ public final class HeaderBlock implements HeaderBlockConstants {
|
|||
private final POIFSBigBlockSize bigBlockSize;
|
||||
|
||||
/**
|
||||
* number of big block allocation table blocks (int).
|
||||
* (Number of FAT Sectors in Microsoft parlance)
|
||||
* Number of big block allocation table blocks (int).
|
||||
* (Number of FAT Sectors in Microsoft parlance).
|
||||
*/
|
||||
private int _bat_count;
|
||||
|
||||
|
@ -159,13 +159,6 @@ public final class HeaderBlock implements HeaderBlockConstants {
|
|||
_sbat_count = new IntegerField(_sbat_block_count_offset, _data).get();
|
||||
_xbat_start = new IntegerField(_xbat_start_offset, _data).get();
|
||||
_xbat_count = new IntegerField(_xbat_count_offset, _data).get();
|
||||
|
||||
// Sanity check values
|
||||
if(_bat_count > _max_bats_in_header) {
|
||||
_logger.log(POILogger.WARN, "Too many BAT blocks listed in header, found "
|
||||
+ _bat_count + " but the maximum is " + _max_bats_in_header);
|
||||
_bat_count = _max_bats_in_header;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -306,7 +299,7 @@ public final class HeaderBlock implements HeaderBlockConstants {
|
|||
// Read them in
|
||||
int[] result = new int[ Math.min(_bat_count,_max_bats_in_header) ];
|
||||
int offset = _bat_array_offset;
|
||||
for (int j = 0; j < _bat_count; j++) {
|
||||
for (int j = 0; j < result.length; j++) {
|
||||
result[ j ] = LittleEndian.getInt(_data, offset);
|
||||
offset += LittleEndianConsts.INT_SIZE;
|
||||
}
|
||||
|
|
|
@ -343,7 +343,9 @@ public class IntList
|
|||
{
|
||||
if (index >= _limit)
|
||||
{
|
||||
throw new IndexOutOfBoundsException();
|
||||
throw new IndexOutOfBoundsException(
|
||||
index + " not accessible in a list of length " + _limit
|
||||
);
|
||||
}
|
||||
return _array[ index ];
|
||||
}
|
||||
|
|
|
@ -17,11 +17,11 @@
|
|||
|
||||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
@ -29,6 +29,9 @@ import junit.framework.TestCase;
|
|||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||
import org.apache.poi.poifs.common.POIFSBigBlockSize;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.poifs.storage.BATBlock;
|
||||
import org.apache.poi.poifs.storage.BlockAllocationTableReader;
|
||||
import org.apache.poi.poifs.storage.HeaderBlock;
|
||||
import org.apache.poi.poifs.storage.RawDataBlockList;
|
||||
|
||||
|
@ -38,6 +41,8 @@ import org.apache.poi.poifs.storage.RawDataBlockList;
|
|||
* @author Josh Micich
|
||||
*/
|
||||
public final class TestPOIFSFileSystem extends TestCase {
|
||||
private POIDataSamples _samples = POIDataSamples.getPOIFSInstance();
|
||||
|
||||
|
||||
/**
|
||||
* Mock exception used to ensure correct error handling
|
||||
|
@ -98,7 +103,6 @@ public final class TestPOIFSFileSystem extends TestCase {
|
|||
* POIFSFileSystem was not closing the input stream.
|
||||
*/
|
||||
public void testAlwaysClose() {
|
||||
|
||||
TestIS testIS;
|
||||
|
||||
// Normal case - read until EOF and close
|
||||
|
@ -139,9 +143,7 @@ public final class TestPOIFSFileSystem extends TestCase {
|
|||
"ShortLastBlock.qwp", "ShortLastBlock.wps"
|
||||
};
|
||||
|
||||
POIDataSamples _samples = POIDataSamples.getPOIFSInstance();
|
||||
for(int i=0; i<files.length; i++) {
|
||||
|
||||
// Open the file up
|
||||
POIFSFileSystem fs = new POIFSFileSystem(
|
||||
_samples.openResourceAsStream(files[i])
|
||||
|
@ -161,8 +163,6 @@ public final class TestPOIFSFileSystem extends TestCase {
|
|||
* sectors that exist in the file.
|
||||
*/
|
||||
public void testFATandDIFATsectors() throws Exception {
|
||||
POIDataSamples _samples = POIDataSamples.getPOIFSInstance();
|
||||
|
||||
// Open the file up
|
||||
try {
|
||||
POIFSFileSystem fs = new POIFSFileSystem(
|
||||
|
@ -175,6 +175,68 @@ public final class TestPOIFSFileSystem extends TestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that we can write and read a file that contains XBATs
|
||||
* as well as regular BATs.
|
||||
* However, because a file needs to be at least 6.875mb big
|
||||
* to have an XBAT in it, we don't have a test one. So, generate it.
|
||||
*/
|
||||
public void testBATandXBAT() throws Exception {
|
||||
byte[] hugeStream = new byte[8*1024*1024];
|
||||
POIFSFileSystem fs = new POIFSFileSystem();
|
||||
fs.getRoot().createDocument(
|
||||
"BIG", new ByteArrayInputStream(hugeStream)
|
||||
);
|
||||
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
fs.writeFilesystem(baos);
|
||||
byte[] fsData = baos.toByteArray();
|
||||
|
||||
|
||||
// Check the header was written properly
|
||||
InputStream inp = new ByteArrayInputStream(fsData);
|
||||
HeaderBlock header = new HeaderBlock(inp);
|
||||
assertEquals(109+21, header.getBATCount());
|
||||
assertEquals(1, header.getXBATCount());
|
||||
|
||||
|
||||
// We should have 21 BATs in the XBAT
|
||||
ByteBuffer xbatData = ByteBuffer.allocate(512);
|
||||
xbatData.put(fsData, (1+header.getXBATIndex())*512, 512);
|
||||
xbatData.position(0);
|
||||
BATBlock xbat = BATBlock.createBATBlock(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, xbatData);
|
||||
for(int i=0; i<21; i++) {
|
||||
assertTrue(xbat.getValueAt(i) != POIFSConstants.UNUSED_BLOCK);
|
||||
}
|
||||
for(int i=21; i<127; i++) {
|
||||
assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i));
|
||||
}
|
||||
assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127));
|
||||
|
||||
|
||||
// Load the blocks and check with that
|
||||
RawDataBlockList blockList = new RawDataBlockList(inp, POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS);
|
||||
assertEquals(fsData.length / 512, blockList.blockCount() + 1); // Header not counted
|
||||
new BlockAllocationTableReader(header.getBigBlockSize(),
|
||||
header.getBATCount(),
|
||||
header.getBATArray(),
|
||||
header.getXBATCount(),
|
||||
header.getXBATIndex(),
|
||||
blockList);
|
||||
assertEquals(fsData.length / 512, blockList.blockCount() + 1); // Header not counted
|
||||
|
||||
// Now load it and check
|
||||
fs = null;
|
||||
fs = new POIFSFileSystem(
|
||||
new ByteArrayInputStream(fsData)
|
||||
);
|
||||
|
||||
DirectoryNode root = fs.getRoot();
|
||||
assertEquals(1, root.getEntryCount());
|
||||
DocumentNode big = (DocumentNode)root.getEntry("BIG");
|
||||
assertEquals(hugeStream.length, big.getSize());
|
||||
}
|
||||
|
||||
/**
|
||||
* Most OLE2 files use 512byte blocks. However, a small number
|
||||
* use 4k blocks. Check that we can open these.
|
||||
|
|
|
@ -216,59 +216,60 @@ public final class TestBATBlock extends TestCase {
|
|||
// Zero fat blocks isn't technically valid, but it'd be header only
|
||||
assertEquals(
|
||||
512,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 0, 0)
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 0)
|
||||
);
|
||||
assertEquals(
|
||||
4096,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 0, 0)
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 0)
|
||||
);
|
||||
|
||||
// A single FAT block can address 128/1024 blocks
|
||||
assertEquals(
|
||||
512 + 512*128,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 1, 0)
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 1)
|
||||
);
|
||||
assertEquals(
|
||||
4096 + 4096*1024,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 1, 0)
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 1)
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
512 + 4*512*128,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 4, 0)
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 4)
|
||||
);
|
||||
assertEquals(
|
||||
4096 + 4*4096*1024,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 4, 0)
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 4)
|
||||
);
|
||||
|
||||
// One XBAT block holds 127/1023 individual BAT blocks, so they can address
|
||||
// a fairly hefty amount of space themselves
|
||||
// However, the BATs continue as before
|
||||
assertEquals(
|
||||
512 + 109*512*128,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 0)
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109)
|
||||
);
|
||||
assertEquals(
|
||||
4096 + 109*4096*1024,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 0)
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109)
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
512 + 109*512*128 + 512*127*128,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 1)
|
||||
512 + 110*512*128,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 110)
|
||||
);
|
||||
assertEquals(
|
||||
4096 + 109*4096*1024 + 4096*1023*1024,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 1)
|
||||
4096 + 110*4096*1024,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 110)
|
||||
);
|
||||
|
||||
assertEquals(
|
||||
512 + 109*512*128 + 3*512*127*128,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 3)
|
||||
512 + 112*512*128,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 112)
|
||||
);
|
||||
assertEquals(
|
||||
4096 + 109*4096*1024 + 3*4096*1023*1024,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 3)
|
||||
4096 + 112*4096*1024,
|
||||
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 112)
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -419,11 +419,7 @@ public final class TestBlockAllocationTableReader extends TestCase {
|
|||
hb.getXBATIndex(), dataBlocks);
|
||||
} catch (IOException e) {
|
||||
// expected during successful test
|
||||
assertEquals(
|
||||
"Your file contains 0 sectors, but the initial DIFAT array at index 0 referenced block # 538976288. This isn't allowed and your file is corrupt",
|
||||
e.getMessage()
|
||||
);
|
||||
// assertEquals("Block count 538976257 is too high. POI maximum is 65535.", e.getMessage());
|
||||
assertEquals("Block count 538976257 is too high. POI maximum is 65535.", e.getMessage());
|
||||
} catch (OutOfMemoryError e) {
|
||||
if (e.getStackTrace()[1].getMethodName().equals("testBadSectorAllocationTableSize")) {
|
||||
throw new AssertionFailedError("Identified bug 48085");
|
||||
|
|
Loading…
Reference in New Issue