More NPOIFS BAT vs XBAT confusion fixes. Also fixes recent POIFS regression on big files, and adds a POIFS unit test for XBAT containing files (previously there wasn't one)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053511 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-12-29 03:00:46 +00:00
parent ada898d0cf
commit bf27275eb2
8 changed files with 124 additions and 69 deletions

View File

@ -45,6 +45,7 @@ import org.apache.poi.poifs.nio.FileBackedDataSource;
import org.apache.poi.poifs.property.DirectoryProperty; import org.apache.poi.poifs.property.DirectoryProperty;
import org.apache.poi.poifs.property.NPropertyTable; import org.apache.poi.poifs.property.NPropertyTable;
import org.apache.poi.poifs.storage.BATBlock; import org.apache.poi.poifs.storage.BATBlock;
import org.apache.poi.poifs.storage.BlockAllocationTableReader;
import org.apache.poi.poifs.storage.BlockAllocationTableWriter; import org.apache.poi.poifs.storage.BlockAllocationTableWriter;
import org.apache.poi.poifs.storage.HeaderBlock; import org.apache.poi.poifs.storage.HeaderBlock;
import org.apache.poi.poifs.storage.HeaderBlockConstants; import org.apache.poi.poifs.storage.HeaderBlockConstants;
@ -179,6 +180,9 @@ public class NPOIFSFileSystem extends BlockStore
// Have the header processed // Have the header processed
_header = new HeaderBlock(headerBuffer); _header = new HeaderBlock(headerBuffer);
// Sanity check the block count
BlockAllocationTableReader.sanityCheckBlockCount(_header.getBATCount());
// We need to buffer the whole file into memory when // We need to buffer the whole file into memory when
// working with an InputStream. // working with an InputStream.
@ -455,8 +459,8 @@ public class NPOIFSFileSystem extends BlockStore
System.arraycopy(_header.getBATArray(), 0, newBATs, 0, newBATs.length-1); System.arraycopy(_header.getBATArray(), 0, newBATs, 0, newBATs.length-1);
newBATs[newBATs.length-1] = offset; newBATs[newBATs.length-1] = offset;
_header.setBATArray(newBATs); _header.setBATArray(newBATs);
_header.setBATCount(newBATs.length);
} }
_header.setBATCount(_bat_blocks.size());
// The current offset stores us, but the next one is free // The current offset stores us, but the next one is free
return offset+1; return offset+1;

View File

@ -233,39 +233,30 @@ public final class BATBlock extends BigBlock {
/** /**
* Calculates the maximum size of a file which is addressable given the * Calculates the maximum size of a file which is addressable given the
* number of FAT (BAT and XBAT) sectors specified. * number of FAT (BAT) sectors specified. (We don't care if those BAT
* blocks come from the 109 in the header, or from header + XBATS, it
* won't affect the calculation)
* *
* For files with 109 or fewer BATs: * The actual file size will be between [size of fatCount-1 blocks] and
* The actual file size will be between [size of fatCount-1 blocks] and
* [size of fatCount blocks]. * [size of fatCount blocks].
* For 512 byte block sizes, this means we may over-estimate by up to 65kb. * For 512 byte block sizes, this means we may over-estimate by up to 65kb.
* For 4096 byte block sizes, this means we may over-estimate by up to 4mb * For 4096 byte block sizes, this means we may over-estimate by up to 4mb
*
* For files with more than 109 BATs (i.e. has XBATs):
* Each XBAT can hold 127/1023 BATs, which in turn address 128/1024 blocks.
* For 512 byte block sizes, this means we may over-estimate by up to 8mb
* For 4096 byte block sizes, this means we may over-estimate by up to 4gb,
* but only for files of more than 436mb in size
*/ */
public static int calculateMaximumSize(final POIFSBigBlockSize bigBlockSize, public static int calculateMaximumSize(final POIFSBigBlockSize bigBlockSize,
final int numBAT, final int numXBAT) { final int numBATs) {
int size = 1; // Header isn't FAT addressed int size = 1; // Header isn't FAT addressed
// The header contains up to 109 BATs, each of which can // The header has up to 109 BATs, and extra ones are referenced
// address 128/1024 blocks // from XBATs
size += (numBAT * bigBlockSize.getBATEntriesPerBlock()); // However, all BATs can contain 128/1024 blocks
size += (numBATs * bigBlockSize.getBATEntriesPerBlock());
// Each XBAT holds up to 127/1024 BATs, each of which can
// address 128/1024 blocks
size += (numXBAT * bigBlockSize.getXBATEntriesPerBlock() *
bigBlockSize.getBATEntriesPerBlock());
// So far we've been in sector counts, turn into bytes // So far we've been in sector counts, turn into bytes
return size * bigBlockSize.getBigBlockSize(); return size * bigBlockSize.getBigBlockSize();
} }
public static int calculateMaximumSize(final HeaderBlock header) public static int calculateMaximumSize(final HeaderBlock header)
{ {
return calculateMaximumSize(header.getBigBlockSize(), header.getBATCount(), header.getXBATCount()); return calculateMaximumSize(header.getBigBlockSize(), header.getBATCount());
} }
/** /**

View File

@ -81,16 +81,7 @@ public final class BlockAllocationTableReader {
int xbat_count, int xbat_index, BlockList raw_block_list) throws IOException { int xbat_count, int xbat_index, BlockList raw_block_list) throws IOException {
this(bigBlockSize); this(bigBlockSize);
if (block_count <= 0) { sanityCheckBlockCount(block_count);
throw new IOException(
"Illegal block count; minimum count is 1, got " + block_count
+ " instead");
}
if (block_count > MAX_BLOCK_COUNT) {
throw new IOException("Block count " + block_count
+ " is too high. POI maximum is " + MAX_BLOCK_COUNT + ".");
}
// We want to get the whole of the FAT table // We want to get the whole of the FAT table
// To do this: // To do this:
@ -186,6 +177,21 @@ public final class BlockAllocationTableReader {
this.bigBlockSize = bigBlockSize; this.bigBlockSize = bigBlockSize;
_entries = new IntList(); _entries = new IntList();
} }
public static void sanityCheckBlockCount(int block_count) throws IOException {
if (block_count <= 0) {
throw new IOException(
"Illegal block count; minimum count is 1, got " +
block_count + " instead"
);
}
if (block_count > MAX_BLOCK_COUNT) {
throw new IOException(
"Block count " + block_count +
" is too high. POI maximum is " + MAX_BLOCK_COUNT + "."
);
}
}
/** /**
* walk the entries from a specified point and return the * walk the entries from a specified point and return the

View File

@ -50,8 +50,8 @@ public final class HeaderBlock implements HeaderBlockConstants {
private final POIFSBigBlockSize bigBlockSize; private final POIFSBigBlockSize bigBlockSize;
/** /**
* number of big block allocation table blocks (int). * Number of big block allocation table blocks (int).
* (Number of FAT Sectors in Microsoft parlance) * (Number of FAT Sectors in Microsoft parlance).
*/ */
private int _bat_count; private int _bat_count;
@ -159,13 +159,6 @@ public final class HeaderBlock implements HeaderBlockConstants {
_sbat_count = new IntegerField(_sbat_block_count_offset, _data).get(); _sbat_count = new IntegerField(_sbat_block_count_offset, _data).get();
_xbat_start = new IntegerField(_xbat_start_offset, _data).get(); _xbat_start = new IntegerField(_xbat_start_offset, _data).get();
_xbat_count = new IntegerField(_xbat_count_offset, _data).get(); _xbat_count = new IntegerField(_xbat_count_offset, _data).get();
// Sanity check values
if(_bat_count > _max_bats_in_header) {
_logger.log(POILogger.WARN, "Too many BAT blocks listed in header, found "
+ _bat_count + " but the maximum is " + _max_bats_in_header);
_bat_count = _max_bats_in_header;
}
} }
/** /**
@ -306,7 +299,7 @@ public final class HeaderBlock implements HeaderBlockConstants {
// Read them in // Read them in
int[] result = new int[ Math.min(_bat_count,_max_bats_in_header) ]; int[] result = new int[ Math.min(_bat_count,_max_bats_in_header) ];
int offset = _bat_array_offset; int offset = _bat_array_offset;
for (int j = 0; j < _bat_count; j++) { for (int j = 0; j < result.length; j++) {
result[ j ] = LittleEndian.getInt(_data, offset); result[ j ] = LittleEndian.getInt(_data, offset);
offset += LittleEndianConsts.INT_SIZE; offset += LittleEndianConsts.INT_SIZE;
} }

View File

@ -343,7 +343,9 @@ public class IntList
{ {
if (index >= _limit) if (index >= _limit)
{ {
throw new IndexOutOfBoundsException(); throw new IndexOutOfBoundsException(
index + " not accessible in a list of length " + _limit
);
} }
return _array[ index ]; return _array[ index ];
} }

View File

@ -17,11 +17,11 @@
package org.apache.poi.poifs.filesystem; package org.apache.poi.poifs.filesystem;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.Iterator; import java.util.Iterator;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -29,6 +29,9 @@ import junit.framework.TestCase;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.poifs.common.POIFSBigBlockSize; import org.apache.poi.poifs.common.POIFSBigBlockSize;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.storage.BATBlock;
import org.apache.poi.poifs.storage.BlockAllocationTableReader;
import org.apache.poi.poifs.storage.HeaderBlock; import org.apache.poi.poifs.storage.HeaderBlock;
import org.apache.poi.poifs.storage.RawDataBlockList; import org.apache.poi.poifs.storage.RawDataBlockList;
@ -38,6 +41,8 @@ import org.apache.poi.poifs.storage.RawDataBlockList;
* @author Josh Micich * @author Josh Micich
*/ */
public final class TestPOIFSFileSystem extends TestCase { public final class TestPOIFSFileSystem extends TestCase {
private POIDataSamples _samples = POIDataSamples.getPOIFSInstance();
/** /**
* Mock exception used to ensure correct error handling * Mock exception used to ensure correct error handling
@ -98,7 +103,6 @@ public final class TestPOIFSFileSystem extends TestCase {
* POIFSFileSystem was not closing the input stream. * POIFSFileSystem was not closing the input stream.
*/ */
public void testAlwaysClose() { public void testAlwaysClose() {
TestIS testIS; TestIS testIS;
// Normal case - read until EOF and close // Normal case - read until EOF and close
@ -139,9 +143,7 @@ public final class TestPOIFSFileSystem extends TestCase {
"ShortLastBlock.qwp", "ShortLastBlock.wps" "ShortLastBlock.qwp", "ShortLastBlock.wps"
}; };
POIDataSamples _samples = POIDataSamples.getPOIFSInstance();
for(int i=0; i<files.length; i++) { for(int i=0; i<files.length; i++) {
// Open the file up // Open the file up
POIFSFileSystem fs = new POIFSFileSystem( POIFSFileSystem fs = new POIFSFileSystem(
_samples.openResourceAsStream(files[i]) _samples.openResourceAsStream(files[i])
@ -161,8 +163,6 @@ public final class TestPOIFSFileSystem extends TestCase {
* sectors that exist in the file. * sectors that exist in the file.
*/ */
public void testFATandDIFATsectors() throws Exception { public void testFATandDIFATsectors() throws Exception {
POIDataSamples _samples = POIDataSamples.getPOIFSInstance();
// Open the file up // Open the file up
try { try {
POIFSFileSystem fs = new POIFSFileSystem( POIFSFileSystem fs = new POIFSFileSystem(
@ -175,6 +175,68 @@ public final class TestPOIFSFileSystem extends TestCase {
} }
} }
/**
* Tests that we can write and read a file that contains XBATs
* as well as regular BATs.
* However, because a file needs to be at least 6.875mb big
* to have an XBAT in it, we don't have a test one. So, generate it.
*/
public void testBATandXBAT() throws Exception {
byte[] hugeStream = new byte[8*1024*1024];
POIFSFileSystem fs = new POIFSFileSystem();
fs.getRoot().createDocument(
"BIG", new ByteArrayInputStream(hugeStream)
);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
fs.writeFilesystem(baos);
byte[] fsData = baos.toByteArray();
// Check the header was written properly
InputStream inp = new ByteArrayInputStream(fsData);
HeaderBlock header = new HeaderBlock(inp);
assertEquals(109+21, header.getBATCount());
assertEquals(1, header.getXBATCount());
// We should have 21 BATs in the XBAT
ByteBuffer xbatData = ByteBuffer.allocate(512);
xbatData.put(fsData, (1+header.getXBATIndex())*512, 512);
xbatData.position(0);
BATBlock xbat = BATBlock.createBATBlock(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, xbatData);
for(int i=0; i<21; i++) {
assertTrue(xbat.getValueAt(i) != POIFSConstants.UNUSED_BLOCK);
}
for(int i=21; i<127; i++) {
assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i));
}
assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127));
// Load the blocks and check with that
RawDataBlockList blockList = new RawDataBlockList(inp, POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS);
assertEquals(fsData.length / 512, blockList.blockCount() + 1); // Header not counted
new BlockAllocationTableReader(header.getBigBlockSize(),
header.getBATCount(),
header.getBATArray(),
header.getXBATCount(),
header.getXBATIndex(),
blockList);
assertEquals(fsData.length / 512, blockList.blockCount() + 1); // Header not counted
// Now load it and check
fs = null;
fs = new POIFSFileSystem(
new ByteArrayInputStream(fsData)
);
DirectoryNode root = fs.getRoot();
assertEquals(1, root.getEntryCount());
DocumentNode big = (DocumentNode)root.getEntry("BIG");
assertEquals(hugeStream.length, big.getSize());
}
/** /**
* Most OLE2 files use 512byte blocks. However, a small number * Most OLE2 files use 512byte blocks. However, a small number
* use 4k blocks. Check that we can open these. * use 4k blocks. Check that we can open these.

View File

@ -216,59 +216,60 @@ public final class TestBATBlock extends TestCase {
// Zero fat blocks isn't technically valid, but it'd be header only // Zero fat blocks isn't technically valid, but it'd be header only
assertEquals( assertEquals(
512, 512,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 0, 0) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 0)
); );
assertEquals( assertEquals(
4096, 4096,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 0, 0) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 0)
); );
// A single FAT block can address 128/1024 blocks // A single FAT block can address 128/1024 blocks
assertEquals( assertEquals(
512 + 512*128, 512 + 512*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 1, 0) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 1)
); );
assertEquals( assertEquals(
4096 + 4096*1024, 4096 + 4096*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 1, 0) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 1)
); );
assertEquals( assertEquals(
512 + 4*512*128, 512 + 4*512*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 4, 0) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 4)
); );
assertEquals( assertEquals(
4096 + 4*4096*1024, 4096 + 4*4096*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 4, 0) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 4)
); );
// One XBAT block holds 127/1023 individual BAT blocks, so they can address // One XBAT block holds 127/1023 individual BAT blocks, so they can address
// a fairly hefty amount of space themselves // a fairly hefty amount of space themselves
// However, the BATs continue as before
assertEquals( assertEquals(
512 + 109*512*128, 512 + 109*512*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 0) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109)
); );
assertEquals( assertEquals(
4096 + 109*4096*1024, 4096 + 109*4096*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 0) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109)
); );
assertEquals( assertEquals(
512 + 109*512*128 + 512*127*128, 512 + 110*512*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 1) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 110)
); );
assertEquals( assertEquals(
4096 + 109*4096*1024 + 4096*1023*1024, 4096 + 110*4096*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 1) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 110)
); );
assertEquals( assertEquals(
512 + 109*512*128 + 3*512*127*128, 512 + 112*512*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 3) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 112)
); );
assertEquals( assertEquals(
4096 + 109*4096*1024 + 3*4096*1023*1024, 4096 + 112*4096*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 3) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 112)
); );
} }

View File

@ -419,11 +419,7 @@ public final class TestBlockAllocationTableReader extends TestCase {
hb.getXBATIndex(), dataBlocks); hb.getXBATIndex(), dataBlocks);
} catch (IOException e) { } catch (IOException e) {
// expected during successful test // expected during successful test
assertEquals( assertEquals("Block count 538976257 is too high. POI maximum is 65535.", e.getMessage());
"Your file contains 0 sectors, but the initial DIFAT array at index 0 referenced block # 538976288. This isn't allowed and your file is corrupt",
e.getMessage()
);
// assertEquals("Block count 538976257 is too high. POI maximum is 65535.", e.getMessage());
} catch (OutOfMemoryError e) { } catch (OutOfMemoryError e) {
if (e.getStackTrace()[1].getMethodName().equals("testBadSectorAllocationTableSize")) { if (e.getStackTrace()[1].getMethodName().equals("testBadSectorAllocationTableSize")) {
throw new AssertionFailedError("Identified bug 48085"); throw new AssertionFailedError("Identified bug 48085");