mirror of https://github.com/apache/poi.git
Lots more documentation on how we read in POIFS files and process the sectors/blocks and FATs. Also add a test that shows that bug #46391 is invalid
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@933663 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e53e3a376e
commit
350df80600
|
@ -32,10 +32,21 @@ public interface POIFSConstants
|
||||||
/** Some use 4096 bytes */
|
/** Some use 4096 bytes */
|
||||||
public static final int LARGER_BIG_BLOCK_SIZE = 0x1000;
|
public static final int LARGER_BIG_BLOCK_SIZE = 0x1000;
|
||||||
|
|
||||||
public static final int END_OF_CHAIN = -2;
|
|
||||||
public static final int PROPERTY_SIZE = 0x0080;
|
public static final int PROPERTY_SIZE = 0x0080;
|
||||||
|
|
||||||
|
/** The highest sector number you're allowed, 0xFFFFFFFA */
|
||||||
|
public static final int LARGEST_REGULAR_SECTOR_NUMBER = -5;
|
||||||
|
|
||||||
|
/** Indicates the sector holds a DIFAT block (0xFFFFFFFC) */
|
||||||
|
public static final int DIFAT_SECTOR_BLOCK = -4;
|
||||||
|
/** Indicates the sector holds a FAT block (0xFFFFFFFD) */
|
||||||
|
public static final int FAT_SECTOR_BLOCK = -3;
|
||||||
|
/** Indicates the sector is the end of a chain (0xFFFFFFFE) */
|
||||||
|
public static final int END_OF_CHAIN = -2;
|
||||||
|
/** Indicates the sector is not used (0xFFFFFFFF) */
|
||||||
public static final int UNUSED_BLOCK = -1;
|
public static final int UNUSED_BLOCK = -1;
|
||||||
|
|
||||||
|
/** The first 4 bytes of an OOXML file, used in detection */
|
||||||
public static final byte[] OOXML_FILE_HEADER =
|
public static final byte[] OOXML_FILE_HEADER =
|
||||||
new byte[] { 0x50, 0x4b, 0x03, 0x04 };
|
new byte[] { 0x50, 0x4b, 0x03, 0x04 };
|
||||||
} // end public interface POIFSConstants;
|
} // end public interface POIFSConstants;
|
||||||
|
|
|
@ -89,17 +89,35 @@ public final class BlockAllocationTableReader {
|
||||||
+ " is too high. POI maximum is " + MAX_BLOCK_COUNT + ".");
|
+ " is too high. POI maximum is " + MAX_BLOCK_COUNT + ".");
|
||||||
}
|
}
|
||||||
|
|
||||||
// acquire raw data blocks containing the BAT block data
|
// We want to get the whole of the FAT table
|
||||||
RawDataBlock blocks[] = new RawDataBlock[ block_count ];
|
// To do this:
|
||||||
|
// * Work through raw_block_list, which points to the
|
||||||
|
// first (up to) 109 BAT blocks
|
||||||
|
// * Jump to the XBAT offset, and read in XBATs which
|
||||||
|
// point to more BAT blocks
|
||||||
int limit = Math.min(block_count, block_array.length);
|
int limit = Math.min(block_count, block_array.length);
|
||||||
int block_index;
|
int block_index;
|
||||||
|
|
||||||
|
// This will hold all of the BAT blocks in order
|
||||||
|
RawDataBlock blocks[] = new RawDataBlock[ block_count ];
|
||||||
|
|
||||||
|
// Process the first (up to) 109 BAT blocks
|
||||||
for (block_index = 0; block_index < limit; block_index++)
|
for (block_index = 0; block_index < limit; block_index++)
|
||||||
{
|
{
|
||||||
|
// Check that the sector number of the BAT block is a valid one
|
||||||
|
int nextOffset = block_array[ block_index ];
|
||||||
|
if(nextOffset > raw_block_list.blockCount()) {
|
||||||
|
throw new IOException("Your file contains " + raw_block_list.blockCount() +
|
||||||
|
" sectors, but the initial DIFAT array at index " + block_index +
|
||||||
|
" referenced block # " + nextOffset + ". This isn't allowed and " +
|
||||||
|
" your file is corrupt");
|
||||||
|
}
|
||||||
|
// Record the sector number of this BAT block
|
||||||
blocks[ block_index ] =
|
blocks[ block_index ] =
|
||||||
( RawDataBlock ) raw_block_list
|
( RawDataBlock ) raw_block_list.remove(nextOffset);
|
||||||
.remove(block_array[ block_index ]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Process additional BAT blocks via the XBATs
|
||||||
if (block_index < block_count)
|
if (block_index < block_count)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -113,6 +131,9 @@ public final class BlockAllocationTableReader {
|
||||||
int max_entries_per_block = BATBlock.entriesPerXBATBlock();
|
int max_entries_per_block = BATBlock.entriesPerXBATBlock();
|
||||||
int chain_index_offset = BATBlock.getXBATChainOffset();
|
int chain_index_offset = BATBlock.getXBATChainOffset();
|
||||||
|
|
||||||
|
// Each XBAT block contains either:
|
||||||
|
// (maximum number of sector indexes) + index of next XBAT
|
||||||
|
// some sector indexes + FREE sectors to max # + EndOfChain
|
||||||
for (int j = 0; j < xbat_count; j++)
|
for (int j = 0; j < xbat_count; j++)
|
||||||
{
|
{
|
||||||
limit = Math.min(block_count - block_index,
|
limit = Math.min(block_count - block_index,
|
||||||
|
@ -139,8 +160,8 @@ public final class BlockAllocationTableReader {
|
||||||
throw new IOException("Could not find all blocks");
|
throw new IOException("Could not find all blocks");
|
||||||
}
|
}
|
||||||
|
|
||||||
// now that we have all of the raw data blocks, go through and
|
// Now that we have all of the raw data blocks which make
|
||||||
// create the indices
|
// up the FAT, go through and create the indices
|
||||||
setEntries(blocks, raw_block_list);
|
setEntries(blocks, raw_block_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -79,5 +79,7 @@ public interface BlockList
|
||||||
|
|
||||||
public void setBAT(final BlockAllocationTableReader bat)
|
public void setBAT(final BlockAllocationTableReader bat)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
public int blockCount();
|
||||||
} // end public interface BlockList
|
} // end public interface BlockList
|
||||||
|
|
||||||
|
|
|
@ -138,4 +138,21 @@ abstract class BlockListImpl implements BlockList {
|
||||||
}
|
}
|
||||||
_bat = bat;
|
_bat = bat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the count of the number of blocks
|
||||||
|
*/
|
||||||
|
public int blockCount() {
|
||||||
|
return _blocks.length;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Returns the number of remaining blocks
|
||||||
|
*/
|
||||||
|
protected int remainingBlocks() {
|
||||||
|
int c = 0;
|
||||||
|
for(int i=0; i<_blocks.length; i++) {
|
||||||
|
if(_blocks[i] != null) c++;
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,11 @@ public interface HeaderBlockConstants
|
||||||
(POIFSConstants.BIG_BLOCK_SIZE - _bat_array_offset)
|
(POIFSConstants.BIG_BLOCK_SIZE - _bat_array_offset)
|
||||||
/ LittleEndianConsts.INT_SIZE;
|
/ LittleEndianConsts.INT_SIZE;
|
||||||
|
|
||||||
|
// Note - in Microsoft terms:
|
||||||
|
// BAT ~= FAT
|
||||||
|
// SBAT ~= MiniFAT
|
||||||
|
// XBAT ~= DIFat
|
||||||
|
|
||||||
// useful offsets
|
// useful offsets
|
||||||
public static final int _signature_offset = 0;
|
public static final int _signature_offset = 0;
|
||||||
public static final int _bat_count_offset = 0x2C;
|
public static final int _bat_count_offset = 0x2C;
|
||||||
|
|
|
@ -22,6 +22,7 @@ import static org.apache.poi.poifs.storage.HeaderBlockConstants._bat_count_offse
|
||||||
import static org.apache.poi.poifs.storage.HeaderBlockConstants._max_bats_in_header;
|
import static org.apache.poi.poifs.storage.HeaderBlockConstants._max_bats_in_header;
|
||||||
import static org.apache.poi.poifs.storage.HeaderBlockConstants._property_start_offset;
|
import static org.apache.poi.poifs.storage.HeaderBlockConstants._property_start_offset;
|
||||||
import static org.apache.poi.poifs.storage.HeaderBlockConstants._sbat_start_offset;
|
import static org.apache.poi.poifs.storage.HeaderBlockConstants._sbat_start_offset;
|
||||||
|
import static org.apache.poi.poifs.storage.HeaderBlockConstants._sbat_block_count_offset;
|
||||||
import static org.apache.poi.poifs.storage.HeaderBlockConstants._signature;
|
import static org.apache.poi.poifs.storage.HeaderBlockConstants._signature;
|
||||||
import static org.apache.poi.poifs.storage.HeaderBlockConstants._signature_offset;
|
import static org.apache.poi.poifs.storage.HeaderBlockConstants._signature_offset;
|
||||||
import static org.apache.poi.poifs.storage.HeaderBlockConstants._xbat_count_offset;
|
import static org.apache.poi.poifs.storage.HeaderBlockConstants._xbat_count_offset;
|
||||||
|
@ -49,21 +50,37 @@ public final class HeaderBlockReader {
|
||||||
*/
|
*/
|
||||||
private final int bigBlockSize;
|
private final int bigBlockSize;
|
||||||
|
|
||||||
/** number of big block allocation table blocks (int) */
|
/**
|
||||||
|
* number of big block allocation table blocks (int).
|
||||||
|
* (Number of FAT Sectors in Microsoft parlance)
|
||||||
|
*/
|
||||||
private final int _bat_count;
|
private final int _bat_count;
|
||||||
|
|
||||||
/** start of the property set block (int index of the property set
|
/**
|
||||||
* chain's first big block)
|
* Start of the property set block (int index of the property set
|
||||||
|
* chain's first big block).
|
||||||
*/
|
*/
|
||||||
private final int _property_start;
|
private final int _property_start;
|
||||||
|
|
||||||
/** start of the small block allocation table (int index of small
|
/**
|
||||||
|
* start of the small block allocation table (int index of small
|
||||||
* block allocation table's first big block)
|
* block allocation table's first big block)
|
||||||
*/
|
*/
|
||||||
private final int _sbat_start;
|
private final int _sbat_start;
|
||||||
|
/**
|
||||||
|
* Number of small block allocation table blocks (int)
|
||||||
|
* (Number of MiniFAT Sectors in Microsoft parlance)
|
||||||
|
*/
|
||||||
|
private final int _sbat_count;
|
||||||
|
|
||||||
/** big block index for extension to the big block allocation table */
|
/**
|
||||||
|
* Big block index for extension to the big block allocation table
|
||||||
|
*/
|
||||||
private final int _xbat_start;
|
private final int _xbat_start;
|
||||||
|
/**
|
||||||
|
* Number of big block allocation table blocks (int)
|
||||||
|
* (Number of DIFAT Sectors in Microsoft parlance)
|
||||||
|
*/
|
||||||
private final int _xbat_count;
|
private final int _xbat_count;
|
||||||
private final byte[] _data;
|
private final byte[] _data;
|
||||||
|
|
||||||
|
@ -132,6 +149,7 @@ public final class HeaderBlockReader {
|
||||||
_bat_count = getInt(_bat_count_offset, _data);
|
_bat_count = getInt(_bat_count_offset, _data);
|
||||||
_property_start = getInt(_property_start_offset, _data);
|
_property_start = getInt(_property_start_offset, _data);
|
||||||
_sbat_start = getInt(_sbat_start_offset, _data);
|
_sbat_start = getInt(_sbat_start_offset, _data);
|
||||||
|
_sbat_count = getInt(_sbat_block_count_offset, _data);
|
||||||
_xbat_start = getInt(_xbat_start_offset, _data);
|
_xbat_start = getInt(_xbat_start_offset, _data);
|
||||||
_xbat_count = getInt(_xbat_count_offset, _data);
|
_xbat_count = getInt(_xbat_count_offset, _data);
|
||||||
}
|
}
|
||||||
|
@ -169,11 +187,14 @@ public final class HeaderBlockReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return start of small block allocation table
|
* @return start of small block (MiniFAT) allocation table
|
||||||
*/
|
*/
|
||||||
public int getSBATStart() {
|
public int getSBATStart() {
|
||||||
return _sbat_start;
|
return _sbat_start;
|
||||||
}
|
}
|
||||||
|
public int getSBATCount() {
|
||||||
|
return _sbat_count;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return number of BAT blocks
|
* @return number of BAT blocks
|
||||||
|
@ -183,7 +204,10 @@ public final class HeaderBlockReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return BAT array
|
* Returns the offsets to the first (up to) 109
|
||||||
|
* BAT sectors.
|
||||||
|
* Any additional BAT sectors
|
||||||
|
* @return BAT offset array
|
||||||
*/
|
*/
|
||||||
public int[] getBATArray() {
|
public int[] getBATArray() {
|
||||||
int[] result = new int[ _max_bats_in_header ];
|
int[] result = new int[ _max_bats_in_header ];
|
||||||
|
@ -197,14 +221,14 @@ public final class HeaderBlockReader {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return XBAT count
|
* @return XBAT (DIFAT) count
|
||||||
*/
|
*/
|
||||||
public int getXBATCount() {
|
public int getXBATCount() {
|
||||||
return _xbat_count;
|
return _xbat_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return XBAT index
|
* @return XBAT (DIFAT) index
|
||||||
*/
|
*/
|
||||||
public int getXBATIndex() {
|
public int getXBATIndex() {
|
||||||
return _xbat_start;
|
return _xbat_start;
|
||||||
|
|
|
@ -111,6 +111,10 @@ public class RawDataBlock
|
||||||
public boolean hasData() {
|
public boolean hasData() {
|
||||||
return _hasData;
|
return _hasData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "RawDataBlock of size " + _data.length;
|
||||||
|
}
|
||||||
|
|
||||||
/* ********** START implementation of ListManagedBlock ********** */
|
/* ********** START implementation of ListManagedBlock ********** */
|
||||||
|
|
||||||
|
|
|
@ -148,6 +148,26 @@ public final class TestPOIFSFileSystem extends TestCase {
|
||||||
// Check sizes
|
// Check sizes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check that we do the right thing when the list of which
|
||||||
|
* sectors are BAT blocks points off the list of
|
||||||
|
* sectors that exist in the file.
|
||||||
|
*/
|
||||||
|
public void testFATandDIFATsectors() throws Exception {
|
||||||
|
POIDataSamples _samples = POIDataSamples.getPOIFSInstance();
|
||||||
|
|
||||||
|
// Open the file up
|
||||||
|
try {
|
||||||
|
POIFSFileSystem fs = new POIFSFileSystem(
|
||||||
|
_samples.openResourceAsStream("ReferencesInvalidSectors.mpp")
|
||||||
|
);
|
||||||
|
fail("File is corrupt and shouldn't have been opened");
|
||||||
|
} catch(IOException e) {
|
||||||
|
String msg = e.getMessage();
|
||||||
|
assertTrue(msg.startsWith("Your file contains 695 sectors"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static InputStream openSampleStream(String sampleFileName) {
|
private static InputStream openSampleStream(String sampleFileName) {
|
||||||
return HSSFTestDataSamples.openSampleFileStream(sampleFileName);
|
return HSSFTestDataSamples.openSampleFileStream(sampleFileName);
|
||||||
|
|
|
@ -181,4 +181,8 @@ public final class LocalRawDataBlockList extends RawDataBlockList {
|
||||||
_array = _list.toArray(new RawDataBlock[ 0 ]);
|
_array = _list.toArray(new RawDataBlock[ 0 ]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int blockCount() {
|
||||||
|
return _list.size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue