[bug-65184] Improve performance of POFSMiniStore getBlockAt. Thanks to sits

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1887604 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2021-03-13 18:09:13 +00:00
parent 5758145ce8
commit c70b649e73
1 changed files with 135 additions and 137 deletions

View File

@ -1,4 +1,3 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
@ -33,141 +32,140 @@ import org.apache.poi.poifs.storage.HeaderBlock;
/**
* This class handles the MiniStream (small block store)
* in the NIO case for {@link POIFSFileSystem}
* in the NIO case for {@link POIFSFileSystem}
*/
public class POIFSMiniStore extends BlockStore
{
public class POIFSMiniStore extends BlockStore {
private final POIFSFileSystem _filesystem;
private POIFSStream _mini_stream;
private final List<BATBlock> _sbat_blocks;
private final HeaderBlock _header;
private final RootProperty _root;
private final List<BATBlock> _sbat_blocks;
private final HeaderBlock _header;
private final RootProperty _root;
POIFSMiniStore(POIFSFileSystem filesystem, RootProperty root,
List<BATBlock> sbats, HeaderBlock header)
{
this._filesystem = filesystem;
this._sbat_blocks = sbats;
this._header = header;
this._root = root;
List<BATBlock> sbats, HeaderBlock header) {
this._filesystem = filesystem;
this._sbat_blocks = sbats;
this._header = header;
this._root = root;
this._mini_stream = new POIFSStream(filesystem, root.getStartBlock());
this._mini_stream = new POIFSStream(filesystem, root.getStartBlock());
}
/**
* Load the block at the given offset.
*/
protected ByteBuffer getBlockAt(final int offset) {
// Which big block is this?
int byteOffset = offset * POIFSConstants.SMALL_BLOCK_SIZE;
int bigBlockNumber = byteOffset / _filesystem.getBigBlockSize();
int bigBlockOffset = byteOffset % _filesystem.getBigBlockSize();
// Which big block is this?
int byteOffset = offset * POIFSConstants.SMALL_BLOCK_SIZE;
int bigBlockNumber = byteOffset / _filesystem.getBigBlockSize();
int bigBlockOffset = byteOffset % _filesystem.getBigBlockSize();
// Now locate the data block for it
Iterator<ByteBuffer> it = _mini_stream.getBlockIterator();
for(int i=0; i<bigBlockNumber; i++) {
it.next();
}
ByteBuffer dataBlock = it.next();
assert(dataBlock != null);
// Now locate the data block for it
Iterator<ByteBuffer> it = _mini_stream.getBlockIterator();
for (int i = 0; i < bigBlockNumber; i++) {
it.next();
}
ByteBuffer dataBlock = it.next();
assert (dataBlock != null);
// Position ourselves, and take a slice
dataBlock.position(
dataBlock.position() + bigBlockOffset
);
ByteBuffer miniBuffer = dataBlock.slice();
miniBuffer.limit(POIFSConstants.SMALL_BLOCK_SIZE);
return miniBuffer;
// Position ourselves, and take a slice
dataBlock.position(
dataBlock.position() + bigBlockOffset
);
ByteBuffer miniBuffer = dataBlock.slice();
miniBuffer.limit(POIFSConstants.SMALL_BLOCK_SIZE);
return miniBuffer;
}
/**
* Load the block, extending the underlying stream if needed
*/
protected ByteBuffer createBlockIfNeeded(final int offset) throws IOException {
boolean firstInStore = false;
if (_mini_stream.getStartBlock() == POIFSConstants.END_OF_CHAIN) {
firstInStore = true;
}
boolean firstInStore = false;
if (_mini_stream.getStartBlock() == POIFSConstants.END_OF_CHAIN) {
firstInStore = true;
}
// Try to get it without extending the stream
if (! firstInStore) {
try {
return getBlockAt(offset);
} catch(NoSuchElementException ignored) {}
}
// Try to get it without extending the stream
if (!firstInStore) {
try {
return getBlockAt(offset);
} catch (NoSuchElementException ignored) {
}
}
// Need to extend the stream
// TODO Replace this with proper append support
// For now, do the extending by hand...
// Need to extend the stream
// TODO Replace this with proper append support
// For now, do the extending by hand...
// Ask for another block
int newBigBlock = _filesystem.getFreeBlock();
_filesystem.createBlockIfNeeded(newBigBlock);
// Ask for another block
int newBigBlock = _filesystem.getFreeBlock();
_filesystem.createBlockIfNeeded(newBigBlock);
// If we are the first block to be allocated, initialise the stream
if (firstInStore) {
_filesystem._get_property_table().getRoot().setStartBlock(newBigBlock);
_mini_stream = new POIFSStream(_filesystem, newBigBlock);
} else {
// Tack it onto the end of our chain
ChainLoopDetector loopDetector = _filesystem.getChainLoopDetector();
int block = _mini_stream.getStartBlock();
while(true) {
loopDetector.claim(block);
int next = _filesystem.getNextBlock(block);
if(next == POIFSConstants.END_OF_CHAIN) {
break;
}
block = next;
}
_filesystem.setNextBlock(block, newBigBlock);
}
// If we are the first block to be allocated, initialise the stream
if (firstInStore) {
_filesystem._get_property_table().getRoot().setStartBlock(newBigBlock);
_mini_stream = new POIFSStream(_filesystem, newBigBlock);
} else {
// Tack it onto the end of our chain
ChainLoopDetector loopDetector = _filesystem.getChainLoopDetector();
int block = _mini_stream.getStartBlock();
while (true) {
loopDetector.claim(block);
int next = _filesystem.getNextBlock(block);
if (next == POIFSConstants.END_OF_CHAIN) {
break;
}
block = next;
}
_filesystem.setNextBlock(block, newBigBlock);
}
// This is now the new end
_filesystem.setNextBlock(newBigBlock, POIFSConstants.END_OF_CHAIN);
// This is now the new end
_filesystem.setNextBlock(newBigBlock, POIFSConstants.END_OF_CHAIN);
// Now try again, to get the real small block
return createBlockIfNeeded(offset);
// Now try again, to get the real small block
return createBlockIfNeeded(offset);
}
/**
* Returns the BATBlock that handles the specified offset,
* and the relative index within it
* and the relative index within it
*/
protected BATBlockAndIndex getBATBlockAndIndex(final int offset) {
return BATBlock.getSBATBlockAndIndex(
offset, _header, _sbat_blocks
);
return BATBlock.getSBATBlockAndIndex(
offset, _header, _sbat_blocks
);
}
/**
* Works out what block follows the specified one.
*/
protected int getNextBlock(final int offset) {
BATBlockAndIndex bai = getBATBlockAndIndex(offset);
return bai.getBlock().getValueAt( bai.getIndex() );
BATBlockAndIndex bai = getBATBlockAndIndex(offset);
return bai.getBlock().getValueAt(bai.getIndex());
}
/**
* Changes the record of what block follows the specified one.
*/
protected void setNextBlock(final int offset, final int nextBlock) {
BATBlockAndIndex bai = getBATBlockAndIndex(offset);
bai.getBlock().setValueAt(
bai.getIndex(), nextBlock
);
BATBlockAndIndex bai = getBATBlockAndIndex(offset);
bai.getBlock().setValueAt(
bai.getIndex(), nextBlock
);
}
/**
* Finds a free block, and returns its offset.
* This method will extend the file if needed, and if doing
* so, allocate new FAT blocks to address the extra space.
* so, allocate new FAT blocks to address the extra space.
*/
protected int getFreeBlock() throws IOException {
int sectorsPerSBAT = _filesystem.getBigBlockSizeDetails().getBATEntriesPerBlock();
int sectorsPerSBAT = _filesystem.getBigBlockSizeDetails().getBATEntriesPerBlock();
// First up, do we have any spare ones?
int offset = 0;
// First up, do we have any spare ones?
int offset = 0;
for (BATBlock sbat : _sbat_blocks) {
// Check this one
if (sbat.hasFreeSectors()) {
@ -185,79 +183,79 @@ public class POIFSMiniStore extends BlockStore
offset += sectorsPerSBAT;
}
// If we get here, then there aren't any
// free sectors in any of the SBATs
// So, we need to extend the chain and add another
// If we get here, then there aren't any
// free sectors in any of the SBATs
// So, we need to extend the chain and add another
// Create a new BATBlock
BATBlock newSBAT = BATBlock.createEmptyBATBlock(_filesystem.getBigBlockSizeDetails(), false);
int batForSBAT = _filesystem.getFreeBlock();
newSBAT.setOurBlockIndex(batForSBAT);
// Create a new BATBlock
BATBlock newSBAT = BATBlock.createEmptyBATBlock(_filesystem.getBigBlockSizeDetails(), false);
int batForSBAT = _filesystem.getFreeBlock();
newSBAT.setOurBlockIndex(batForSBAT);
// Are we the first SBAT?
if(_header.getSBATCount() == 0) {
// Tell the header that we've got our first SBAT there
_header.setSBATStart(batForSBAT);
_header.setSBATBlockCount(1);
} else {
// Find the end of the SBAT stream, and add the sbat in there
ChainLoopDetector loopDetector = _filesystem.getChainLoopDetector();
int batOffset = _header.getSBATStart();
while(true) {
loopDetector.claim(batOffset);
int nextBat = _filesystem.getNextBlock(batOffset);
if(nextBat == POIFSConstants.END_OF_CHAIN) {
break;
}
batOffset = nextBat;
}
// Are we the first SBAT?
if (_header.getSBATCount() == 0) {
// Tell the header that we've got our first SBAT there
_header.setSBATStart(batForSBAT);
_header.setSBATBlockCount(1);
} else {
// Find the end of the SBAT stream, and add the sbat in there
ChainLoopDetector loopDetector = _filesystem.getChainLoopDetector();
int batOffset = _header.getSBATStart();
while (true) {
loopDetector.claim(batOffset);
int nextBat = _filesystem.getNextBlock(batOffset);
if (nextBat == POIFSConstants.END_OF_CHAIN) {
break;
}
batOffset = nextBat;
}
// Add it in at the end
_filesystem.setNextBlock(batOffset, batForSBAT);
// Add it in at the end
_filesystem.setNextBlock(batOffset, batForSBAT);
// And update the count
_header.setSBATBlockCount(
_header.getSBATCount() + 1
);
}
// And update the count
_header.setSBATBlockCount(
_header.getSBATCount() + 1
);
}
// Finish allocating
_filesystem.setNextBlock(batForSBAT, POIFSConstants.END_OF_CHAIN);
_sbat_blocks.add(newSBAT);
// Finish allocating
_filesystem.setNextBlock(batForSBAT, POIFSConstants.END_OF_CHAIN);
_sbat_blocks.add(newSBAT);
// Return our first spot
return offset;
// Return our first spot
return offset;
}
@Override
protected ChainLoopDetector getChainLoopDetector() {
return new ChainLoopDetector( _root.getSize() );
return new ChainLoopDetector(_root.getSize());
}
protected int getBlockStoreBlockSize() {
return POIFSConstants.SMALL_BLOCK_SIZE;
return POIFSConstants.SMALL_BLOCK_SIZE;
}
/**
* Writes the SBATs to their backing blocks, and updates
* the mini-stream size in the properties. Stream size is
* based on full blocks used, not the data within the streams
* the mini-stream size in the properties. Stream size is
* based on full blocks used, not the data within the streams
*/
void syncWithDataSource() throws IOException {
int blocksUsed = 0;
for (BATBlock sbat : _sbat_blocks) {
ByteBuffer block = _filesystem.getBlockAt(sbat.getOurBlockIndex());
sbat.writeData(block);
int blocksUsed = 0;
for (BATBlock sbat : _sbat_blocks) {
ByteBuffer block = _filesystem.getBlockAt(sbat.getOurBlockIndex());
sbat.writeData(block);
if (!sbat.hasFreeSectors()) {
blocksUsed += _filesystem.getBigBlockSizeDetails().getBATEntriesPerBlock();
} else {
blocksUsed += sbat.getOccupiedSize();
}
}
// Set the size on the root in terms of the number of SBAT blocks
// RootProperty.setSize does the sbat -> bytes conversion for us
_filesystem._get_property_table().getRoot().setSize(blocksUsed);
if (!sbat.hasFreeSectors()) {
blocksUsed += _filesystem.getBigBlockSizeDetails().getBATEntriesPerBlock();
} else {
blocksUsed += sbat.getOccupiedSize();
}
}
// Set the size on the root in terms of the number of SBAT blocks
// RootProperty.setSize does the sbat -> bytes conversion for us
_filesystem._get_property_table().getRoot().setSize(blocksUsed);
}
@Override