Bug 56437 - [PATCH] Streaming write support in NPOIFS

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1588887 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2014-04-21 14:37:33 +00:00
parent f566b57be0
commit 059cba52ab
6 changed files with 262 additions and 111 deletions

View File

@ -412,8 +412,12 @@ public class DirectoryNode
final POIFSWriterListener writer)
throws IOException
{
if(_nfilesystem != null) {
return createDocument(new NPOIFSDocument(name, size, _nfilesystem, writer));
} else {
return createDocument(new POIFSDocument(name, size, _path, writer));
}
}
/**
* create a new DirectoryEntry

View File

@ -17,10 +17,11 @@
package org.apache.poi.poifs.filesystem;
import java.io.ByteArrayInputStream;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.Iterator;
@ -30,7 +31,6 @@ import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.dev.POIFSViewable;
import org.apache.poi.poifs.property.DocumentProperty;
import org.apache.poi.util.HexDump;
import org.apache.poi.util.IOUtils;
/**
* This class manages a document in the NIO POIFS filesystem.
@ -72,21 +72,12 @@ public final class NPOIFSDocument implements POIFSViewable {
{
this._filesystem = filesystem;
// Buffer the contents into memory. This is a bit icky...
// TODO Replace with a buffer up to the mini stream size, then streaming write
byte[] contents;
if(stream instanceof ByteArrayInputStream) {
ByteArrayInputStream bais = (ByteArrayInputStream)stream;
contents = new byte[bais.available()];
bais.read(contents);
} else {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
IOUtils.copy(stream, baos);
contents = baos.toByteArray();
}
final int bigBlockSize = POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE;
BufferedInputStream bis = new BufferedInputStream(stream, bigBlockSize+1);
bis.mark(bigBlockSize);
// Do we need to store as a mini stream or a full one?
if(contents.length <= POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE) {
if(bis.skip(bigBlockSize) < bigBlockSize) {
_stream = new NPOIFSStream(filesystem.getMiniStore());
_block_size = _filesystem.getMiniStore().getBlockStoreBlockSize();
} else {
@ -94,11 +85,46 @@ public final class NPOIFSDocument implements POIFSViewable {
_block_size = _filesystem.getBlockStoreBlockSize();
}
// start from the beginning
bis.reset();
// Store it
_stream.updateContents(contents);
OutputStream os = _stream.getOutputStream();
byte buf[] = new byte[1024];
int length = 0;
for (int readBytes; (readBytes = bis.read(buf)) != -1; length += readBytes) {
os.write(buf, 0, readBytes);
}
// And build the property for it
this._property = new DocumentProperty(name, contents.length);
this._property = new DocumentProperty(name, length);
_property.setStartBlock(_stream.getStartBlock());
}
public NPOIFSDocument(String name, int size, NPOIFSFileSystem filesystem, POIFSWriterListener writer)
throws IOException
{
this._filesystem = filesystem;
if (size < POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE) {
_stream = new NPOIFSStream(filesystem.getMiniStore());
_block_size = _filesystem.getMiniStore().getBlockStoreBlockSize();
} else {
_stream = new NPOIFSStream(filesystem);
_block_size = _filesystem.getBlockStoreBlockSize();
}
OutputStream innerOs = _stream.getOutputStream();
DocumentOutputStream os = new DocumentOutputStream(innerOs, size);
POIFSDocumentPath path = new POIFSDocumentPath(name.split("\\\\"));
String docName = path.getComponent(path.length()-1);
POIFSWriterEvent event = new POIFSWriterEvent(os, path, docName, size);
writer.processPOIFSWriterEvent(event);
innerOs.close();
// And build the property for it
this._property = new DocumentProperty(name, size);
_property.setStartBlock(_stream.getStartBlock());
}

View File

@ -117,12 +117,14 @@ public class NPOIFSFileSystem extends BlockStore
// Mark us as having a single empty BAT at offset 0
_header.setBATCount(1);
_header.setBATArray(new int[] { 0 });
_bat_blocks.add(BATBlock.createEmptyBATBlock(bigBlockSize, false));
setNextBlock(0, POIFSConstants.FAT_SECTOR_BLOCK);
BATBlock bb = BATBlock.createEmptyBATBlock(bigBlockSize, false);
bb.setOurBlockIndex(0);
_bat_blocks.add(bb);
// Now associate the properties with the empty block
_property_table.setStartBlock(1);
setNextBlock(0, POIFSConstants.FAT_SECTOR_BLOCK);
setNextBlock(1, POIFSConstants.END_OF_CHAIN);
_property_table.setStartBlock(POIFSConstants.END_OF_CHAIN);
}
/**
@ -483,13 +485,11 @@ public class NPOIFSFileSystem extends BlockStore
*/
@Override
protected int getFreeBlock() throws IOException {
// First up, do we have any spare ones?
int offset = 0;
for(int i=0; i<_bat_blocks.size(); i++) {
int numSectors = bigBlockSize.getBATEntriesPerBlock();
// Check this one
BATBlock bat = _bat_blocks.get(i);
// First up, do we have any spare ones?
int offset = 0;
for (BATBlock bat : _bat_blocks) {
if(bat.hasFreeSectors()) {
// Claim one of them and return it
for(int j=0; j<numSectors; j++) {
@ -699,8 +699,13 @@ public class NPOIFSFileSystem extends BlockStore
* Has our in-memory objects write their state
* to their backing blocks
*/
private void syncWithDataSource() throws IOException
{
private void syncWithDataSource() throws IOException {
// Properties
NPOIFSStream propStream = new NPOIFSStream(this, _header.getPropertyStart());
_property_table.preWrite();
_property_table.write(propStream);
// _header.setPropertyStart has been updated on write ...
// HeaderBlock
HeaderBlockWriter hbw = new HeaderBlockWriter(_header);
hbw.writeBlock( getBlockAt(-1) );
@ -713,11 +718,6 @@ public class NPOIFSFileSystem extends BlockStore
// SBATs
_mini_store.syncWithDataSource();
// Properties
_property_table.write(
new NPOIFSStream(this, _header.getPropertyStart())
);
}
/**

View File

@ -20,6 +20,7 @@
package org.apache.poi.poifs.filesystem;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Iterator;
@ -47,6 +48,7 @@ public class NPOIFSStream implements Iterable<ByteBuffer>
{
private BlockStore blockStore;
private int startBlock;
private OutputStream outStream;
/**
* Constructor for an existing stream. It's up to you
@ -100,62 +102,16 @@ public class NPOIFSStream implements Iterable<ByteBuffer>
* need to update the size in the property yourself
*/
public void updateContents(byte[] contents) throws IOException {
// How many blocks are we going to need?
int blockSize = blockStore.getBlockStoreBlockSize();
int blocks = (int)Math.ceil( ((double)contents.length) / blockSize );
// Make sure we don't encounter a loop whilst overwriting
// the existing blocks
ChainLoopDetector loopDetector = blockStore.getChainLoopDetector();
// Start writing
int prevBlock = POIFSConstants.END_OF_CHAIN;
int nextBlock = startBlock;
for(int i=0; i<blocks; i++) {
int thisBlock = nextBlock;
// Allocate a block if needed, otherwise figure
// out what the next block will be
if(thisBlock == POIFSConstants.END_OF_CHAIN) {
thisBlock = blockStore.getFreeBlock();
loopDetector.claim(thisBlock);
// We're on the end of the chain
nextBlock = POIFSConstants.END_OF_CHAIN;
// Mark the previous block as carrying on to us if needed
if(prevBlock != POIFSConstants.END_OF_CHAIN) {
blockStore.setNextBlock(prevBlock, thisBlock);
}
blockStore.setNextBlock(thisBlock, POIFSConstants.END_OF_CHAIN);
// If we've just written the first block on a
// new stream, save the start block offset
if(this.startBlock == POIFSConstants.END_OF_CHAIN) {
this.startBlock = thisBlock;
}
} else {
loopDetector.claim(thisBlock);
nextBlock = blockStore.getNextBlock(thisBlock);
OutputStream os = getOutputStream();
os.write(contents);
os.close();
}
// Write it
ByteBuffer buffer = blockStore.createBlockIfNeeded(thisBlock);
int startAt = i*blockSize;
int endAt = Math.min(contents.length - startAt, blockSize);
buffer.put(contents, startAt, endAt);
// Update pointers
prevBlock = thisBlock;
public OutputStream getOutputStream() throws IOException {
if (outStream == null) {
outStream = new StreamBlockByteBuffer();
}
int lastBlock = prevBlock;
// If we're overwriting, free any remaining blocks
NPOIFSStream toFree = new NPOIFSStream(blockStore, nextBlock);
toFree.free(loopDetector);
// Mark the end of the stream
blockStore.setNextBlock(lastBlock, POIFSConstants.END_OF_CHAIN);
return outStream;
}
// TODO Streaming write support
@ -222,5 +178,87 @@ public class NPOIFSStream implements Iterable<ByteBuffer>
throw new UnsupportedOperationException();
}
}
protected class StreamBlockByteBuffer extends OutputStream {
byte oneByte[] = new byte[1];
ByteBuffer buffer;
// Make sure we don't encounter a loop whilst overwriting
// the existing blocks
ChainLoopDetector loopDetector;
int prevBlock, nextBlock;
protected StreamBlockByteBuffer() throws IOException {
loopDetector = blockStore.getChainLoopDetector();
prevBlock = POIFSConstants.END_OF_CHAIN;
nextBlock = startBlock;
}
protected void createBlockIfNeeded() throws IOException {
if (buffer != null && buffer.hasRemaining()) return;
int thisBlock = nextBlock;
// Allocate a block if needed, otherwise figure
// out what the next block will be
if(thisBlock == POIFSConstants.END_OF_CHAIN) {
thisBlock = blockStore.getFreeBlock();
loopDetector.claim(thisBlock);
// We're on the end of the chain
nextBlock = POIFSConstants.END_OF_CHAIN;
// Mark the previous block as carrying on to us if needed
if(prevBlock != POIFSConstants.END_OF_CHAIN) {
blockStore.setNextBlock(prevBlock, thisBlock);
}
blockStore.setNextBlock(thisBlock, POIFSConstants.END_OF_CHAIN);
// If we've just written the first block on a
// new stream, save the start block offset
if(startBlock == POIFSConstants.END_OF_CHAIN) {
startBlock = thisBlock;
}
} else {
loopDetector.claim(thisBlock);
nextBlock = blockStore.getNextBlock(thisBlock);
}
buffer = blockStore.createBlockIfNeeded(thisBlock);
// Update pointers
prevBlock = thisBlock;
}
public void write(int b) throws IOException {
oneByte[0] = (byte)(b & 0xFF);
write(oneByte);
}
public void write(byte[] b, int off, int len) throws IOException {
if ((off < 0) || (off > b.length) || (len < 0) ||
((off + len) > b.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return;
}
do {
createBlockIfNeeded();
int writeBytes = Math.min(buffer.remaining(), len);
buffer.put(b, off, writeBytes);
off += writeBytes;
len -= writeBytes;
} while (len > 0);
}
public void close() throws IOException {
// If we're overwriting, free any remaining blocks
NPOIFSStream toFree = new NPOIFSStream(blockStore, nextBlock);
toFree.free(loopDetector);
// Mark the end of the stream
blockStore.setNextBlock(prevBlock, POIFSConstants.END_OF_CHAIN);
}
}
}

View File

@ -17,8 +17,8 @@
package org.apache.poi.poifs.property;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
@ -123,18 +123,35 @@ public final class NPropertyTable extends PropertyTableBase {
return (int)Math.ceil(size / _bigBigBlockSize.getBigBlockSize());
}
/**
* Prepare to be written
*/
public void preWrite() {
List<Property> pList = new ArrayList<Property>();
// give each property its index
int i=0;
for (Property p : _properties) {
// only handle non-null properties
if (p == null) continue;
p.setIndex(i++);
pList.add(p);
}
// prepare each property for writing
for (Property p : pList) p.preWrite();
}
/**
* Writes the properties out into the given low-level stream
*/
public void write(NPOIFSStream stream) throws IOException {
// TODO - Use a streaming write
ByteArrayOutputStream baos = new ByteArrayOutputStream();
OutputStream os = stream.getOutputStream();
for(Property property : _properties) {
if(property != null) {
property.writeData(baos);
property.writeData(os);
}
}
stream.updateContents(baos.toByteArray());
os.close();
// Update the start position if needed
if(getStartBlock() != stream.getStartBlock()) {

View File

@ -17,13 +17,18 @@
package org.apache.poi.poifs.filesystem;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.fail;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
@ -33,14 +38,17 @@ import org.apache.poi.poifs.property.NPropertyTable;
import org.apache.poi.poifs.property.Property;
import org.apache.poi.poifs.property.RootProperty;
import org.apache.poi.poifs.storage.HeaderBlock;
import org.apache.poi.util.IOUtils;
import org.junit.Test;
/**
* Tests for the new NIO POIFSFileSystem implementation
*/
public final class TestNPOIFSFileSystem extends TestCase {
public final class TestNPOIFSFileSystem {
private static final POIDataSamples _inst = POIDataSamples.getPOIFSInstance();
public void testBasicOpen() throws Exception {
@Test
public void basicOpen() throws Exception {
NPOIFSFileSystem fsA, fsB;
// With a simple 512 block file
@ -58,7 +66,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
}
}
public void testPropertiesAndFatOnRead() throws Exception {
@Test
public void propertiesAndFatOnRead() throws Exception {
NPOIFSFileSystem fsA, fsB;
// With a simple 512 block file
@ -196,7 +205,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
* Check that for a given block, we can correctly figure
* out what the next one is
*/
public void testNextBlock() throws Exception {
@Test
public void nextBlock() throws Exception {
NPOIFSFileSystem fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi"));
NPOIFSFileSystem fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi"));
for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB}) {
@ -253,7 +263,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
/**
* Check we get the right data back for each block
*/
public void testGetBlock() throws Exception {
@Test
public void getBlock() throws Exception {
NPOIFSFileSystem fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi"));
NPOIFSFileSystem fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi"));
for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB}) {
@ -322,7 +333,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
* Ask for free blocks where there are some already
* to be had from the FAT
*/
public void testGetFreeBlockWithSpare() throws Exception {
@Test
public void getFreeBlockWithSpare() throws Exception {
NPOIFSFileSystem fs = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi"));
// Our first BAT block has spares
@ -349,7 +361,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
* Ask for free blocks where no free ones exist, and so the
* file needs to be extended and another BAT/XBAT added
*/
public void testGetFreeBlockWithNoneSpare() throws Exception {
@Test
public void getFreeBlockWithNoneSpare() throws Exception {
NPOIFSFileSystem fs = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi"));
int free;
@ -479,7 +492,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
* Test that we can correctly get the list of directory
* entries, and the details on the files in them
*/
public void testListEntries() throws Exception {
@Test
public void listEntries() throws Exception {
NPOIFSFileSystem fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi"));
NPOIFSFileSystem fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi"));
NPOIFSFileSystem fsC = new NPOIFSFileSystem(_inst.getFile("BlockSize4096.zvi"));
@ -519,7 +533,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
* Tests that we can get the correct contents for
* a document in the filesystem
*/
public void testGetDocumentEntry() throws Exception {
@Test
public void getDocumentEntry() throws Exception {
NPOIFSFileSystem fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi"));
NPOIFSFileSystem fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi"));
NPOIFSFileSystem fsC = new NPOIFSFileSystem(_inst.getFile("BlockSize4096.zvi"));
@ -552,7 +567,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
* Read a file, write it and read it again.
* Then, alter+add some streams, write and read
*/
public void testReadWriteRead() throws Exception {
@Test
public void readWriteRead() throws Exception {
// TODO
// TODO
}
@ -561,7 +577,8 @@ public final class TestNPOIFSFileSystem extends TestCase {
* Create a new file, write it and read it again
* Then, add some streams, write and read
*/
public void testCreateWriteRead() throws Exception {
@Test
public void createWriteRead() throws Exception {
NPOIFSFileSystem fs = new NPOIFSFileSystem();
// Initially has a BAT but not SBAT
@ -577,10 +594,11 @@ public final class TestNPOIFSFileSystem extends TestCase {
fs.writeFilesystem(baos);
fs = new NPOIFSFileSystem(new ByteArrayInputStream(baos.toByteArray()));
// Check it's still like that
// Property table entries have been added to the blocks
assertEquals(POIFSConstants.FAT_SECTOR_BLOCK, fs.getNextBlock(0));
assertEquals(POIFSConstants.END_OF_CHAIN, fs.getNextBlock(1));
assertEquals(POIFSConstants.UNUSED_BLOCK, fs.getNextBlock(2));
assertEquals(POIFSConstants.END_OF_CHAIN, fs.getNextBlock(2));
assertEquals(POIFSConstants.UNUSED_BLOCK, fs.getNextBlock(3));
assertEquals(POIFSConstants.END_OF_CHAIN, fs.getRoot().getProperty().getStartBlock());
// Now add a normal stream and a mini stream
@ -589,5 +607,53 @@ public final class TestNPOIFSFileSystem extends TestCase {
// TODO The rest of the test
}
@Test
public void writPOIFSWriterListener() throws Exception {
File testFile = POIDataSamples.getSpreadSheetInstance().getFile("Simple.xls");
NPOIFSFileSystem src = new NPOIFSFileSystem(testFile);
byte wbDataExp[] = IOUtils.toByteArray(src.createDocumentInputStream("Workbook"));
NPOIFSFileSystem nfs = new NPOIFSFileSystem();
copy(src.getRoot(), nfs.getRoot());
src.close();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
nfs.writeFilesystem(bos);
nfs.close();
POIFSFileSystem pfs = new POIFSFileSystem(new ByteArrayInputStream(bos.toByteArray()));
byte wbDataAct[] = IOUtils.toByteArray(pfs.createDocumentInputStream("Workbook"));
assertThat(wbDataExp, equalTo(wbDataAct));
}
private static void copy(final DirectoryNode src, final DirectoryNode dest) throws IOException {
Iterator<Entry> srcIter = src.getEntries();
while(srcIter.hasNext()) {
Entry entry = srcIter.next();
if (entry.isDirectoryEntry()) {
DirectoryNode srcDir = (DirectoryNode)entry;
DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
destDir.setStorageClsid(src.getStorageClsid());
copy(srcDir, destDir);
} else {
final DocumentNode srcDoc = (DocumentNode)entry;
// dest.createDocument(srcDoc.getName(), src.createDocumentInputStream(srcDoc));
dest.createDocument(srcDoc.getName(), srcDoc.getSize(), new POIFSWriterListener() {
public void processPOIFSWriterEvent(POIFSWriterEvent event) {
try {
DocumentInputStream dis = src.createDocumentInputStream(srcDoc);
IOUtils.copy(dis, event.getStream());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}
}
}
// TODO Directory/Document write tests
}