Bug 64322: Optimize performance of reading ole2 files

Remember channel-size to no re-read it for every read-access,
but reset it if we extend the size of the file
profiling indicates Channel.size() sometimes has similar runtime
overhead as position() or read()!

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1877816 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2020-05-16 13:06:07 +00:00
parent ff919eb0e4
commit 3cad9e6851
3 changed files with 96 additions and 13 deletions

View File

@ -39,6 +39,8 @@ public class FileBackedDataSource extends DataSource {
private final static POILogger logger = POILogFactory.getLogger(FileBackedDataSource.class); private final static POILogger logger = POILogFactory.getLogger(FileBackedDataSource.class);
private final FileChannel channel; private final FileChannel channel;
private Long channelSize;
private final boolean writable; private final boolean writable;
// remember file base, which needs to be closed too // remember file base, which needs to be closed too
private RandomAccessFile srcFile; private RandomAccessFile srcFile;
@ -97,8 +99,9 @@ public class FileBackedDataSource extends DataSource {
// remember this buffer for cleanup // remember this buffer for cleanup
buffersToClean.put(dst,dst); buffersToClean.put(dst,dst);
} else { } else {
// allocate the buffer on the heap if we cannot map the data in directly
channel.position(position); channel.position(position);
// allocate the buffer on the heap if we cannot map the data in directly
dst = ByteBuffer.allocate(length); dst = ByteBuffer.allocate(length);
// Read the contents and check that we could read some data // Read the contents and check that we could read some data
@ -118,6 +121,11 @@ public class FileBackedDataSource extends DataSource {
@Override @Override
public void write(ByteBuffer src, long position) throws IOException { public void write(ByteBuffer src, long position) throws IOException {
channel.write(src, position); channel.write(src, position);
// we have to re-read size if we write "after" the recorded one
if(channelSize != null && position >= channelSize) {
channelSize = null;
}
} }
@Override @Override
@ -131,7 +139,13 @@ public class FileBackedDataSource extends DataSource {
@Override @Override
public long size() throws IOException { public long size() throws IOException {
return channel.size(); // this is called often and profiling showed that channel.size()
// was taking a large part of processing-time, so we only read it
// once
if(channelSize == null) {
channelSize = channel.size();
}
return channelSize;
} }
public void releaseBuffer(ByteBuffer buffer) { public void releaseBuffer(ByteBuffer buffer) {

View File

@ -24,11 +24,18 @@ import static org.junit.Assert.fail;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.HashMap;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.hpsf.NoPropertySetStreamException;
import org.apache.poi.hpsf.Property;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.Section;
import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.poifs.common.POIFSBigBlockSize; import org.apache.poi.poifs.common.POIFSBigBlockSize;
import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.common.POIFSConstants;
@ -177,7 +184,7 @@ public final class TestPOIFSFileSystem {
fail("File is corrupt and shouldn't have been opened"); fail("File is corrupt and shouldn't have been opened");
} }
} }
/** /**
* Tests that we can write and read a file that contains XBATs * Tests that we can write and read a file that contains XBATs
* as well as regular BATs. * as well as regular BATs.
@ -191,19 +198,19 @@ public final class TestPOIFSFileSystem {
fs.getRoot().createDocument( fs.getRoot().createDocument(
"BIG", new ByteArrayInputStream(hugeStream) "BIG", new ByteArrayInputStream(hugeStream)
); );
ByteArrayOutputStream baos = new ByteArrayOutputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream();
fs.writeFilesystem(baos); fs.writeFilesystem(baos);
byte[] fsData = baos.toByteArray(); byte[] fsData = baos.toByteArray();
// Check the header was written properly // Check the header was written properly
InputStream inp = new ByteArrayInputStream(fsData); InputStream inp = new ByteArrayInputStream(fsData);
HeaderBlock header = new HeaderBlock(inp); HeaderBlock header = new HeaderBlock(inp);
assertEquals(109+21, header.getBATCount()); assertEquals(109+21, header.getBATCount());
assertEquals(1, header.getXBATCount()); assertEquals(1, header.getXBATCount());
// We should have 21 BATs in the XBAT // We should have 21 BATs in the XBAT
ByteBuffer xbatData = ByteBuffer.allocate(512); ByteBuffer xbatData = ByteBuffer.allocate(512);
xbatData.put(fsData, (1+header.getXBATIndex())*512, 512); xbatData.put(fsData, (1+header.getXBATIndex())*512, 512);
@ -216,19 +223,19 @@ public final class TestPOIFSFileSystem {
assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i)); assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i));
} }
assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127)); assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127));
// Now load it and check // Now load it and check
fs = new POIFSFileSystem( fs = new POIFSFileSystem(
new ByteArrayInputStream(fsData) new ByteArrayInputStream(fsData)
); );
DirectoryNode root = fs.getRoot(); DirectoryNode root = fs.getRoot();
assertEquals(1, root.getEntryCount()); assertEquals(1, root.getEntryCount());
DocumentNode big = (DocumentNode)root.getEntry("BIG"); DocumentNode big = (DocumentNode)root.getEntry("BIG");
assertEquals(hugeStream.length, big.getSize()); assertEquals(hugeStream.length, big.getSize());
} }
/** /**
* Most OLE2 files use 512byte blocks. However, a small number * Most OLE2 files use 512byte blocks. However, a small number
* use 4k blocks. Check that we can open these. * use 4k blocks. Check that we can open these.
@ -293,4 +300,66 @@ public final class TestPOIFSFileSystem {
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("foobaa".getBytes(UTF_8))); assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("foobaa".getBytes(UTF_8)));
} }
@Test
public void test64322() throws NoPropertySetStreamException, IOException {
try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.getFile("64322.ole2"))) {
int count = recurseDir(poiFS.getRoot());
assertEquals("Expecting a fixed number of entries being found in the test-document",
1285, count);
}
}
@Test
public void test64322a() throws NoPropertySetStreamException, IOException {
try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.openResourceAsStream("64322.ole2"))) {
int count = recurseDir(poiFS.getRoot());
assertEquals("Expecting a fixed number of entries being found in the test-document",
1285, count);
}
}
private static int recurseDir(DirectoryEntry dir) throws IOException, NoPropertySetStreamException {
int count = 0;
for (Entry entry : dir) {
count++;
if (entry instanceof DirectoryEntry) {
count += recurseDir((DirectoryEntry) entry);
}
if (entry instanceof DocumentEntry) {
DocumentEntry de = (DocumentEntry) entry;
HashMap<String, String> props = new HashMap<>();
try (DocumentInputStream dis = new DocumentInputStream(de)) {
props.put("name", de.getName());
if (PropertySet.isPropertySetStream(dis)) {
dis.mark(10000000);
PropertySet ps = null;
try {
ps = new PropertySet(dis);
} catch (UnsupportedEncodingException e) {
// ignore
}
if (ps != null) {
for (Section section : ps.getSections()) {
for (Property p : section.getProperties()) {
String prop = section.getDictionary() != null
? section.getDictionary().get(p.getID())
: String.valueOf(p.getID());
if (p.getValue() != null)
props.put("property_" + prop, p.getValue().toString());
}
}
}
dis.reset();
}
}
assertTrue(props.size() > 0);
}
}
return count;
}
} }

BIN
test-data/poifs/64322.ole2 Normal file

Binary file not shown.