mirror of https://github.com/apache/poi.git
Bug 64322: Optimize performance of reading ole2 files
Remember channel-size to no re-read it for every read-access, but reset it if we extend the size of the file profiling indicates Channel.size() sometimes has similar runtime overhead as position() or read()! git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1877816 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ff919eb0e4
commit
3cad9e6851
|
@ -39,6 +39,8 @@ public class FileBackedDataSource extends DataSource {
|
||||||
private final static POILogger logger = POILogFactory.getLogger(FileBackedDataSource.class);
|
private final static POILogger logger = POILogFactory.getLogger(FileBackedDataSource.class);
|
||||||
|
|
||||||
private final FileChannel channel;
|
private final FileChannel channel;
|
||||||
|
private Long channelSize;
|
||||||
|
|
||||||
private final boolean writable;
|
private final boolean writable;
|
||||||
// remember file base, which needs to be closed too
|
// remember file base, which needs to be closed too
|
||||||
private RandomAccessFile srcFile;
|
private RandomAccessFile srcFile;
|
||||||
|
@ -97,8 +99,9 @@ public class FileBackedDataSource extends DataSource {
|
||||||
// remember this buffer for cleanup
|
// remember this buffer for cleanup
|
||||||
buffersToClean.put(dst,dst);
|
buffersToClean.put(dst,dst);
|
||||||
} else {
|
} else {
|
||||||
// allocate the buffer on the heap if we cannot map the data in directly
|
|
||||||
channel.position(position);
|
channel.position(position);
|
||||||
|
|
||||||
|
// allocate the buffer on the heap if we cannot map the data in directly
|
||||||
dst = ByteBuffer.allocate(length);
|
dst = ByteBuffer.allocate(length);
|
||||||
|
|
||||||
// Read the contents and check that we could read some data
|
// Read the contents and check that we could read some data
|
||||||
|
@ -118,6 +121,11 @@ public class FileBackedDataSource extends DataSource {
|
||||||
@Override
|
@Override
|
||||||
public void write(ByteBuffer src, long position) throws IOException {
|
public void write(ByteBuffer src, long position) throws IOException {
|
||||||
channel.write(src, position);
|
channel.write(src, position);
|
||||||
|
|
||||||
|
// we have to re-read size if we write "after" the recorded one
|
||||||
|
if(channelSize != null && position >= channelSize) {
|
||||||
|
channelSize = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -131,7 +139,13 @@ public class FileBackedDataSource extends DataSource {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long size() throws IOException {
|
public long size() throws IOException {
|
||||||
return channel.size();
|
// this is called often and profiling showed that channel.size()
|
||||||
|
// was taking a large part of processing-time, so we only read it
|
||||||
|
// once
|
||||||
|
if(channelSize == null) {
|
||||||
|
channelSize = channel.size();
|
||||||
|
}
|
||||||
|
return channelSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void releaseBuffer(ByteBuffer buffer) {
|
public void releaseBuffer(ByteBuffer buffer) {
|
||||||
|
|
|
@ -24,11 +24,18 @@ import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.POIDataSamples;
|
||||||
|
import org.apache.poi.hpsf.NoPropertySetStreamException;
|
||||||
|
import org.apache.poi.hpsf.Property;
|
||||||
|
import org.apache.poi.hpsf.PropertySet;
|
||||||
|
import org.apache.poi.hpsf.Section;
|
||||||
import org.apache.poi.hssf.HSSFTestDataSamples;
|
import org.apache.poi.hssf.HSSFTestDataSamples;
|
||||||
import org.apache.poi.poifs.common.POIFSBigBlockSize;
|
import org.apache.poi.poifs.common.POIFSBigBlockSize;
|
||||||
import org.apache.poi.poifs.common.POIFSConstants;
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
|
@ -177,7 +184,7 @@ public final class TestPOIFSFileSystem {
|
||||||
fail("File is corrupt and shouldn't have been opened");
|
fail("File is corrupt and shouldn't have been opened");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that we can write and read a file that contains XBATs
|
* Tests that we can write and read a file that contains XBATs
|
||||||
* as well as regular BATs.
|
* as well as regular BATs.
|
||||||
|
@ -191,19 +198,19 @@ public final class TestPOIFSFileSystem {
|
||||||
fs.getRoot().createDocument(
|
fs.getRoot().createDocument(
|
||||||
"BIG", new ByteArrayInputStream(hugeStream)
|
"BIG", new ByteArrayInputStream(hugeStream)
|
||||||
);
|
);
|
||||||
|
|
||||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||||
fs.writeFilesystem(baos);
|
fs.writeFilesystem(baos);
|
||||||
byte[] fsData = baos.toByteArray();
|
byte[] fsData = baos.toByteArray();
|
||||||
|
|
||||||
|
|
||||||
// Check the header was written properly
|
// Check the header was written properly
|
||||||
InputStream inp = new ByteArrayInputStream(fsData);
|
InputStream inp = new ByteArrayInputStream(fsData);
|
||||||
HeaderBlock header = new HeaderBlock(inp);
|
HeaderBlock header = new HeaderBlock(inp);
|
||||||
assertEquals(109+21, header.getBATCount());
|
assertEquals(109+21, header.getBATCount());
|
||||||
assertEquals(1, header.getXBATCount());
|
assertEquals(1, header.getXBATCount());
|
||||||
|
|
||||||
|
|
||||||
// We should have 21 BATs in the XBAT
|
// We should have 21 BATs in the XBAT
|
||||||
ByteBuffer xbatData = ByteBuffer.allocate(512);
|
ByteBuffer xbatData = ByteBuffer.allocate(512);
|
||||||
xbatData.put(fsData, (1+header.getXBATIndex())*512, 512);
|
xbatData.put(fsData, (1+header.getXBATIndex())*512, 512);
|
||||||
|
@ -216,19 +223,19 @@ public final class TestPOIFSFileSystem {
|
||||||
assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i));
|
assertEquals(POIFSConstants.UNUSED_BLOCK, xbat.getValueAt(i));
|
||||||
}
|
}
|
||||||
assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127));
|
assertEquals(POIFSConstants.END_OF_CHAIN, xbat.getValueAt(127));
|
||||||
|
|
||||||
|
|
||||||
// Now load it and check
|
// Now load it and check
|
||||||
fs = new POIFSFileSystem(
|
fs = new POIFSFileSystem(
|
||||||
new ByteArrayInputStream(fsData)
|
new ByteArrayInputStream(fsData)
|
||||||
);
|
);
|
||||||
|
|
||||||
DirectoryNode root = fs.getRoot();
|
DirectoryNode root = fs.getRoot();
|
||||||
assertEquals(1, root.getEntryCount());
|
assertEquals(1, root.getEntryCount());
|
||||||
DocumentNode big = (DocumentNode)root.getEntry("BIG");
|
DocumentNode big = (DocumentNode)root.getEntry("BIG");
|
||||||
assertEquals(hugeStream.length, big.getSize());
|
assertEquals(hugeStream.length, big.getSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Most OLE2 files use 512byte blocks. However, a small number
|
* Most OLE2 files use 512byte blocks. However, a small number
|
||||||
* use 4k blocks. Check that we can open these.
|
* use 4k blocks. Check that we can open these.
|
||||||
|
@ -293,4 +300,66 @@ public final class TestPOIFSFileSystem {
|
||||||
|
|
||||||
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("foobaa".getBytes(UTF_8)));
|
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("foobaa".getBytes(UTF_8)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test64322() throws NoPropertySetStreamException, IOException {
|
||||||
|
try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.getFile("64322.ole2"))) {
|
||||||
|
int count = recurseDir(poiFS.getRoot());
|
||||||
|
|
||||||
|
assertEquals("Expecting a fixed number of entries being found in the test-document",
|
||||||
|
1285, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test64322a() throws NoPropertySetStreamException, IOException {
|
||||||
|
try (POIFSFileSystem poiFS = new POIFSFileSystem(_samples.openResourceAsStream("64322.ole2"))) {
|
||||||
|
int count = recurseDir(poiFS.getRoot());
|
||||||
|
|
||||||
|
assertEquals("Expecting a fixed number of entries being found in the test-document",
|
||||||
|
1285, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int recurseDir(DirectoryEntry dir) throws IOException, NoPropertySetStreamException {
|
||||||
|
int count = 0;
|
||||||
|
for (Entry entry : dir) {
|
||||||
|
count++;
|
||||||
|
if (entry instanceof DirectoryEntry) {
|
||||||
|
count += recurseDir((DirectoryEntry) entry);
|
||||||
|
}
|
||||||
|
if (entry instanceof DocumentEntry) {
|
||||||
|
DocumentEntry de = (DocumentEntry) entry;
|
||||||
|
HashMap<String, String> props = new HashMap<>();
|
||||||
|
try (DocumentInputStream dis = new DocumentInputStream(de)) {
|
||||||
|
props.put("name", de.getName());
|
||||||
|
|
||||||
|
if (PropertySet.isPropertySetStream(dis)) {
|
||||||
|
dis.mark(10000000);
|
||||||
|
PropertySet ps = null;
|
||||||
|
try {
|
||||||
|
ps = new PropertySet(dis);
|
||||||
|
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
if (ps != null) {
|
||||||
|
for (Section section : ps.getSections()) {
|
||||||
|
for (Property p : section.getProperties()) {
|
||||||
|
String prop = section.getDictionary() != null
|
||||||
|
? section.getDictionary().get(p.getID())
|
||||||
|
: String.valueOf(p.getID());
|
||||||
|
if (p.getValue() != null)
|
||||||
|
props.put("property_" + prop, p.getValue().toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dis.reset();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertTrue(props.size() > 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue