mirror of https://github.com/apache/poi.git
Optimisation of RecordInputStream - removed intermediate 8K byte buffer. Expected performance gain was not realised immediately, so LittleEndianInput stuff has been pushed down into DocumentInputStream to help.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@707778 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
37d5592c3f
commit
e2f22b4b0e
|
@ -1,4 +1,3 @@
|
|||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -15,11 +14,9 @@
|
|||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.contrib.poibrowser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -160,17 +157,7 @@ public class TreeReaderListener implements POIFSReaderListener
|
|||
throw new RuntimeException(t.getMessage());
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
is.close();
|
||||
}
|
||||
catch (IOException ex)
|
||||
{
|
||||
System.err.println
|
||||
("Unexpected exception while closing " +
|
||||
event.getName() + " in " + event.getPath().toString());
|
||||
ex.printStackTrace(System.err);
|
||||
}
|
||||
is.close();
|
||||
|
||||
final MutableTreeNode parentNode = getNode(d.path, filename, rootNode);
|
||||
final MutableTreeNode nameNode = new DefaultMutableTreeNode(d.name);
|
||||
|
|
|
@ -17,12 +17,13 @@
|
|||
|
||||
package org.apache.poi.hssf.record;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianInput;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianInput;
|
||||
import org.apache.poi.util.LittleEndianInputStream;
|
||||
|
||||
/**
|
||||
* Title: Record Input Stream<P>
|
||||
|
@ -34,106 +35,131 @@ public final class RecordInputStream extends InputStream implements LittleEndian
|
|||
/** Maximum size of a single record (minus the 4 byte header) without a continue*/
|
||||
public final static short MAX_RECORD_DATA_SIZE = 8224;
|
||||
private static final int INVALID_SID_VALUE = -1;
|
||||
private static final int DATA_LEN_NEEDS_TO_BE_READ = -1;
|
||||
private static final byte[] EMPTY_BYTE_ARRAY = { };
|
||||
|
||||
private InputStream in;
|
||||
private short currentSid;
|
||||
private short currentLength = -1;
|
||||
private short nextSid;
|
||||
private final InputStream _in;
|
||||
/** {@link LittleEndianInput} facet of field {@link #_in} */
|
||||
private final LittleEndianInput _le;
|
||||
private int currentSid;
|
||||
private int _currentDataLength;
|
||||
private int nextSid;
|
||||
private int recordOffset;
|
||||
private boolean autoContinue; // TODO - remove this
|
||||
|
||||
private final byte[] data = new byte[MAX_RECORD_DATA_SIZE];
|
||||
private short recordOffset;
|
||||
private long pos;
|
||||
public RecordInputStream(InputStream in) throws RecordFormatException {
|
||||
_in = in;
|
||||
if (in instanceof LittleEndianInput) {
|
||||
// accessing directly is an optimisation
|
||||
_le = (LittleEndianInput) in;
|
||||
} else {
|
||||
// less optimal, but should work OK just the same. Often occurs in junit tests.
|
||||
_le = new LittleEndianInputStream(in);
|
||||
}
|
||||
try {
|
||||
if (_in.available() < LittleEndian.SHORT_SIZE) {
|
||||
nextSid = INVALID_SID_VALUE;
|
||||
} else {
|
||||
nextSid = LittleEndian.readShort(in);
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
throw new RecordFormatException("Error reading bytes", ex);
|
||||
}
|
||||
_currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
|
||||
autoContinue = true;
|
||||
}
|
||||
|
||||
private boolean autoContinue = true;
|
||||
|
||||
public RecordInputStream(InputStream in) throws RecordFormatException {
|
||||
this.in = in;
|
||||
try {
|
||||
nextSid = LittleEndian.readShort(in);
|
||||
//Don't increment the pos just yet (technically we are at the start of
|
||||
//the record stream until nextRecord is called).
|
||||
} catch (IOException ex) {
|
||||
throw new RecordFormatException("Error reading bytes", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/** This method will read a byte from the current record*/
|
||||
public int read() {
|
||||
checkRecordPosition(LittleEndian.BYTE_SIZE);
|
||||
|
||||
byte result = data[recordOffset];
|
||||
recordOffset += LittleEndian.BYTE_SIZE;
|
||||
pos += LittleEndian.BYTE_SIZE;
|
||||
return result;
|
||||
return _le.readUByte();
|
||||
}
|
||||
public int read(byte[] b, int off, int len) {
|
||||
int limit = Math.min(len, remaining());
|
||||
if (limit == 0) {
|
||||
return 0;
|
||||
}
|
||||
readFully(b, off,limit);
|
||||
return limit;
|
||||
}
|
||||
|
||||
public short getSid() {
|
||||
return currentSid;
|
||||
return (short) currentSid;
|
||||
}
|
||||
|
||||
public short getLength() {
|
||||
return currentLength;
|
||||
public short getLength() { // TODO - remove
|
||||
return (short) _currentDataLength;
|
||||
}
|
||||
|
||||
public short getRecordOffset() {
|
||||
return recordOffset;
|
||||
}
|
||||
|
||||
public long getPos() {
|
||||
return pos;
|
||||
}
|
||||
/**
|
||||
* Note - this method is expected to be called only when completed reading the current BIFF record.
|
||||
* Calling this before reaching the end of the current record will cause all remaining data to be
|
||||
* discarded
|
||||
*/
|
||||
public boolean hasNextRecord() {
|
||||
if (_currentDataLength != -1 && _currentDataLength != recordOffset) {
|
||||
System.out.println("WARN. Unread "+remaining()+" bytes of record 0x"+Integer.toHexString(currentSid));
|
||||
// discard unread data
|
||||
while (recordOffset < _currentDataLength) {
|
||||
readByte();
|
||||
}
|
||||
}
|
||||
if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ) {
|
||||
nextSid = readNextSid();
|
||||
_currentDataLength = DATA_LEN_NEEDS_TO_BE_READ;
|
||||
}
|
||||
return nextSid != INVALID_SID_VALUE;
|
||||
}
|
||||
|
||||
public boolean hasNextRecord() {
|
||||
return nextSid != INVALID_SID_VALUE;
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @return the sid of the next record or {@link #INVALID_SID_VALUE} if at end of stream
|
||||
*/
|
||||
private int readNextSid() {
|
||||
int nAvailable;
|
||||
try {
|
||||
nAvailable = _in.available();
|
||||
} catch (IOException e) {
|
||||
throw new RecordFormatException("Error checking stream available bytes", e);
|
||||
}
|
||||
if (nAvailable < EOFRecord.ENCODED_SIZE) {
|
||||
if (nAvailable > 0) {
|
||||
// some scrap left over?
|
||||
// ex45582-22397.xls has one extra byte after the last record
|
||||
// Excel reads that file OK
|
||||
}
|
||||
return INVALID_SID_VALUE;
|
||||
}
|
||||
int result = _le.readUShort();
|
||||
if (result == INVALID_SID_VALUE) {
|
||||
throw new RecordFormatException("Found invalid sid (" + result + ")");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Moves to the next record in the stream.
|
||||
*
|
||||
* <i>Note: The auto continue flag is reset to true</i>
|
||||
*/
|
||||
public void nextRecord() throws RecordFormatException {
|
||||
if ((currentLength != -1) && (currentLength != recordOffset)) {
|
||||
System.out.println("WARN. Unread "+remaining()+" bytes of record 0x"+Integer.toHexString(currentSid));
|
||||
}
|
||||
currentSid = nextSid;
|
||||
pos += LittleEndian.SHORT_SIZE;
|
||||
autoContinue = true;
|
||||
try {
|
||||
recordOffset = 0;
|
||||
currentLength = LittleEndian.readShort(in);
|
||||
if (currentLength > MAX_RECORD_DATA_SIZE)
|
||||
throw new RecordFormatException("The content of an excel record cannot exceed "+MAX_RECORD_DATA_SIZE+" bytes");
|
||||
pos += LittleEndian.SHORT_SIZE;
|
||||
in.read(data, 0, currentLength);
|
||||
|
||||
//Read the Sid of the next record
|
||||
if (in.available() < EOFRecord.ENCODED_SIZE) {
|
||||
if (in.available() > 0) {
|
||||
// some scrap left over?
|
||||
// ex45582-22397.xls has one extra byte after the last record
|
||||
// Excel reads that file OK
|
||||
}
|
||||
nextSid = INVALID_SID_VALUE;
|
||||
} else {
|
||||
nextSid = LittleEndian.readShort(in);
|
||||
if (nextSid == INVALID_SID_VALUE) {
|
||||
throw new RecordFormatException("Found sid " + nextSid + " after record with sid 0x"
|
||||
+ Integer.toHexString(currentSid).toUpperCase());
|
||||
}
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
throw new RecordFormatException("Error reading bytes", ex);
|
||||
}
|
||||
}
|
||||
/** Moves to the next record in the stream.
|
||||
*
|
||||
* <i>Note: The auto continue flag is reset to true</i>
|
||||
*/
|
||||
public void nextRecord() throws RecordFormatException {
|
||||
if (nextSid == INVALID_SID_VALUE) {
|
||||
throw new IllegalStateException("EOF - next record not available");
|
||||
}
|
||||
currentSid = nextSid;
|
||||
autoContinue = true;
|
||||
recordOffset = 0;
|
||||
_currentDataLength = _le.readUShort();
|
||||
if (_currentDataLength > MAX_RECORD_DATA_SIZE) {
|
||||
throw new RecordFormatException("The content of an excel record cannot exceed "
|
||||
+ MAX_RECORD_DATA_SIZE + " bytes");
|
||||
}
|
||||
}
|
||||
|
||||
public void setAutoContinue(boolean enable) {
|
||||
this.autoContinue = enable;
|
||||
}
|
||||
|
||||
public boolean getAutoContinue() {
|
||||
return autoContinue;
|
||||
}
|
||||
|
||||
private void checkRecordPosition(int requiredByteCount) {
|
||||
|
||||
if (remaining() < requiredByteCount) {
|
||||
|
@ -150,11 +176,8 @@ public final class RecordInputStream extends InputStream implements LittleEndian
|
|||
*/
|
||||
public byte readByte() {
|
||||
checkRecordPosition(LittleEndian.BYTE_SIZE);
|
||||
|
||||
byte result = data[recordOffset];
|
||||
recordOffset += LittleEndian.BYTE_SIZE;
|
||||
pos += LittleEndian.BYTE_SIZE;
|
||||
return result;
|
||||
return _le.readByte();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -162,29 +185,20 @@ public final class RecordInputStream extends InputStream implements LittleEndian
|
|||
*/
|
||||
public short readShort() {
|
||||
checkRecordPosition(LittleEndian.SHORT_SIZE);
|
||||
|
||||
short result = LittleEndian.getShort(data, recordOffset);
|
||||
recordOffset += LittleEndian.SHORT_SIZE;
|
||||
pos += LittleEndian.SHORT_SIZE;
|
||||
return result;
|
||||
return _le.readShort();
|
||||
}
|
||||
|
||||
public int readInt() {
|
||||
checkRecordPosition(LittleEndian.INT_SIZE);
|
||||
|
||||
int result = LittleEndian.getInt(data, recordOffset);
|
||||
recordOffset += LittleEndian.INT_SIZE;
|
||||
pos += LittleEndian.INT_SIZE;
|
||||
return result;
|
||||
return _le.readInt();
|
||||
}
|
||||
|
||||
public long readLong() {
|
||||
checkRecordPosition(LittleEndian.LONG_SIZE);
|
||||
|
||||
long result = LittleEndian.getLong(data, recordOffset);
|
||||
recordOffset += LittleEndian.LONG_SIZE;
|
||||
pos += LittleEndian.LONG_SIZE;
|
||||
return result;
|
||||
return _le.readLong();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -200,22 +214,18 @@ public final class RecordInputStream extends InputStream implements LittleEndian
|
|||
*/
|
||||
public int readUShort() {
|
||||
checkRecordPosition(LittleEndian.SHORT_SIZE);
|
||||
|
||||
int result = LittleEndian.getUShort(data, recordOffset);
|
||||
recordOffset += LittleEndian.SHORT_SIZE;
|
||||
pos += LittleEndian.SHORT_SIZE;
|
||||
return result;
|
||||
return _le.readUShort();
|
||||
}
|
||||
|
||||
public double readDouble() {
|
||||
checkRecordPosition(LittleEndian.DOUBLE_SIZE);
|
||||
long valueLongBits = LittleEndian.getLong(data, recordOffset);
|
||||
recordOffset += LittleEndian.DOUBLE_SIZE;
|
||||
long valueLongBits = _le.readLong();
|
||||
double result = Double.longBitsToDouble(valueLongBits);
|
||||
if (Double.isNaN(result)) {
|
||||
throw new RuntimeException("Did not expect to read NaN"); // (Because Excel typically doesn't write NaN
|
||||
}
|
||||
recordOffset += LittleEndian.DOUBLE_SIZE;
|
||||
pos += LittleEndian.DOUBLE_SIZE;
|
||||
return result;
|
||||
}
|
||||
public void readFully(byte[] buf) {
|
||||
|
@ -224,9 +234,8 @@ public final class RecordInputStream extends InputStream implements LittleEndian
|
|||
|
||||
public void readFully(byte[] buf, int off, int len) {
|
||||
checkRecordPosition(len);
|
||||
System.arraycopy(data, recordOffset, buf, off, len);
|
||||
_le.readFully(buf, off, len);
|
||||
recordOffset+=len;
|
||||
pos+=len;
|
||||
}
|
||||
|
||||
public String readString() {
|
||||
|
@ -315,18 +324,19 @@ public final class RecordInputStream extends InputStream implements LittleEndian
|
|||
return new UnicodeString(this);
|
||||
}
|
||||
|
||||
/** Returns the remaining bytes for the current record.
|
||||
*
|
||||
* @return The remaining bytes of the current record.
|
||||
*/
|
||||
public byte[] readRemainder() {
|
||||
int size = remaining();
|
||||
byte[] result = new byte[size];
|
||||
System.arraycopy(data, recordOffset, result, 0, size);
|
||||
recordOffset += size;
|
||||
pos += size;
|
||||
return result;
|
||||
}
|
||||
/** Returns the remaining bytes for the current record.
|
||||
*
|
||||
* @return The remaining bytes of the current record.
|
||||
*/
|
||||
public byte[] readRemainder() {
|
||||
int size = remaining();
|
||||
if (size ==0) {
|
||||
return EMPTY_BYTE_ARRAY;
|
||||
}
|
||||
byte[] result = new byte[size];
|
||||
readFully(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Reads all byte data for the current record, including any
|
||||
* that overlaps into any following continue records.
|
||||
|
@ -350,19 +360,29 @@ public final class RecordInputStream extends InputStream implements LittleEndian
|
|||
return out.toByteArray();
|
||||
}
|
||||
|
||||
/** The remaining number of bytes in the <i>current</i> record.
|
||||
*
|
||||
* @return The number of bytes remaining in the current record
|
||||
*/
|
||||
public int remaining() {
|
||||
return (currentLength - recordOffset);
|
||||
}
|
||||
/** The remaining number of bytes in the <i>current</i> record.
|
||||
*
|
||||
* @return The number of bytes remaining in the current record
|
||||
*/
|
||||
public int remaining() {
|
||||
if (_currentDataLength == DATA_LEN_NEEDS_TO_BE_READ) {
|
||||
// already read sid of next record. so current one is finished
|
||||
return 0;
|
||||
}
|
||||
return (_currentDataLength - recordOffset);
|
||||
}
|
||||
|
||||
/** Returns true iif a Continue record is next in the excel stream
|
||||
*
|
||||
* @return True when a ContinueRecord is next.
|
||||
*/
|
||||
public boolean isContinueNext() {
|
||||
return (nextSid == ContinueRecord.sid);
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @return <code>true</code> when a {@link ContinueRecord} is next.
|
||||
*/
|
||||
public boolean isContinueNext() {
|
||||
if (_currentDataLength != DATA_LEN_NEEDS_TO_BE_READ && recordOffset != _currentDataLength) {
|
||||
throw new IllegalStateException("Should never be called before end of current record");
|
||||
}
|
||||
if (!hasNextRecord()) {
|
||||
return false;
|
||||
}
|
||||
return nextSid == ContinueRecord.sid;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -15,437 +14,312 @@
|
|||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.poifs.storage.DataInputBlock;
|
||||
import org.apache.poi.util.LittleEndianInput;
|
||||
|
||||
/**
|
||||
* This class provides methods to read a DocumentEntry managed by a
|
||||
* Filesystem instance.
|
||||
* {@link POIFSFileSystem} instance.
|
||||
*
|
||||
* @author Marc Johnson (mjohnson at apache dot org)
|
||||
*/
|
||||
public final class DocumentInputStream extends InputStream implements LittleEndianInput {
|
||||
/** returned by read operations if we're at end of document */
|
||||
private static final int EOF = -1;
|
||||
|
||||
public class DocumentInputStream
|
||||
extends InputStream
|
||||
{
|
||||
private static final int SIZE_SHORT = 2;
|
||||
private static final int SIZE_INT = 4;
|
||||
private static final int SIZE_LONG = 8;
|
||||
|
||||
// current offset into the Document
|
||||
private int _current_offset;
|
||||
/** current offset into the Document */
|
||||
private int _current_offset;
|
||||
|
||||
// current marked offset into the Document (used by mark and
|
||||
// reset)
|
||||
private int _marked_offset;
|
||||
/** current marked offset into the Document (used by mark and reset) */
|
||||
private int _marked_offset;
|
||||
|
||||
// the Document's size
|
||||
private int _document_size;
|
||||
/** the Document's size */
|
||||
private int _document_size;
|
||||
|
||||
// have we been closed?
|
||||
private boolean _closed;
|
||||
/** have we been closed? */
|
||||
private boolean _closed;
|
||||
|
||||
// the actual Document
|
||||
private POIFSDocument _document;
|
||||
/** the actual Document */
|
||||
private POIFSDocument _document;
|
||||
|
||||
// buffer used to read one byte at a time
|
||||
private byte[] _tiny_buffer;
|
||||
/** the data block containing the current stream pointer */
|
||||
private DataInputBlock _currentBlock;
|
||||
|
||||
// returned by read operations if we're at end of document
|
||||
static private final int EOD = -1;
|
||||
/**
|
||||
* Create an InputStream from the specified DocumentEntry
|
||||
*
|
||||
* @param document the DocumentEntry to be read
|
||||
*
|
||||
* @exception IOException if the DocumentEntry cannot be opened (like, maybe it has
|
||||
* been deleted?)
|
||||
*/
|
||||
public DocumentInputStream(DocumentEntry document) throws IOException {
|
||||
if (!(document instanceof DocumentNode)) {
|
||||
throw new IOException("Cannot open internal document storage");
|
||||
}
|
||||
_current_offset = 0;
|
||||
_marked_offset = 0;
|
||||
_document_size = document.getSize();
|
||||
_closed = false;
|
||||
_document = ((DocumentNode) document).getDocument();
|
||||
_currentBlock = getDataInputBlock(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an InputStream from the specified DocumentEntry
|
||||
*
|
||||
* @param document the DocumentEntry to be read
|
||||
*
|
||||
* @exception IOException if the DocumentEntry cannot be opened
|
||||
* (like, maybe it has been deleted?)
|
||||
*/
|
||||
/**
|
||||
* Create an InputStream from the specified Document
|
||||
*
|
||||
* @param document the Document to be read
|
||||
*/
|
||||
public DocumentInputStream(POIFSDocument document) {
|
||||
_current_offset = 0;
|
||||
_marked_offset = 0;
|
||||
_document_size = document.getSize();
|
||||
_closed = false;
|
||||
_document = document;
|
||||
_currentBlock = getDataInputBlock(0);
|
||||
}
|
||||
|
||||
public DocumentInputStream(final DocumentEntry document)
|
||||
throws IOException
|
||||
{
|
||||
_current_offset = 0;
|
||||
_marked_offset = 0;
|
||||
_document_size = document.getSize();
|
||||
_closed = false;
|
||||
_tiny_buffer = null;
|
||||
if (document instanceof DocumentNode)
|
||||
{
|
||||
_document = (( DocumentNode ) document).getDocument();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IOException("Cannot open internal document storage");
|
||||
}
|
||||
}
|
||||
public int available() throws IOException {
|
||||
dieIfClosed();
|
||||
return _document_size - _current_offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an InputStream from the specified Document
|
||||
*
|
||||
* @param document the Document to be read
|
||||
*
|
||||
* @exception IOException if the DocumentEntry cannot be opened
|
||||
* (like, maybe it has been deleted?)
|
||||
*/
|
||||
public void close() {
|
||||
_closed = true;
|
||||
}
|
||||
|
||||
public DocumentInputStream(final POIFSDocument document)
|
||||
throws IOException
|
||||
{
|
||||
_current_offset = 0;
|
||||
_marked_offset = 0;
|
||||
_document_size = document.getSize();
|
||||
_closed = false;
|
||||
_tiny_buffer = null;
|
||||
_document = document;
|
||||
}
|
||||
public void mark(int ignoredReadlimit) {
|
||||
_marked_offset = _current_offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes that can be read (or skipped over)
|
||||
* from this input stream without blocking by the next caller of a
|
||||
* method for this input stream. The next caller might be the same
|
||||
* thread or or another thread.
|
||||
*
|
||||
* @return the number of bytes that can be read from this input
|
||||
* stream without blocking.
|
||||
*
|
||||
* @exception IOException on error (such as the stream has been
|
||||
* closed)
|
||||
*/
|
||||
/**
|
||||
* Tests if this input stream supports the mark and reset methods.
|
||||
*
|
||||
* @return <code>true</code> always
|
||||
*/
|
||||
public boolean markSupported() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int available()
|
||||
throws IOException
|
||||
{
|
||||
dieIfClosed();
|
||||
return _document_size - _current_offset;
|
||||
}
|
||||
private DataInputBlock getDataInputBlock(int offset) {
|
||||
return _document.getDataInputBlock(offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes this input stream and releases any system resources
|
||||
* associated with the stream.
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
public int read() throws IOException {
|
||||
dieIfClosed();
|
||||
if (atEOD()) {
|
||||
return EOF;
|
||||
}
|
||||
int result = _currentBlock.readUByte();
|
||||
_current_offset++;
|
||||
if (_currentBlock.available() < 1) {
|
||||
_currentBlock = getDataInputBlock(_current_offset);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public void close()
|
||||
throws IOException
|
||||
{
|
||||
_closed = true;
|
||||
}
|
||||
public int read(byte[] b) throws IOException {
|
||||
return read(b, 0, b.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Marks the current position in this input stream. A subsequent
|
||||
* call to the reset method repositions this stream at the last
|
||||
* marked position so that subsequent reads re-read the same
|
||||
* bytes.
|
||||
* <p>
|
||||
* The readlimit arguments tells this input stream to allow that
|
||||
* many bytes to be read before the mark position gets
|
||||
* invalidated. This implementation, however, does not care.
|
||||
* <p>
|
||||
* The general contract of mark is that, if the method
|
||||
* markSupported returns true, the stream somehow remembers all
|
||||
* the bytes read after the call to mark and stands ready to
|
||||
* supply those same bytes again if and whenever the method reset
|
||||
* is called. However, the stream is not required to remember any
|
||||
* data at all if more than readlimit bytes are read from the
|
||||
* stream before reset is called. But this stream will.
|
||||
*
|
||||
* @param ignoredReadlimit the maximum limit of bytes that can be
|
||||
* read before the mark position becomes
|
||||
* invalid. Ignored by this
|
||||
* implementation.
|
||||
*/
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
dieIfClosed();
|
||||
if (b == null) {
|
||||
throw new IllegalArgumentException("buffer must not be null");
|
||||
}
|
||||
if (off < 0 || len < 0 || b.length < off + len) {
|
||||
throw new IndexOutOfBoundsException("can't read past buffer boundaries");
|
||||
}
|
||||
if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (atEOD()) {
|
||||
return EOF;
|
||||
}
|
||||
int limit = Math.min(available(), len);
|
||||
readFully(b, off, limit);
|
||||
return limit;
|
||||
}
|
||||
|
||||
public void mark(int ignoredReadlimit)
|
||||
{
|
||||
_marked_offset = _current_offset;
|
||||
}
|
||||
/**
|
||||
* Repositions this stream to the position at the time the mark() method was
|
||||
* last called on this input stream. If mark() has not been called this
|
||||
* method repositions the stream to its beginning.
|
||||
*/
|
||||
public void reset() {
|
||||
_current_offset = _marked_offset;
|
||||
_currentBlock = getDataInputBlock(_current_offset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if this input stream supports the mark and reset methods.
|
||||
*
|
||||
* @return true
|
||||
*/
|
||||
public long skip(long n) throws IOException {
|
||||
dieIfClosed();
|
||||
if (n < 0) {
|
||||
return 0;
|
||||
}
|
||||
int new_offset = _current_offset + (int) n;
|
||||
|
||||
public boolean markSupported()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if (new_offset < _current_offset) {
|
||||
|
||||
/**
|
||||
* Reads the next byte of data from the input stream. The value
|
||||
* byte is returned as an int in the range 0 to 255. If no byte is
|
||||
* available because the end of the stream has been reached, the
|
||||
* value -1 is returned. The definition of this method in
|
||||
* java.io.InputStream allows this method to block, but it won't.
|
||||
*
|
||||
* @return the next byte of data, or -1 if the end of the stream
|
||||
* is reached.
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
// wrap around in converting a VERY large long to an int
|
||||
new_offset = _document_size;
|
||||
} else if (new_offset > _document_size) {
|
||||
new_offset = _document_size;
|
||||
}
|
||||
long rval = new_offset - _current_offset;
|
||||
|
||||
public int read()
|
||||
throws IOException
|
||||
{
|
||||
dieIfClosed();
|
||||
if (atEOD())
|
||||
{
|
||||
return EOD;
|
||||
}
|
||||
if (_tiny_buffer == null)
|
||||
{
|
||||
_tiny_buffer = new byte[ 1 ];
|
||||
}
|
||||
_document.read(_tiny_buffer, _current_offset++);
|
||||
return ((int)_tiny_buffer[ 0 ]) & 0x000000FF;
|
||||
}
|
||||
_current_offset = new_offset;
|
||||
_currentBlock = getDataInputBlock(_current_offset);
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads some number of bytes from the input stream and stores
|
||||
* them into the buffer array b. The number of bytes actually read
|
||||
* is returned as an integer. The definition of this method in
|
||||
* java.io.InputStream allows this method to block, but it won't.
|
||||
* <p>
|
||||
* If b is null, a NullPointerException is thrown. If the length
|
||||
* of b is zero, then no bytes are read and 0 is returned;
|
||||
* otherwise, there is an attempt to read at least one byte. If no
|
||||
* byte is available because the stream is at end of file, the
|
||||
* value -1 is returned; otherwise, at least one byte is read and
|
||||
* stored into b.
|
||||
* <p>
|
||||
* The first byte read is stored into element b[0], the next one
|
||||
* into b[1], and so on. The number of bytes read is, at most,
|
||||
* equal to the length of b. Let k be the number of bytes actually
|
||||
* read; these bytes will be stored in elements b[0] through
|
||||
* b[k-1], leaving elements b[k] through b[b.length-1] unaffected.
|
||||
* <p>
|
||||
* If the first byte cannot be read for any reason other than end
|
||||
* of file, then an IOException is thrown. In particular, an
|
||||
* IOException is thrown if the input stream has been closed.
|
||||
* <p>
|
||||
* The read(b) method for class InputStream has the same effect as:
|
||||
* <p>
|
||||
* <code>read(b, 0, b.length)</code>
|
||||
*
|
||||
* @param b the buffer into which the data is read.
|
||||
*
|
||||
* @return the total number of bytes read into the buffer, or -1
|
||||
* if there is no more data because the end of the stream
|
||||
* has been reached.
|
||||
*
|
||||
* @exception IOException
|
||||
* @exception NullPointerException
|
||||
*/
|
||||
private void dieIfClosed() throws IOException {
|
||||
if (_closed) {
|
||||
throw new IOException("cannot perform requested operation on a closed stream");
|
||||
}
|
||||
}
|
||||
|
||||
public int read(final byte [] b)
|
||||
throws IOException, NullPointerException
|
||||
{
|
||||
return read(b, 0, b.length);
|
||||
}
|
||||
private boolean atEOD() {
|
||||
return _current_offset == _document_size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads up to len bytes of data from the input stream into an
|
||||
* array of bytes. An attempt is made to read as many as len
|
||||
* bytes, but a smaller number may be read, possibly zero. The
|
||||
* number of bytes actually read is returned as an integer.
|
||||
* <p>
|
||||
* The definition of this method in java.io.InputStream allows it
|
||||
* to block, but it won't.
|
||||
* <p>
|
||||
* If b is null, a NullPointerException is thrown.
|
||||
* <p>
|
||||
* If off is negative, or len is negative, or off+len is greater
|
||||
* than the length of the array b, then an
|
||||
* IndexOutOfBoundsException is thrown.
|
||||
* <p>
|
||||
* If len is zero, then no bytes are read and 0 is returned;
|
||||
* otherwise, there is an attempt to read at least one byte. If no
|
||||
* byte is available because the stream is at end of file, the
|
||||
* value -1 is returned; otherwise, at least one byte is read and
|
||||
* stored into b.
|
||||
* <p>
|
||||
* The first byte read is stored into element b[off], the next one
|
||||
* into b[off+1], and so on. The number of bytes read is, at most,
|
||||
* equal to len. Let k be the number of bytes actually read; these
|
||||
* bytes will be stored in elements b[off] through b[off+k-1],
|
||||
* leaving elements b[off+k] through b[off+len-1] unaffected.
|
||||
* <p>
|
||||
* In every case, elements b[0] through b[off] and elements
|
||||
* b[off+len] through b[b.length-1] are unaffected.
|
||||
* <p>
|
||||
* If the first byte cannot be read for any reason other than end
|
||||
* of file, then an IOException is thrown. In particular, an
|
||||
* IOException is thrown if the input stream has been closed.
|
||||
*
|
||||
* @param b the buffer into which the data is read.
|
||||
* @param off the start offset in array b at which the data is
|
||||
* written.
|
||||
* @param len the maximum number of bytes to read.
|
||||
*
|
||||
* @return the total number of bytes read into the buffer, or -1
|
||||
* if there is no more data because the end of the stream
|
||||
* has been reached.
|
||||
*
|
||||
* @exception IOException
|
||||
* @exception NullPointerException
|
||||
* @exception IndexOutOfBoundsException
|
||||
*/
|
||||
private void checkAvaliable(int requestedSize) {
|
||||
if (_closed) {
|
||||
throw new RuntimeException("cannot perform requested operation on a closed stream");
|
||||
}
|
||||
if (requestedSize > _document_size - _current_offset) {
|
||||
throw new RuntimeException("Buffer underrun - requested " + requestedSize
|
||||
+ " bytes but " + (_document_size - _current_offset) + " was available");
|
||||
}
|
||||
}
|
||||
|
||||
public int read(final byte [] b, final int off, final int len)
|
||||
throws IOException, NullPointerException, IndexOutOfBoundsException
|
||||
{
|
||||
dieIfClosed();
|
||||
if (b == null)
|
||||
{
|
||||
throw new NullPointerException("buffer is null");
|
||||
}
|
||||
if ((off < 0) || (len < 0) || (b.length < (off + len)))
|
||||
{
|
||||
throw new IndexOutOfBoundsException(
|
||||
"can't read past buffer boundaries");
|
||||
}
|
||||
if (len == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
if (atEOD())
|
||||
{
|
||||
return EOD;
|
||||
}
|
||||
int limit = Math.min(available(), len);
|
||||
public byte readByte() {
|
||||
return (byte) readUByte();
|
||||
}
|
||||
|
||||
if ((off == 0) && (limit == b.length))
|
||||
{
|
||||
_document.read(b, _current_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
byte[] buffer = new byte[ limit ];
|
||||
public double readDouble() {
|
||||
return Double.longBitsToDouble(readLong());
|
||||
}
|
||||
|
||||
_document.read(buffer, _current_offset);
|
||||
System.arraycopy(buffer, 0, b, off, limit);
|
||||
}
|
||||
_current_offset += limit;
|
||||
return limit;
|
||||
}
|
||||
public void readFully(byte[] buf) {
|
||||
readFully(buf, 0, buf.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Repositions this stream to the position at the time the mark
|
||||
* method was last called on this input stream.
|
||||
* <p>
|
||||
* The general contract of reset is:
|
||||
* <p>
|
||||
* <ul>
|
||||
* <li>
|
||||
* If the method markSupported returns true, then:
|
||||
* <ul>
|
||||
* <li>
|
||||
* If the method mark has not been called since the
|
||||
* stream was created, or the number of bytes read
|
||||
* from the stream since mark was last called is
|
||||
* larger than the argument to mark at that last
|
||||
* call, then an IOException might be thrown.
|
||||
* </li>
|
||||
* <li>
|
||||
* If such an IOException is not thrown, then the
|
||||
* stream is reset to a state such that all the
|
||||
* bytes read since the most recent call to mark
|
||||
* (or since the start of the file, if mark has not
|
||||
* been called) will be resupplied to subsequent
|
||||
* callers of the read method, followed by any
|
||||
* bytes that otherwise would have been the next
|
||||
* input data as of the time of the call to reset.
|
||||
* </li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* <li>
|
||||
* If the method markSupported returns false, then:
|
||||
* <ul>
|
||||
* <li>
|
||||
* The call to reset may throw an IOException.
|
||||
* </li>
|
||||
* <li>
|
||||
* If an IOException is not thrown, then the
|
||||
* stream is reset to a fixed state that depends
|
||||
* on the particular type of the input and how it
|
||||
* was created. The bytes that will be supplied to
|
||||
* subsequent callers of the read method depend on
|
||||
* the particular type of the input stream.
|
||||
* </li>
|
||||
* </ul>
|
||||
* </li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* All well and good ... this class's markSupported method returns
|
||||
* true and this method does not care whether you've called mark
|
||||
* at all, or whether you've exceeded the number of bytes
|
||||
* specified in the last call to mark. We're basically walking a
|
||||
* byte array ... mark and reset to your heart's content.
|
||||
*/
|
||||
public short readShort() {
|
||||
return (short) readUShort();
|
||||
}
|
||||
|
||||
public void reset()
|
||||
{
|
||||
_current_offset = _marked_offset;
|
||||
}
|
||||
public void readFully(byte[] buf, int off, int len) {
|
||||
checkAvaliable(len);
|
||||
int blockAvailable = _currentBlock.available();
|
||||
if (blockAvailable > len) {
|
||||
_currentBlock.readFully(buf, off, len);
|
||||
_current_offset += len;
|
||||
return;
|
||||
}
|
||||
// else read big amount in chunks
|
||||
int remaining = len;
|
||||
int writePos = off;
|
||||
while (remaining > 0) {
|
||||
boolean blockIsExpiring = remaining >= blockAvailable;
|
||||
int reqSize;
|
||||
if (blockIsExpiring) {
|
||||
reqSize = blockAvailable;
|
||||
} else {
|
||||
reqSize = remaining;
|
||||
}
|
||||
_currentBlock.readFully(buf, writePos, reqSize);
|
||||
remaining -= reqSize;
|
||||
writePos += reqSize;
|
||||
_current_offset += reqSize;
|
||||
if (blockIsExpiring) {
|
||||
if (_current_offset == _document_size) {
|
||||
if (remaining > 0) {
|
||||
throw new IllegalStateException(
|
||||
"reached end of document stream unexpectedly");
|
||||
}
|
||||
_currentBlock = null;
|
||||
break;
|
||||
}
|
||||
_currentBlock = getDataInputBlock(_current_offset);
|
||||
blockAvailable = _currentBlock.available();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips over and discards n bytes of data from this input
|
||||
* stream. The skip method may, for a variety of reasons, end up
|
||||
* skipping over some smaller number of bytes, possibly 0. This
|
||||
* may result from any of a number of conditions; reaching end of
|
||||
* file before n bytes have been skipped is only one
|
||||
* possibility. The actual number of bytes skipped is returned. If
|
||||
* n is negative, no bytes are skipped.
|
||||
*
|
||||
* @param n the number of bytes to be skipped.
|
||||
*
|
||||
* @return the actual number of bytes skipped.
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
public long readLong() {
|
||||
checkAvaliable(SIZE_LONG);
|
||||
int blockAvailable = _currentBlock.available();
|
||||
long result;
|
||||
if (blockAvailable > SIZE_LONG) {
|
||||
result = _currentBlock.readLongLE();
|
||||
} else {
|
||||
DataInputBlock nextBlock = getDataInputBlock(_current_offset + blockAvailable);
|
||||
if (blockAvailable == SIZE_LONG) {
|
||||
result = _currentBlock.readLongLE();
|
||||
} else {
|
||||
result = nextBlock.readLongLE(_currentBlock, blockAvailable);
|
||||
}
|
||||
_currentBlock = nextBlock;
|
||||
}
|
||||
_current_offset += SIZE_LONG;
|
||||
return result;
|
||||
}
|
||||
|
||||
public long skip(final long n)
|
||||
throws IOException
|
||||
{
|
||||
dieIfClosed();
|
||||
if (n < 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
int new_offset = _current_offset + ( int ) n;
|
||||
public int readInt() {
|
||||
checkAvaliable(SIZE_INT);
|
||||
int blockAvailable = _currentBlock.available();
|
||||
int result;
|
||||
if (blockAvailable > SIZE_INT) {
|
||||
result = _currentBlock.readIntLE();
|
||||
} else {
|
||||
DataInputBlock nextBlock = getDataInputBlock(_current_offset + blockAvailable);
|
||||
if (blockAvailable == SIZE_INT) {
|
||||
result = _currentBlock.readIntLE();
|
||||
} else {
|
||||
result = nextBlock.readIntLE(_currentBlock, blockAvailable);
|
||||
}
|
||||
_currentBlock = nextBlock;
|
||||
}
|
||||
_current_offset += SIZE_INT;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (new_offset < _current_offset)
|
||||
{
|
||||
|
||||
// wrap around in converting a VERY large long to an int
|
||||
new_offset = _document_size;
|
||||
}
|
||||
else if (new_offset > _document_size)
|
||||
{
|
||||
new_offset = _document_size;
|
||||
}
|
||||
long rval = new_offset - _current_offset;
|
||||
|
||||
_current_offset = new_offset;
|
||||
return rval;
|
||||
}
|
||||
|
||||
private void dieIfClosed()
|
||||
throws IOException
|
||||
{
|
||||
if (_closed)
|
||||
{
|
||||
throw new IOException(
|
||||
"cannot perform requested operation on a closed stream");
|
||||
}
|
||||
}
|
||||
|
||||
private boolean atEOD()
|
||||
{
|
||||
return _current_offset == _document_size;
|
||||
}
|
||||
} // end public class DocumentInputStream
|
||||
public int readUShort() {
|
||||
checkAvaliable(SIZE_SHORT);
|
||||
int blockAvailable = _currentBlock.available();
|
||||
int result;
|
||||
if (blockAvailable > SIZE_SHORT) {
|
||||
result = _currentBlock.readUShortLE();
|
||||
} else {
|
||||
DataInputBlock nextBlock = getDataInputBlock(_current_offset + blockAvailable);
|
||||
if (blockAvailable == SIZE_SHORT) {
|
||||
result = _currentBlock.readUShortLE();
|
||||
} else {
|
||||
result = nextBlock.readUShortLE(_currentBlock);
|
||||
}
|
||||
_currentBlock = nextBlock;
|
||||
}
|
||||
_current_offset += SIZE_SHORT;
|
||||
return result;
|
||||
}
|
||||
|
||||
public int readUByte() {
|
||||
checkAvaliable(1);
|
||||
int result = _currentBlock.readUByte();
|
||||
_current_offset++;
|
||||
if (_currentBlock.available() < 1) {
|
||||
_currentBlock = getDataInputBlock(_current_offset);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.poi.poifs.dev.POIFSViewable;
|
|||
import org.apache.poi.poifs.property.DocumentProperty;
|
||||
import org.apache.poi.poifs.property.Property;
|
||||
import org.apache.poi.poifs.storage.BlockWritable;
|
||||
import org.apache.poi.poifs.storage.DataInputBlock;
|
||||
import org.apache.poi.poifs.storage.DocumentBlock;
|
||||
import org.apache.poi.poifs.storage.ListManagedBlock;
|
||||
import org.apache.poi.poifs.storage.RawDataBlock;
|
||||
|
@ -194,12 +195,62 @@ public final class POIFSDocument implements BATManaged, BlockWritable, POIFSView
|
|||
*
|
||||
* @param buffer the buffer to write to
|
||||
* @param offset the offset into our storage to read from
|
||||
* This method is currently (Oct 2008) only used by test code. Perhaps it can be deleted
|
||||
*/
|
||||
void read(byte[] buffer, int offset) {
|
||||
int len = buffer.length;
|
||||
|
||||
DataInputBlock currentBlock = getDataInputBlock(offset);
|
||||
|
||||
int blockAvailable = currentBlock.available();
|
||||
if (blockAvailable > len) {
|
||||
currentBlock.readFully(buffer, 0, len);
|
||||
return;
|
||||
}
|
||||
// else read big amount in chunks
|
||||
int remaining = len;
|
||||
int writePos = 0;
|
||||
int currentOffset = offset;
|
||||
while (remaining > 0) {
|
||||
boolean blockIsExpiring = remaining >= blockAvailable;
|
||||
int reqSize;
|
||||
if (blockIsExpiring) {
|
||||
reqSize = blockAvailable;
|
||||
} else {
|
||||
reqSize = remaining;
|
||||
}
|
||||
currentBlock.readFully(buffer, writePos, reqSize);
|
||||
remaining-=reqSize;
|
||||
writePos+=reqSize;
|
||||
currentOffset += reqSize;
|
||||
if (blockIsExpiring) {
|
||||
if (currentOffset == _size) {
|
||||
if (remaining > 0) {
|
||||
throw new IllegalStateException("reached end of document stream unexpectedly");
|
||||
}
|
||||
currentBlock = null;
|
||||
break;
|
||||
}
|
||||
currentBlock = getDataInputBlock(currentOffset);
|
||||
blockAvailable = currentBlock.available();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return <code>null</code> if <tt>offset</tt> points to the end of the document stream
|
||||
*/
|
||||
DataInputBlock getDataInputBlock(int offset) {
|
||||
if (offset >= _size) {
|
||||
if (offset > _size) {
|
||||
throw new RuntimeException("Request for Offset " + offset + " doc size is " + _size);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (_property.shouldUseSmallBlocks()) {
|
||||
SmallDocumentBlock.read(_small_store.getBlocks(), buffer, offset);
|
||||
return SmallDocumentBlock.getDataInputBlock(_small_store.getBlocks(), offset);
|
||||
} else {
|
||||
DocumentBlock.read(_big_store.getBlocks(), buffer, offset);
|
||||
return DocumentBlock.getDataInputBlock(_big_store.getBlocks(), offset);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.poifs.storage;
|
||||
|
||||
/**
|
||||
* Wraps a <tt>byte</tt> array and provides simple data input access.
|
||||
* Internally, this class maintains a buffer read index, so that for the most part, primitive
|
||||
* data can be read in a data-input-stream-like manner.<p/>
|
||||
*
|
||||
* Note - the calling class should call the {@link #available()} method to detect end-of-buffer
|
||||
* and move to the next data block when the current is exhausted.
|
||||
* For optimisation reasons, no error handling is performed in this class. Thus, mistakes in
|
||||
* calling code ran may raise ugly exceptions here, like {@link ArrayIndexOutOfBoundsException},
|
||||
* etc .<p/>
|
||||
*
|
||||
* The multi-byte primitive input methods ({@link #readUShortLE()}, {@link #readIntLE()} and
|
||||
* {@link #readLongLE()}) have corresponding 'spanning read' methods which (when required) perform
|
||||
* a read across the block boundary. These spanning read methods take the previous
|
||||
* {@link DataInputBlock} as a parameter.
|
||||
* Reads of larger amounts of data (into <tt>byte</tt> array buffers) must be managed by the caller
|
||||
* since these could conceivably involve more than two blocks.
|
||||
*
|
||||
* @author Josh Micich
|
||||
*/
|
||||
public final class DataInputBlock {
|
||||
|
||||
/**
|
||||
* Possibly any size (usually 512K or 64K). Assumed to be at least 8 bytes for all blocks
|
||||
* before the end of the stream. The last block in the stream can be any size except zero.
|
||||
*/
|
||||
private final byte[] _buf;
|
||||
private int _readIndex;
|
||||
private int _maxIndex;
|
||||
|
||||
DataInputBlock(byte[] data, int startOffset) {
|
||||
_buf = data;
|
||||
_readIndex = startOffset;
|
||||
_maxIndex = _buf.length;
|
||||
}
|
||||
public int available() {
|
||||
return _maxIndex-_readIndex;
|
||||
}
|
||||
|
||||
public int readUByte() {
|
||||
return _buf[_readIndex++] & 0xFF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a <tt>short</tt> which was encoded in <em>little endian</em> format.
|
||||
*/
|
||||
public int readUShortLE() {
|
||||
int i = _readIndex;
|
||||
|
||||
int b0 = _buf[i++] & 0xFF;
|
||||
int b1 = _buf[i++] & 0xFF;
|
||||
_readIndex = i;
|
||||
return (b1 << 8) + (b0 << 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a <tt>short</tt> which spans the end of <tt>prevBlock</tt> and the start of this block.
|
||||
*/
|
||||
public int readUShortLE(DataInputBlock prevBlock) {
|
||||
// simple case - will always be one byte in each block
|
||||
int i = prevBlock._buf.length-1;
|
||||
|
||||
int b0 = prevBlock._buf[i++] & 0xFF;
|
||||
int b1 = _buf[_readIndex++] & 0xFF;
|
||||
return (b1 << 8) + (b0 << 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an <tt>int</tt> which was encoded in <em>little endian</em> format.
|
||||
*/
|
||||
public int readIntLE() {
|
||||
int i = _readIndex;
|
||||
|
||||
int b0 = _buf[i++] & 0xFF;
|
||||
int b1 = _buf[i++] & 0xFF;
|
||||
int b2 = _buf[i++] & 0xFF;
|
||||
int b3 = _buf[i++] & 0xFF;
|
||||
_readIndex = i;
|
||||
return (b3 << 24) + (b2 << 16) + (b1 << 8) + (b0 << 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an <tt>int</tt> which spans the end of <tt>prevBlock</tt> and the start of this block.
|
||||
*/
|
||||
public int readIntLE(DataInputBlock prevBlock, int prevBlockAvailable) {
|
||||
byte[] buf = new byte[4];
|
||||
|
||||
readSpanning(prevBlock, prevBlockAvailable, buf);
|
||||
int b0 = buf[0] & 0xFF;
|
||||
int b1 = buf[1] & 0xFF;
|
||||
int b2 = buf[2] & 0xFF;
|
||||
int b3 = buf[3] & 0xFF;
|
||||
return (b3 << 24) + (b2 << 16) + (b1 << 8) + (b0 << 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a <tt>long</tt> which was encoded in <em>little endian</em> format.
|
||||
*/
|
||||
public long readLongLE() {
|
||||
int i = _readIndex;
|
||||
|
||||
int b0 = _buf[i++] & 0xFF;
|
||||
int b1 = _buf[i++] & 0xFF;
|
||||
int b2 = _buf[i++] & 0xFF;
|
||||
int b3 = _buf[i++] & 0xFF;
|
||||
int b4 = _buf[i++] & 0xFF;
|
||||
int b5 = _buf[i++] & 0xFF;
|
||||
int b6 = _buf[i++] & 0xFF;
|
||||
int b7 = _buf[i++] & 0xFF;
|
||||
_readIndex = i;
|
||||
return (((long)b7 << 56) +
|
||||
((long)b6 << 48) +
|
||||
((long)b5 << 40) +
|
||||
((long)b4 << 32) +
|
||||
((long)b3 << 24) +
|
||||
(b2 << 16) +
|
||||
(b1 << 8) +
|
||||
(b0 << 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a <tt>long</tt> which spans the end of <tt>prevBlock</tt> and the start of this block.
|
||||
*/
|
||||
public long readLongLE(DataInputBlock prevBlock, int prevBlockAvailable) {
|
||||
byte[] buf = new byte[8];
|
||||
|
||||
readSpanning(prevBlock, prevBlockAvailable, buf);
|
||||
|
||||
int b0 = buf[0] & 0xFF;
|
||||
int b1 = buf[1] & 0xFF;
|
||||
int b2 = buf[2] & 0xFF;
|
||||
int b3 = buf[3] & 0xFF;
|
||||
int b4 = buf[4] & 0xFF;
|
||||
int b5 = buf[5] & 0xFF;
|
||||
int b6 = buf[6] & 0xFF;
|
||||
int b7 = buf[7] & 0xFF;
|
||||
return (((long)b7 << 56) +
|
||||
((long)b6 << 48) +
|
||||
((long)b5 << 40) +
|
||||
((long)b4 << 32) +
|
||||
((long)b3 << 24) +
|
||||
(b2 << 16) +
|
||||
(b1 << 8) +
|
||||
(b0 << 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a small amount of data from across the boundary between two blocks.
|
||||
* The {@link #_readIndex} of this (the second) block is updated accordingly.
|
||||
* Note- this method (and other code) assumes that the second {@link DataInputBlock}
|
||||
* always is big enough to complete the read without being exhausted.
|
||||
*/
|
||||
private void readSpanning(DataInputBlock prevBlock, int prevBlockAvailable, byte[] buf) {
|
||||
System.arraycopy(prevBlock._buf, prevBlock._readIndex, buf, 0, prevBlockAvailable);
|
||||
int secondReadLen = buf.length-prevBlockAvailable;
|
||||
System.arraycopy(_buf, 0, buf, prevBlockAvailable, secondReadLen);
|
||||
_readIndex = secondReadLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads <tt>len</tt> bytes from this block into the supplied buffer.
|
||||
*/
|
||||
public void readFully(byte[] buf, int off, int len) {
|
||||
System.arraycopy(_buf, _readIndex, buf, off, len);
|
||||
_readIndex += len;
|
||||
}
|
||||
}
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -15,31 +14,27 @@
|
|||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.poifs.storage;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.IntegerField;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianConsts;
|
||||
|
||||
/**
|
||||
* A block of document data.
|
||||
*
|
||||
* @author Marc Johnson (mjohnson at apache dot org)
|
||||
*/
|
||||
public final class DocumentBlock extends BigBlock {
|
||||
private static final int BLOCK_SHIFT = 9;
|
||||
private static final int BLOCK_SIZE = 1 << BLOCK_SHIFT;
|
||||
private static final int BLOCK_MASK = BLOCK_SIZE-1;
|
||||
|
||||
public class DocumentBlock
|
||||
extends BigBlock
|
||||
{
|
||||
private static final byte _default_value = ( byte ) 0xFF;
|
||||
private byte[] _data;
|
||||
private int _bytes_read;
|
||||
|
@ -161,45 +156,10 @@ public class DocumentBlock
|
|||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* read data from an array of DocumentBlocks
|
||||
*
|
||||
* @param blocks the blocks to read from
|
||||
* @param buffer the buffer to write the data into
|
||||
* @param offset the offset into the array of blocks to read from
|
||||
*/
|
||||
|
||||
public static void read(final DocumentBlock [] blocks,
|
||||
final byte [] buffer, final int offset)
|
||||
{
|
||||
int firstBlockIndex = offset / POIFSConstants.BIG_BLOCK_SIZE;
|
||||
int firstBlockOffset = offset % POIFSConstants.BIG_BLOCK_SIZE;
|
||||
int lastBlockIndex = (offset + buffer.length - 1)
|
||||
/ POIFSConstants.BIG_BLOCK_SIZE;
|
||||
|
||||
if (firstBlockIndex == lastBlockIndex)
|
||||
{
|
||||
System.arraycopy(blocks[ firstBlockIndex ]._data,
|
||||
firstBlockOffset, buffer, 0, buffer.length);
|
||||
}
|
||||
else
|
||||
{
|
||||
int buffer_offset = 0;
|
||||
|
||||
System.arraycopy(blocks[ firstBlockIndex ]._data,
|
||||
firstBlockOffset, buffer, buffer_offset,
|
||||
POIFSConstants.BIG_BLOCK_SIZE
|
||||
- firstBlockOffset);
|
||||
buffer_offset += POIFSConstants.BIG_BLOCK_SIZE - firstBlockOffset;
|
||||
for (int j = firstBlockIndex + 1; j < lastBlockIndex; j++)
|
||||
{
|
||||
System.arraycopy(blocks[ j ]._data, 0, buffer, buffer_offset,
|
||||
POIFSConstants.BIG_BLOCK_SIZE);
|
||||
buffer_offset += POIFSConstants.BIG_BLOCK_SIZE;
|
||||
}
|
||||
System.arraycopy(blocks[ lastBlockIndex ]._data, 0, buffer,
|
||||
buffer_offset, buffer.length - buffer_offset);
|
||||
}
|
||||
public static DataInputBlock getDataInputBlock(DocumentBlock[] blocks, int offset) {
|
||||
int firstBlockIndex = offset >> BLOCK_SHIFT;
|
||||
int firstBlockOffset= offset & BLOCK_MASK;
|
||||
return new DataInputBlock(blocks[firstBlockIndex]._data, firstBlockOffset);
|
||||
}
|
||||
|
||||
/* ********** START extension of BigBlock ********** */
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -15,13 +14,15 @@
|
|||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.poifs.storage;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
|
||||
|
@ -31,13 +32,14 @@ import org.apache.poi.poifs.common.POIFSConstants;
|
|||
*
|
||||
* @author Marc Johnson (mjohnson at apache dot org)
|
||||
*/
|
||||
public final class SmallDocumentBlock implements BlockWritable, ListManagedBlock {
|
||||
private static final int BLOCK_SHIFT = 6;
|
||||
|
||||
public class SmallDocumentBlock
|
||||
implements BlockWritable, ListManagedBlock
|
||||
{
|
||||
private byte[] _data;
|
||||
private static final byte _default_fill = ( byte ) 0xff;
|
||||
private static final int _block_size = 64;
|
||||
private static final int _block_size = 1 << BLOCK_SHIFT;
|
||||
private static final int BLOCK_MASK = _block_size-1;
|
||||
|
||||
private static final int _blocks_per_big_block =
|
||||
POIFSConstants.BIG_BLOCK_SIZE / _block_size;
|
||||
|
||||
|
@ -178,46 +180,10 @@ public class SmallDocumentBlock
|
|||
return sdbs;
|
||||
}
|
||||
|
||||
/**
|
||||
* read data from an array of SmallDocumentBlocks
|
||||
*
|
||||
* @param blocks the blocks to read from
|
||||
* @param buffer the buffer to write the data into
|
||||
* @param offset the offset into the array of blocks to read from
|
||||
*/
|
||||
|
||||
public static void read(final BlockWritable [] blocks,
|
||||
final byte [] buffer, final int offset)
|
||||
{
|
||||
int firstBlockIndex = offset / _block_size;
|
||||
int firstBlockOffset = offset % _block_size;
|
||||
int lastBlockIndex = (offset + buffer.length - 1) / _block_size;
|
||||
|
||||
if (firstBlockIndex == lastBlockIndex)
|
||||
{
|
||||
System.arraycopy(
|
||||
(( SmallDocumentBlock ) blocks[ firstBlockIndex ])._data,
|
||||
firstBlockOffset, buffer, 0, buffer.length);
|
||||
}
|
||||
else
|
||||
{
|
||||
int buffer_offset = 0;
|
||||
|
||||
System.arraycopy(
|
||||
(( SmallDocumentBlock ) blocks[ firstBlockIndex ])._data,
|
||||
firstBlockOffset, buffer, buffer_offset,
|
||||
_block_size - firstBlockOffset);
|
||||
buffer_offset += _block_size - firstBlockOffset;
|
||||
for (int j = firstBlockIndex + 1; j < lastBlockIndex; j++)
|
||||
{
|
||||
System.arraycopy((( SmallDocumentBlock ) blocks[ j ])._data,
|
||||
0, buffer, buffer_offset, _block_size);
|
||||
buffer_offset += _block_size;
|
||||
}
|
||||
System.arraycopy(
|
||||
(( SmallDocumentBlock ) blocks[ lastBlockIndex ])._data, 0,
|
||||
buffer, buffer_offset, buffer.length - buffer_offset);
|
||||
}
|
||||
public static DataInputBlock getDataInputBlock(SmallDocumentBlock[] blocks, int offset) {
|
||||
int firstBlockIndex = offset >> BLOCK_SHIFT;
|
||||
int firstBlockOffset= offset & BLOCK_MASK;
|
||||
return new DataInputBlock(blocks[firstBlockIndex]._data, firstBlockOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -15,25 +14,21 @@
|
|||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.poifs.storage;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import junit.framework.*;
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Class to test DocumentBlock functionality
|
||||
*
|
||||
* @author Marc Johnson
|
||||
*/
|
||||
|
||||
public class TestDocumentBlock
|
||||
extends TestCase
|
||||
{
|
||||
public final class TestDocumentBlock extends TestCase {
|
||||
static final private byte[] _testdata;
|
||||
|
||||
static
|
||||
|
@ -44,25 +39,10 @@ public class TestDocumentBlock
|
|||
_testdata[ j ] = ( byte ) j;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
/**
|
||||
* Constructor TestDocumentBlock
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
|
||||
public TestDocumentBlock(String name)
|
||||
{
|
||||
super(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the writing DocumentBlock constructor.
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
|
||||
public void testConstructor()
|
||||
throws IOException
|
||||
{
|
||||
|
@ -88,46 +68,10 @@ public class TestDocumentBlock
|
|||
assertEquals(_testdata.length, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* test static read method
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
|
||||
public void testRead()
|
||||
throws IOException
|
||||
{
|
||||
DocumentBlock[] blocks = new DocumentBlock[ 4 ];
|
||||
ByteArrayInputStream input = new ByteArrayInputStream(_testdata);
|
||||
|
||||
for (int j = 0; j < 4; j++)
|
||||
{
|
||||
blocks[ j ] = new DocumentBlock(input);
|
||||
}
|
||||
for (int j = 1; j <= 2000; j += 17)
|
||||
{
|
||||
byte[] buffer = new byte[ j ];
|
||||
int offset = 0;
|
||||
|
||||
for (int k = 0; k < (2000 / j); k++)
|
||||
{
|
||||
DocumentBlock.read(blocks, buffer, offset);
|
||||
for (int n = 0; n < buffer.length; n++)
|
||||
{
|
||||
assertEquals("checking byte " + (k * j) + n,
|
||||
_testdata[ (k * j) + n ], buffer[ n ]);
|
||||
}
|
||||
offset += j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test 'reading' constructor
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
|
||||
public void testReadingConstructor()
|
||||
throws IOException
|
||||
{
|
||||
|
@ -164,17 +108,4 @@ public class TestDocumentBlock
|
|||
assertEquals(( byte ) 0xFF, copy[ j ]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* main method to run the unit tests
|
||||
*
|
||||
* @param ignored_args
|
||||
*/
|
||||
|
||||
public static void main(String [] ignored_args)
|
||||
{
|
||||
System.out
|
||||
.println("Testing org.apache.poi.poifs.storage.DocumentBlock");
|
||||
junit.textui.TestRunner.run(TestDocumentBlock.class);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -15,25 +14,24 @@
|
|||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.poifs.storage;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import junit.framework.*;
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Class to test SmallDocumentBlock functionality
|
||||
*
|
||||
* @author Marc Johnson
|
||||
*/
|
||||
|
||||
public class TestSmallDocumentBlock
|
||||
extends TestCase
|
||||
{
|
||||
public final class TestSmallDocumentBlock extends TestCase {
|
||||
static final private byte[] _testdata;
|
||||
static final private int _testdata_size = 2999;
|
||||
|
||||
|
@ -45,25 +43,10 @@ public class TestSmallDocumentBlock
|
|||
_testdata[ j ] = ( byte ) j;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
/**
|
||||
* constructor
|
||||
*
|
||||
* @param name
|
||||
*/
|
||||
|
||||
public TestSmallDocumentBlock(String name)
|
||||
{
|
||||
super(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test conversion from DocumentBlocks
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
|
||||
public void testConvert1()
|
||||
throws IOException
|
||||
{
|
||||
|
@ -113,12 +96,7 @@ public class TestSmallDocumentBlock
|
|||
|
||||
/**
|
||||
* Test conversion from byte array
|
||||
*
|
||||
* @exception IOException;
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
|
||||
public void testConvert2()
|
||||
throws IOException
|
||||
{
|
||||
|
@ -154,57 +132,9 @@ public class TestSmallDocumentBlock
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test read method
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
|
||||
public void testRead()
|
||||
throws IOException
|
||||
{
|
||||
ByteArrayInputStream stream = new ByteArrayInputStream(_testdata);
|
||||
List documents = new ArrayList();
|
||||
|
||||
while (true)
|
||||
{
|
||||
DocumentBlock block = new DocumentBlock(stream);
|
||||
|
||||
documents.add(block);
|
||||
if (block.partiallyRead())
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
SmallDocumentBlock[] blocks =
|
||||
SmallDocumentBlock
|
||||
.convert(( BlockWritable [] ) documents
|
||||
.toArray(new DocumentBlock[ 0 ]), _testdata_size);
|
||||
|
||||
for (int j = 1; j <= _testdata_size; j += 38)
|
||||
{
|
||||
byte[] buffer = new byte[ j ];
|
||||
int offset = 0;
|
||||
|
||||
for (int k = 0; k < (_testdata_size / j); k++)
|
||||
{
|
||||
SmallDocumentBlock.read(blocks, buffer, offset);
|
||||
for (int n = 0; n < buffer.length; n++)
|
||||
{
|
||||
assertEquals("checking byte " + (k * j) + n,
|
||||
_testdata[ (k * j) + n ], buffer[ n ]);
|
||||
}
|
||||
offset += j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* test fill
|
||||
*
|
||||
* @exception IOException
|
||||
*/
|
||||
|
||||
public void testFill()
|
||||
throws IOException
|
||||
{
|
||||
|
@ -294,17 +224,4 @@ public class TestSmallDocumentBlock
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* main method to run the unit tests
|
||||
*
|
||||
* @param ignored_args
|
||||
*/
|
||||
|
||||
public static void main(String [] ignored_args)
|
||||
{
|
||||
System.out.println(
|
||||
"Testing org.apache.poi.poifs.storage.SmallDocumentBlock");
|
||||
junit.textui.TestRunner.run(TestSmallDocumentBlock.class);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue