mirror of https://github.com/apache/lucene.git
Optimize FST on-heap BytesReader (#12879)
* Move size() to FSTStore * Remove size() completely * Allow FST builder to use different DataOutput * access BytesStore byte[] directly for copying * Rename BytesStore * Change class to final * Reorder methods * Remove unused methods * Rename truncate to setPosition() and remove skipBytes() * Simplify the writing operations * Update comment * remove unused parameter * Simplify BytesStore operation * tidy code * Rename copyBytes to writeTo * Simplify BytesStore operations * Embed writeBytes() to FSTCompiler * Fix the write bytes method * Remove the default block bits constant * add assertion * Rename method parameter names * Move reverse to FSTCompiler * Revert setPosition call * Address comments * Return immediately when writing 0 bytes * Add comment & * Rename variables * Fix the compile error * Remove isReadable() * Remove isReadable() * Optimize ReadWriteDataOutput * tidy code * Freeze the DataOutput once finished() * Refactor * freeze the DataOutput before use * Improvement of ReadWriteDataOutput * tidy code * Address comments and add off-heap FST tests * Remove the hardcoded random * Ignore the Test2BFSTOffHeap test * Simplify ReadWriteDataOutput * Do not expose blockBits * tidy code * Remove 0 initialization * Add assertion and comment
This commit is contained in:
parent
7b8aece125
commit
4c883a414c
|
@ -16,8 +16,6 @@
|
|||
*/
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
import static org.apache.lucene.store.ByteBuffersDataOutput.ALLOCATE_BB_ON_HEAP;
|
||||
import static org.apache.lucene.store.ByteBuffersDataOutput.NO_REUSE;
|
||||
import static org.apache.lucene.util.fst.FST.ARCS_FOR_BINARY_SEARCH;
|
||||
import static org.apache.lucene.util.fst.FST.ARCS_FOR_CONTINUOUS;
|
||||
import static org.apache.lucene.util.fst.FST.ARCS_FOR_DIRECT_ADDRESSING;
|
||||
|
@ -34,7 +32,6 @@ import static org.apache.lucene.util.fst.FST.getNumPresenceBytes;
|
|||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.store.ByteArrayDataOutput;
|
||||
import org.apache.lucene.store.ByteBuffersDataOutput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -153,8 +150,7 @@ public class FSTCompiler<T> {
|
|||
* @return the DataOutput
|
||||
*/
|
||||
public static DataOutput getOnHeapReaderWriter(int blockBits) {
|
||||
return new ReadWriteDataOutput(
|
||||
new ByteBuffersDataOutput(blockBits, blockBits, ALLOCATE_BB_ON_HEAP, NO_REUSE));
|
||||
return new ReadWriteDataOutput(blockBits);
|
||||
}
|
||||
|
||||
private FSTCompiler(
|
||||
|
|
|
@ -16,8 +16,12 @@
|
|||
*/
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
import static org.apache.lucene.store.ByteBuffersDataOutput.ALLOCATE_BB_ON_HEAP;
|
||||
import static org.apache.lucene.store.ByteBuffersDataOutput.NO_REUSE;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.store.ByteBuffersDataInput;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.store.ByteBuffersDataOutput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
|
||||
|
@ -28,13 +32,19 @@ import org.apache.lucene.store.DataOutput;
|
|||
final class ReadWriteDataOutput extends DataOutput implements FSTReader {
|
||||
|
||||
private final ByteBuffersDataOutput dataOutput;
|
||||
// the DataInput to read from once we finish writing
|
||||
private ByteBuffersDataInput dataInput;
|
||||
private final int blockBits;
|
||||
private final int blockSize;
|
||||
private final int blockMask;
|
||||
private List<ByteBuffer> byteBuffers;
|
||||
// whether this DataOutput is already frozen
|
||||
private boolean frozen;
|
||||
|
||||
public ReadWriteDataOutput(ByteBuffersDataOutput dataOutput) {
|
||||
this.dataOutput = dataOutput;
|
||||
public ReadWriteDataOutput(int blockBits) {
|
||||
this.dataOutput =
|
||||
new ByteBuffersDataOutput(blockBits, blockBits, ALLOCATE_BB_ON_HEAP, NO_REUSE);
|
||||
this.blockBits = blockBits;
|
||||
this.blockSize = 1 << blockBits;
|
||||
this.blockMask = blockSize - 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -56,14 +66,62 @@ final class ReadWriteDataOutput extends DataOutput implements FSTReader {
|
|||
|
||||
public void freeze() {
|
||||
frozen = true;
|
||||
// this operation are costly, so we want to compute it once and cache
|
||||
dataInput = dataOutput.toDataInput();
|
||||
// this operation is costly, so we want to compute it once and cache
|
||||
this.byteBuffers = dataOutput.toWriteableBufferList();
|
||||
// ensure the ByteBuffer internal array is accessible. The call to toWriteableBufferList() above
|
||||
// would ensure that it is accessible.
|
||||
assert byteBuffers.stream().allMatch(ByteBuffer::hasArray);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FST.BytesReader getReverseBytesReader() {
|
||||
assert dataInput != null; // freeze() must be called first
|
||||
return new ReverseRandomAccessReader(dataInput);
|
||||
assert byteBuffers != null; // freeze() must be called first
|
||||
if (byteBuffers.size() == 1) {
|
||||
// use a faster implementation for single-block case
|
||||
return new ReverseBytesReader(byteBuffers.get(0).array());
|
||||
}
|
||||
return new FST.BytesReader() {
|
||||
private byte[] current = byteBuffers.get(0).array();
|
||||
private int nextBuffer = -1;
|
||||
private int nextRead;
|
||||
|
||||
@Override
|
||||
public byte readByte() {
|
||||
if (nextRead == -1) {
|
||||
current = byteBuffers.get(nextBuffer--).array();
|
||||
nextRead = blockSize - 1;
|
||||
}
|
||||
return current[nextRead--];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skipBytes(long count) {
|
||||
setPosition(getPosition() - count);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
b[offset + i] = readByte();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getPosition() {
|
||||
return ((long) nextBuffer + 1) * blockSize + nextRead;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setPosition(long pos) {
|
||||
int bufferIndex = (int) (pos >> blockBits);
|
||||
if (nextBuffer != bufferIndex - 1) {
|
||||
nextBuffer = bufferIndex - 1;
|
||||
current = byteBuffers.get(bufferIndex).array();
|
||||
}
|
||||
nextRead = (int) (pos & blockMask);
|
||||
assert getPosition() == pos : "pos=" + pos + " getPos()=" + getPosition();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue