mirror of https://github.com/apache/lucene.git
Reduce overhead for FSTs in FieldReader (#13524)
We don't need to clone the index input we hold on to in OffHeapFSTStore since we only use it for slicing from known coordinates anyway. -> remove the cloning and add the infrastructure to initialize OffHeapFSTStore without seeking the input to the starting offset.
This commit is contained in:
parent
33a4c1d8ef
commit
126834c09e
|
@ -88,21 +88,15 @@ public final class FieldReader extends Terms {
|
|||
(new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong()
|
||||
>>> Lucene40BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
|
||||
// Initialize FST always off-heap.
|
||||
final IndexInput clone = indexIn.clone();
|
||||
clone.seek(indexStartFP);
|
||||
final FST.FSTMetadata<BytesRef> fstMetadata;
|
||||
if (metaIn == indexIn) { // Only true before Lucene 8.6
|
||||
index =
|
||||
new FST<>(
|
||||
readMetadata(clone, ByteSequenceOutputs.getSingleton()),
|
||||
clone,
|
||||
new OffHeapFSTStore());
|
||||
final IndexInput clone = indexIn.clone();
|
||||
clone.seek(indexStartFP);
|
||||
fstMetadata = readMetadata(clone, ByteSequenceOutputs.getSingleton());
|
||||
} else {
|
||||
index =
|
||||
new FST<>(
|
||||
readMetadata(metaIn, ByteSequenceOutputs.getSingleton()),
|
||||
clone,
|
||||
new OffHeapFSTStore());
|
||||
fstMetadata = readMetadata(metaIn, ByteSequenceOutputs.getSingleton());
|
||||
}
|
||||
index = FST.fromFSTReader(fstMetadata, new OffHeapFSTStore(indexIn, indexStartFP, fstMetadata));
|
||||
/*
|
||||
if (false) {
|
||||
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
|
||||
|
|
|
@ -195,9 +195,10 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
this.sumTotalTermFreq = sumTotalTermFreq;
|
||||
this.sumDocFreq = sumDocFreq;
|
||||
this.docCount = docCount;
|
||||
OffHeapFSTStore offHeapFSTStore = new OffHeapFSTStore();
|
||||
FSTTermOutputs outputs = new FSTTermOutputs(fieldInfo);
|
||||
this.dict = new FST<>(FST.readMetadata(in, outputs), in, offHeapFSTStore);
|
||||
final var fstMetadata = FST.readMetadata(in, outputs);
|
||||
OffHeapFSTStore offHeapFSTStore = new OffHeapFSTStore(in, in.getFilePointer(), fstMetadata);
|
||||
this.dict = FST.fromFSTReader(fstMetadata, offHeapFSTStore);
|
||||
in.skipBytes(offHeapFSTStore.size());
|
||||
}
|
||||
|
||||
|
|
|
@ -90,10 +90,15 @@ public class FSTDictionary implements IndexDictionary {
|
|||
}
|
||||
PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
|
||||
FST.FSTMetadata<Long> metadata = FST.readMetadata(fstDataInput, fstOutputs);
|
||||
FST<Long> fst =
|
||||
isFSTOnHeap
|
||||
? new FST<>(metadata, fstDataInput)
|
||||
: new FST<>(metadata, fstDataInput, new OffHeapFSTStore());
|
||||
FST<Long> fst;
|
||||
if (isFSTOnHeap) {
|
||||
fst = new FST<>(metadata, fstDataInput);
|
||||
} else {
|
||||
final IndexInput indexInput = (IndexInput) fstDataInput;
|
||||
fst =
|
||||
FST.fromFSTReader(
|
||||
metadata, new OffHeapFSTStore(indexInput, indexInput.getFilePointer(), metadata));
|
||||
}
|
||||
return new FSTDictionary(fst);
|
||||
}
|
||||
|
||||
|
|
|
@ -89,13 +89,8 @@ public final class FieldReader extends Terms {
|
|||
readVLongOutput(new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length))
|
||||
>>> Lucene90BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
|
||||
// Initialize FST always off-heap.
|
||||
final IndexInput clone = indexIn.clone();
|
||||
clone.seek(indexStartFP);
|
||||
index =
|
||||
new FST<>(
|
||||
FST.readMetadata(metaIn, ByteSequenceOutputs.getSingleton()),
|
||||
clone,
|
||||
new OffHeapFSTStore());
|
||||
var metadata = FST.readMetadata(metaIn, ByteSequenceOutputs.getSingleton());
|
||||
index = FST.fromFSTReader(metadata, new OffHeapFSTStore(indexIn, indexStartFP, metadata));
|
||||
/*
|
||||
if (false) {
|
||||
final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
|
||||
|
|
|
@ -417,16 +417,7 @@ public final class FST<T> implements Accountable {
|
|||
* maxBlockBits set to {@link #DEFAULT_MAX_BLOCK_BITS}
|
||||
*/
|
||||
public FST(FSTMetadata<T> metadata, DataInput in) throws IOException {
|
||||
this(metadata, in, new OnHeapFSTStore(DEFAULT_MAX_BLOCK_BITS));
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a previously saved FST with a metdata object and a FSTStore. If using {@link
|
||||
* OnHeapFSTStore}, setting maxBlockBits allows you to control the size of the byte[] pages used
|
||||
* to hold the FST bytes.
|
||||
*/
|
||||
public FST(FSTMetadata<T> metadata, DataInput in, FSTStore fstStore) throws IOException {
|
||||
this(metadata, fstStore.init(in, metadata.numBytes));
|
||||
this(metadata, new OnHeapFSTStore(DEFAULT_MAX_BLOCK_BITS, in, metadata.numBytes));
|
||||
}
|
||||
|
||||
/** Create the FST with a metadata object and a FSTReader. */
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.fst;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
|
||||
/** A type of {@link FSTReader} which needs data to be initialized before use */
|
||||
public interface FSTStore extends FSTReader {
|
||||
|
||||
/**
|
||||
* Initialize the FSTStore
|
||||
*
|
||||
* @param in the DataInput to read from
|
||||
* @param numBytes the number of bytes to read
|
||||
* @return this FSTStore
|
||||
* @throws IOException if exception occurred during reading the DataInput
|
||||
*/
|
||||
FSTStore init(DataInput in, long numBytes) throws IOException;
|
||||
}
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.lucene.util.fst;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
@ -28,27 +27,19 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class OffHeapFSTStore implements FSTStore {
|
||||
public final class OffHeapFSTStore implements FSTReader {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(OffHeapFSTStore.class);
|
||||
|
||||
private IndexInput in;
|
||||
private long offset;
|
||||
private long numBytes;
|
||||
private final IndexInput in;
|
||||
private final long offset;
|
||||
private final long numBytes;
|
||||
|
||||
@Override
|
||||
public FSTStore init(DataInput in, long numBytes) throws IOException {
|
||||
if (in instanceof IndexInput) {
|
||||
this.in = (IndexInput) in;
|
||||
this.numBytes = numBytes;
|
||||
this.offset = this.in.getFilePointer();
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
"parameter:in should be an instance of IndexInput for using OffHeapFSTStore, not a "
|
||||
+ in.getClass().getName());
|
||||
}
|
||||
return this;
|
||||
public OffHeapFSTStore(IndexInput in, long offset, FST.FSTMetadata<?> metadata) {
|
||||
this.in = in;
|
||||
this.offset = offset;
|
||||
this.numBytes = metadata.numBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class OnHeapFSTStore implements FSTStore {
|
||||
public final class OnHeapFSTStore implements FSTReader {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(OnHeapFSTStore.class);
|
||||
|
@ -40,31 +40,24 @@ public final class OnHeapFSTStore implements FSTStore {
|
|||
private ReadWriteDataOutput dataOutput;
|
||||
|
||||
/** Used at read time when the FST fits into a single byte[]. */
|
||||
private byte[] bytesArray;
|
||||
private final byte[] bytesArray;
|
||||
|
||||
private final int maxBlockBits;
|
||||
|
||||
public OnHeapFSTStore(int maxBlockBits) {
|
||||
public OnHeapFSTStore(int maxBlockBits, DataInput in, long numBytes) throws IOException {
|
||||
if (maxBlockBits < 1 || maxBlockBits > 30) {
|
||||
throw new IllegalArgumentException("maxBlockBits should be 1 .. 30; got " + maxBlockBits);
|
||||
}
|
||||
|
||||
this.maxBlockBits = maxBlockBits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FSTStore init(DataInput in, long numBytes) throws IOException {
|
||||
if (numBytes > 1 << this.maxBlockBits) {
|
||||
if (numBytes > 1 << maxBlockBits) {
|
||||
// FST is big: we need multiple pages
|
||||
dataOutput = (ReadWriteDataOutput) getOnHeapReaderWriter(maxBlockBits);
|
||||
dataOutput.copyBytes(in, numBytes);
|
||||
dataOutput.freeze();
|
||||
bytesArray = null;
|
||||
} else {
|
||||
// FST fits into a single block: use ByteArrayBytesStoreReader for less overhead
|
||||
bytesArray = new byte[(int) numBytes];
|
||||
in.readBytes(bytesArray, 0, bytesArray.length);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -93,7 +93,10 @@ public class Test2BFSTOffHeap extends LuceneTestCase {
|
|||
FST.FSTMetadata<Object> fstMetadata = fstCompiler.compile();
|
||||
indexOutput.close();
|
||||
try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) {
|
||||
FST<Object> fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore());
|
||||
FST<Object> fst =
|
||||
FST.fromFSTReader(
|
||||
fstMetadata,
|
||||
new OffHeapFSTStore(indexInput, indexInput.getFilePointer(), fstMetadata));
|
||||
|
||||
for (int verify = 0; verify < 2; verify++) {
|
||||
System.out.println(
|
||||
|
@ -181,7 +184,10 @@ public class Test2BFSTOffHeap extends LuceneTestCase {
|
|||
FST.FSTMetadata<BytesRef> fstMetadata = fstCompiler.compile();
|
||||
indexOutput.close();
|
||||
try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) {
|
||||
FST<BytesRef> fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore());
|
||||
FST<BytesRef> fst =
|
||||
FST.fromFSTReader(
|
||||
fstMetadata,
|
||||
new OffHeapFSTStore(indexInput, indexInput.getFilePointer(), fstMetadata));
|
||||
for (int verify = 0; verify < 2; verify++) {
|
||||
|
||||
System.out.println(
|
||||
|
@ -266,7 +272,10 @@ public class Test2BFSTOffHeap extends LuceneTestCase {
|
|||
FST.FSTMetadata<Long> fstMetadata = fstCompiler.compile();
|
||||
indexOutput.close();
|
||||
try (IndexInput indexInput = dir.openInput("fst", IOContext.DEFAULT)) {
|
||||
FST<Long> fst = new FST<>(fstMetadata, indexInput, new OffHeapFSTStore());
|
||||
FST<Long> fst =
|
||||
FST.fromFSTReader(
|
||||
fstMetadata,
|
||||
new OffHeapFSTStore(indexInput, indexInput.getFilePointer(), fstMetadata));
|
||||
|
||||
for (int verify = 0; verify < 2; verify++) {
|
||||
|
||||
|
|
|
@ -341,11 +341,10 @@ public final class NRTSuggester implements Accountable {
|
|||
PairOutputs<Long, BytesRef> outputs =
|
||||
new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
|
||||
if (shouldLoadFSTOffHeap(input, fstLoadMode)) {
|
||||
OffHeapFSTStore store = new OffHeapFSTStore();
|
||||
IndexInput clone = input.clone();
|
||||
clone.seek(input.getFilePointer());
|
||||
fst = new FST<>(FST.readMetadata(clone, outputs), clone, store);
|
||||
input.seek(clone.getFilePointer() + store.size());
|
||||
final FST.FSTMetadata<Pair<Long, BytesRef>> fstMetadata = FST.readMetadata(input, outputs);
|
||||
OffHeapFSTStore store = new OffHeapFSTStore(input, input.getFilePointer(), fstMetadata);
|
||||
fst = FST.fromFSTReader(fstMetadata, store);
|
||||
input.skipBytes(store.size());
|
||||
} else {
|
||||
fst = new FST<>(FST.readMetadata(input, outputs), input);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue