mirror of https://github.com/apache/lucene.git
* Fix index out of bounds when writing FST to different metaOut (#12697) * Tidify code * Update CHANGES.txt * Re-add assertion
This commit is contained in:
parent
343a9e7100
commit
0d8a3e6c4f
|
@ -216,6 +216,8 @@ Bug Fixes
|
|||
|
||||
* GITHUB#11556: HTMLStripCharFilter fails on '>' or '<' characters in attribute values. (Elliot Lin)
|
||||
|
||||
* GITHUB#12698: Fix IndexOutOfBoundsException when saving FSTStore-backed FST with different DataOutput for metadata (Anh Dung Bui)
|
||||
|
||||
* GITHUB#12642: Ensure #finish only gets called once on the base collector during drill-sideways (Greg Miller)
|
||||
|
||||
Build
|
||||
|
|
|
@ -558,6 +558,8 @@ public final class FST<T> implements Accountable {
|
|||
bytes.writeTo(out);
|
||||
} else {
|
||||
assert fstStore != null;
|
||||
long numBytes = fstStore.size();
|
||||
metaOut.writeVLong(numBytes);
|
||||
fstStore.writeTo(out);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,13 +68,19 @@ public final class OnHeapFSTStore implements FSTStore {
|
|||
if (bytesArray != null) {
|
||||
return bytesArray.length;
|
||||
} else {
|
||||
return bytes.ramBytesUsed();
|
||||
return bytes.getPosition();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + size();
|
||||
long size = BASE_RAM_BYTES_USED;
|
||||
if (bytesArray != null) {
|
||||
size += bytesArray.length;
|
||||
} else {
|
||||
size += bytes.ramBytesUsed();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -89,12 +95,9 @@ public final class OnHeapFSTStore implements FSTStore {
|
|||
@Override
|
||||
public void writeTo(DataOutput out) throws IOException {
|
||||
if (bytes != null) {
|
||||
long numBytes = bytes.getPosition();
|
||||
out.writeVLong(numBytes);
|
||||
bytes.writeTo(out);
|
||||
} else {
|
||||
assert bytesArray != null;
|
||||
out.writeVLong(bytesArray.length);
|
||||
out.writeBytes(bytesArray, 0, bytesArray.length);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.lucene.tests.util.fst.FSTTester.simpleRandomString;
|
|||
import static org.apache.lucene.tests.util.fst.FSTTester.toIntsRef;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
|
@ -54,11 +55,13 @@ import org.apache.lucene.index.Terms;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.apache.lucene.tests.store.MockDirectoryWrapper;
|
||||
|
@ -1193,6 +1196,47 @@ public class TestFSTs extends LuceneTestCase {
|
|||
assertTrue(w.toString().contains("[label=\"n\" style=\"bold\""));
|
||||
}
|
||||
|
||||
// https://github.com/apache/lucene/issues/12697
|
||||
// Make sure the FST can be saved and loaded with different DataOutput for metadata
|
||||
public void testSaveDifferentMetaOut() throws Exception {
|
||||
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||
FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
|
||||
// first build the FST from scratch
|
||||
final IntsRefBuilder scratch = new IntsRefBuilder();
|
||||
fstCompiler.add(Util.toIntsRef(newBytesRef("aab"), scratch), 22L);
|
||||
fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), 7L);
|
||||
fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L);
|
||||
|
||||
FST<Long> fst = fstCompiler.compile();
|
||||
|
||||
// save the FST to DataOutput, here it would not matter whether we are saving to different
|
||||
// DataOutput for meta or not
|
||||
ByteArrayOutputStream outOS = new ByteArrayOutputStream();
|
||||
OutputStreamDataOutput out = new OutputStreamDataOutput(outOS);
|
||||
fst.save(out, out);
|
||||
|
||||
// load the FST, which will force it to use FSTStore instead of BytesStore
|
||||
ByteArrayDataInput in = new ByteArrayDataInput(outOS.toByteArray());
|
||||
FST<Long> loadedFST = new FST<>(in, in, outputs);
|
||||
|
||||
// now save the FST again, this time to different DataOutput for meta
|
||||
ByteArrayOutputStream metdataOS = new ByteArrayOutputStream();
|
||||
OutputStreamDataOutput metaOut = new OutputStreamDataOutput(metdataOS);
|
||||
ByteArrayOutputStream dataOS = new ByteArrayOutputStream();
|
||||
OutputStreamDataOutput dataOut = new OutputStreamDataOutput(dataOS);
|
||||
loadedFST.save(metaOut, dataOut);
|
||||
|
||||
// finally load it again
|
||||
ByteArrayDataInput metaIn = new ByteArrayDataInput(metdataOS.toByteArray());
|
||||
ByteArrayDataInput dataIn = new ByteArrayDataInput(dataOS.toByteArray());
|
||||
loadedFST = new FST<>(metaIn, dataIn, outputs);
|
||||
|
||||
assertEquals(22L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aab"), scratch)).longValue());
|
||||
assertEquals(7L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aac"), scratch)).longValue());
|
||||
assertEquals(17L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("ax"), scratch)).longValue());
|
||||
}
|
||||
|
||||
// Make sure raw FST can differentiate between final vs
|
||||
// non-final end nodes
|
||||
public void testNonFinalStopNode() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue