mirror of https://github.com/apache/lucene.git
* Fix index out of bounds when writing FST to different metaOut (#12697) * Tidify code * Update CHANGES.txt * Re-add assertion
This commit is contained in:
parent
343a9e7100
commit
0d8a3e6c4f
|
@ -216,6 +216,8 @@ Bug Fixes
|
||||||
|
|
||||||
* GITHUB#11556: HTMLStripCharFilter fails on '>' or '<' characters in attribute values. (Elliot Lin)
|
* GITHUB#11556: HTMLStripCharFilter fails on '>' or '<' characters in attribute values. (Elliot Lin)
|
||||||
|
|
||||||
|
* GITHUB#12698: Fix IndexOutOfBoundsException when saving FSTStore-backed FST with different DataOutput for metadata (Anh Dung Bui)
|
||||||
|
|
||||||
* GITHUB#12642: Ensure #finish only gets called once on the base collector during drill-sideways (Greg Miller)
|
* GITHUB#12642: Ensure #finish only gets called once on the base collector during drill-sideways (Greg Miller)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
|
@ -558,6 +558,8 @@ public final class FST<T> implements Accountable {
|
||||||
bytes.writeTo(out);
|
bytes.writeTo(out);
|
||||||
} else {
|
} else {
|
||||||
assert fstStore != null;
|
assert fstStore != null;
|
||||||
|
long numBytes = fstStore.size();
|
||||||
|
metaOut.writeVLong(numBytes);
|
||||||
fstStore.writeTo(out);
|
fstStore.writeTo(out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,13 +68,19 @@ public final class OnHeapFSTStore implements FSTStore {
|
||||||
if (bytesArray != null) {
|
if (bytesArray != null) {
|
||||||
return bytesArray.length;
|
return bytesArray.length;
|
||||||
} else {
|
} else {
|
||||||
return bytes.ramBytesUsed();
|
return bytes.getPosition();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
return BASE_RAM_BYTES_USED + size();
|
long size = BASE_RAM_BYTES_USED;
|
||||||
|
if (bytesArray != null) {
|
||||||
|
size += bytesArray.length;
|
||||||
|
} else {
|
||||||
|
size += bytes.ramBytesUsed();
|
||||||
|
}
|
||||||
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -89,12 +95,9 @@ public final class OnHeapFSTStore implements FSTStore {
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(DataOutput out) throws IOException {
|
public void writeTo(DataOutput out) throws IOException {
|
||||||
if (bytes != null) {
|
if (bytes != null) {
|
||||||
long numBytes = bytes.getPosition();
|
|
||||||
out.writeVLong(numBytes);
|
|
||||||
bytes.writeTo(out);
|
bytes.writeTo(out);
|
||||||
} else {
|
} else {
|
||||||
assert bytesArray != null;
|
assert bytesArray != null;
|
||||||
out.writeVLong(bytesArray.length);
|
|
||||||
out.writeBytes(bytesArray, 0, bytesArray.length);
|
out.writeBytes(bytesArray, 0, bytesArray.length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import static org.apache.lucene.tests.util.fst.FSTTester.simpleRandomString;
|
||||||
import static org.apache.lucene.tests.util.fst.FSTTester.toIntsRef;
|
import static org.apache.lucene.tests.util.fst.FSTTester.toIntsRef;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
|
@ -54,11 +55,13 @@ import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.tests.store.MockDirectoryWrapper;
|
import org.apache.lucene.tests.store.MockDirectoryWrapper;
|
||||||
|
@ -1193,6 +1196,47 @@ public class TestFSTs extends LuceneTestCase {
|
||||||
assertTrue(w.toString().contains("[label=\"n\" style=\"bold\""));
|
assertTrue(w.toString().contains("[label=\"n\" style=\"bold\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://github.com/apache/lucene/issues/12697
|
||||||
|
// Make sure the FST can be saved and loaded with different DataOutput for metadata
|
||||||
|
public void testSaveDifferentMetaOut() throws Exception {
|
||||||
|
PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
|
||||||
|
FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||||
|
|
||||||
|
// first build the FST from scratch
|
||||||
|
final IntsRefBuilder scratch = new IntsRefBuilder();
|
||||||
|
fstCompiler.add(Util.toIntsRef(newBytesRef("aab"), scratch), 22L);
|
||||||
|
fstCompiler.add(Util.toIntsRef(newBytesRef("aac"), scratch), 7L);
|
||||||
|
fstCompiler.add(Util.toIntsRef(newBytesRef("ax"), scratch), 17L);
|
||||||
|
|
||||||
|
FST<Long> fst = fstCompiler.compile();
|
||||||
|
|
||||||
|
// save the FST to DataOutput, here it would not matter whether we are saving to different
|
||||||
|
// DataOutput for meta or not
|
||||||
|
ByteArrayOutputStream outOS = new ByteArrayOutputStream();
|
||||||
|
OutputStreamDataOutput out = new OutputStreamDataOutput(outOS);
|
||||||
|
fst.save(out, out);
|
||||||
|
|
||||||
|
// load the FST, which will force it to use FSTStore instead of BytesStore
|
||||||
|
ByteArrayDataInput in = new ByteArrayDataInput(outOS.toByteArray());
|
||||||
|
FST<Long> loadedFST = new FST<>(in, in, outputs);
|
||||||
|
|
||||||
|
// now save the FST again, this time to different DataOutput for meta
|
||||||
|
ByteArrayOutputStream metdataOS = new ByteArrayOutputStream();
|
||||||
|
OutputStreamDataOutput metaOut = new OutputStreamDataOutput(metdataOS);
|
||||||
|
ByteArrayOutputStream dataOS = new ByteArrayOutputStream();
|
||||||
|
OutputStreamDataOutput dataOut = new OutputStreamDataOutput(dataOS);
|
||||||
|
loadedFST.save(metaOut, dataOut);
|
||||||
|
|
||||||
|
// finally load it again
|
||||||
|
ByteArrayDataInput metaIn = new ByteArrayDataInput(metdataOS.toByteArray());
|
||||||
|
ByteArrayDataInput dataIn = new ByteArrayDataInput(dataOS.toByteArray());
|
||||||
|
loadedFST = new FST<>(metaIn, dataIn, outputs);
|
||||||
|
|
||||||
|
assertEquals(22L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aab"), scratch)).longValue());
|
||||||
|
assertEquals(7L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("aac"), scratch)).longValue());
|
||||||
|
assertEquals(17L, Util.get(loadedFST, Util.toIntsRef(newBytesRef("ax"), scratch)).longValue());
|
||||||
|
}
|
||||||
|
|
||||||
// Make sure raw FST can differentiate between final vs
|
// Make sure raw FST can differentiate between final vs
|
||||||
// non-final end nodes
|
// non-final end nodes
|
||||||
public void testNonFinalStopNode() throws Exception {
|
public void testNonFinalStopNode() throws Exception {
|
||||||
|
|
Loading…
Reference in New Issue