mirror of https://github.com/apache/lucene.git
Merge branch 'main' into java_21
This commit is contained in:
commit
05b23abe92
|
@ -67,6 +67,13 @@
|
|||
</maintainer>
|
||||
|
||||
<!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-9.9.0</name>
|
||||
<created>2023-12-04</created>
|
||||
<revision>9.9.0</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-9.8.0</name>
|
||||
|
|
|
@ -62,7 +62,8 @@ configure(project(":lucene:core")) {
|
|||
classpath = configurations.apiextractor
|
||||
mainClass = file("${resources}/ExtractJdkApis.java") as String
|
||||
systemProperties = [
|
||||
'user.timezone': 'UTC'
|
||||
'user.timezone': 'UTC',
|
||||
'file.encoding': 'UTF-8',
|
||||
]
|
||||
args = [
|
||||
jdkVersion,
|
||||
|
|
|
@ -189,7 +189,7 @@ public final class ExtractJdkApis {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void visitPermittedSubclass(String c) {
|
||||
public void visitPermittedSubclass(String c) {
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -60,6 +60,9 @@ grant {
|
|||
permission java.lang.RuntimePermission "getFileStoreAttributes";
|
||||
permission java.lang.RuntimePermission "writeFileDescriptor";
|
||||
|
||||
// needed to check if C2 (implied by the presence of the CI env) is enabled
|
||||
permission java.lang.RuntimePermission "getenv.CI";
|
||||
|
||||
// TestLockFactoriesMultiJVM opens a random port on 127.0.0.1 (port 0 = ephemeral port range):
|
||||
permission java.net.SocketPermission "127.0.0.1:0", "accept,listen,resolve";
|
||||
|
||||
|
|
|
@ -139,7 +139,7 @@ Larger heap size
|
|||
By default tests run with a 512 MB max heap. But some tests (monster/nightly)
|
||||
need more heap. Use "-Dtests.heapsize" for this:
|
||||
|
||||
gradlew -p lucene/core test --tests "Test2BFST" -Dtest.heapsize=32g
|
||||
gradlew -p lucene/core test --tests "Test2BFST" -Dtests.heapsize=32g
|
||||
|
||||
|
||||
Run GUI tests headlessly with Xvfb (Linux only)
|
||||
|
|
|
@ -171,7 +171,11 @@ API Changes
|
|||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
* GITHUB#12679: Add support for similarity-based vector searches using [Byte|Float]VectorSimilarityQuery. Uses a new
|
||||
VectorSimilarityCollector to find all vectors scoring above a `resultSimilarity` while traversing the HNSW graph till
|
||||
better-scoring nodes are available, or the best candidate is below a score of `traversalSimilarity` in the lowest
|
||||
level. (Aditya Prakash, Kaival Parikh)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
|
@ -191,11 +195,25 @@ Bug Fixes
|
|||
* GITHUB#12558: Ensure #finish is called on all drill-sideways FacetsCollectors even when no hits are scored.
|
||||
(Greg Miller)
|
||||
|
||||
* GITHUB#12920: Address bug in TestDrillSideways#testCollectionTerminated that could occasionally cause the test to
|
||||
fail with certain random seeds. (Greg Miller)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
|
||||
* GITHUB#11023: Removing some dead code in CheckIndex. (Jakub Slowinski)
|
||||
|
||||
* GITHUB#11023: Removing @lucene.experimental tags in testXXX methods in CheckIndex. (Jakub Slowinski)
|
||||
|
||||
======================== Lucene 9.9.1 =======================
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
* GITHUB#12898: JVM SIGSEGV crash when compiling computeCommonPrefixLengthAndBuildHistogram (Chris Hegarty)
|
||||
|
||||
* GITHUB#12900: Push and pop OutputAccumulator as IntersectTermsEnumFrames are pushed and popped (Guo Feng, Mike McCandless)
|
||||
|
||||
======================== Lucene 9.9.0 =======================
|
||||
|
||||
API Changes
|
||||
|
|
|
@ -528,7 +528,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
|
|||
assertTokenStreamContents(tf8, new String[] {"fußball"});
|
||||
}
|
||||
|
||||
public static interface MockRetainAttribute extends Attribute {
|
||||
public interface MockRetainAttribute extends Attribute {
|
||||
void setRetain(boolean attr);
|
||||
|
||||
boolean getRetain();
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -149,7 +149,7 @@ stored as a key and the record of key's transformation to its
|
|||
respective stem. The transformation record is termed a patch command
|
||||
(P-command). It must be ensured that P-commands are universal, and that
|
||||
P-commands can transform any word to its stem. Our solution[6,8] is
|
||||
based on the Levenstein metric [10], which produces P-command as the
|
||||
based on the Levenshtein metric [10], which produces P-command as the
|
||||
minimum cost path in a directed graph.<br>
|
||||
<br>
|
||||
One can imagine the P-command as an algorithm for an operator (editor)
|
||||
|
|
|
@ -75,7 +75,11 @@ public final class Lucene90RWPostingsFormat extends PostingsFormat {
|
|||
try {
|
||||
FieldsConsumer ret =
|
||||
new Lucene90BlockTreeTermsWriter(
|
||||
state, postingsWriter, minTermBlockSize, maxTermBlockSize);
|
||||
state,
|
||||
postingsWriter,
|
||||
minTermBlockSize,
|
||||
maxTermBlockSize,
|
||||
Lucene90BlockTreeTermsReader.VERSION_START);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene90;
|
||||
|
||||
import static org.apache.lucene.backward_codecs.lucene90.Lucene90ScoreSkipReader.readImpacts;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.backward_codecs.lucene90.Lucene90ScoreSkipReader.MutableImpactList;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
|
||||
import org.apache.lucene.codecs.lucene90.blocktree.FieldReader;
|
||||
import org.apache.lucene.codecs.lucene90.blocktree.Stats;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99SkipWriter;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.Impact;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.tests.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
|
||||
public class TestLucene90PostingsFormat extends BasePostingsFormatTestCase {
|
||||
private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene90RWPostingsFormat());
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
|
||||
/** Make sure the final sub-block(s) are not skipped. */
|
||||
public void testFinalBlock() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
|
||||
for (int i = 0; i < 25; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO));
|
||||
doc.add(newStringField("field", "z" + Character.toString((char) (97 + i)), Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
w.forceMerge(1);
|
||||
|
||||
DirectoryReader r = DirectoryReader.open(w);
|
||||
assertEquals(1, r.leaves().size());
|
||||
FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
|
||||
// We should see exactly two blocks: one root block (prefix empty string) and one block for z*
|
||||
// terms (prefix z):
|
||||
Stats stats = field.getStats();
|
||||
assertEquals(0, stats.floorBlockCount);
|
||||
assertEquals(2, stats.nonFloorBlockCount);
|
||||
r.close();
|
||||
w.close();
|
||||
d.close();
|
||||
}
|
||||
|
||||
private void shouldFail(int minItemsInBlock, int maxItemsInBlock) {
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> {
|
||||
new Lucene99PostingsFormat(minItemsInBlock, maxItemsInBlock);
|
||||
});
|
||||
}
|
||||
|
||||
public void testInvalidBlockSizes() throws Exception {
|
||||
shouldFail(0, 0);
|
||||
shouldFail(10, 8);
|
||||
shouldFail(-1, 10);
|
||||
shouldFail(10, -1);
|
||||
shouldFail(10, 12);
|
||||
}
|
||||
|
||||
public void testImpactSerialization() throws IOException {
|
||||
// omit norms and omit freqs
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(1, 1L)));
|
||||
|
||||
// omit freqs
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(1, 42L)));
|
||||
// omit freqs with very large norms
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(1, -100L)));
|
||||
|
||||
// omit norms
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(30, 1L)));
|
||||
// omit norms with large freq
|
||||
doTestImpactSerialization(Collections.singletonList(new Impact(500, 1L)));
|
||||
|
||||
// freqs and norms, basic
|
||||
doTestImpactSerialization(
|
||||
Arrays.asList(
|
||||
new Impact(1, 7L),
|
||||
new Impact(3, 9L),
|
||||
new Impact(7, 10L),
|
||||
new Impact(15, 11L),
|
||||
new Impact(20, 13L),
|
||||
new Impact(28, 14L)));
|
||||
|
||||
// freqs and norms, high values
|
||||
doTestImpactSerialization(
|
||||
Arrays.asList(
|
||||
new Impact(2, 2L),
|
||||
new Impact(10, 10L),
|
||||
new Impact(12, 50L),
|
||||
new Impact(50, -100L),
|
||||
new Impact(1000, -80L),
|
||||
new Impact(1005, -3L)));
|
||||
}
|
||||
|
||||
private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
|
||||
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
|
||||
for (Impact impact : impacts) {
|
||||
acc.add(impact.freq, impact.norm);
|
||||
}
|
||||
try (Directory dir = newDirectory()) {
|
||||
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
|
||||
Lucene99SkipWriter.writeImpacts(acc, out);
|
||||
}
|
||||
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
|
||||
byte[] b = new byte[Math.toIntExact(in.length())];
|
||||
in.readBytes(b, 0, b.length);
|
||||
List<Impact> impacts2 = readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
|
||||
assertEquals(impacts, impacts2);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -109,6 +109,7 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
@ -374,7 +375,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
"9.7.0-cfs",
|
||||
"9.7.0-nocfs",
|
||||
"9.8.0-cfs",
|
||||
"9.8.0-nocfs"
|
||||
"9.8.0-nocfs",
|
||||
"9.9.0-cfs",
|
||||
"9.9.0-nocfs"
|
||||
};
|
||||
|
||||
public static String[] getOldNames() {
|
||||
|
@ -392,7 +395,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
"sorted.9.5.0",
|
||||
"sorted.9.6.0",
|
||||
"sorted.9.7.0",
|
||||
"sorted.9.8.0"
|
||||
"sorted.9.8.0",
|
||||
"sorted.9.9.0"
|
||||
};
|
||||
|
||||
public static String[] getOldSortedNames() {
|
||||
|
@ -2240,6 +2244,25 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// #12895: test on a carefully crafted 9.8.0 index (from a small contiguous subset
|
||||
// of wikibigall unique terms) that shows the read-time exception of
|
||||
// IntersectTermsEnum (used by WildcardQuery)
|
||||
public void testWildcardQueryExceptions990() throws IOException {
|
||||
Path path = createTempDir("12895");
|
||||
|
||||
String name = "index.12895.9.8.0.zip";
|
||||
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name);
|
||||
assertNotNull("missing zip file to reproduce #12895", resource);
|
||||
TestUtil.unzip(resource, path);
|
||||
|
||||
try (Directory dir = newFSDirectory(path);
|
||||
DirectoryReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
|
||||
searcher.count(new WildcardQuery(new Term("field", "*qx*")));
|
||||
}
|
||||
}
|
||||
|
||||
@Nightly
|
||||
public void testReadNMinusTwoCommit() throws IOException {
|
||||
for (String name : binarySupportedNames) {
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -120,6 +120,8 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
assert setSavedStartTerm(startTerm);
|
||||
|
||||
currentFrame = f;
|
||||
outputAccumulator.push(currentFrame.arc.output());
|
||||
|
||||
if (startTerm != null) {
|
||||
seekToStartTerm(startTerm);
|
||||
}
|
||||
|
@ -184,8 +186,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
int idx = currentFrame.prefix;
|
||||
assert currentFrame.suffix > 0;
|
||||
|
||||
outputAccumulator.reset();
|
||||
outputAccumulator.push(arc.output());
|
||||
int initOutputCount = outputAccumulator.outputCount();
|
||||
while (idx < f.prefix) {
|
||||
final int target = term.bytes[idx] & 0xff;
|
||||
// TODO: we could be more efficient for the next()
|
||||
|
@ -198,9 +199,11 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
}
|
||||
|
||||
f.arc = arc;
|
||||
f.outputNum = outputAccumulator.outputCount() - initOutputCount;
|
||||
assert arc.isFinal();
|
||||
outputAccumulator.push(arc.nextFinalOutput());
|
||||
f.load(outputAccumulator);
|
||||
outputAccumulator.pop(arc.nextFinalOutput());
|
||||
return f;
|
||||
}
|
||||
|
||||
|
@ -343,6 +346,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
throw NoMoreTermsException.INSTANCE;
|
||||
}
|
||||
final long lastFP = currentFrame.fpOrig;
|
||||
outputAccumulator.pop(currentFrame.outputNum);
|
||||
currentFrame = stack[currentFrame.ord - 1];
|
||||
currentTransition = currentFrame.transition;
|
||||
assert currentFrame.lastSubFP == lastFP;
|
||||
|
@ -429,6 +433,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
|
|||
currentFrame = null;
|
||||
return null;
|
||||
}
|
||||
outputAccumulator.pop(currentFrame.outputNum);
|
||||
currentFrame = stack[currentFrame.ord - 1];
|
||||
currentTransition = currentFrame.transition;
|
||||
isSubBlock = popPushNext();
|
||||
|
|
|
@ -89,6 +89,8 @@ final class IntersectTermsEnumFrame {
|
|||
|
||||
final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
|
||||
|
||||
int outputNum;
|
||||
|
||||
int startBytePos;
|
||||
int suffix;
|
||||
|
||||
|
|
|
@ -238,6 +238,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final int maxDoc;
|
||||
final int minItemsInBlock;
|
||||
final int maxItemsInBlock;
|
||||
final int version;
|
||||
|
||||
final PostingsWriterBase postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
|
@ -255,10 +256,37 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
int minItemsInBlock,
|
||||
int maxItemsInBlock)
|
||||
throws IOException {
|
||||
this(
|
||||
state,
|
||||
postingsWriter,
|
||||
minItemsInBlock,
|
||||
maxItemsInBlock,
|
||||
Lucene90BlockTreeTermsReader.VERSION_CURRENT);
|
||||
}
|
||||
|
||||
/** Expert constructor that allows configuring the version, used for bw tests. */
|
||||
public Lucene90BlockTreeTermsWriter(
|
||||
SegmentWriteState state,
|
||||
PostingsWriterBase postingsWriter,
|
||||
int minItemsInBlock,
|
||||
int maxItemsInBlock,
|
||||
int version)
|
||||
throws IOException {
|
||||
validateSettings(minItemsInBlock, maxItemsInBlock);
|
||||
|
||||
this.minItemsInBlock = minItemsInBlock;
|
||||
this.maxItemsInBlock = maxItemsInBlock;
|
||||
if (version < Lucene90BlockTreeTermsReader.VERSION_START
|
||||
|| version > Lucene90BlockTreeTermsReader.VERSION_CURRENT) {
|
||||
throw new IllegalArgumentException(
|
||||
"Expected version in range ["
|
||||
+ Lucene90BlockTreeTermsReader.VERSION_START
|
||||
+ ", "
|
||||
+ Lucene90BlockTreeTermsReader.VERSION_CURRENT
|
||||
+ "], but got "
|
||||
+ version);
|
||||
}
|
||||
this.version = version;
|
||||
|
||||
this.maxDoc = state.segmentInfo.maxDoc();
|
||||
this.fieldInfos = state.fieldInfos;
|
||||
|
@ -276,7 +304,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
CodecUtil.writeIndexHeader(
|
||||
termsOut,
|
||||
Lucene90BlockTreeTermsReader.TERMS_CODEC_NAME,
|
||||
Lucene90BlockTreeTermsReader.VERSION_CURRENT,
|
||||
version,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
|
||||
|
@ -289,7 +317,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
CodecUtil.writeIndexHeader(
|
||||
indexOut,
|
||||
Lucene90BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME,
|
||||
Lucene90BlockTreeTermsReader.VERSION_CURRENT,
|
||||
version,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
// segment = state.segmentInfo.name;
|
||||
|
@ -303,7 +331,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
CodecUtil.writeIndexHeader(
|
||||
metaOut,
|
||||
Lucene90BlockTreeTermsReader.TERMS_META_CODEC_NAME,
|
||||
Lucene90BlockTreeTermsReader.VERSION_CURRENT,
|
||||
version,
|
||||
state.segmentInfo.getId(),
|
||||
state.segmentSuffix);
|
||||
|
||||
|
@ -451,7 +479,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL)));
|
||||
}
|
||||
|
||||
private static final class PendingBlock extends PendingEntry {
|
||||
private final class PendingBlock extends PendingEntry {
|
||||
public final BytesRef prefix;
|
||||
public final long fp;
|
||||
public FST<BytesRef> index;
|
||||
|
@ -494,7 +522,11 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
assert scratchBytes.size() == 0;
|
||||
|
||||
// write the leading vLong in MSB order for better outputs sharing in the FST
|
||||
writeMSBVLong(encodeOutput(fp, hasTerms, isFloor), scratchBytes);
|
||||
if (version >= Lucene90BlockTreeTermsReader.VERSION_MSB_VLONG_OUTPUT) {
|
||||
writeMSBVLong(encodeOutput(fp, hasTerms, isFloor), scratchBytes);
|
||||
} else {
|
||||
scratchBytes.writeVLong(encodeOutput(fp, hasTerms, isFloor));
|
||||
}
|
||||
if (isFloor) {
|
||||
scratchBytes.writeVInt(blocks.size() - 1);
|
||||
for (int i = 1; i < blocks.size(); i++) {
|
||||
|
@ -522,12 +554,19 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
|
|||
int pageBits = Math.min(15, Math.max(6, estimateBitsRequired));
|
||||
|
||||
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
|
||||
final int fstVersion;
|
||||
if (version >= Lucene90BlockTreeTermsReader.VERSION_CURRENT) {
|
||||
fstVersion = FST.VERSION_CURRENT;
|
||||
} else {
|
||||
fstVersion = FST.VERSION_90;
|
||||
}
|
||||
final FSTCompiler<BytesRef> fstCompiler =
|
||||
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
|
||||
// Disable suffixes sharing for block tree index because suffixes are mostly dropped
|
||||
// from the FST index and left in the term blocks.
|
||||
.suffixRAMLimitMB(0d)
|
||||
.dataOutput(getOnHeapReaderWriter(pageBits))
|
||||
.setVersion(fstVersion)
|
||||
.build();
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" compile index for prefix=" + prefix);
|
||||
|
|
|
@ -495,7 +495,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
targetUpto = 0;
|
||||
outputAccumulator.push(arc.nextFinalOutput());
|
||||
currentFrame = pushFrame(arc, 0);
|
||||
outputAccumulator.pop();
|
||||
outputAccumulator.pop(arc.nextFinalOutput());
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -569,7 +569,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// if (DEBUG) System.out.println(" arc is final!");
|
||||
outputAccumulator.push(arc.nextFinalOutput());
|
||||
currentFrame = pushFrame(arc, targetUpto);
|
||||
outputAccumulator.pop();
|
||||
outputAccumulator.pop(arc.nextFinalOutput());
|
||||
// if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
|
||||
// currentFrame.hasTerms);
|
||||
}
|
||||
|
@ -767,7 +767,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
targetUpto = 0;
|
||||
outputAccumulator.push(arc.nextFinalOutput());
|
||||
currentFrame = pushFrame(arc, 0);
|
||||
outputAccumulator.pop();
|
||||
outputAccumulator.pop(arc.nextFinalOutput());
|
||||
}
|
||||
|
||||
// if (DEBUG) {
|
||||
|
@ -841,7 +841,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
// if (DEBUG) System.out.println(" arc is final!");
|
||||
outputAccumulator.push(arc.nextFinalOutput());
|
||||
currentFrame = pushFrame(arc, targetUpto);
|
||||
outputAccumulator.pop();
|
||||
outputAccumulator.pop(arc.nextFinalOutput());
|
||||
// if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
|
||||
// currentFrame.hasTerms);
|
||||
}
|
||||
|
@ -1187,14 +1187,27 @@ final class SegmentTermsEnum extends BaseTermsEnum {
|
|||
|
||||
void push(BytesRef output) {
|
||||
if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
|
||||
assert output.length > 0;
|
||||
outputs = ArrayUtil.grow(outputs, num + 1);
|
||||
outputs[num++] = output;
|
||||
}
|
||||
}
|
||||
|
||||
void pop() {
|
||||
assert num > 0;
|
||||
num--;
|
||||
void pop(BytesRef output) {
|
||||
if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
|
||||
assert num > 0;
|
||||
assert outputs[num - 1] == output;
|
||||
num--;
|
||||
}
|
||||
}
|
||||
|
||||
void pop(int cnt) {
|
||||
assert num >= cnt;
|
||||
num -= cnt;
|
||||
}
|
||||
|
||||
int outputCount() {
|
||||
return num;
|
||||
}
|
||||
|
||||
void reset() {
|
||||
|
|
|
@ -83,6 +83,11 @@ import org.apache.lucene.util.NamedThreadFactory;
|
|||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
|
||||
/**
|
||||
* Basic tool and API to check the health of an index and write a new segments file that removes
|
||||
|
@ -1149,11 +1154,7 @@ public final class CheckIndex implements Closeable {
|
|||
return segInfoStat;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests index sort order.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Tests index sort order. */
|
||||
public static Status.IndexSortStatus testSort(
|
||||
CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException {
|
||||
// This segment claims its documents are sorted according to the incoming sort ... let's make
|
||||
|
@ -1226,11 +1227,7 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test live docs.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test live docs. */
|
||||
public static Status.LiveDocStatus testLiveDocs(
|
||||
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
|
||||
long startNS = System.nanoTime();
|
||||
|
@ -1295,11 +1292,7 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test field infos.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test field infos. */
|
||||
public static Status.FieldInfoStatus testFieldInfos(
|
||||
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
|
||||
long startNS = System.nanoTime();
|
||||
|
@ -1336,11 +1329,7 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test field norms.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test field norms. */
|
||||
public static Status.FieldNormStatus testFieldNorms(
|
||||
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
|
||||
long startNS = System.nanoTime();
|
||||
|
@ -2314,6 +2303,33 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test Terms#intersect
|
||||
TermsEnum allTerms = terms.iterator();
|
||||
// An automaton that should match a good number of terms
|
||||
Automaton a =
|
||||
Operations.concatenate(
|
||||
Arrays.asList(
|
||||
Automata.makeAnyBinary(),
|
||||
Automata.makeCharRange('a', 'e'),
|
||||
Automata.makeAnyBinary()));
|
||||
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(a);
|
||||
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(a);
|
||||
TermsEnum filteredTerms = terms.intersect(ca, null);
|
||||
for (BytesRef term = allTerms.next(); term != null; term = allTerms.next()) {
|
||||
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
|
||||
BytesRef filteredTerm = filteredTerms.next();
|
||||
if (Objects.equals(term, filteredTerm) == false) {
|
||||
throw new CheckIndexException(
|
||||
"Expected next filtered term: " + term + ", but got " + filteredTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
BytesRef filteredTerm = filteredTerms.next();
|
||||
if (filteredTerm != null) {
|
||||
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2443,21 +2459,13 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the term index.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test the term index. */
|
||||
public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream)
|
||||
throws IOException {
|
||||
return testPostings(reader, infoStream, false, Level.MIN_LEVEL_FOR_SLOW_CHECKS, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the term index.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test the term index. */
|
||||
public static Status.TermIndexStatus testPostings(
|
||||
CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
|
||||
throws IOException {
|
||||
|
@ -2510,11 +2518,7 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the points index
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test the points index. */
|
||||
public static Status.PointsStatus testPoints(
|
||||
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
|
||||
if (infoStream != null) {
|
||||
|
@ -2617,11 +2621,7 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the vectors index
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test the vectors index. */
|
||||
public static Status.VectorValuesStatus testVectors(
|
||||
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
|
||||
if (infoStream != null) {
|
||||
|
@ -3104,11 +3104,7 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test stored fields.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test stored fields. */
|
||||
public static Status.StoredFieldStatus testStoredFields(
|
||||
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
|
||||
long startNS = System.nanoTime();
|
||||
|
@ -3162,11 +3158,7 @@ public final class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test docvalues.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test docvalues. */
|
||||
public static Status.DocValuesStatus testDocValues(
|
||||
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
|
||||
long startNS = System.nanoTime();
|
||||
|
@ -3623,21 +3615,13 @@ public final class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test term vectors.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test term vectors. */
|
||||
public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream)
|
||||
throws IOException {
|
||||
return testTermVectors(reader, infoStream, false, Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test term vectors.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
/** Test term vectors. */
|
||||
public static Status.TermVectorStatus testTermVectors(
|
||||
CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
|
||||
throws IOException {
|
||||
|
|
|
@ -112,7 +112,7 @@ public abstract sealed class IndexReader implements Closeable permits CompositeR
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static interface CacheHelper {
|
||||
public interface CacheHelper {
|
||||
|
||||
/**
|
||||
* Get a key that the resource can be cached on. The given entry can be compared using identity,
|
||||
|
@ -139,7 +139,7 @@ public abstract sealed class IndexReader implements Closeable permits CompositeR
|
|||
* @lucene.experimental
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public static interface ClosedListener {
|
||||
public interface ClosedListener {
|
||||
/**
|
||||
* Invoked when the resource (segment core, or index reader) that is being cached on is closed.
|
||||
*/
|
||||
|
|
|
@ -34,10 +34,10 @@ import org.apache.lucene.util.BytesRef;
|
|||
public interface IndexableField {
|
||||
|
||||
/** Field name */
|
||||
public String name();
|
||||
String name();
|
||||
|
||||
/** {@link IndexableFieldType} describing the properties of this field. */
|
||||
public IndexableFieldType fieldType();
|
||||
IndexableFieldType fieldType();
|
||||
|
||||
/**
|
||||
* Creates the TokenStream used for indexing this field. If appropriate, implementations should
|
||||
|
@ -52,13 +52,13 @@ public interface IndexableField {
|
|||
* @return TokenStream value for indexing the document. Should always return a non-null value if
|
||||
* the field is to be indexed
|
||||
*/
|
||||
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse);
|
||||
TokenStream tokenStream(Analyzer analyzer, TokenStream reuse);
|
||||
|
||||
/** Non-null if this field has a binary value */
|
||||
public BytesRef binaryValue();
|
||||
BytesRef binaryValue();
|
||||
|
||||
/** Non-null if this field has a string value */
|
||||
public String stringValue();
|
||||
String stringValue();
|
||||
|
||||
/** Non-null if this field has a string value */
|
||||
default CharSequence getCharSequenceValue() {
|
||||
|
@ -66,20 +66,20 @@ public interface IndexableField {
|
|||
}
|
||||
|
||||
/** Non-null if this field has a Reader value */
|
||||
public Reader readerValue();
|
||||
Reader readerValue();
|
||||
|
||||
/** Non-null if this field has a numeric value */
|
||||
public Number numericValue();
|
||||
Number numericValue();
|
||||
|
||||
/**
|
||||
* Stored value. This method is called to populate stored fields and must return a non-null value
|
||||
* if the field stored.
|
||||
*/
|
||||
public StoredValue storedValue();
|
||||
StoredValue storedValue();
|
||||
|
||||
/**
|
||||
* Describes how this field should be inverted. This must return a non-null value if the field
|
||||
* indexes terms and postings.
|
||||
*/
|
||||
public InvertableType invertableType();
|
||||
InvertableType invertableType();
|
||||
}
|
||||
|
|
|
@ -31,19 +31,19 @@ public interface TwoPhaseCommit {
|
|||
* method, but avoid actual committing changes. If the 2-phase commit fails, {@link #rollback()}
|
||||
* is called to discard all changes since last successful commit.
|
||||
*/
|
||||
public long prepareCommit() throws IOException;
|
||||
long prepareCommit() throws IOException;
|
||||
|
||||
/**
|
||||
* The second phase of a 2-phase commit. Implementations should ideally do very little work in
|
||||
* this method (following {@link #prepareCommit()}, and after it returns, the caller can assume
|
||||
* that the changes were successfully committed to the underlying storage.
|
||||
*/
|
||||
public long commit() throws IOException;
|
||||
long commit() throws IOException;
|
||||
|
||||
/**
|
||||
* Discards any changes that have occurred since the last commit. In a 2-phase commit algorithm,
|
||||
* where one of the objects failed to {@link #commit()} or {@link #prepareCommit()}, this method
|
||||
* is used to roll all other objects back to their previous state.
|
||||
*/
|
||||
public void rollback() throws IOException;
|
||||
void rollback() throws IOException;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,288 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* Search for all (approximate) vectors above a similarity threshold.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
abstract class AbstractVectorSimilarityQuery extends Query {
|
||||
protected final String field;
|
||||
protected final float traversalSimilarity, resultSimilarity;
|
||||
protected final Query filter;
|
||||
|
||||
/**
|
||||
* Search for all (approximate) vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
|
||||
* the filter, and then falls back to exact search if results are incomplete.
|
||||
*
|
||||
* @param field a field that has been indexed as a vector field.
|
||||
* @param traversalSimilarity (lower) similarity score for graph traversal.
|
||||
* @param resultSimilarity (higher) similarity score for result collection.
|
||||
* @param filter a filter applied before the vector search.
|
||||
*/
|
||||
AbstractVectorSimilarityQuery(
|
||||
String field, float traversalSimilarity, float resultSimilarity, Query filter) {
|
||||
if (traversalSimilarity > resultSimilarity) {
|
||||
throw new IllegalArgumentException("traversalSimilarity should be <= resultSimilarity");
|
||||
}
|
||||
this.field = Objects.requireNonNull(field, "field");
|
||||
this.traversalSimilarity = traversalSimilarity;
|
||||
this.resultSimilarity = resultSimilarity;
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
abstract VectorScorer createVectorScorer(LeafReaderContext context) throws IOException;
|
||||
|
||||
protected abstract TopDocs approximateSearch(
|
||||
LeafReaderContext context, Bits acceptDocs, int visitLimit) throws IOException;
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
|
||||
throws IOException {
|
||||
return new Weight(this) {
|
||||
final Weight filterWeight =
|
||||
filter == null
|
||||
? null
|
||||
: searcher.createWeight(searcher.rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1);
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
if (filterWeight != null) {
|
||||
Scorer filterScorer = filterWeight.scorer(context);
|
||||
if (filterScorer == null || filterScorer.iterator().advance(doc) > doc) {
|
||||
return Explanation.noMatch("Doc does not match the filter");
|
||||
}
|
||||
}
|
||||
|
||||
VectorScorer scorer = createVectorScorer(context);
|
||||
if (scorer == null) {
|
||||
return Explanation.noMatch("Not indexed as the correct vector field");
|
||||
} else if (scorer.advanceExact(doc)) {
|
||||
float score = scorer.score();
|
||||
if (score >= resultSimilarity) {
|
||||
return Explanation.match(boost * score, "Score above threshold");
|
||||
} else {
|
||||
return Explanation.noMatch("Score below threshold");
|
||||
}
|
||||
} else {
|
||||
return Explanation.noMatch("No vector found for doc");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
@SuppressWarnings("resource")
|
||||
LeafReader leafReader = context.reader();
|
||||
Bits liveDocs = leafReader.getLiveDocs();
|
||||
|
||||
// If there is no filter
|
||||
if (filterWeight == null) {
|
||||
// Return exhaustive results
|
||||
TopDocs results = approximateSearch(context, liveDocs, Integer.MAX_VALUE);
|
||||
return VectorSimilarityScorer.fromScoreDocs(this, boost, results.scoreDocs);
|
||||
}
|
||||
|
||||
Scorer scorer = filterWeight.scorer(context);
|
||||
if (scorer == null) {
|
||||
// If the filter does not match any documents
|
||||
return null;
|
||||
}
|
||||
|
||||
BitSet acceptDocs;
|
||||
if (liveDocs == null && scorer.iterator() instanceof BitSetIterator bitSetIterator) {
|
||||
// If there are no deletions, and matching docs are already cached
|
||||
acceptDocs = bitSetIterator.getBitSet();
|
||||
} else {
|
||||
// Else collect all matching docs
|
||||
FilteredDocIdSetIterator filtered =
|
||||
new FilteredDocIdSetIterator(scorer.iterator()) {
|
||||
@Override
|
||||
protected boolean match(int doc) {
|
||||
return liveDocs == null || liveDocs.get(doc);
|
||||
}
|
||||
};
|
||||
acceptDocs = BitSet.of(filtered, leafReader.maxDoc());
|
||||
}
|
||||
|
||||
int cardinality = acceptDocs.cardinality();
|
||||
if (cardinality == 0) {
|
||||
// If there are no live matching docs
|
||||
return null;
|
||||
}
|
||||
|
||||
// Perform an approximate search
|
||||
TopDocs results = approximateSearch(context, acceptDocs, cardinality);
|
||||
|
||||
// If the limit was exhausted
|
||||
if (results.totalHits.relation == TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO) {
|
||||
// Return a lazy-loading iterator
|
||||
return VectorSimilarityScorer.fromAcceptDocs(
|
||||
this,
|
||||
boost,
|
||||
createVectorScorer(context),
|
||||
new BitSetIterator(acceptDocs, cardinality),
|
||||
resultSimilarity);
|
||||
} else {
|
||||
// Return an iterator over the collected results
|
||||
return VectorSimilarityScorer.fromScoreDocs(this, boost, results.scoreDocs);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(QueryVisitor visitor) {
|
||||
if (visitor.acceptField(field)) {
|
||||
visitor.visitLeaf(this);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return sameClassAs(o)
|
||||
&& Objects.equals(field, ((AbstractVectorSimilarityQuery) o).field)
|
||||
&& Float.compare(
|
||||
((AbstractVectorSimilarityQuery) o).traversalSimilarity, traversalSimilarity)
|
||||
== 0
|
||||
&& Float.compare(((AbstractVectorSimilarityQuery) o).resultSimilarity, resultSimilarity)
|
||||
== 0
|
||||
&& Objects.equals(filter, ((AbstractVectorSimilarityQuery) o).filter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(field, traversalSimilarity, resultSimilarity, filter);
|
||||
}
|
||||
|
||||
private static class VectorSimilarityScorer extends Scorer {
|
||||
final DocIdSetIterator iterator;
|
||||
final float[] cachedScore;
|
||||
|
||||
VectorSimilarityScorer(Weight weight, DocIdSetIterator iterator, float[] cachedScore) {
|
||||
super(weight);
|
||||
this.iterator = iterator;
|
||||
this.cachedScore = cachedScore;
|
||||
}
|
||||
|
||||
static VectorSimilarityScorer fromScoreDocs(Weight weight, float boost, ScoreDoc[] scoreDocs) {
|
||||
// Sort in ascending order of docid
|
||||
Arrays.sort(scoreDocs, Comparator.comparingInt(scoreDoc -> scoreDoc.doc));
|
||||
|
||||
float[] cachedScore = new float[1];
|
||||
DocIdSetIterator iterator =
|
||||
new DocIdSetIterator() {
|
||||
int index = -1;
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
if (index < 0) {
|
||||
return -1;
|
||||
} else if (index >= scoreDocs.length) {
|
||||
return NO_MORE_DOCS;
|
||||
} else {
|
||||
cachedScore[0] = boost * scoreDocs[index].score;
|
||||
return scoreDocs[index].doc;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
index++;
|
||||
return docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
index =
|
||||
Arrays.binarySearch(
|
||||
scoreDocs,
|
||||
new ScoreDoc(target, 0),
|
||||
Comparator.comparingInt(scoreDoc -> scoreDoc.doc));
|
||||
if (index < 0) {
|
||||
index = -1 - index;
|
||||
}
|
||||
return docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return scoreDocs.length;
|
||||
}
|
||||
};
|
||||
|
||||
return new VectorSimilarityScorer(weight, iterator, cachedScore);
|
||||
}
|
||||
|
||||
static VectorSimilarityScorer fromAcceptDocs(
|
||||
Weight weight,
|
||||
float boost,
|
||||
VectorScorer scorer,
|
||||
DocIdSetIterator acceptDocs,
|
||||
float threshold) {
|
||||
float[] cachedScore = new float[1];
|
||||
DocIdSetIterator iterator =
|
||||
new FilteredDocIdSetIterator(acceptDocs) {
|
||||
@Override
|
||||
protected boolean match(int doc) throws IOException {
|
||||
// Compute the dot product
|
||||
float score = scorer.score();
|
||||
cachedScore[0] = score * boost;
|
||||
return score >= threshold;
|
||||
}
|
||||
};
|
||||
|
||||
return new VectorSimilarityScorer(weight, iterator, cachedScore);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return iterator.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return iterator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getMaxScore(int upTo) {
|
||||
return Float.POSITIVE_INFINITY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
return cachedScore[0];
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.document.KnnByteVectorField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* Search for all (approximate) byte vectors above a similarity threshold.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ByteVectorSimilarityQuery extends AbstractVectorSimilarityQuery {
|
||||
private final byte[] target;
|
||||
|
||||
/**
|
||||
* Search for all (approximate) byte vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
|
||||
* the filter, and then falls back to exact search if results are incomplete.
|
||||
*
|
||||
* @param field a field that has been indexed as a {@link KnnByteVectorField}.
|
||||
* @param target the target of the search.
|
||||
* @param traversalSimilarity (lower) similarity score for graph traversal.
|
||||
* @param resultSimilarity (higher) similarity score for result collection.
|
||||
* @param filter a filter applied before the vector search.
|
||||
*/
|
||||
public ByteVectorSimilarityQuery(
|
||||
String field,
|
||||
byte[] target,
|
||||
float traversalSimilarity,
|
||||
float resultSimilarity,
|
||||
Query filter) {
|
||||
super(field, traversalSimilarity, resultSimilarity, filter);
|
||||
this.target = Objects.requireNonNull(target, "target");
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for all (approximate) byte vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}.
|
||||
*
|
||||
* @param field a field that has been indexed as a {@link KnnByteVectorField}.
|
||||
* @param target the target of the search.
|
||||
* @param traversalSimilarity (lower) similarity score for graph traversal.
|
||||
* @param resultSimilarity (higher) similarity score for result collection.
|
||||
*/
|
||||
public ByteVectorSimilarityQuery(
|
||||
String field, byte[] target, float traversalSimilarity, float resultSimilarity) {
|
||||
this(field, target, traversalSimilarity, resultSimilarity, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for all (approximate) byte vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
|
||||
* the filter, and then falls back to exact search if results are incomplete.
|
||||
*
|
||||
* @param field a field that has been indexed as a {@link KnnByteVectorField}.
|
||||
* @param target the target of the search.
|
||||
* @param resultSimilarity similarity score for result collection.
|
||||
* @param filter a filter applied before the vector search.
|
||||
*/
|
||||
public ByteVectorSimilarityQuery(
|
||||
String field, byte[] target, float resultSimilarity, Query filter) {
|
||||
this(field, target, resultSimilarity, resultSimilarity, filter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for all (approximate) byte vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}.
|
||||
*
|
||||
* @param field a field that has been indexed as a {@link KnnByteVectorField}.
|
||||
* @param target the target of the search.
|
||||
* @param resultSimilarity similarity score for result collection.
|
||||
*/
|
||||
public ByteVectorSimilarityQuery(String field, byte[] target, float resultSimilarity) {
|
||||
this(field, target, resultSimilarity, resultSimilarity, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
VectorScorer createVectorScorer(LeafReaderContext context) throws IOException {
|
||||
@SuppressWarnings("resource")
|
||||
FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
|
||||
if (fi == null || fi.getVectorEncoding() != VectorEncoding.BYTE) {
|
||||
return null;
|
||||
}
|
||||
return VectorScorer.create(context, fi, target);
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("resource")
|
||||
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitLimit)
|
||||
throws IOException {
|
||||
KnnCollector collector =
|
||||
new VectorSimilarityCollector(traversalSimilarity, resultSimilarity, visitLimit);
|
||||
context.reader().searchNearestVectors(field, target, collector, acceptDocs);
|
||||
return collector.topDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return String.format(
|
||||
Locale.ROOT,
|
||||
"%s[field=%s target=[%d...] traversalSimilarity=%f resultSimilarity=%f filter=%s]",
|
||||
getClass().getSimpleName(),
|
||||
field,
|
||||
target[0],
|
||||
traversalSimilarity,
|
||||
resultSimilarity,
|
||||
filter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return sameClassAs(o)
|
||||
&& super.equals(o)
|
||||
&& Arrays.equals(target, ((ByteVectorSimilarityQuery) o).target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = super.hashCode();
|
||||
result = 31 * result + Arrays.hashCode(target);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.document.KnnFloatVectorField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.VectorEncoding;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.VectorUtil;
|
||||
|
||||
/**
|
||||
* Search for all (approximate) float vectors above a similarity threshold.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FloatVectorSimilarityQuery extends AbstractVectorSimilarityQuery {
|
||||
private final float[] target;
|
||||
|
||||
/**
|
||||
* Search for all (approximate) float vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
|
||||
* the filter, and then falls back to exact search if results are incomplete.
|
||||
*
|
||||
* @param field a field that has been indexed as a {@link KnnFloatVectorField}.
|
||||
* @param target the target of the search.
|
||||
* @param traversalSimilarity (lower) similarity score for graph traversal.
|
||||
* @param resultSimilarity (higher) similarity score for result collection.
|
||||
* @param filter a filter applied before the vector search.
|
||||
*/
|
||||
public FloatVectorSimilarityQuery(
|
||||
String field,
|
||||
float[] target,
|
||||
float traversalSimilarity,
|
||||
float resultSimilarity,
|
||||
Query filter) {
|
||||
super(field, traversalSimilarity, resultSimilarity, filter);
|
||||
this.target = VectorUtil.checkFinite(Objects.requireNonNull(target, "target"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for all (approximate) float vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}.
|
||||
*
|
||||
* @param field a field that has been indexed as a {@link KnnFloatVectorField}.
|
||||
* @param target the target of the search.
|
||||
* @param traversalSimilarity (lower) similarity score for graph traversal.
|
||||
* @param resultSimilarity (higher) similarity score for result collection.
|
||||
*/
|
||||
public FloatVectorSimilarityQuery(
|
||||
String field, float[] target, float traversalSimilarity, float resultSimilarity) {
|
||||
this(field, target, traversalSimilarity, resultSimilarity, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for all (approximate) float vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
|
||||
* the filter, and then falls back to exact search if results are incomplete.
|
||||
*
|
||||
* @param field a field that has been indexed as a {@link KnnFloatVectorField}.
|
||||
* @param target the target of the search.
|
||||
* @param resultSimilarity similarity score for result collection.
|
||||
* @param filter a filter applied before the vector search.
|
||||
*/
|
||||
public FloatVectorSimilarityQuery(
|
||||
String field, float[] target, float resultSimilarity, Query filter) {
|
||||
this(field, target, resultSimilarity, resultSimilarity, filter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for all (approximate) float vectors above a similarity threshold using {@link
|
||||
* VectorSimilarityCollector}.
|
||||
*
|
||||
* @param field a field that has been indexed as a {@link KnnFloatVectorField}.
|
||||
* @param target the target of the search.
|
||||
* @param resultSimilarity similarity score for result collection.
|
||||
*/
|
||||
public FloatVectorSimilarityQuery(String field, float[] target, float resultSimilarity) {
|
||||
this(field, target, resultSimilarity, resultSimilarity, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
VectorScorer createVectorScorer(LeafReaderContext context) throws IOException {
|
||||
@SuppressWarnings("resource")
|
||||
FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
|
||||
if (fi == null || fi.getVectorEncoding() != VectorEncoding.FLOAT32) {
|
||||
return null;
|
||||
}
|
||||
return VectorScorer.create(context, fi, target);
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("resource")
|
||||
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitLimit)
|
||||
throws IOException {
|
||||
KnnCollector collector =
|
||||
new VectorSimilarityCollector(traversalSimilarity, resultSimilarity, visitLimit);
|
||||
context.reader().searchNearestVectors(field, target, collector, acceptDocs);
|
||||
return collector.topDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return String.format(
|
||||
Locale.ROOT,
|
||||
"%s[field=%s target=[%f...] traversalSimilarity=%f resultSimilarity=%f filter=%s]",
|
||||
getClass().getSimpleName(),
|
||||
field,
|
||||
target[0],
|
||||
traversalSimilarity,
|
||||
resultSimilarity,
|
||||
filter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return sameClassAs(o)
|
||||
&& super.equals(o)
|
||||
&& Arrays.equals(target, ((FloatVectorSimilarityQuery) o).target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = super.hashCode();
|
||||
result = 31 * result + Arrays.hashCode(target);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Perform a similarity-based graph search.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class VectorSimilarityCollector extends AbstractKnnCollector {
|
||||
private final float traversalSimilarity, resultSimilarity;
|
||||
private float maxSimilarity;
|
||||
private final List<ScoreDoc> scoreDocList;
|
||||
|
||||
/**
|
||||
* Perform a similarity-based graph search. The graph is traversed till better scoring nodes are
|
||||
* available, or the best candidate is below {@link #traversalSimilarity}. All traversed nodes
|
||||
* above {@link #resultSimilarity} are collected.
|
||||
*
|
||||
* @param traversalSimilarity (lower) similarity score for graph traversal.
|
||||
* @param resultSimilarity (higher) similarity score for result collection.
|
||||
* @param visitLimit limit on number of nodes to visit.
|
||||
*/
|
||||
public VectorSimilarityCollector(
|
||||
float traversalSimilarity, float resultSimilarity, long visitLimit) {
|
||||
super(1, visitLimit);
|
||||
if (traversalSimilarity > resultSimilarity) {
|
||||
throw new IllegalArgumentException("traversalSimilarity should be <= resultSimilarity");
|
||||
}
|
||||
this.traversalSimilarity = traversalSimilarity;
|
||||
this.resultSimilarity = resultSimilarity;
|
||||
this.maxSimilarity = Float.NEGATIVE_INFINITY;
|
||||
this.scoreDocList = new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean collect(int docId, float similarity) {
|
||||
maxSimilarity = Math.max(maxSimilarity, similarity);
|
||||
if (similarity >= resultSimilarity) {
|
||||
scoreDocList.add(new ScoreDoc(docId, similarity));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float minCompetitiveSimilarity() {
|
||||
return Math.min(traversalSimilarity, maxSimilarity);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TopDocs topDocs() {
|
||||
// Results are not returned in a sorted order to prevent unnecessary calculations (because we do
|
||||
// not need to maintain the topK)
|
||||
TotalHits.Relation relation =
|
||||
earlyTerminated()
|
||||
? TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO
|
||||
: TotalHits.Relation.EQUAL_TO;
|
||||
return new TopDocs(
|
||||
new TotalHits(visitedCount(), relation), scoreDocList.toArray(ScoreDoc[]::new));
|
||||
}
|
||||
}
|
|
@ -121,12 +121,12 @@ public abstract class LMSimilarity extends SimilarityBase {
|
|||
}
|
||||
|
||||
/** A strategy for computing the collection language model. */
|
||||
public static interface CollectionModel {
|
||||
public interface CollectionModel {
|
||||
/**
|
||||
* Computes the probability {@code p(w|C)} according to the language model strategy for the
|
||||
* current term.
|
||||
*/
|
||||
public double computeProbability(BasicStats stats);
|
||||
double computeProbability(BasicStats stats);
|
||||
|
||||
/** The name of the collection model strategy. */
|
||||
public String getName();
|
||||
|
|
|
@ -39,7 +39,7 @@ final class ByteBufferGuard {
|
|||
* this to allow unmapping of bytebuffers with private Java APIs.
|
||||
*/
|
||||
@FunctionalInterface
|
||||
static interface BufferCleaner {
|
||||
interface BufferCleaner {
|
||||
void freeBuffer(String resourceDescription, ByteBuffer b) throws IOException;
|
||||
}
|
||||
|
||||
|
|
|
@ -264,7 +264,7 @@ public class MMapDirectory extends FSDirectory {
|
|||
*/
|
||||
public static final String UNMAP_NOT_SUPPORTED_REASON;
|
||||
|
||||
static interface MMapIndexInputProvider {
|
||||
interface MMapIndexInputProvider {
|
||||
IndexInput openInput(Path path, IOContext context, int chunkSizePower, boolean preload)
|
||||
throws IOException;
|
||||
|
||||
|
|
|
@ -26,14 +26,14 @@ import org.apache.lucene.util.BitUtil; // javadocs
|
|||
public interface RandomAccessInput {
|
||||
|
||||
/** The number of bytes in the file. */
|
||||
public long length();
|
||||
long length();
|
||||
|
||||
/**
|
||||
* Reads a byte at the given position in the file
|
||||
*
|
||||
* @see DataInput#readByte
|
||||
*/
|
||||
public byte readByte(long pos) throws IOException;
|
||||
byte readByte(long pos) throws IOException;
|
||||
|
||||
/**
|
||||
* Reads a specified number of bytes starting at a given position into an array at the specified
|
||||
|
@ -53,7 +53,7 @@ public interface RandomAccessInput {
|
|||
* @see DataInput#readShort
|
||||
* @see BitUtil#VH_LE_SHORT
|
||||
*/
|
||||
public short readShort(long pos) throws IOException;
|
||||
short readShort(long pos) throws IOException;
|
||||
|
||||
/**
|
||||
* Reads an integer (LE byte order) at the given position in the file
|
||||
|
@ -61,7 +61,7 @@ public interface RandomAccessInput {
|
|||
* @see DataInput#readInt
|
||||
* @see BitUtil#VH_LE_INT
|
||||
*/
|
||||
public int readInt(long pos) throws IOException;
|
||||
int readInt(long pos) throws IOException;
|
||||
|
||||
/**
|
||||
* Reads a long (LE byte order) at the given position in the file
|
||||
|
@ -69,5 +69,5 @@ public interface RandomAccessInput {
|
|||
* @see DataInput#readLong
|
||||
* @see BitUtil#VH_LE_LONG
|
||||
*/
|
||||
public long readLong(long pos) throws IOException;
|
||||
long readLong(long pos) throws IOException;
|
||||
}
|
||||
|
|
|
@ -714,7 +714,7 @@ public final class ArrayUtil {
|
|||
|
||||
/** Comparator for a fixed number of bytes. */
|
||||
@FunctionalInterface
|
||||
public static interface ByteArrayComparator {
|
||||
public interface ByteArrayComparator {
|
||||
|
||||
/**
|
||||
* Compare bytes starting from the given offsets. The return value has the same contract as
|
||||
|
|
|
@ -30,5 +30,5 @@ public interface AttributeReflector {
|
|||
* method once using {@code org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class} as
|
||||
* attribute class, {@code "term"} as key and the actual value as a String.
|
||||
*/
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value);
|
||||
void reflect(Class<? extends Attribute> attClass, String key, Object value);
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ public interface ClassLoaderUtils {
|
|||
* returned (this is fine, because if we get a {@code SecurityException} it is for sure no
|
||||
* parent).
|
||||
*/
|
||||
public static boolean isParentClassLoader(final ClassLoader parent, final ClassLoader child) {
|
||||
static boolean isParentClassLoader(final ClassLoader parent, final ClassLoader child) {
|
||||
try {
|
||||
ClassLoader cl = child;
|
||||
while (cl != null) {
|
||||
|
|
|
@ -213,7 +213,16 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
*
|
||||
* @see #buildHistogram
|
||||
*/
|
||||
// This method, and its namesakes, have been manually split to work around a JVM crash.
|
||||
// See https://github.com/apache/lucene/issues/12898
|
||||
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
|
||||
int commonPrefixLength = computeInitialCommonPrefixLength(from, k);
|
||||
return computeCommonPrefixLengthAndBuildHistogramPart1(
|
||||
from, to, k, histogram, commonPrefixLength);
|
||||
}
|
||||
|
||||
// This method, and its namesakes, have been manually split to work around a JVM crash.
|
||||
private int computeInitialCommonPrefixLength(int from, int k) {
|
||||
final int[] commonPrefix = this.commonPrefix;
|
||||
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
|
@ -224,7 +233,13 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
break;
|
||||
}
|
||||
}
|
||||
return commonPrefixLength;
|
||||
}
|
||||
|
||||
// This method, and its namesakes, have been manually split to work around a JVM crash.
|
||||
private int computeCommonPrefixLengthAndBuildHistogramPart1(
|
||||
int from, int to, int k, int[] histogram, int commonPrefixLength) {
|
||||
final int[] commonPrefix = this.commonPrefix;
|
||||
int i;
|
||||
outer:
|
||||
for (i = from + 1; i < to; ++i) {
|
||||
|
@ -239,7 +254,13 @@ public abstract class MSBRadixSorter extends Sorter {
|
|||
}
|
||||
}
|
||||
}
|
||||
return computeCommonPrefixLengthAndBuildHistogramPart2(
|
||||
from, to, k, histogram, commonPrefixLength, i);
|
||||
}
|
||||
|
||||
// This method, and its namesakes, have been manually split to work around a JVM crash.
|
||||
private int computeCommonPrefixLengthAndBuildHistogramPart2(
|
||||
int from, int to, int k, int[] histogram, int commonPrefixLength, int i) {
|
||||
if (i < to) {
|
||||
// the loop got broken because there is no common prefix
|
||||
assert commonPrefixLength == 0;
|
||||
|
|
|
@ -127,7 +127,7 @@ public final class NamedSPILoader<S extends NamedSPILoader.NamedSPI> implements
|
|||
*
|
||||
* <p>Names must be all ascii alphanumeric, and less than 128 characters in length.
|
||||
*/
|
||||
public static interface NamedSPI {
|
||||
public interface NamedSPI {
|
||||
String getName();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -198,7 +198,16 @@ public abstract class RadixSelector extends Selector {
|
|||
*
|
||||
* @see #buildHistogram
|
||||
*/
|
||||
// This method, and its namesakes, have been manually split to work around a JVM crash.
|
||||
// See https://github.com/apache/lucene/issues/12898
|
||||
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
|
||||
int commonPrefixLength = computeInitialCommonPrefixLength(from, k);
|
||||
return computeCommonPrefixLengthAndBuildHistogramPart1(
|
||||
from, to, k, histogram, commonPrefixLength);
|
||||
}
|
||||
|
||||
// This method, and its namesakes, have been manually split to work around a JVM crash.
|
||||
private int computeInitialCommonPrefixLength(int from, int k) {
|
||||
final int[] commonPrefix = this.commonPrefix;
|
||||
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
|
||||
for (int j = 0; j < commonPrefixLength; ++j) {
|
||||
|
@ -209,7 +218,13 @@ public abstract class RadixSelector extends Selector {
|
|||
break;
|
||||
}
|
||||
}
|
||||
return commonPrefixLength;
|
||||
}
|
||||
|
||||
// This method, and its namesakes, have been manually split to work around a JVM crash.
|
||||
private int computeCommonPrefixLengthAndBuildHistogramPart1(
|
||||
int from, int to, int k, int[] histogram, int commonPrefixLength) {
|
||||
final int[] commonPrefix = this.commonPrefix;
|
||||
int i;
|
||||
outer:
|
||||
for (i = from + 1; i < to; ++i) {
|
||||
|
@ -226,7 +241,13 @@ public abstract class RadixSelector extends Selector {
|
|||
}
|
||||
}
|
||||
}
|
||||
return computeCommonPrefixLengthAndBuildHistogramPart2(
|
||||
from, to, k, histogram, commonPrefixLength, i);
|
||||
}
|
||||
|
||||
// This method, and its namesakes, have been manually split to work around a JVM crash.
|
||||
private int computeCommonPrefixLengthAndBuildHistogramPart2(
|
||||
int from, int to, int k, int[] histogram, int commonPrefixLength, int i) {
|
||||
if (i < to) {
|
||||
// the loop got broken because there is no common prefix
|
||||
assert commonPrefixLength == 0;
|
||||
|
|
|
@ -23,14 +23,14 @@ import java.io.InputStream;
|
|||
public interface ResourceLoader {
|
||||
|
||||
/** Opens a named resource */
|
||||
public InputStream openResource(String resource) throws IOException;
|
||||
InputStream openResource(String resource) throws IOException;
|
||||
|
||||
/** Finds class of the name and expected type */
|
||||
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType);
|
||||
<T> Class<? extends T> findClass(String cname, Class<T> expectedType);
|
||||
|
||||
/** Creates an instance of the name and expected type */
|
||||
// TODO: fix exception handling
|
||||
public default <T> T newInstance(String cname, Class<T> expectedType) {
|
||||
default <T> T newInstance(String cname, Class<T> expectedType) {
|
||||
Class<? extends T> clazz = findClass(cname, expectedType);
|
||||
try {
|
||||
return clazz.getConstructor().newInstance();
|
||||
|
|
|
@ -24,8 +24,8 @@ package org.apache.lucene.util;
|
|||
public abstract class RollingBuffer<T extends RollingBuffer.Resettable> {
|
||||
|
||||
/** Implement to reset an instance */
|
||||
public static interface Resettable {
|
||||
public void reset();
|
||||
public interface Resettable {
|
||||
void reset();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
|
|
|
@ -29,7 +29,7 @@ public interface Unwrappable<T> {
|
|||
|
||||
/** Unwraps all {@code Unwrappable}s around the given object. */
|
||||
@SuppressWarnings("unchecked")
|
||||
public static <T> T unwrapAll(T o) {
|
||||
static <T> T unwrapAll(T o) {
|
||||
while (o instanceof Unwrappable) {
|
||||
o = ((Unwrappable<T>) o).unwrap();
|
||||
}
|
||||
|
|
|
@ -45,5 +45,5 @@ public interface AutomatonProvider {
|
|||
* @return automaton
|
||||
* @throws IOException if errors occur
|
||||
*/
|
||||
public Automaton getAutomaton(String name) throws IOException;
|
||||
Automaton getAutomaton(String name) throws IOException;
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@ final class BKDUtil {
|
|||
|
||||
/** Predicate for a fixed number of bytes. */
|
||||
@FunctionalInterface
|
||||
public static interface ByteArrayPredicate {
|
||||
public interface ByteArrayPredicate {
|
||||
|
||||
/** Test bytes starting from the given offsets. */
|
||||
boolean test(byte[] a, int aOffset, byte[] b, int bOffset);
|
||||
|
|
|
@ -109,10 +109,23 @@ public final class FST<T> implements Accountable {
|
|||
|
||||
// Increment version to change it
|
||||
private static final String FILE_FORMAT_NAME = "FST";
|
||||
private static final int VERSION_START = 6;
|
||||
|
||||
/** First supported version, this is the version that was used when releasing Lucene 7.0. */
|
||||
public static final int VERSION_START = 6;
|
||||
|
||||
// Version 7 introduced direct addressing for arcs, but it's not recorded here because it doesn't
|
||||
// need version checks on the read side, it uses new flag values on arcs instead.
|
||||
|
||||
private static final int VERSION_LITTLE_ENDIAN = 8;
|
||||
private static final int VERSION_CONTINUOUS_ARCS = 9;
|
||||
static final int VERSION_CURRENT = VERSION_CONTINUOUS_ARCS;
|
||||
|
||||
/** Version that started storing continuous arcs. */
|
||||
public static final int VERSION_CONTINUOUS_ARCS = 9;
|
||||
|
||||
/** Current version. */
|
||||
public static final int VERSION_CURRENT = VERSION_CONTINUOUS_ARCS;
|
||||
|
||||
/** Version that was used when releasing Lucene 9.0. */
|
||||
public static final int VERSION_90 = VERSION_LITTLE_ENDIAN;
|
||||
|
||||
// Never serialized; just used to represent the virtual
|
||||
// final node w/ no arcs:
|
||||
|
|
|
@ -29,7 +29,6 @@ import static org.apache.lucene.util.fst.FST.BIT_STOP_NODE;
|
|||
import static org.apache.lucene.util.fst.FST.BIT_TARGET_NEXT;
|
||||
import static org.apache.lucene.util.fst.FST.FINAL_END_NODE;
|
||||
import static org.apache.lucene.util.fst.FST.NON_FINAL_END_NODE;
|
||||
import static org.apache.lucene.util.fst.FST.VERSION_CURRENT;
|
||||
import static org.apache.lucene.util.fst.FST.getNumPresenceBytes;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -135,6 +134,7 @@ public class FSTCompiler<T> {
|
|||
|
||||
final boolean allowFixedLengthArcs;
|
||||
final float directAddressingMaxOversizingFactor;
|
||||
final int version;
|
||||
long directAddressingExpansionCredit;
|
||||
|
||||
// the DataOutput to stream the FST bytes to
|
||||
|
@ -163,10 +163,12 @@ public class FSTCompiler<T> {
|
|||
Outputs<T> outputs,
|
||||
boolean allowFixedLengthArcs,
|
||||
DataOutput dataOutput,
|
||||
float directAddressingMaxOversizingFactor)
|
||||
float directAddressingMaxOversizingFactor,
|
||||
int version)
|
||||
throws IOException {
|
||||
this.allowFixedLengthArcs = allowFixedLengthArcs;
|
||||
this.directAddressingMaxOversizingFactor = directAddressingMaxOversizingFactor;
|
||||
this.version = version;
|
||||
// pad: ensure no node gets address 0 which is reserved to mean
|
||||
// the stop state w/ no arcs
|
||||
dataOutput.writeByte((byte) 0);
|
||||
|
@ -174,7 +176,7 @@ public class FSTCompiler<T> {
|
|||
this.dataOutput = dataOutput;
|
||||
fst =
|
||||
new FST<>(
|
||||
new FST.FSTMetadata<>(inputType, outputs, null, -1, VERSION_CURRENT, 0),
|
||||
new FST.FSTMetadata<>(inputType, outputs, null, -1, version, 0),
|
||||
toFSTReader(dataOutput));
|
||||
if (suffixRAMLimitMB < 0) {
|
||||
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
|
||||
|
@ -241,6 +243,7 @@ public class FSTCompiler<T> {
|
|||
private boolean allowFixedLengthArcs = true;
|
||||
private DataOutput dataOutput;
|
||||
private float directAddressingMaxOversizingFactor = DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR;
|
||||
private int version = FST.VERSION_CURRENT;
|
||||
|
||||
/**
|
||||
* @param inputType The input type (transition labels). Can be anything from {@link INPUT_TYPE}
|
||||
|
@ -325,6 +328,21 @@ public class FSTCompiler<T> {
|
|||
return this;
|
||||
}
|
||||
|
||||
/** Expert: Set the codec version. * */
|
||||
public Builder<T> setVersion(int version) {
|
||||
if (version < FST.VERSION_90 || version > FST.VERSION_CURRENT) {
|
||||
throw new IllegalArgumentException(
|
||||
"Expected version in range ["
|
||||
+ FST.VERSION_90
|
||||
+ ", "
|
||||
+ FST.VERSION_CURRENT
|
||||
+ "], got "
|
||||
+ version);
|
||||
}
|
||||
this.version = version;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Creates a new {@link FSTCompiler}. */
|
||||
public FSTCompiler<T> build() throws IOException {
|
||||
// create a default DataOutput if not specified
|
||||
|
@ -337,7 +355,8 @@ public class FSTCompiler<T> {
|
|||
outputs,
|
||||
allowFixedLengthArcs,
|
||||
dataOutput,
|
||||
directAddressingMaxOversizingFactor);
|
||||
directAddressingMaxOversizingFactor,
|
||||
version);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -517,7 +536,7 @@ public class FSTCompiler<T> {
|
|||
int labelRange = nodeIn.arcs[nodeIn.numArcs - 1].label - nodeIn.arcs[0].label + 1;
|
||||
assert labelRange > 0;
|
||||
boolean continuousLabel = labelRange == nodeIn.numArcs;
|
||||
if (continuousLabel) {
|
||||
if (continuousLabel && version >= FST.VERSION_CONTINUOUS_ARCS) {
|
||||
writeNodeForDirectAddressingOrContinuous(
|
||||
nodeIn, maxBytesPerArcWithoutLabel, labelRange, true);
|
||||
continuousNodeCount++;
|
||||
|
|
|
@ -120,7 +120,7 @@ public class PackedInts {
|
|||
throw new IllegalArgumentException("Unknown format id: " + id);
|
||||
}
|
||||
|
||||
private Format(int id) {
|
||||
Format(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
|
@ -231,7 +231,7 @@ public class PackedInts {
|
|||
}
|
||||
|
||||
/** A decoder for packed integers. */
|
||||
public static interface Decoder {
|
||||
public interface Decoder {
|
||||
|
||||
/**
|
||||
* The minimum number of long blocks to encode in a single iteration, when using long encoding.
|
||||
|
@ -299,7 +299,7 @@ public class PackedInts {
|
|||
}
|
||||
|
||||
/** An encoder for packed integers. */
|
||||
public static interface Encoder {
|
||||
public interface Encoder {
|
||||
|
||||
/**
|
||||
* The minimum number of long blocks to encode in a single iteration, when using long encoding.
|
||||
|
@ -400,7 +400,7 @@ public class PackedInts {
|
|||
}
|
||||
|
||||
/** Run-once iterator interface, to decode previously saved PackedInts. */
|
||||
public static interface ReaderIterator {
|
||||
public interface ReaderIterator {
|
||||
/** Returns next value */
|
||||
long next() throws IOException;
|
||||
|
||||
|
|
|
@ -0,0 +1,516 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.stream.IntStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
abstract class BaseVectorSimilarityQueryTestCase<
|
||||
V, F extends Field, Q extends AbstractVectorSimilarityQuery>
|
||||
extends LuceneTestCase {
|
||||
String vectorField, idField;
|
||||
VectorSimilarityFunction function;
|
||||
int numDocs, dim;
|
||||
|
||||
abstract V getRandomVector(int dim);
|
||||
|
||||
abstract float compare(V vector1, V vector2);
|
||||
|
||||
abstract boolean checkEquals(V vector1, V vector2);
|
||||
|
||||
abstract F getVectorField(String name, V vector, VectorSimilarityFunction function);
|
||||
|
||||
abstract Q getVectorQuery(
|
||||
String field, V vector, float traversalSimilarity, float resultSimilarity, Query filter);
|
||||
|
||||
abstract Q getThrowingVectorQuery(
|
||||
String field, V vector, float traversalSimilarity, float resultSimilarity, Query filter);
|
||||
|
||||
public void testEquals() {
|
||||
String field1 = "f1", field2 = "f2";
|
||||
|
||||
V vector1 = getRandomVector(dim);
|
||||
V vector2;
|
||||
do {
|
||||
vector2 = getRandomVector(dim);
|
||||
} while (checkEquals(vector1, vector2));
|
||||
|
||||
float traversalSimilarity1 = 0.3f, traversalSimilarity2 = 0.4f;
|
||||
float resultSimilarity1 = 0.4f, resultSimilarity2 = 0.5f;
|
||||
|
||||
Query filter1 = new TermQuery(new Term("t1", "v1"));
|
||||
Query filter2 = new TermQuery(new Term("t2", "v2"));
|
||||
|
||||
Query query = getVectorQuery(field1, vector1, traversalSimilarity1, resultSimilarity1, filter1);
|
||||
|
||||
// Everything is equal
|
||||
assertEquals(
|
||||
query, getVectorQuery(field1, vector1, traversalSimilarity1, resultSimilarity1, filter1));
|
||||
|
||||
// Null check
|
||||
assertNotEquals(query, null);
|
||||
|
||||
// Different field
|
||||
assertNotEquals(
|
||||
query, getVectorQuery(field2, vector1, traversalSimilarity1, resultSimilarity1, filter1));
|
||||
|
||||
// Different vector
|
||||
assertNotEquals(
|
||||
query, getVectorQuery(field1, vector2, traversalSimilarity1, resultSimilarity1, filter1));
|
||||
|
||||
// Different traversalSimilarity
|
||||
assertNotEquals(
|
||||
query, getVectorQuery(field1, vector1, traversalSimilarity2, resultSimilarity1, filter1));
|
||||
|
||||
// Different resultSimilarity
|
||||
assertNotEquals(
|
||||
query, getVectorQuery(field1, vector1, traversalSimilarity1, resultSimilarity2, filter1));
|
||||
|
||||
// Different filter
|
||||
assertNotEquals(
|
||||
query, getVectorQuery(field1, vector1, traversalSimilarity1, resultSimilarity1, filter2));
|
||||
}
|
||||
|
||||
public void testEmptyIndex() throws IOException {
|
||||
// Do not index any vectors
|
||||
numDocs = 0;
|
||||
|
||||
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
null);
|
||||
|
||||
// Check that no vectors are found
|
||||
assertEquals(0, searcher.count(query));
|
||||
}
|
||||
}
|
||||
|
||||
public void testExtremes() throws IOException {
|
||||
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
// All vectors are above -Infinity
|
||||
Query query1 =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
null);
|
||||
|
||||
// Check that all vectors are found
|
||||
assertEquals(numDocs, searcher.count(query1));
|
||||
|
||||
// No vectors are above +Infinity
|
||||
Query query2 =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.POSITIVE_INFINITY,
|
||||
Float.POSITIVE_INFINITY,
|
||||
null);
|
||||
|
||||
// Check that no vectors are found
|
||||
assertEquals(0, searcher.count(query2));
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomFilter() throws IOException {
|
||||
// Filter a sub-range from 0 to numDocs
|
||||
int startIndex = random().nextInt(numDocs);
|
||||
int endIndex = random().nextInt(startIndex, numDocs);
|
||||
Query filter = IntField.newRangeQuery(idField, startIndex, endIndex);
|
||||
|
||||
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
filter);
|
||||
|
||||
ScoreDoc[] scoreDocs = searcher.search(query, numDocs).scoreDocs;
|
||||
for (ScoreDoc scoreDoc : scoreDocs) {
|
||||
int id = getId(searcher, scoreDoc.doc);
|
||||
|
||||
// Check that returned document is in selected range
|
||||
assertTrue(id >= startIndex && id <= endIndex);
|
||||
}
|
||||
// Check that all filtered vectors are found
|
||||
assertEquals(endIndex - startIndex + 1, scoreDocs.length);
|
||||
}
|
||||
}
|
||||
|
||||
public void testFilterWithNoMatches() throws IOException {
|
||||
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
// Non-existent field
|
||||
Query filter1 = new TermQuery(new Term("random_field", "random_value"));
|
||||
Query query1 =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
filter1);
|
||||
|
||||
// Check that no vectors are found
|
||||
assertEquals(0, searcher.count(query1));
|
||||
|
||||
// Field exists, but value of -1 is not indexed
|
||||
Query filter2 = IntField.newExactQuery(idField, -1);
|
||||
Query query2 =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
filter2);
|
||||
|
||||
// Check that no vectors are found
|
||||
assertEquals(0, searcher.count(query2));
|
||||
}
|
||||
}
|
||||
|
||||
public void testDimensionMismatch() throws IOException {
|
||||
// Different dimension
|
||||
int newDim = atLeast(dim + 1);
|
||||
|
||||
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(newDim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
null);
|
||||
|
||||
// Check that an exception for differing dimensions is thrown
|
||||
IllegalArgumentException e =
|
||||
expectThrows(IllegalArgumentException.class, () -> searcher.count(query));
|
||||
assertEquals(
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"vector query dimension: %d differs from field dimension: %d",
|
||||
newDim,
|
||||
dim),
|
||||
e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testNonVectorsField() throws IOException {
|
||||
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
// Non-existent field
|
||||
Query query1 =
|
||||
getVectorQuery(
|
||||
"random_field",
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
null);
|
||||
assertEquals(0, searcher.count(query1));
|
||||
|
||||
// Indexed as int field
|
||||
Query query2 =
|
||||
getVectorQuery(
|
||||
idField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
null);
|
||||
assertEquals(0, searcher.count(query2));
|
||||
}
|
||||
}
|
||||
|
||||
public void testSomeDeletes() throws IOException {
|
||||
// Delete a sub-range from 0 to numDocs
|
||||
int startIndex = random().nextInt(numDocs);
|
||||
int endIndex = random().nextInt(startIndex, numDocs);
|
||||
Query delete = IntField.newRangeQuery(idField, startIndex, endIndex);
|
||||
|
||||
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexWriter w = new IndexWriter(indexStore, newIndexWriterConfig())) {
|
||||
|
||||
w.deleteDocuments(delete);
|
||||
w.commit();
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
null);
|
||||
|
||||
ScoreDoc[] scoreDocs = searcher.search(query, numDocs).scoreDocs;
|
||||
for (ScoreDoc scoreDoc : scoreDocs) {
|
||||
int id = getId(searcher, scoreDoc.doc);
|
||||
|
||||
// Check that returned document is not deleted
|
||||
assertFalse(id >= startIndex && id <= endIndex);
|
||||
}
|
||||
// Check that all live docs are returned
|
||||
assertEquals(numDocs - endIndex + startIndex - 1, scoreDocs.length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testAllDeletes() throws IOException {
|
||||
try (Directory dir = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
|
||||
// Delete all documents
|
||||
w.deleteDocuments(new MatchAllDocsQuery());
|
||||
w.commit();
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
null);
|
||||
|
||||
// Check that no vectors are found
|
||||
assertEquals(0, searcher.count(query));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testBoostQuery() throws IOException {
|
||||
// Define the boost and allowed delta
|
||||
float boost = random().nextFloat(5, 10);
|
||||
float delta = 1e-3f;
|
||||
|
||||
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query1 =
|
||||
getVectorQuery(
|
||||
vectorField,
|
||||
getRandomVector(dim),
|
||||
Float.NEGATIVE_INFINITY,
|
||||
Float.NEGATIVE_INFINITY,
|
||||
null);
|
||||
ScoreDoc[] scoreDocs1 = searcher.search(query1, numDocs).scoreDocs;
|
||||
|
||||
Query query2 = new BoostQuery(query1, boost);
|
||||
ScoreDoc[] scoreDocs2 = searcher.search(query2, numDocs).scoreDocs;
|
||||
|
||||
// Check that all docs are identical, with boosted scores
|
||||
assertEquals(scoreDocs1.length, scoreDocs2.length);
|
||||
for (int i = 0; i < scoreDocs1.length; i++) {
|
||||
assertEquals(scoreDocs1[i].doc, scoreDocs2[i].doc);
|
||||
assertEquals(boost * scoreDocs1[i].score, scoreDocs2[i].score, delta);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testVectorsAboveSimilarity() throws IOException {
|
||||
// Pick number of docs to accept
|
||||
int numAccepted = random().nextInt(numDocs / 3, numDocs / 2);
|
||||
float delta = 1e-3f;
|
||||
|
||||
V[] vectors = getRandomVectors(numDocs, dim);
|
||||
V queryVector = getRandomVector(dim);
|
||||
|
||||
// Find score above which we get (at least) numAccepted vectors
|
||||
float resultSimilarity = getSimilarity(vectors, queryVector, numAccepted);
|
||||
|
||||
// Cache scores of vectors
|
||||
Map<Integer, Float> scores = new HashMap<>();
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
float score = compare(queryVector, vectors[i]);
|
||||
if (score >= resultSimilarity) {
|
||||
scores.put(i, score);
|
||||
}
|
||||
}
|
||||
|
||||
try (Directory indexStore = getIndexStore(vectors);
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
getVectorQuery(vectorField, queryVector, Float.NEGATIVE_INFINITY, resultSimilarity, null);
|
||||
|
||||
ScoreDoc[] scoreDocs = searcher.search(query, numDocs).scoreDocs;
|
||||
for (ScoreDoc scoreDoc : scoreDocs) {
|
||||
int id = getId(searcher, scoreDoc.doc);
|
||||
|
||||
// Check that the collected result is above accepted similarity
|
||||
assertTrue(scores.containsKey(id));
|
||||
|
||||
// Check that the score is correct
|
||||
assertEquals(scores.get(id), scoreDoc.score, delta);
|
||||
}
|
||||
|
||||
// Check that all results are collected
|
||||
assertEquals(scores.size(), scoreDocs.length);
|
||||
}
|
||||
}
|
||||
|
||||
public void testFallbackToExact() throws IOException {
|
||||
// Restrictive filter, along with similarity to visit a large number of nodes
|
||||
int numFiltered = random().nextInt(numDocs / 10, numDocs / 5);
|
||||
int targetVisited = random().nextInt(numFiltered * 2, numDocs);
|
||||
|
||||
V[] vectors = getRandomVectors(numDocs, dim);
|
||||
V queryVector = getRandomVector(dim);
|
||||
|
||||
float resultSimilarity = getSimilarity(vectors, queryVector, targetVisited);
|
||||
Query filter = IntField.newSetQuery(idField, getFiltered(numFiltered));
|
||||
|
||||
try (Directory indexStore = getIndexStore(vectors);
|
||||
IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
getThrowingVectorQuery(
|
||||
vectorField, queryVector, resultSimilarity, resultSimilarity, filter);
|
||||
|
||||
// Falls back to exact search
|
||||
expectThrows(UnsupportedOperationException.class, () -> searcher.count(query));
|
||||
}
|
||||
}
|
||||
|
||||
public void testApproximate() throws IOException {
|
||||
// Non-restrictive filter, along with similarity to visit a small number of nodes
|
||||
int numFiltered = random().nextInt((numDocs * 4) / 5, numDocs);
|
||||
int targetVisited = random().nextInt(numFiltered / 10, numFiltered / 8);
|
||||
|
||||
V[] vectors = getRandomVectors(numDocs, dim);
|
||||
V queryVector = getRandomVector(dim);
|
||||
|
||||
float resultSimilarity = getSimilarity(vectors, queryVector, targetVisited);
|
||||
Query filter = IntField.newSetQuery(idField, getFiltered(numFiltered));
|
||||
|
||||
try (Directory indexStore = getIndexStore(vectors);
|
||||
IndexWriter w = new IndexWriter(indexStore, newIndexWriterConfig())) {
|
||||
// Force merge because smaller segments have few filtered docs and often fall back to exact
|
||||
// search, making this test flaky
|
||||
w.forceMerge(1);
|
||||
w.commit();
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(indexStore)) {
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
Query query =
|
||||
getThrowingVectorQuery(
|
||||
vectorField, queryVector, resultSimilarity, resultSimilarity, filter);
|
||||
|
||||
// Does not fall back to exact search
|
||||
assertTrue(searcher.count(query) <= numFiltered);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private float getSimilarity(V[] vectors, V queryVector, int targetVisited) {
|
||||
assertTrue(targetVisited >= 0 && targetVisited <= numDocs);
|
||||
if (targetVisited == 0) {
|
||||
return Float.POSITIVE_INFINITY;
|
||||
}
|
||||
|
||||
float[] scores = new float[numDocs];
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
scores[i] = compare(queryVector, vectors[i]);
|
||||
}
|
||||
Arrays.sort(scores);
|
||||
|
||||
return scores[numDocs - targetVisited];
|
||||
}
|
||||
|
||||
private int[] getFiltered(int numFiltered) {
|
||||
Set<Integer> accepted = new HashSet<>();
|
||||
for (int i = 0; i < numFiltered; ) {
|
||||
int index = random().nextInt(numDocs);
|
||||
if (!accepted.contains(index)) {
|
||||
accepted.add(index);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return accepted.stream().mapToInt(Integer::intValue).toArray();
|
||||
}
|
||||
|
||||
private int getId(IndexSearcher searcher, int doc) throws IOException {
|
||||
return Objects.requireNonNull(searcher.storedFields().document(doc).getField(idField))
|
||||
.numericValue()
|
||||
.intValue();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
V[] getRandomVectors(int numDocs, int dim) {
|
||||
return (V[]) IntStream.range(0, numDocs).mapToObj(i -> getRandomVector(dim)).toArray();
|
||||
}
|
||||
|
||||
@SafeVarargs
|
||||
final Directory getIndexStore(V... vectors) throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
|
||||
for (int i = 0; i < vectors.length; ++i) {
|
||||
Document doc = new Document();
|
||||
doc.add(getVectorField(vectorField, vectors[i], function));
|
||||
doc.add(new IntField(idField, i, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
}
|
||||
return dir;
|
||||
}
|
||||
}
|
|
@ -221,7 +221,7 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
bigSearcher = null;
|
||||
}
|
||||
|
||||
private static String[] docFields = {
|
||||
private static final String[] docFields = {
|
||||
"w1 w2 w3 w4 w5", "w1 w3 w2 w3", "w1 xx w2 yy w3", "w1 w3 xx w2 yy mm"
|
||||
};
|
||||
|
||||
|
@ -423,8 +423,8 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
|
||||
// used to set properties or change every BooleanQuery
|
||||
// generated from randBoolQuery.
|
||||
public static interface Callback {
|
||||
public void postCreate(BooleanQuery.Builder q);
|
||||
public interface Callback {
|
||||
void postCreate(BooleanQuery.Builder q);
|
||||
}
|
||||
|
||||
// Random rnd is passed in so that the exact same random query may be created
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.document.KnnByteVectorField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.TestVectorUtil;
|
||||
import org.junit.Before;
|
||||
|
||||
public class TestByteVectorSimilarityQuery
|
||||
extends BaseVectorSimilarityQueryTestCase<
|
||||
byte[], KnnByteVectorField, ByteVectorSimilarityQuery> {
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
vectorField = getClass().getSimpleName() + ":VectorField";
|
||||
idField = getClass().getSimpleName() + ":IdField";
|
||||
function = VectorSimilarityFunction.EUCLIDEAN;
|
||||
numDocs = atLeast(100);
|
||||
dim = atLeast(50);
|
||||
}
|
||||
|
||||
@Override
|
||||
byte[] getRandomVector(int dim) {
|
||||
return TestVectorUtil.randomVectorBytes(dim);
|
||||
}
|
||||
|
||||
@Override
|
||||
float compare(byte[] vector1, byte[] vector2) {
|
||||
return function.compare(vector1, vector2);
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean checkEquals(byte[] vector1, byte[] vector2) {
|
||||
return Arrays.equals(vector1, vector2);
|
||||
}
|
||||
|
||||
@Override
|
||||
KnnByteVectorField getVectorField(String name, byte[] vector, VectorSimilarityFunction function) {
|
||||
return new KnnByteVectorField(name, vector, function);
|
||||
}
|
||||
|
||||
@Override
|
||||
ByteVectorSimilarityQuery getVectorQuery(
|
||||
String field,
|
||||
byte[] vector,
|
||||
float traversalSimilarity,
|
||||
float resultSimilarity,
|
||||
Query filter) {
|
||||
return new ByteVectorSimilarityQuery(
|
||||
field, vector, traversalSimilarity, resultSimilarity, filter);
|
||||
}
|
||||
|
||||
@Override
|
||||
ByteVectorSimilarityQuery getThrowingVectorQuery(
|
||||
String field,
|
||||
byte[] vector,
|
||||
float traversalSimilarity,
|
||||
float resultSimilarity,
|
||||
Query filter) {
|
||||
return new ByteVectorSimilarityQuery(
|
||||
field, vector, traversalSimilarity, resultSimilarity, filter) {
|
||||
@Override
|
||||
VectorScorer createVectorScorer(LeafReaderContext context) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.document.KnnFloatVectorField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.TestVectorUtil;
|
||||
import org.junit.Before;
|
||||
|
||||
public class TestFloatVectorSimilarityQuery
|
||||
extends BaseVectorSimilarityQueryTestCase<
|
||||
float[], KnnFloatVectorField, FloatVectorSimilarityQuery> {
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
vectorField = getClass().getSimpleName() + ":VectorField";
|
||||
idField = getClass().getSimpleName() + ":IdField";
|
||||
function = VectorSimilarityFunction.EUCLIDEAN;
|
||||
numDocs = atLeast(100);
|
||||
dim = atLeast(50);
|
||||
}
|
||||
|
||||
@Override
|
||||
float[] getRandomVector(int dim) {
|
||||
return TestVectorUtil.randomVector(dim);
|
||||
}
|
||||
|
||||
@Override
|
||||
float compare(float[] vector1, float[] vector2) {
|
||||
return function.compare(vector1, vector2);
|
||||
}
|
||||
|
||||
@Override
|
||||
boolean checkEquals(float[] vector1, float[] vector2) {
|
||||
return Arrays.equals(vector1, vector2);
|
||||
}
|
||||
|
||||
@Override
|
||||
KnnFloatVectorField getVectorField(
|
||||
String name, float[] vector, VectorSimilarityFunction function) {
|
||||
return new KnnFloatVectorField(name, vector, function);
|
||||
}
|
||||
|
||||
@Override
|
||||
FloatVectorSimilarityQuery getVectorQuery(
|
||||
String field,
|
||||
float[] vector,
|
||||
float traversalSimilarity,
|
||||
float resultSimilarity,
|
||||
Query filter) {
|
||||
return new FloatVectorSimilarityQuery(
|
||||
field, vector, traversalSimilarity, resultSimilarity, filter);
|
||||
}
|
||||
|
||||
@Override
|
||||
FloatVectorSimilarityQuery getThrowingVectorQuery(
|
||||
String field,
|
||||
float[] vector,
|
||||
float traversalSimilarity,
|
||||
float resultSimilarity,
|
||||
Query filter) {
|
||||
return new FloatVectorSimilarityQuery(
|
||||
field, vector, traversalSimilarity, resultSimilarity, filter) {
|
||||
@Override
|
||||
VectorScorer createVectorScorer(LeafReaderContext context) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
|
||||
public class TestVectorSimilarityCollector extends LuceneTestCase {
|
||||
public void testResultCollection() {
|
||||
float traversalSimilarity = 0.3f, resultSimilarity = 0.5f;
|
||||
|
||||
VectorSimilarityCollector collector =
|
||||
new VectorSimilarityCollector(traversalSimilarity, resultSimilarity, Integer.MAX_VALUE);
|
||||
int[] nodes = {1, 5, 10, 4, 8, 3, 2, 6, 7, 9};
|
||||
float[] scores = {0.1f, 0.2f, 0.3f, 0.5f, 0.2f, 0.6f, 0.9f, 0.3f, 0.7f, 0.8f};
|
||||
|
||||
float[] minCompetitiveSimilarities = new float[nodes.length];
|
||||
for (int i = 0; i < nodes.length; i++) {
|
||||
collector.collect(nodes[i], scores[i]);
|
||||
minCompetitiveSimilarities[i] = collector.minCompetitiveSimilarity();
|
||||
}
|
||||
|
||||
ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
|
||||
int[] resultNodes = new int[scoreDocs.length];
|
||||
float[] resultScores = new float[scoreDocs.length];
|
||||
for (int i = 0; i < scoreDocs.length; i++) {
|
||||
resultNodes[i] = scoreDocs[i].doc;
|
||||
resultScores[i] = scoreDocs[i].score;
|
||||
}
|
||||
|
||||
// All nodes above resultSimilarity appear in order of collection
|
||||
assertArrayEquals(new int[] {4, 3, 2, 7, 9}, resultNodes);
|
||||
assertArrayEquals(new float[] {0.5f, 0.6f, 0.9f, 0.7f, 0.8f}, resultScores, 1e-3f);
|
||||
|
||||
// Min competitive similarity is minimum of traversalSimilarity or best result encountered
|
||||
assertArrayEquals(
|
||||
new float[] {0.1f, 0.2f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f},
|
||||
minCompetitiveSimilarities,
|
||||
1e-3f);
|
||||
}
|
||||
}
|
|
@ -17,8 +17,14 @@
|
|||
package org.apache.lucene.util.bkd;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.security.AccessController;
|
||||
import java.security.PrivilegedAction;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.index.PointValues.Relation;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -28,6 +34,7 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.util.SuppressForbidden;
|
||||
|
||||
public class TestDocIdsWriter extends LuceneTestCase {
|
||||
|
||||
|
@ -150,4 +157,28 @@ public class TestDocIdsWriter extends LuceneTestCase {
|
|||
}
|
||||
dir.deleteFile("tmp");
|
||||
}
|
||||
|
||||
// This simple test tickles a JVM C2 JIT crash on JDK's less than 21.0.1
|
||||
// Crashes only when run with C2, so with the environment variable `CI` set
|
||||
// Regardless of whether C2 is enabled or not, the test should never fail.
|
||||
public void testCrash() throws IOException {
|
||||
assumeTrue("Requires C2, which is only enabled when CI env is set", getCIEnv() != null);
|
||||
int itrs = atLeast(100);
|
||||
for (int i = 0; i < itrs; i++) {
|
||||
try (Directory dir = newDirectory();
|
||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null))) {
|
||||
for (int d = 0; d < 20_000; d++) {
|
||||
iw.addDocument(
|
||||
List.of(new IntPoint("foo", 0), new SortedNumericDocValuesField("bar", 0)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "needed to check if C2 is enabled")
|
||||
@SuppressWarnings("removal")
|
||||
private static String getCIEnv() {
|
||||
PrivilegedAction<String> pa = () -> System.getenv("CI");
|
||||
return AccessController.doPrivileged(pa);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -326,10 +326,6 @@ public class TestDrillSideways extends FacetTestCase {
|
|||
// termination occurs (i.e., #finish is properly called in that scenario):
|
||||
assertEquals(1, baseFC.getMatchingDocs().size());
|
||||
assertEquals(1, dimFC.getMatchingDocs().size());
|
||||
FacetsCollector.MatchingDocs baseMD = baseFC.getMatchingDocs().get(0);
|
||||
FacetsCollector.MatchingDocs dimMD = dimFC.getMatchingDocs().get(0);
|
||||
assertEquals(1, baseMD.totalHits);
|
||||
assertEquals(1, dimMD.totalHits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.search.SimpleCollector;
|
|||
abstract class DocValuesTermsCollector<DV> extends SimpleCollector {
|
||||
|
||||
@FunctionalInterface
|
||||
static interface Function<R> {
|
||||
interface Function<R> {
|
||||
R apply(LeafReader t) throws IOException;
|
||||
}
|
||||
|
||||
|
|
|
@ -200,6 +200,7 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
|
|||
}
|
||||
toAdd.add(makeParent(new int[] {6, 7, 8, 9, 10}));
|
||||
w.addDocuments(toAdd);
|
||||
w.forceMerge(1);
|
||||
}
|
||||
try (IndexReader reader = DirectoryReader.open(d)) {
|
||||
assertEquals(1, reader.leaves().size());
|
||||
|
|
|
@ -41,7 +41,7 @@ public class TestQueryTreeBuilder extends LuceneTestCase {
|
|||
assertEquals("OK", result);
|
||||
}
|
||||
|
||||
private static interface DummyQueryNodeInterface extends QueryNode {}
|
||||
private interface DummyQueryNodeInterface extends QueryNode {}
|
||||
|
||||
private abstract static class AbstractDummyQueryNode extends QueryNodeImpl
|
||||
implements DummyQueryNodeInterface {}
|
||||
|
|
|
@ -80,17 +80,17 @@ public abstract class NumberRangePrefixTree extends SpatialPrefixTree {
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static interface NRShape extends Shape, Cloneable {
|
||||
public interface NRShape extends Shape, Cloneable {
|
||||
/** The result should be parseable by {@link #parseShape(String)}. */
|
||||
@Override
|
||||
abstract String toString();
|
||||
String toString();
|
||||
|
||||
/**
|
||||
* Returns this shape rounded to the target level. If we are already more course than the level
|
||||
* then the shape is simply returned. The result may refer to internal state of the argument so
|
||||
* you may want to clone it.
|
||||
*/
|
||||
public NRShape roundToLevel(int targetLevel);
|
||||
NRShape roundToLevel(int targetLevel);
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -234,7 +234,7 @@ public abstract class NumberRangePrefixTree extends SpatialPrefixTree {
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static interface UnitNRShape extends NRShape, Comparable<UnitNRShape> {
|
||||
public interface UnitNRShape extends NRShape, Comparable<UnitNRShape> {
|
||||
// note: formerly known as LevelledValue; thus some variables still use 'lv'
|
||||
|
||||
/** Get the prefix tree level, the higher the more precise. 0 means the world (universe). */
|
||||
|
|
|
@ -1120,12 +1120,12 @@ class GeoComplexPolygon extends GeoBasePolygon {
|
|||
* into the traversal method of a tree, and each edge that matches will cause this object to be
|
||||
* called.
|
||||
*/
|
||||
private static interface EdgeIterator {
|
||||
private interface EdgeIterator {
|
||||
/**
|
||||
* @param edge is the edge that matched.
|
||||
* @return true if the iteration should continue, false otherwise.
|
||||
*/
|
||||
public boolean matches(final Edge edge);
|
||||
boolean matches(final Edge edge);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1133,16 +1133,16 @@ class GeoComplexPolygon extends GeoBasePolygon {
|
|||
* implementing this interface into the traversal method of a tree, and each edge that matches
|
||||
* will cause this object to be called.
|
||||
*/
|
||||
private static interface CountingEdgeIterator extends EdgeIterator {
|
||||
private interface CountingEdgeIterator extends EdgeIterator {
|
||||
/**
|
||||
* @return the number of edges that were crossed.
|
||||
*/
|
||||
public int getCrossingCount();
|
||||
int getCrossingCount();
|
||||
|
||||
/**
|
||||
* @return true if the endpoint was on an edge.
|
||||
*/
|
||||
public boolean isOnEdge();
|
||||
boolean isOnEdge();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -100,8 +100,8 @@ public final class SuggestRebuildTestUtil {
|
|||
* Simple marker interface to allow {@link #testLookupsDuringReBuild} callbacks to throw
|
||||
* Exceptions
|
||||
*/
|
||||
public static interface ExceptionalCallback {
|
||||
public void check(final Lookup suggester) throws Exception;
|
||||
public interface ExceptionalCallback {
|
||||
void check(final Lookup suggester) throws Exception;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -1200,7 +1200,7 @@ public class TestFuzzySuggester extends LuceneTestCase {
|
|||
// and tweaked to return the edit distance not the float
|
||||
// lucene measure
|
||||
|
||||
/* Finds unicode (code point) Levenstein (edit) distance
|
||||
/* Finds unicode (code point) Levenshtein (edit) distance
|
||||
* between two strings, including transpositions. */
|
||||
public int getDistance(String target, String other, boolean allowTransposition) {
|
||||
IntsRef targetPoints;
|
||||
|
|
|
@ -90,7 +90,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
* Attribute that records if it was cleared or not. This is used for testing that
|
||||
* clearAttributes() was called correctly.
|
||||
*/
|
||||
public static interface CheckClearAttributesAttribute extends Attribute {
|
||||
public interface CheckClearAttributesAttribute extends Attribute {
|
||||
boolean getAndResetClearCalled();
|
||||
}
|
||||
|
||||
|
@ -129,7 +129,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
((CheckClearAttributesAttributeImpl) target).clear();
|
||||
target.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.codecs.NormsProducer;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
|
@ -47,6 +48,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.MultiTerms;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
@ -1610,4 +1612,29 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Test realistic data, which is often better at uncovering real bugs. */
|
||||
@Nightly // this test takes a few seconds
|
||||
public void testLineFileDocs() throws IOException {
|
||||
// Use a FS dir and a non-randomized IWC to not slow down indexing
|
||||
try (Directory dir = newFSDirectory(createTempDir())) {
|
||||
try (LineFileDocs docs = new LineFileDocs(random());
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) {
|
||||
final int numDocs = atLeast(10_000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
// Only keep the body field, and don't index term vectors on it, we only care about
|
||||
// postings
|
||||
Document doc = docs.nextDoc();
|
||||
IndexableField body = doc.getField("body");
|
||||
assertNotNull(body);
|
||||
assertNotNull(body.stringValue());
|
||||
assertNotEquals(IndexOptions.NONE, body.fieldType().indexOptions());
|
||||
body = new TextField("body", body.stringValue(), Store.NO);
|
||||
w.addDocument(Collections.singletonList(body));
|
||||
}
|
||||
w.forceMerge(1);
|
||||
}
|
||||
TestUtil.checkIndex(dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.junit.runners.model.Statement;
|
|||
*/
|
||||
public final class TestRuleIgnoreTestSuites implements TestRule {
|
||||
/** Marker interface for nested suites that should be ignored if executed in stand-alone mode. */
|
||||
public static interface NestedTestSuite {}
|
||||
public interface NestedTestSuite {}
|
||||
|
||||
/** A boolean system property indicating nested suites should be executed normally. */
|
||||
public static final String PROPERTY_RUN_NESTED = "tests.runnested";
|
||||
|
|
|
@ -140,7 +140,7 @@ public class TestRuleLimitSysouts extends TestRuleAdapter {
|
|||
/** Test failures from any tests or rules before. */
|
||||
private final TestRuleMarkFailure failureMarker;
|
||||
|
||||
static interface LimitPredicate {
|
||||
interface LimitPredicate {
|
||||
void check(long before, long after) throws IOException;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue