Merge branch 'main' into java_21

This commit is contained in:
ChrisHegarty 2024-01-31 16:19:32 +00:00
commit 05b23abe92
65 changed files with 1929 additions and 155 deletions

View File

@ -67,6 +67,13 @@
</maintainer>
<!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
<release>
<Version>
<name>lucene-9.9.0</name>
<created>2023-12-04</created>
<revision>9.9.0</revision>
</Version>
</release>
<release>
<Version>
<name>lucene-9.8.0</name>

View File

@ -62,7 +62,8 @@ configure(project(":lucene:core")) {
classpath = configurations.apiextractor
mainClass = file("${resources}/ExtractJdkApis.java") as String
systemProperties = [
'user.timezone': 'UTC'
'user.timezone': 'UTC',
'file.encoding': 'UTF-8',
]
args = [
jdkVersion,

View File

@ -189,7 +189,7 @@ public final class ExtractJdkApis {
}
@Override
public void visitPermittedSubclass(String c) {
public void visitPermittedSubclass(String c) {
}
}

View File

@ -60,6 +60,9 @@ grant {
permission java.lang.RuntimePermission "getFileStoreAttributes";
permission java.lang.RuntimePermission "writeFileDescriptor";
// needed to check if C2 (implied by the presence of the CI env) is enabled
permission java.lang.RuntimePermission "getenv.CI";
// TestLockFactoriesMultiJVM opens a random port on 127.0.0.1 (port 0 = ephemeral port range):
permission java.net.SocketPermission "127.0.0.1:0", "accept,listen,resolve";

View File

@ -139,7 +139,7 @@ Larger heap size
By default tests run with a 512 MB max heap. But some tests (monster/nightly)
need more heap. Use "-Dtests.heapsize" for this:
gradlew -p lucene/core test --tests "Test2BFST" -Dtest.heapsize=32g
gradlew -p lucene/core test --tests "Test2BFST" -Dtests.heapsize=32g
Run GUI tests headlessly with Xvfb (Linux only)

View File

@ -171,7 +171,11 @@ API Changes
New Features
---------------------
(No changes)
* GITHUB#12679: Add support for similarity-based vector searches using [Byte|Float]VectorSimilarityQuery. Uses a new
VectorSimilarityCollector to find all vectors scoring above a `resultSimilarity` while traversing the HNSW graph till
better-scoring nodes are available, or the best candidate is below a score of `traversalSimilarity` in the lowest
level. (Aditya Prakash, Kaival Parikh)
Improvements
---------------------
@ -191,11 +195,25 @@ Bug Fixes
* GITHUB#12558: Ensure #finish is called on all drill-sideways FacetsCollectors even when no hits are scored.
(Greg Miller)
* GITHUB#12920: Address bug in TestDrillSideways#testCollectionTerminated that could occasionally cause the test to
fail with certain random seeds. (Greg Miller)
Other
---------------------
* GITHUB#11023: Removing some dead code in CheckIndex. (Jakub Slowinski)
* GITHUB#11023: Removing @lucene.experimental tags in testXXX methods in CheckIndex. (Jakub Slowinski)
======================== Lucene 9.9.1 =======================
Bug Fixes
---------------------
* GITHUB#12898: JVM SIGSEGV crash when compiling computeCommonPrefixLengthAndBuildHistogram (Chris Hegarty)
* GITHUB#12900: Push and pop OutputAccumulator as IntersectTermsEnumFrames are pushed and popped (Guo Feng, Mike McCandless)
======================== Lucene 9.9.0 =======================
API Changes

View File

@ -528,7 +528,7 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
assertTokenStreamContents(tf8, new String[] {"fußball"});
}
public static interface MockRetainAttribute extends Attribute {
public interface MockRetainAttribute extends Attribute {
void setRetain(boolean attr);
boolean getRetain();

View File

@ -149,7 +149,7 @@ stored as a key and the record of key's transformation to its
respective stem. The transformation record is termed a patch command
(P-command). It must be ensured that P-commands are universal, and that
P-commands can transform any word to its stem. Our solution[6,8] is
based on the Levenstein metric [10], which produces P-command as the
based on the Levenshtein metric [10], which produces P-command as the
minimum cost path in a directed graph.<br>
<br>
One can imagine the P-command as an algorithm for an operator (editor)

View File

@ -75,7 +75,11 @@ public final class Lucene90RWPostingsFormat extends PostingsFormat {
try {
FieldsConsumer ret =
new Lucene90BlockTreeTermsWriter(
state, postingsWriter, minTermBlockSize, maxTermBlockSize);
state,
postingsWriter,
minTermBlockSize,
maxTermBlockSize,
Lucene90BlockTreeTermsReader.VERSION_START);
success = true;
return ret;
} finally {

View File

@ -0,0 +1,148 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_codecs.lucene90;
import static org.apache.lucene.backward_codecs.lucene90.Lucene90ScoreSkipReader.readImpacts;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.backward_codecs.lucene90.Lucene90ScoreSkipReader.MutableImpactList;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.CompetitiveImpactAccumulator;
import org.apache.lucene.codecs.lucene90.blocktree.FieldReader;
import org.apache.lucene.codecs.lucene90.blocktree.Stats;
import org.apache.lucene.codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99SkipWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Impact;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.index.BasePostingsFormatTestCase;
import org.apache.lucene.tests.util.TestUtil;
public class TestLucene90PostingsFormat extends BasePostingsFormatTestCase {
private final Codec codec = TestUtil.alwaysPostingsFormat(new Lucene90RWPostingsFormat());
@Override
protected Codec getCodec() {
return codec;
}
/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
Directory d = newDirectory();
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
for (int i = 0; i < 25; i++) {
Document doc = new Document();
doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO));
doc.add(newStringField("field", "z" + Character.toString((char) (97 + i)), Field.Store.NO));
w.addDocument(doc);
}
w.forceMerge(1);
DirectoryReader r = DirectoryReader.open(w);
assertEquals(1, r.leaves().size());
FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
// We should see exactly two blocks: one root block (prefix empty string) and one block for z*
// terms (prefix z):
Stats stats = field.getStats();
assertEquals(0, stats.floorBlockCount);
assertEquals(2, stats.nonFloorBlockCount);
r.close();
w.close();
d.close();
}
private void shouldFail(int minItemsInBlock, int maxItemsInBlock) {
expectThrows(
IllegalArgumentException.class,
() -> {
new Lucene99PostingsFormat(minItemsInBlock, maxItemsInBlock);
});
}
public void testInvalidBlockSizes() throws Exception {
shouldFail(0, 0);
shouldFail(10, 8);
shouldFail(-1, 10);
shouldFail(10, -1);
shouldFail(10, 12);
}
public void testImpactSerialization() throws IOException {
// omit norms and omit freqs
doTestImpactSerialization(Collections.singletonList(new Impact(1, 1L)));
// omit freqs
doTestImpactSerialization(Collections.singletonList(new Impact(1, 42L)));
// omit freqs with very large norms
doTestImpactSerialization(Collections.singletonList(new Impact(1, -100L)));
// omit norms
doTestImpactSerialization(Collections.singletonList(new Impact(30, 1L)));
// omit norms with large freq
doTestImpactSerialization(Collections.singletonList(new Impact(500, 1L)));
// freqs and norms, basic
doTestImpactSerialization(
Arrays.asList(
new Impact(1, 7L),
new Impact(3, 9L),
new Impact(7, 10L),
new Impact(15, 11L),
new Impact(20, 13L),
new Impact(28, 14L)));
// freqs and norms, high values
doTestImpactSerialization(
Arrays.asList(
new Impact(2, 2L),
new Impact(10, 10L),
new Impact(12, 50L),
new Impact(50, -100L),
new Impact(1000, -80L),
new Impact(1005, -3L)));
}
private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
for (Impact impact : impacts) {
acc.add(impact.freq, impact.norm);
}
try (Directory dir = newDirectory()) {
try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
Lucene99SkipWriter.writeImpacts(acc, out);
}
try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
byte[] b = new byte[Math.toIntExact(in.length())];
in.readBytes(b, 0, b.length);
List<Impact> impacts2 = readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
assertEquals(impacts, impacts2);
}
}
}
}

View File

@ -109,6 +109,7 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@ -374,7 +375,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"9.7.0-cfs",
"9.7.0-nocfs",
"9.8.0-cfs",
"9.8.0-nocfs"
"9.8.0-nocfs",
"9.9.0-cfs",
"9.9.0-nocfs"
};
public static String[] getOldNames() {
@ -392,7 +395,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"sorted.9.5.0",
"sorted.9.6.0",
"sorted.9.7.0",
"sorted.9.8.0"
"sorted.9.8.0",
"sorted.9.9.0"
};
public static String[] getOldSortedNames() {
@ -2240,6 +2244,25 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
}
// #12895: test on a carefully crafted 9.8.0 index (from a small contiguous subset
// of wikibigall unique terms) that shows the read-time exception of
// IntersectTermsEnum (used by WildcardQuery)
public void testWildcardQueryExceptions990() throws IOException {
Path path = createTempDir("12895");
String name = "index.12895.9.8.0.zip";
InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name);
assertNotNull("missing zip file to reproduce #12895", resource);
TestUtil.unzip(resource, path);
try (Directory dir = newFSDirectory(path);
DirectoryReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);
searcher.count(new WildcardQuery(new Term("field", "*qx*")));
}
}
@Nightly
public void testReadNMinusTwoCommit() throws IOException {
for (String name : binarySupportedNames) {

View File

@ -120,6 +120,8 @@ final class IntersectTermsEnum extends BaseTermsEnum {
assert setSavedStartTerm(startTerm);
currentFrame = f;
outputAccumulator.push(currentFrame.arc.output());
if (startTerm != null) {
seekToStartTerm(startTerm);
}
@ -184,8 +186,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
int idx = currentFrame.prefix;
assert currentFrame.suffix > 0;
outputAccumulator.reset();
outputAccumulator.push(arc.output());
int initOutputCount = outputAccumulator.outputCount();
while (idx < f.prefix) {
final int target = term.bytes[idx] & 0xff;
// TODO: we could be more efficient for the next()
@ -198,9 +199,11 @@ final class IntersectTermsEnum extends BaseTermsEnum {
}
f.arc = arc;
f.outputNum = outputAccumulator.outputCount() - initOutputCount;
assert arc.isFinal();
outputAccumulator.push(arc.nextFinalOutput());
f.load(outputAccumulator);
outputAccumulator.pop(arc.nextFinalOutput());
return f;
}
@ -343,6 +346,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
throw NoMoreTermsException.INSTANCE;
}
final long lastFP = currentFrame.fpOrig;
outputAccumulator.pop(currentFrame.outputNum);
currentFrame = stack[currentFrame.ord - 1];
currentTransition = currentFrame.transition;
assert currentFrame.lastSubFP == lastFP;
@ -429,6 +433,7 @@ final class IntersectTermsEnum extends BaseTermsEnum {
currentFrame = null;
return null;
}
outputAccumulator.pop(currentFrame.outputNum);
currentFrame = stack[currentFrame.ord - 1];
currentTransition = currentFrame.transition;
isSubBlock = popPushNext();

View File

@ -89,6 +89,8 @@ final class IntersectTermsEnumFrame {
final ByteArrayDataInput bytesReader = new ByteArrayDataInput();
int outputNum;
int startBytePos;
int suffix;

View File

@ -238,6 +238,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
final int maxDoc;
final int minItemsInBlock;
final int maxItemsInBlock;
final int version;
final PostingsWriterBase postingsWriter;
final FieldInfos fieldInfos;
@ -255,10 +256,37 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
int minItemsInBlock,
int maxItemsInBlock)
throws IOException {
this(
state,
postingsWriter,
minItemsInBlock,
maxItemsInBlock,
Lucene90BlockTreeTermsReader.VERSION_CURRENT);
}
/** Expert constructor that allows configuring the version, used for bw tests. */
public Lucene90BlockTreeTermsWriter(
SegmentWriteState state,
PostingsWriterBase postingsWriter,
int minItemsInBlock,
int maxItemsInBlock,
int version)
throws IOException {
validateSettings(minItemsInBlock, maxItemsInBlock);
this.minItemsInBlock = minItemsInBlock;
this.maxItemsInBlock = maxItemsInBlock;
if (version < Lucene90BlockTreeTermsReader.VERSION_START
|| version > Lucene90BlockTreeTermsReader.VERSION_CURRENT) {
throw new IllegalArgumentException(
"Expected version in range ["
+ Lucene90BlockTreeTermsReader.VERSION_START
+ ", "
+ Lucene90BlockTreeTermsReader.VERSION_CURRENT
+ "], but got "
+ version);
}
this.version = version;
this.maxDoc = state.segmentInfo.maxDoc();
this.fieldInfos = state.fieldInfos;
@ -276,7 +304,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
CodecUtil.writeIndexHeader(
termsOut,
Lucene90BlockTreeTermsReader.TERMS_CODEC_NAME,
Lucene90BlockTreeTermsReader.VERSION_CURRENT,
version,
state.segmentInfo.getId(),
state.segmentSuffix);
@ -289,7 +317,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
CodecUtil.writeIndexHeader(
indexOut,
Lucene90BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME,
Lucene90BlockTreeTermsReader.VERSION_CURRENT,
version,
state.segmentInfo.getId(),
state.segmentSuffix);
// segment = state.segmentInfo.name;
@ -303,7 +331,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
CodecUtil.writeIndexHeader(
metaOut,
Lucene90BlockTreeTermsReader.TERMS_META_CODEC_NAME,
Lucene90BlockTreeTermsReader.VERSION_CURRENT,
version,
state.segmentInfo.getId(),
state.segmentSuffix);
@ -451,7 +479,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL)));
}
private static final class PendingBlock extends PendingEntry {
private final class PendingBlock extends PendingEntry {
public final BytesRef prefix;
public final long fp;
public FST<BytesRef> index;
@ -494,7 +522,11 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
assert scratchBytes.size() == 0;
// write the leading vLong in MSB order for better outputs sharing in the FST
writeMSBVLong(encodeOutput(fp, hasTerms, isFloor), scratchBytes);
if (version >= Lucene90BlockTreeTermsReader.VERSION_MSB_VLONG_OUTPUT) {
writeMSBVLong(encodeOutput(fp, hasTerms, isFloor), scratchBytes);
} else {
scratchBytes.writeVLong(encodeOutput(fp, hasTerms, isFloor));
}
if (isFloor) {
scratchBytes.writeVInt(blocks.size() - 1);
for (int i = 1; i < blocks.size(); i++) {
@ -522,12 +554,19 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
int pageBits = Math.min(15, Math.max(6, estimateBitsRequired));
final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
final int fstVersion;
if (version >= Lucene90BlockTreeTermsReader.VERSION_CURRENT) {
fstVersion = FST.VERSION_CURRENT;
} else {
fstVersion = FST.VERSION_90;
}
final FSTCompiler<BytesRef> fstCompiler =
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
// Disable suffixes sharing for block tree index because suffixes are mostly dropped
// from the FST index and left in the term blocks.
.suffixRAMLimitMB(0d)
.dataOutput(getOnHeapReaderWriter(pageBits))
.setVersion(fstVersion)
.build();
// if (DEBUG) {
// System.out.println(" compile index for prefix=" + prefix);

View File

@ -495,7 +495,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
targetUpto = 0;
outputAccumulator.push(arc.nextFinalOutput());
currentFrame = pushFrame(arc, 0);
outputAccumulator.pop();
outputAccumulator.pop(arc.nextFinalOutput());
}
// if (DEBUG) {
@ -569,7 +569,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) System.out.println(" arc is final!");
outputAccumulator.push(arc.nextFinalOutput());
currentFrame = pushFrame(arc, targetUpto);
outputAccumulator.pop();
outputAccumulator.pop(arc.nextFinalOutput());
// if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
// currentFrame.hasTerms);
}
@ -767,7 +767,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
targetUpto = 0;
outputAccumulator.push(arc.nextFinalOutput());
currentFrame = pushFrame(arc, 0);
outputAccumulator.pop();
outputAccumulator.pop(arc.nextFinalOutput());
}
// if (DEBUG) {
@ -841,7 +841,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
// if (DEBUG) System.out.println(" arc is final!");
outputAccumulator.push(arc.nextFinalOutput());
currentFrame = pushFrame(arc, targetUpto);
outputAccumulator.pop();
outputAccumulator.pop(arc.nextFinalOutput());
// if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" +
// currentFrame.hasTerms);
}
@ -1187,14 +1187,27 @@ final class SegmentTermsEnum extends BaseTermsEnum {
void push(BytesRef output) {
if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
assert output.length > 0;
outputs = ArrayUtil.grow(outputs, num + 1);
outputs[num++] = output;
}
}
void pop() {
assert num > 0;
num--;
void pop(BytesRef output) {
if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
assert num > 0;
assert outputs[num - 1] == output;
num--;
}
}
void pop(int cnt) {
assert num >= cnt;
num -= cnt;
}
int outputCount() {
return num;
}
void reset() {

View File

@ -83,6 +83,11 @@ import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.SuppressForbidden;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.Operations;
/**
* Basic tool and API to check the health of an index and write a new segments file that removes
@ -1149,11 +1154,7 @@ public final class CheckIndex implements Closeable {
return segInfoStat;
}
/**
* Tests index sort order.
*
* @lucene.experimental
*/
/** Tests index sort order. */
public static Status.IndexSortStatus testSort(
CodecReader reader, Sort sort, PrintStream infoStream, boolean failFast) throws IOException {
// This segment claims its documents are sorted according to the incoming sort ... let's make
@ -1226,11 +1227,7 @@ public final class CheckIndex implements Closeable {
return status;
}
/**
* Test live docs.
*
* @lucene.experimental
*/
/** Test live docs. */
public static Status.LiveDocStatus testLiveDocs(
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime();
@ -1295,11 +1292,7 @@ public final class CheckIndex implements Closeable {
return status;
}
/**
* Test field infos.
*
* @lucene.experimental
*/
/** Test field infos. */
public static Status.FieldInfoStatus testFieldInfos(
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime();
@ -1336,11 +1329,7 @@ public final class CheckIndex implements Closeable {
return status;
}
/**
* Test field norms.
*
* @lucene.experimental
*/
/** Test field norms. */
public static Status.FieldNormStatus testFieldNorms(
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime();
@ -2314,6 +2303,33 @@ public final class CheckIndex implements Closeable {
}
}
}
// Test Terms#intersect
TermsEnum allTerms = terms.iterator();
// An automaton that should match a good number of terms
Automaton a =
Operations.concatenate(
Arrays.asList(
Automata.makeAnyBinary(),
Automata.makeCharRange('a', 'e'),
Automata.makeAnyBinary()));
a = Operations.determinize(a, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
CompiledAutomaton ca = new CompiledAutomaton(a);
ByteRunAutomaton runAutomaton = new ByteRunAutomaton(a);
TermsEnum filteredTerms = terms.intersect(ca, null);
for (BytesRef term = allTerms.next(); term != null; term = allTerms.next()) {
if (runAutomaton.run(term.bytes, term.offset, term.length)) {
BytesRef filteredTerm = filteredTerms.next();
if (Objects.equals(term, filteredTerm) == false) {
throw new CheckIndexException(
"Expected next filtered term: " + term + ", but got " + filteredTerm);
}
}
}
BytesRef filteredTerm = filteredTerms.next();
if (filteredTerm != null) {
throw new CheckIndexException("Expected exhausted TermsEnum, but got " + filteredTerm);
}
}
}
@ -2443,21 +2459,13 @@ public final class CheckIndex implements Closeable {
}
}
/**
* Test the term index.
*
* @lucene.experimental
*/
/** Test the term index. */
public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream)
throws IOException {
return testPostings(reader, infoStream, false, Level.MIN_LEVEL_FOR_SLOW_CHECKS, false);
}
/**
* Test the term index.
*
* @lucene.experimental
*/
/** Test the term index. */
public static Status.TermIndexStatus testPostings(
CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
throws IOException {
@ -2510,11 +2518,7 @@ public final class CheckIndex implements Closeable {
return status;
}
/**
* Test the points index
*
* @lucene.experimental
*/
/** Test the points index. */
public static Status.PointsStatus testPoints(
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
if (infoStream != null) {
@ -2617,11 +2621,7 @@ public final class CheckIndex implements Closeable {
return status;
}
/**
* Test the vectors index
*
* @lucene.experimental
*/
/** Test the vectors index. */
public static Status.VectorValuesStatus testVectors(
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
if (infoStream != null) {
@ -3104,11 +3104,7 @@ public final class CheckIndex implements Closeable {
}
}
/**
* Test stored fields.
*
* @lucene.experimental
*/
/** Test stored fields. */
public static Status.StoredFieldStatus testStoredFields(
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime();
@ -3162,11 +3158,7 @@ public final class CheckIndex implements Closeable {
return status;
}
/**
* Test docvalues.
*
* @lucene.experimental
*/
/** Test docvalues. */
public static Status.DocValuesStatus testDocValues(
CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
long startNS = System.nanoTime();
@ -3623,21 +3615,13 @@ public final class CheckIndex implements Closeable {
}
}
/**
* Test term vectors.
*
* @lucene.experimental
*/
/** Test term vectors. */
public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream)
throws IOException {
return testTermVectors(reader, infoStream, false, Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, false);
}
/**
* Test term vectors.
*
* @lucene.experimental
*/
/** Test term vectors. */
public static Status.TermVectorStatus testTermVectors(
CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
throws IOException {

View File

@ -112,7 +112,7 @@ public abstract sealed class IndexReader implements Closeable permits CompositeR
*
* @lucene.experimental
*/
public static interface CacheHelper {
public interface CacheHelper {
/**
* Get a key that the resource can be cached on. The given entry can be compared using identity,
@ -139,7 +139,7 @@ public abstract sealed class IndexReader implements Closeable permits CompositeR
* @lucene.experimental
*/
@FunctionalInterface
public static interface ClosedListener {
public interface ClosedListener {
/**
* Invoked when the resource (segment core, or index reader) that is being cached on is closed.
*/

View File

@ -34,10 +34,10 @@ import org.apache.lucene.util.BytesRef;
public interface IndexableField {
/** Field name */
public String name();
String name();
/** {@link IndexableFieldType} describing the properties of this field. */
public IndexableFieldType fieldType();
IndexableFieldType fieldType();
/**
* Creates the TokenStream used for indexing this field. If appropriate, implementations should
@ -52,13 +52,13 @@ public interface IndexableField {
* @return TokenStream value for indexing the document. Should always return a non-null value if
* the field is to be indexed
*/
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse);
TokenStream tokenStream(Analyzer analyzer, TokenStream reuse);
/** Non-null if this field has a binary value */
public BytesRef binaryValue();
BytesRef binaryValue();
/** Non-null if this field has a string value */
public String stringValue();
String stringValue();
/** Non-null if this field has a string value */
default CharSequence getCharSequenceValue() {
@ -66,20 +66,20 @@ public interface IndexableField {
}
/** Non-null if this field has a Reader value */
public Reader readerValue();
Reader readerValue();
/** Non-null if this field has a numeric value */
public Number numericValue();
Number numericValue();
/**
* Stored value. This method is called to populate stored fields and must return a non-null value
* if the field stored.
*/
public StoredValue storedValue();
StoredValue storedValue();
/**
* Describes how this field should be inverted. This must return a non-null value if the field
* indexes terms and postings.
*/
public InvertableType invertableType();
InvertableType invertableType();
}

View File

@ -31,19 +31,19 @@ public interface TwoPhaseCommit {
* method, but avoid actual committing changes. If the 2-phase commit fails, {@link #rollback()}
* is called to discard all changes since last successful commit.
*/
public long prepareCommit() throws IOException;
long prepareCommit() throws IOException;
/**
* The second phase of a 2-phase commit. Implementations should ideally do very little work in
* this method (following {@link #prepareCommit()}, and after it returns, the caller can assume
* that the changes were successfully committed to the underlying storage.
*/
public long commit() throws IOException;
long commit() throws IOException;
/**
* Discards any changes that have occurred since the last commit. In a 2-phase commit algorithm,
* where one of the objects failed to {@link #commit()} or {@link #prepareCommit()}, this method
* is used to roll all other objects back to their previous state.
*/
public void rollback() throws IOException;
void rollback() throws IOException;
}

View File

@ -0,0 +1,288 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Objects;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits;
/**
* Search for all (approximate) vectors above a similarity threshold.
*
* @lucene.experimental
*/
abstract class AbstractVectorSimilarityQuery extends Query {
protected final String field;
protected final float traversalSimilarity, resultSimilarity;
protected final Query filter;
/**
* Search for all (approximate) vectors above a similarity threshold using {@link
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
* the filter, and then falls back to exact search if results are incomplete.
*
* @param field a field that has been indexed as a vector field.
* @param traversalSimilarity (lower) similarity score for graph traversal.
* @param resultSimilarity (higher) similarity score for result collection.
* @param filter a filter applied before the vector search.
*/
AbstractVectorSimilarityQuery(
String field, float traversalSimilarity, float resultSimilarity, Query filter) {
if (traversalSimilarity > resultSimilarity) {
throw new IllegalArgumentException("traversalSimilarity should be <= resultSimilarity");
}
this.field = Objects.requireNonNull(field, "field");
this.traversalSimilarity = traversalSimilarity;
this.resultSimilarity = resultSimilarity;
this.filter = filter;
}
abstract VectorScorer createVectorScorer(LeafReaderContext context) throws IOException;
protected abstract TopDocs approximateSearch(
LeafReaderContext context, Bits acceptDocs, int visitLimit) throws IOException;
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
return new Weight(this) {
final Weight filterWeight =
filter == null
? null
: searcher.createWeight(searcher.rewrite(filter), ScoreMode.COMPLETE_NO_SCORES, 1);
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
if (filterWeight != null) {
Scorer filterScorer = filterWeight.scorer(context);
if (filterScorer == null || filterScorer.iterator().advance(doc) > doc) {
return Explanation.noMatch("Doc does not match the filter");
}
}
VectorScorer scorer = createVectorScorer(context);
if (scorer == null) {
return Explanation.noMatch("Not indexed as the correct vector field");
} else if (scorer.advanceExact(doc)) {
float score = scorer.score();
if (score >= resultSimilarity) {
return Explanation.match(boost * score, "Score above threshold");
} else {
return Explanation.noMatch("Score below threshold");
}
} else {
return Explanation.noMatch("No vector found for doc");
}
}
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
@SuppressWarnings("resource")
LeafReader leafReader = context.reader();
Bits liveDocs = leafReader.getLiveDocs();
// If there is no filter
if (filterWeight == null) {
// Return exhaustive results
TopDocs results = approximateSearch(context, liveDocs, Integer.MAX_VALUE);
return VectorSimilarityScorer.fromScoreDocs(this, boost, results.scoreDocs);
}
Scorer scorer = filterWeight.scorer(context);
if (scorer == null) {
// If the filter does not match any documents
return null;
}
BitSet acceptDocs;
if (liveDocs == null && scorer.iterator() instanceof BitSetIterator bitSetIterator) {
// If there are no deletions, and matching docs are already cached
acceptDocs = bitSetIterator.getBitSet();
} else {
// Else collect all matching docs
FilteredDocIdSetIterator filtered =
new FilteredDocIdSetIterator(scorer.iterator()) {
@Override
protected boolean match(int doc) {
return liveDocs == null || liveDocs.get(doc);
}
};
acceptDocs = BitSet.of(filtered, leafReader.maxDoc());
}
int cardinality = acceptDocs.cardinality();
if (cardinality == 0) {
// If there are no live matching docs
return null;
}
// Perform an approximate search
TopDocs results = approximateSearch(context, acceptDocs, cardinality);
// If the limit was exhausted
if (results.totalHits.relation == TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO) {
// Return a lazy-loading iterator
return VectorSimilarityScorer.fromAcceptDocs(
this,
boost,
createVectorScorer(context),
new BitSetIterator(acceptDocs, cardinality),
resultSimilarity);
} else {
// Return an iterator over the collected results
return VectorSimilarityScorer.fromScoreDocs(this, boost, results.scoreDocs);
}
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;
}
};
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public boolean equals(Object o) {
return sameClassAs(o)
&& Objects.equals(field, ((AbstractVectorSimilarityQuery) o).field)
&& Float.compare(
((AbstractVectorSimilarityQuery) o).traversalSimilarity, traversalSimilarity)
== 0
&& Float.compare(((AbstractVectorSimilarityQuery) o).resultSimilarity, resultSimilarity)
== 0
&& Objects.equals(filter, ((AbstractVectorSimilarityQuery) o).filter);
}
@Override
public int hashCode() {
return Objects.hash(field, traversalSimilarity, resultSimilarity, filter);
}
private static class VectorSimilarityScorer extends Scorer {
final DocIdSetIterator iterator;
final float[] cachedScore;
VectorSimilarityScorer(Weight weight, DocIdSetIterator iterator, float[] cachedScore) {
super(weight);
this.iterator = iterator;
this.cachedScore = cachedScore;
}
static VectorSimilarityScorer fromScoreDocs(Weight weight, float boost, ScoreDoc[] scoreDocs) {
// Sort in ascending order of docid
Arrays.sort(scoreDocs, Comparator.comparingInt(scoreDoc -> scoreDoc.doc));
float[] cachedScore = new float[1];
DocIdSetIterator iterator =
new DocIdSetIterator() {
int index = -1;
@Override
public int docID() {
if (index < 0) {
return -1;
} else if (index >= scoreDocs.length) {
return NO_MORE_DOCS;
} else {
cachedScore[0] = boost * scoreDocs[index].score;
return scoreDocs[index].doc;
}
}
@Override
public int nextDoc() {
index++;
return docID();
}
@Override
public int advance(int target) {
index =
Arrays.binarySearch(
scoreDocs,
new ScoreDoc(target, 0),
Comparator.comparingInt(scoreDoc -> scoreDoc.doc));
if (index < 0) {
index = -1 - index;
}
return docID();
}
@Override
public long cost() {
return scoreDocs.length;
}
};
return new VectorSimilarityScorer(weight, iterator, cachedScore);
}
static VectorSimilarityScorer fromAcceptDocs(
Weight weight,
float boost,
VectorScorer scorer,
DocIdSetIterator acceptDocs,
float threshold) {
float[] cachedScore = new float[1];
DocIdSetIterator iterator =
new FilteredDocIdSetIterator(acceptDocs) {
@Override
protected boolean match(int doc) throws IOException {
// Compute the dot product
float score = scorer.score();
cachedScore[0] = score * boost;
return score >= threshold;
}
};
return new VectorSimilarityScorer(weight, iterator, cachedScore);
}
@Override
public int docID() {
return iterator.docID();
}
@Override
public DocIdSetIterator iterator() {
return iterator;
}
@Override
public float getMaxScore(int upTo) {
return Float.POSITIVE_INFINITY;
}
@Override
public float score() {
return cachedScore[0];
}
}
}

View File

@ -0,0 +1,145 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.document.KnnByteVectorField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.util.Bits;
/**
* Search for all (approximate) byte vectors above a similarity threshold.
*
* @lucene.experimental
*/
public class ByteVectorSimilarityQuery extends AbstractVectorSimilarityQuery {
private final byte[] target;
/**
* Search for all (approximate) byte vectors above a similarity threshold using {@link
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
* the filter, and then falls back to exact search if results are incomplete.
*
* @param field a field that has been indexed as a {@link KnnByteVectorField}.
* @param target the target of the search.
* @param traversalSimilarity (lower) similarity score for graph traversal.
* @param resultSimilarity (higher) similarity score for result collection.
* @param filter a filter applied before the vector search.
*/
public ByteVectorSimilarityQuery(
String field,
byte[] target,
float traversalSimilarity,
float resultSimilarity,
Query filter) {
super(field, traversalSimilarity, resultSimilarity, filter);
this.target = Objects.requireNonNull(target, "target");
}
/**
* Search for all (approximate) byte vectors above a similarity threshold using {@link
* VectorSimilarityCollector}.
*
* @param field a field that has been indexed as a {@link KnnByteVectorField}.
* @param target the target of the search.
* @param traversalSimilarity (lower) similarity score for graph traversal.
* @param resultSimilarity (higher) similarity score for result collection.
*/
public ByteVectorSimilarityQuery(
String field, byte[] target, float traversalSimilarity, float resultSimilarity) {
this(field, target, traversalSimilarity, resultSimilarity, null);
}
/**
* Search for all (approximate) byte vectors above a similarity threshold using {@link
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
* the filter, and then falls back to exact search if results are incomplete.
*
* @param field a field that has been indexed as a {@link KnnByteVectorField}.
* @param target the target of the search.
* @param resultSimilarity similarity score for result collection.
* @param filter a filter applied before the vector search.
*/
public ByteVectorSimilarityQuery(
String field, byte[] target, float resultSimilarity, Query filter) {
this(field, target, resultSimilarity, resultSimilarity, filter);
}
/**
* Search for all (approximate) byte vectors above a similarity threshold using {@link
* VectorSimilarityCollector}.
*
* @param field a field that has been indexed as a {@link KnnByteVectorField}.
* @param target the target of the search.
* @param resultSimilarity similarity score for result collection.
*/
public ByteVectorSimilarityQuery(String field, byte[] target, float resultSimilarity) {
this(field, target, resultSimilarity, resultSimilarity, null);
}
@Override
VectorScorer createVectorScorer(LeafReaderContext context) throws IOException {
@SuppressWarnings("resource")
FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
if (fi == null || fi.getVectorEncoding() != VectorEncoding.BYTE) {
return null;
}
return VectorScorer.create(context, fi, target);
}
@Override
@SuppressWarnings("resource")
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitLimit)
throws IOException {
KnnCollector collector =
new VectorSimilarityCollector(traversalSimilarity, resultSimilarity, visitLimit);
context.reader().searchNearestVectors(field, target, collector, acceptDocs);
return collector.topDocs();
}
@Override
public String toString(String field) {
return String.format(
Locale.ROOT,
"%s[field=%s target=[%d...] traversalSimilarity=%f resultSimilarity=%f filter=%s]",
getClass().getSimpleName(),
field,
target[0],
traversalSimilarity,
resultSimilarity,
filter);
}
@Override
public boolean equals(Object o) {
return sameClassAs(o)
&& super.equals(o)
&& Arrays.equals(target, ((ByteVectorSimilarityQuery) o).target);
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + Arrays.hashCode(target);
return result;
}
}

View File

@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Locale;
import java.util.Objects;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.VectorUtil;
/**
* Search for all (approximate) float vectors above a similarity threshold.
*
* @lucene.experimental
*/
public class FloatVectorSimilarityQuery extends AbstractVectorSimilarityQuery {
private final float[] target;
/**
* Search for all (approximate) float vectors above a similarity threshold using {@link
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
* the filter, and then falls back to exact search if results are incomplete.
*
* @param field a field that has been indexed as a {@link KnnFloatVectorField}.
* @param target the target of the search.
* @param traversalSimilarity (lower) similarity score for graph traversal.
* @param resultSimilarity (higher) similarity score for result collection.
* @param filter a filter applied before the vector search.
*/
public FloatVectorSimilarityQuery(
String field,
float[] target,
float traversalSimilarity,
float resultSimilarity,
Query filter) {
super(field, traversalSimilarity, resultSimilarity, filter);
this.target = VectorUtil.checkFinite(Objects.requireNonNull(target, "target"));
}
/**
* Search for all (approximate) float vectors above a similarity threshold using {@link
* VectorSimilarityCollector}.
*
* @param field a field that has been indexed as a {@link KnnFloatVectorField}.
* @param target the target of the search.
* @param traversalSimilarity (lower) similarity score for graph traversal.
* @param resultSimilarity (higher) similarity score for result collection.
*/
public FloatVectorSimilarityQuery(
String field, float[] target, float traversalSimilarity, float resultSimilarity) {
this(field, target, traversalSimilarity, resultSimilarity, null);
}
/**
* Search for all (approximate) float vectors above a similarity threshold using {@link
* VectorSimilarityCollector}. If a filter is applied, it traverses as many nodes as the cost of
* the filter, and then falls back to exact search if results are incomplete.
*
* @param field a field that has been indexed as a {@link KnnFloatVectorField}.
* @param target the target of the search.
* @param resultSimilarity similarity score for result collection.
* @param filter a filter applied before the vector search.
*/
public FloatVectorSimilarityQuery(
String field, float[] target, float resultSimilarity, Query filter) {
this(field, target, resultSimilarity, resultSimilarity, filter);
}
/**
* Search for all (approximate) float vectors above a similarity threshold using {@link
* VectorSimilarityCollector}.
*
* @param field a field that has been indexed as a {@link KnnFloatVectorField}.
* @param target the target of the search.
* @param resultSimilarity similarity score for result collection.
*/
public FloatVectorSimilarityQuery(String field, float[] target, float resultSimilarity) {
this(field, target, resultSimilarity, resultSimilarity, null);
}
@Override
VectorScorer createVectorScorer(LeafReaderContext context) throws IOException {
@SuppressWarnings("resource")
FieldInfo fi = context.reader().getFieldInfos().fieldInfo(field);
if (fi == null || fi.getVectorEncoding() != VectorEncoding.FLOAT32) {
return null;
}
return VectorScorer.create(context, fi, target);
}
@Override
@SuppressWarnings("resource")
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitLimit)
throws IOException {
KnnCollector collector =
new VectorSimilarityCollector(traversalSimilarity, resultSimilarity, visitLimit);
context.reader().searchNearestVectors(field, target, collector, acceptDocs);
return collector.topDocs();
}
@Override
public String toString(String field) {
return String.format(
Locale.ROOT,
"%s[field=%s target=[%f...] traversalSimilarity=%f resultSimilarity=%f filter=%s]",
getClass().getSimpleName(),
field,
target[0],
traversalSimilarity,
resultSimilarity,
filter);
}
@Override
public boolean equals(Object o) {
return sameClassAs(o)
&& super.equals(o)
&& Arrays.equals(target, ((FloatVectorSimilarityQuery) o).target);
}
@Override
public int hashCode() {
int result = super.hashCode();
result = 31 * result + Arrays.hashCode(target);
return result;
}
}

View File

@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.ArrayList;
import java.util.List;
/**
* Perform a similarity-based graph search.
*
* @lucene.experimental
*/
class VectorSimilarityCollector extends AbstractKnnCollector {
private final float traversalSimilarity, resultSimilarity;
private float maxSimilarity;
private final List<ScoreDoc> scoreDocList;
/**
* Perform a similarity-based graph search. The graph is traversed till better scoring nodes are
* available, or the best candidate is below {@link #traversalSimilarity}. All traversed nodes
* above {@link #resultSimilarity} are collected.
*
* @param traversalSimilarity (lower) similarity score for graph traversal.
* @param resultSimilarity (higher) similarity score for result collection.
* @param visitLimit limit on number of nodes to visit.
*/
public VectorSimilarityCollector(
float traversalSimilarity, float resultSimilarity, long visitLimit) {
super(1, visitLimit);
if (traversalSimilarity > resultSimilarity) {
throw new IllegalArgumentException("traversalSimilarity should be <= resultSimilarity");
}
this.traversalSimilarity = traversalSimilarity;
this.resultSimilarity = resultSimilarity;
this.maxSimilarity = Float.NEGATIVE_INFINITY;
this.scoreDocList = new ArrayList<>();
}
@Override
public boolean collect(int docId, float similarity) {
maxSimilarity = Math.max(maxSimilarity, similarity);
if (similarity >= resultSimilarity) {
scoreDocList.add(new ScoreDoc(docId, similarity));
}
return true;
}
@Override
public float minCompetitiveSimilarity() {
return Math.min(traversalSimilarity, maxSimilarity);
}
@Override
public TopDocs topDocs() {
// Results are not returned in a sorted order to prevent unnecessary calculations (because we do
// not need to maintain the topK)
TotalHits.Relation relation =
earlyTerminated()
? TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO
: TotalHits.Relation.EQUAL_TO;
return new TopDocs(
new TotalHits(visitedCount(), relation), scoreDocList.toArray(ScoreDoc[]::new));
}
}

View File

@ -121,12 +121,12 @@ public abstract class LMSimilarity extends SimilarityBase {
}
/** A strategy for computing the collection language model. */
public static interface CollectionModel {
public interface CollectionModel {
/**
* Computes the probability {@code p(w|C)} according to the language model strategy for the
* current term.
*/
public double computeProbability(BasicStats stats);
double computeProbability(BasicStats stats);
/** The name of the collection model strategy. */
public String getName();

View File

@ -39,7 +39,7 @@ final class ByteBufferGuard {
* this to allow unmapping of bytebuffers with private Java APIs.
*/
@FunctionalInterface
static interface BufferCleaner {
interface BufferCleaner {
void freeBuffer(String resourceDescription, ByteBuffer b) throws IOException;
}

View File

@ -264,7 +264,7 @@ public class MMapDirectory extends FSDirectory {
*/
public static final String UNMAP_NOT_SUPPORTED_REASON;
static interface MMapIndexInputProvider {
interface MMapIndexInputProvider {
IndexInput openInput(Path path, IOContext context, int chunkSizePower, boolean preload)
throws IOException;

View File

@ -26,14 +26,14 @@ import org.apache.lucene.util.BitUtil; // javadocs
public interface RandomAccessInput {
/** The number of bytes in the file. */
public long length();
long length();
/**
* Reads a byte at the given position in the file
*
* @see DataInput#readByte
*/
public byte readByte(long pos) throws IOException;
byte readByte(long pos) throws IOException;
/**
* Reads a specified number of bytes starting at a given position into an array at the specified
@ -53,7 +53,7 @@ public interface RandomAccessInput {
* @see DataInput#readShort
* @see BitUtil#VH_LE_SHORT
*/
public short readShort(long pos) throws IOException;
short readShort(long pos) throws IOException;
/**
* Reads an integer (LE byte order) at the given position in the file
@ -61,7 +61,7 @@ public interface RandomAccessInput {
* @see DataInput#readInt
* @see BitUtil#VH_LE_INT
*/
public int readInt(long pos) throws IOException;
int readInt(long pos) throws IOException;
/**
* Reads a long (LE byte order) at the given position in the file
@ -69,5 +69,5 @@ public interface RandomAccessInput {
* @see DataInput#readLong
* @see BitUtil#VH_LE_LONG
*/
public long readLong(long pos) throws IOException;
long readLong(long pos) throws IOException;
}

View File

@ -714,7 +714,7 @@ public final class ArrayUtil {
/** Comparator for a fixed number of bytes. */
@FunctionalInterface
public static interface ByteArrayComparator {
public interface ByteArrayComparator {
/**
* Compare bytes starting from the given offsets. The return value has the same contract as

View File

@ -30,5 +30,5 @@ public interface AttributeReflector {
* method once using {@code org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class} as
* attribute class, {@code "term"} as key and the actual value as a String.
*/
public void reflect(Class<? extends Attribute> attClass, String key, Object value);
void reflect(Class<? extends Attribute> attClass, String key, Object value);
}

View File

@ -32,7 +32,7 @@ public interface ClassLoaderUtils {
* returned (this is fine, because if we get a {@code SecurityException} it is for sure no
* parent).
*/
public static boolean isParentClassLoader(final ClassLoader parent, final ClassLoader child) {
static boolean isParentClassLoader(final ClassLoader parent, final ClassLoader child) {
try {
ClassLoader cl = child;
while (cl != null) {

View File

@ -213,7 +213,16 @@ public abstract class MSBRadixSorter extends Sorter {
*
* @see #buildHistogram
*/
// This method, and its namesakes, have been manually split to work around a JVM crash.
// See https://github.com/apache/lucene/issues/12898
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
int commonPrefixLength = computeInitialCommonPrefixLength(from, k);
return computeCommonPrefixLengthAndBuildHistogramPart1(
from, to, k, histogram, commonPrefixLength);
}
// This method, and its namesakes, have been manually split to work around a JVM crash.
private int computeInitialCommonPrefixLength(int from, int k) {
final int[] commonPrefix = this.commonPrefix;
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
for (int j = 0; j < commonPrefixLength; ++j) {
@ -224,7 +233,13 @@ public abstract class MSBRadixSorter extends Sorter {
break;
}
}
return commonPrefixLength;
}
// This method, and its namesakes, have been manually split to work around a JVM crash.
private int computeCommonPrefixLengthAndBuildHistogramPart1(
int from, int to, int k, int[] histogram, int commonPrefixLength) {
final int[] commonPrefix = this.commonPrefix;
int i;
outer:
for (i = from + 1; i < to; ++i) {
@ -239,7 +254,13 @@ public abstract class MSBRadixSorter extends Sorter {
}
}
}
return computeCommonPrefixLengthAndBuildHistogramPart2(
from, to, k, histogram, commonPrefixLength, i);
}
// This method, and its namesakes, have been manually split to work around a JVM crash.
private int computeCommonPrefixLengthAndBuildHistogramPart2(
int from, int to, int k, int[] histogram, int commonPrefixLength, int i) {
if (i < to) {
// the loop got broken because there is no common prefix
assert commonPrefixLength == 0;

View File

@ -127,7 +127,7 @@ public final class NamedSPILoader<S extends NamedSPILoader.NamedSPI> implements
*
* <p>Names must be all ascii alphanumeric, and less than 128 characters in length.
*/
public static interface NamedSPI {
public interface NamedSPI {
String getName();
}
}

View File

@ -198,7 +198,16 @@ public abstract class RadixSelector extends Selector {
*
* @see #buildHistogram
*/
// This method, and its namesakes, have been manually split to work around a JVM crash.
// See https://github.com/apache/lucene/issues/12898
private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
int commonPrefixLength = computeInitialCommonPrefixLength(from, k);
return computeCommonPrefixLengthAndBuildHistogramPart1(
from, to, k, histogram, commonPrefixLength);
}
// This method, and its namesakes, have been manually split to work around a JVM crash.
private int computeInitialCommonPrefixLength(int from, int k) {
final int[] commonPrefix = this.commonPrefix;
int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
for (int j = 0; j < commonPrefixLength; ++j) {
@ -209,7 +218,13 @@ public abstract class RadixSelector extends Selector {
break;
}
}
return commonPrefixLength;
}
// This method, and its namesakes, have been manually split to work around a JVM crash.
private int computeCommonPrefixLengthAndBuildHistogramPart1(
int from, int to, int k, int[] histogram, int commonPrefixLength) {
final int[] commonPrefix = this.commonPrefix;
int i;
outer:
for (i = from + 1; i < to; ++i) {
@ -226,7 +241,13 @@ public abstract class RadixSelector extends Selector {
}
}
}
return computeCommonPrefixLengthAndBuildHistogramPart2(
from, to, k, histogram, commonPrefixLength, i);
}
// This method, and its namesakes, have been manually split to work around a JVM crash.
private int computeCommonPrefixLengthAndBuildHistogramPart2(
int from, int to, int k, int[] histogram, int commonPrefixLength, int i) {
if (i < to) {
// the loop got broken because there is no common prefix
assert commonPrefixLength == 0;

View File

@ -23,14 +23,14 @@ import java.io.InputStream;
public interface ResourceLoader {
/** Opens a named resource */
public InputStream openResource(String resource) throws IOException;
InputStream openResource(String resource) throws IOException;
/** Finds class of the name and expected type */
public <T> Class<? extends T> findClass(String cname, Class<T> expectedType);
<T> Class<? extends T> findClass(String cname, Class<T> expectedType);
/** Creates an instance of the name and expected type */
// TODO: fix exception handling
public default <T> T newInstance(String cname, Class<T> expectedType) {
default <T> T newInstance(String cname, Class<T> expectedType) {
Class<? extends T> clazz = findClass(cname, expectedType);
try {
return clazz.getConstructor().newInstance();

View File

@ -24,8 +24,8 @@ package org.apache.lucene.util;
public abstract class RollingBuffer<T extends RollingBuffer.Resettable> {
/** Implement to reset an instance */
public static interface Resettable {
public void reset();
public interface Resettable {
void reset();
}
@SuppressWarnings("unchecked")

View File

@ -29,7 +29,7 @@ public interface Unwrappable<T> {
/** Unwraps all {@code Unwrappable}s around the given object. */
@SuppressWarnings("unchecked")
public static <T> T unwrapAll(T o) {
static <T> T unwrapAll(T o) {
while (o instanceof Unwrappable) {
o = ((Unwrappable<T>) o).unwrap();
}

View File

@ -45,5 +45,5 @@ public interface AutomatonProvider {
* @return automaton
* @throws IOException if errors occur
*/
public Automaton getAutomaton(String name) throws IOException;
Automaton getAutomaton(String name) throws IOException;
}

View File

@ -68,7 +68,7 @@ final class BKDUtil {
/** Predicate for a fixed number of bytes. */
@FunctionalInterface
public static interface ByteArrayPredicate {
public interface ByteArrayPredicate {
/** Test bytes starting from the given offsets. */
boolean test(byte[] a, int aOffset, byte[] b, int bOffset);

View File

@ -109,10 +109,23 @@ public final class FST<T> implements Accountable {
// Increment version to change it
private static final String FILE_FORMAT_NAME = "FST";
private static final int VERSION_START = 6;
/** First supported version, this is the version that was used when releasing Lucene 7.0. */
public static final int VERSION_START = 6;
// Version 7 introduced direct addressing for arcs, but it's not recorded here because it doesn't
// need version checks on the read side, it uses new flag values on arcs instead.
private static final int VERSION_LITTLE_ENDIAN = 8;
private static final int VERSION_CONTINUOUS_ARCS = 9;
static final int VERSION_CURRENT = VERSION_CONTINUOUS_ARCS;
/** Version that started storing continuous arcs. */
public static final int VERSION_CONTINUOUS_ARCS = 9;
/** Current version. */
public static final int VERSION_CURRENT = VERSION_CONTINUOUS_ARCS;
/** Version that was used when releasing Lucene 9.0. */
public static final int VERSION_90 = VERSION_LITTLE_ENDIAN;
// Never serialized; just used to represent the virtual
// final node w/ no arcs:

View File

@ -29,7 +29,6 @@ import static org.apache.lucene.util.fst.FST.BIT_STOP_NODE;
import static org.apache.lucene.util.fst.FST.BIT_TARGET_NEXT;
import static org.apache.lucene.util.fst.FST.FINAL_END_NODE;
import static org.apache.lucene.util.fst.FST.NON_FINAL_END_NODE;
import static org.apache.lucene.util.fst.FST.VERSION_CURRENT;
import static org.apache.lucene.util.fst.FST.getNumPresenceBytes;
import java.io.IOException;
@ -135,6 +134,7 @@ public class FSTCompiler<T> {
final boolean allowFixedLengthArcs;
final float directAddressingMaxOversizingFactor;
final int version;
long directAddressingExpansionCredit;
// the DataOutput to stream the FST bytes to
@ -163,10 +163,12 @@ public class FSTCompiler<T> {
Outputs<T> outputs,
boolean allowFixedLengthArcs,
DataOutput dataOutput,
float directAddressingMaxOversizingFactor)
float directAddressingMaxOversizingFactor,
int version)
throws IOException {
this.allowFixedLengthArcs = allowFixedLengthArcs;
this.directAddressingMaxOversizingFactor = directAddressingMaxOversizingFactor;
this.version = version;
// pad: ensure no node gets address 0 which is reserved to mean
// the stop state w/ no arcs
dataOutput.writeByte((byte) 0);
@ -174,7 +176,7 @@ public class FSTCompiler<T> {
this.dataOutput = dataOutput;
fst =
new FST<>(
new FST.FSTMetadata<>(inputType, outputs, null, -1, VERSION_CURRENT, 0),
new FST.FSTMetadata<>(inputType, outputs, null, -1, version, 0),
toFSTReader(dataOutput));
if (suffixRAMLimitMB < 0) {
throw new IllegalArgumentException("ramLimitMB must be >= 0; got: " + suffixRAMLimitMB);
@ -241,6 +243,7 @@ public class FSTCompiler<T> {
private boolean allowFixedLengthArcs = true;
private DataOutput dataOutput;
private float directAddressingMaxOversizingFactor = DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR;
private int version = FST.VERSION_CURRENT;
/**
* @param inputType The input type (transition labels). Can be anything from {@link INPUT_TYPE}
@ -325,6 +328,21 @@ public class FSTCompiler<T> {
return this;
}
/** Expert: Set the codec version. * */
public Builder<T> setVersion(int version) {
if (version < FST.VERSION_90 || version > FST.VERSION_CURRENT) {
throw new IllegalArgumentException(
"Expected version in range ["
+ FST.VERSION_90
+ ", "
+ FST.VERSION_CURRENT
+ "], got "
+ version);
}
this.version = version;
return this;
}
/** Creates a new {@link FSTCompiler}. */
public FSTCompiler<T> build() throws IOException {
// create a default DataOutput if not specified
@ -337,7 +355,8 @@ public class FSTCompiler<T> {
outputs,
allowFixedLengthArcs,
dataOutput,
directAddressingMaxOversizingFactor);
directAddressingMaxOversizingFactor,
version);
}
}
@ -517,7 +536,7 @@ public class FSTCompiler<T> {
int labelRange = nodeIn.arcs[nodeIn.numArcs - 1].label - nodeIn.arcs[0].label + 1;
assert labelRange > 0;
boolean continuousLabel = labelRange == nodeIn.numArcs;
if (continuousLabel) {
if (continuousLabel && version >= FST.VERSION_CONTINUOUS_ARCS) {
writeNodeForDirectAddressingOrContinuous(
nodeIn, maxBytesPerArcWithoutLabel, labelRange, true);
continuousNodeCount++;

View File

@ -120,7 +120,7 @@ public class PackedInts {
throw new IllegalArgumentException("Unknown format id: " + id);
}
private Format(int id) {
Format(int id) {
this.id = id;
}
@ -231,7 +231,7 @@ public class PackedInts {
}
/** A decoder for packed integers. */
public static interface Decoder {
public interface Decoder {
/**
* The minimum number of long blocks to encode in a single iteration, when using long encoding.
@ -299,7 +299,7 @@ public class PackedInts {
}
/** An encoder for packed integers. */
public static interface Encoder {
public interface Encoder {
/**
* The minimum number of long blocks to encode in a single iteration, when using long encoding.
@ -400,7 +400,7 @@ public class PackedInts {
}
/** Run-once iterator interface, to decode previously saved PackedInts. */
public static interface ReaderIterator {
public interface ReaderIterator {
/** Returns next value */
long next() throws IOException;

View File

@ -0,0 +1,516 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.IntStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
abstract class BaseVectorSimilarityQueryTestCase<
V, F extends Field, Q extends AbstractVectorSimilarityQuery>
extends LuceneTestCase {
String vectorField, idField;
VectorSimilarityFunction function;
int numDocs, dim;
abstract V getRandomVector(int dim);
abstract float compare(V vector1, V vector2);
abstract boolean checkEquals(V vector1, V vector2);
abstract F getVectorField(String name, V vector, VectorSimilarityFunction function);
abstract Q getVectorQuery(
String field, V vector, float traversalSimilarity, float resultSimilarity, Query filter);
abstract Q getThrowingVectorQuery(
String field, V vector, float traversalSimilarity, float resultSimilarity, Query filter);
public void testEquals() {
String field1 = "f1", field2 = "f2";
V vector1 = getRandomVector(dim);
V vector2;
do {
vector2 = getRandomVector(dim);
} while (checkEquals(vector1, vector2));
float traversalSimilarity1 = 0.3f, traversalSimilarity2 = 0.4f;
float resultSimilarity1 = 0.4f, resultSimilarity2 = 0.5f;
Query filter1 = new TermQuery(new Term("t1", "v1"));
Query filter2 = new TermQuery(new Term("t2", "v2"));
Query query = getVectorQuery(field1, vector1, traversalSimilarity1, resultSimilarity1, filter1);
// Everything is equal
assertEquals(
query, getVectorQuery(field1, vector1, traversalSimilarity1, resultSimilarity1, filter1));
// Null check
assertNotEquals(query, null);
// Different field
assertNotEquals(
query, getVectorQuery(field2, vector1, traversalSimilarity1, resultSimilarity1, filter1));
// Different vector
assertNotEquals(
query, getVectorQuery(field1, vector2, traversalSimilarity1, resultSimilarity1, filter1));
// Different traversalSimilarity
assertNotEquals(
query, getVectorQuery(field1, vector1, traversalSimilarity2, resultSimilarity1, filter1));
// Different resultSimilarity
assertNotEquals(
query, getVectorQuery(field1, vector1, traversalSimilarity1, resultSimilarity2, filter1));
// Different filter
assertNotEquals(
query, getVectorQuery(field1, vector1, traversalSimilarity1, resultSimilarity1, filter2));
}
public void testEmptyIndex() throws IOException {
// Do not index any vectors
numDocs = 0;
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
null);
// Check that no vectors are found
assertEquals(0, searcher.count(query));
}
}
public void testExtremes() throws IOException {
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
// All vectors are above -Infinity
Query query1 =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
null);
// Check that all vectors are found
assertEquals(numDocs, searcher.count(query1));
// No vectors are above +Infinity
Query query2 =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.POSITIVE_INFINITY,
Float.POSITIVE_INFINITY,
null);
// Check that no vectors are found
assertEquals(0, searcher.count(query2));
}
}
public void testRandomFilter() throws IOException {
// Filter a sub-range from 0 to numDocs
int startIndex = random().nextInt(numDocs);
int endIndex = random().nextInt(startIndex, numDocs);
Query filter = IntField.newRangeQuery(idField, startIndex, endIndex);
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
filter);
ScoreDoc[] scoreDocs = searcher.search(query, numDocs).scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
int id = getId(searcher, scoreDoc.doc);
// Check that returned document is in selected range
assertTrue(id >= startIndex && id <= endIndex);
}
// Check that all filtered vectors are found
assertEquals(endIndex - startIndex + 1, scoreDocs.length);
}
}
public void testFilterWithNoMatches() throws IOException {
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
// Non-existent field
Query filter1 = new TermQuery(new Term("random_field", "random_value"));
Query query1 =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
filter1);
// Check that no vectors are found
assertEquals(0, searcher.count(query1));
// Field exists, but value of -1 is not indexed
Query filter2 = IntField.newExactQuery(idField, -1);
Query query2 =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
filter2);
// Check that no vectors are found
assertEquals(0, searcher.count(query2));
}
}
public void testDimensionMismatch() throws IOException {
// Different dimension
int newDim = atLeast(dim + 1);
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
getVectorQuery(
vectorField,
getRandomVector(newDim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
null);
// Check that an exception for differing dimensions is thrown
IllegalArgumentException e =
expectThrows(IllegalArgumentException.class, () -> searcher.count(query));
assertEquals(
String.format(
Locale.ROOT,
"vector query dimension: %d differs from field dimension: %d",
newDim,
dim),
e.getMessage());
}
}
public void testNonVectorsField() throws IOException {
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
// Non-existent field
Query query1 =
getVectorQuery(
"random_field",
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
null);
assertEquals(0, searcher.count(query1));
// Indexed as int field
Query query2 =
getVectorQuery(
idField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
null);
assertEquals(0, searcher.count(query2));
}
}
public void testSomeDeletes() throws IOException {
// Delete a sub-range from 0 to numDocs
int startIndex = random().nextInt(numDocs);
int endIndex = random().nextInt(startIndex, numDocs);
Query delete = IntField.newRangeQuery(idField, startIndex, endIndex);
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexWriter w = new IndexWriter(indexStore, newIndexWriterConfig())) {
w.deleteDocuments(delete);
w.commit();
try (IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
null);
ScoreDoc[] scoreDocs = searcher.search(query, numDocs).scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
int id = getId(searcher, scoreDoc.doc);
// Check that returned document is not deleted
assertFalse(id >= startIndex && id <= endIndex);
}
// Check that all live docs are returned
assertEquals(numDocs - endIndex + startIndex - 1, scoreDocs.length);
}
}
}
public void testAllDeletes() throws IOException {
try (Directory dir = getIndexStore(getRandomVectors(numDocs, dim));
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig())) {
// Delete all documents
w.deleteDocuments(new MatchAllDocsQuery());
w.commit();
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
null);
// Check that no vectors are found
assertEquals(0, searcher.count(query));
}
}
}
public void testBoostQuery() throws IOException {
// Define the boost and allowed delta
float boost = random().nextFloat(5, 10);
float delta = 1e-3f;
try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
Query query1 =
getVectorQuery(
vectorField,
getRandomVector(dim),
Float.NEGATIVE_INFINITY,
Float.NEGATIVE_INFINITY,
null);
ScoreDoc[] scoreDocs1 = searcher.search(query1, numDocs).scoreDocs;
Query query2 = new BoostQuery(query1, boost);
ScoreDoc[] scoreDocs2 = searcher.search(query2, numDocs).scoreDocs;
// Check that all docs are identical, with boosted scores
assertEquals(scoreDocs1.length, scoreDocs2.length);
for (int i = 0; i < scoreDocs1.length; i++) {
assertEquals(scoreDocs1[i].doc, scoreDocs2[i].doc);
assertEquals(boost * scoreDocs1[i].score, scoreDocs2[i].score, delta);
}
}
}
public void testVectorsAboveSimilarity() throws IOException {
// Pick number of docs to accept
int numAccepted = random().nextInt(numDocs / 3, numDocs / 2);
float delta = 1e-3f;
V[] vectors = getRandomVectors(numDocs, dim);
V queryVector = getRandomVector(dim);
// Find score above which we get (at least) numAccepted vectors
float resultSimilarity = getSimilarity(vectors, queryVector, numAccepted);
// Cache scores of vectors
Map<Integer, Float> scores = new HashMap<>();
for (int i = 0; i < numDocs; i++) {
float score = compare(queryVector, vectors[i]);
if (score >= resultSimilarity) {
scores.put(i, score);
}
}
try (Directory indexStore = getIndexStore(vectors);
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
getVectorQuery(vectorField, queryVector, Float.NEGATIVE_INFINITY, resultSimilarity, null);
ScoreDoc[] scoreDocs = searcher.search(query, numDocs).scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
int id = getId(searcher, scoreDoc.doc);
// Check that the collected result is above accepted similarity
assertTrue(scores.containsKey(id));
// Check that the score is correct
assertEquals(scores.get(id), scoreDoc.score, delta);
}
// Check that all results are collected
assertEquals(scores.size(), scoreDocs.length);
}
}
public void testFallbackToExact() throws IOException {
// Restrictive filter, along with similarity to visit a large number of nodes
int numFiltered = random().nextInt(numDocs / 10, numDocs / 5);
int targetVisited = random().nextInt(numFiltered * 2, numDocs);
V[] vectors = getRandomVectors(numDocs, dim);
V queryVector = getRandomVector(dim);
float resultSimilarity = getSimilarity(vectors, queryVector, targetVisited);
Query filter = IntField.newSetQuery(idField, getFiltered(numFiltered));
try (Directory indexStore = getIndexStore(vectors);
IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
getThrowingVectorQuery(
vectorField, queryVector, resultSimilarity, resultSimilarity, filter);
// Falls back to exact search
expectThrows(UnsupportedOperationException.class, () -> searcher.count(query));
}
}
public void testApproximate() throws IOException {
// Non-restrictive filter, along with similarity to visit a small number of nodes
int numFiltered = random().nextInt((numDocs * 4) / 5, numDocs);
int targetVisited = random().nextInt(numFiltered / 10, numFiltered / 8);
V[] vectors = getRandomVectors(numDocs, dim);
V queryVector = getRandomVector(dim);
float resultSimilarity = getSimilarity(vectors, queryVector, targetVisited);
Query filter = IntField.newSetQuery(idField, getFiltered(numFiltered));
try (Directory indexStore = getIndexStore(vectors);
IndexWriter w = new IndexWriter(indexStore, newIndexWriterConfig())) {
// Force merge because smaller segments have few filtered docs and often fall back to exact
// search, making this test flaky
w.forceMerge(1);
w.commit();
try (IndexReader reader = DirectoryReader.open(indexStore)) {
IndexSearcher searcher = newSearcher(reader);
Query query =
getThrowingVectorQuery(
vectorField, queryVector, resultSimilarity, resultSimilarity, filter);
// Does not fall back to exact search
assertTrue(searcher.count(query) <= numFiltered);
}
}
}
private float getSimilarity(V[] vectors, V queryVector, int targetVisited) {
assertTrue(targetVisited >= 0 && targetVisited <= numDocs);
if (targetVisited == 0) {
return Float.POSITIVE_INFINITY;
}
float[] scores = new float[numDocs];
for (int i = 0; i < numDocs; i++) {
scores[i] = compare(queryVector, vectors[i]);
}
Arrays.sort(scores);
return scores[numDocs - targetVisited];
}
private int[] getFiltered(int numFiltered) {
Set<Integer> accepted = new HashSet<>();
for (int i = 0; i < numFiltered; ) {
int index = random().nextInt(numDocs);
if (!accepted.contains(index)) {
accepted.add(index);
i++;
}
}
return accepted.stream().mapToInt(Integer::intValue).toArray();
}
private int getId(IndexSearcher searcher, int doc) throws IOException {
return Objects.requireNonNull(searcher.storedFields().document(doc).getField(idField))
.numericValue()
.intValue();
}
@SuppressWarnings("unchecked")
V[] getRandomVectors(int numDocs, int dim) {
return (V[]) IntStream.range(0, numDocs).mapToObj(i -> getRandomVector(dim)).toArray();
}
@SafeVarargs
final Directory getIndexStore(V... vectors) throws IOException {
Directory dir = newDirectory();
try (RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < vectors.length; ++i) {
Document doc = new Document();
doc.add(getVectorField(vectorField, vectors[i], function));
doc.add(new IntField(idField, i, Field.Store.YES));
writer.addDocument(doc);
}
}
return dir;
}
}

View File

@ -221,7 +221,7 @@ public class TestBoolean2 extends LuceneTestCase {
bigSearcher = null;
}
private static String[] docFields = {
private static final String[] docFields = {
"w1 w2 w3 w4 w5", "w1 w3 w2 w3", "w1 xx w2 yy w3", "w1 w3 xx w2 yy mm"
};
@ -423,8 +423,8 @@ public class TestBoolean2 extends LuceneTestCase {
// used to set properties or change every BooleanQuery
// generated from randBoolQuery.
public static interface Callback {
public void postCreate(BooleanQuery.Builder q);
public interface Callback {
void postCreate(BooleanQuery.Builder q);
}
// Random rnd is passed in so that the exact same random query may be created

View File

@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.Arrays;
import org.apache.lucene.document.KnnByteVectorField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.TestVectorUtil;
import org.junit.Before;
public class TestByteVectorSimilarityQuery
extends BaseVectorSimilarityQueryTestCase<
byte[], KnnByteVectorField, ByteVectorSimilarityQuery> {
@Before
public void setup() {
vectorField = getClass().getSimpleName() + ":VectorField";
idField = getClass().getSimpleName() + ":IdField";
function = VectorSimilarityFunction.EUCLIDEAN;
numDocs = atLeast(100);
dim = atLeast(50);
}
@Override
byte[] getRandomVector(int dim) {
return TestVectorUtil.randomVectorBytes(dim);
}
@Override
float compare(byte[] vector1, byte[] vector2) {
return function.compare(vector1, vector2);
}
@Override
boolean checkEquals(byte[] vector1, byte[] vector2) {
return Arrays.equals(vector1, vector2);
}
@Override
KnnByteVectorField getVectorField(String name, byte[] vector, VectorSimilarityFunction function) {
return new KnnByteVectorField(name, vector, function);
}
@Override
ByteVectorSimilarityQuery getVectorQuery(
String field,
byte[] vector,
float traversalSimilarity,
float resultSimilarity,
Query filter) {
return new ByteVectorSimilarityQuery(
field, vector, traversalSimilarity, resultSimilarity, filter);
}
@Override
ByteVectorSimilarityQuery getThrowingVectorQuery(
String field,
byte[] vector,
float traversalSimilarity,
float resultSimilarity,
Query filter) {
return new ByteVectorSimilarityQuery(
field, vector, traversalSimilarity, resultSimilarity, filter) {
@Override
VectorScorer createVectorScorer(LeafReaderContext context) {
throw new UnsupportedOperationException();
}
};
}
}

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.Arrays;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.TestVectorUtil;
import org.junit.Before;
public class TestFloatVectorSimilarityQuery
extends BaseVectorSimilarityQueryTestCase<
float[], KnnFloatVectorField, FloatVectorSimilarityQuery> {
@Before
public void setup() {
vectorField = getClass().getSimpleName() + ":VectorField";
idField = getClass().getSimpleName() + ":IdField";
function = VectorSimilarityFunction.EUCLIDEAN;
numDocs = atLeast(100);
dim = atLeast(50);
}
@Override
float[] getRandomVector(int dim) {
return TestVectorUtil.randomVector(dim);
}
@Override
float compare(float[] vector1, float[] vector2) {
return function.compare(vector1, vector2);
}
@Override
boolean checkEquals(float[] vector1, float[] vector2) {
return Arrays.equals(vector1, vector2);
}
@Override
KnnFloatVectorField getVectorField(
String name, float[] vector, VectorSimilarityFunction function) {
return new KnnFloatVectorField(name, vector, function);
}
@Override
FloatVectorSimilarityQuery getVectorQuery(
String field,
float[] vector,
float traversalSimilarity,
float resultSimilarity,
Query filter) {
return new FloatVectorSimilarityQuery(
field, vector, traversalSimilarity, resultSimilarity, filter);
}
@Override
FloatVectorSimilarityQuery getThrowingVectorQuery(
String field,
float[] vector,
float traversalSimilarity,
float resultSimilarity,
Query filter) {
return new FloatVectorSimilarityQuery(
field, vector, traversalSimilarity, resultSimilarity, filter) {
@Override
VectorScorer createVectorScorer(LeafReaderContext context) {
throw new UnsupportedOperationException();
}
};
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.tests.util.LuceneTestCase;
public class TestVectorSimilarityCollector extends LuceneTestCase {
public void testResultCollection() {
float traversalSimilarity = 0.3f, resultSimilarity = 0.5f;
VectorSimilarityCollector collector =
new VectorSimilarityCollector(traversalSimilarity, resultSimilarity, Integer.MAX_VALUE);
int[] nodes = {1, 5, 10, 4, 8, 3, 2, 6, 7, 9};
float[] scores = {0.1f, 0.2f, 0.3f, 0.5f, 0.2f, 0.6f, 0.9f, 0.3f, 0.7f, 0.8f};
float[] minCompetitiveSimilarities = new float[nodes.length];
for (int i = 0; i < nodes.length; i++) {
collector.collect(nodes[i], scores[i]);
minCompetitiveSimilarities[i] = collector.minCompetitiveSimilarity();
}
ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
int[] resultNodes = new int[scoreDocs.length];
float[] resultScores = new float[scoreDocs.length];
for (int i = 0; i < scoreDocs.length; i++) {
resultNodes[i] = scoreDocs[i].doc;
resultScores[i] = scoreDocs[i].score;
}
// All nodes above resultSimilarity appear in order of collection
assertArrayEquals(new int[] {4, 3, 2, 7, 9}, resultNodes);
assertArrayEquals(new float[] {0.5f, 0.6f, 0.9f, 0.7f, 0.8f}, resultScores, 1e-3f);
// Min competitive similarity is minimum of traversalSimilarity or best result encountered
assertArrayEquals(
new float[] {0.1f, 0.2f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f, 0.3f},
minCompetitiveSimilarities,
1e-3f);
}
}

View File

@ -17,8 +17,14 @@
package org.apache.lucene.util.bkd;
import java.io.IOException;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.store.Directory;
@ -28,6 +34,7 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.SuppressForbidden;
public class TestDocIdsWriter extends LuceneTestCase {
@ -150,4 +157,28 @@ public class TestDocIdsWriter extends LuceneTestCase {
}
dir.deleteFile("tmp");
}
// This simple test tickles a JVM C2 JIT crash on JDK's less than 21.0.1
// Crashes only when run with C2, so with the environment variable `CI` set
// Regardless of whether C2 is enabled or not, the test should never fail.
public void testCrash() throws IOException {
assumeTrue("Requires C2, which is only enabled when CI env is set", getCIEnv() != null);
int itrs = atLeast(100);
for (int i = 0; i < itrs; i++) {
try (Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null))) {
for (int d = 0; d < 20_000; d++) {
iw.addDocument(
List.of(new IntPoint("foo", 0), new SortedNumericDocValuesField("bar", 0)));
}
}
}
}
@SuppressForbidden(reason = "needed to check if C2 is enabled")
@SuppressWarnings("removal")
private static String getCIEnv() {
PrivilegedAction<String> pa = () -> System.getenv("CI");
return AccessController.doPrivileged(pa);
}
}

View File

@ -326,10 +326,6 @@ public class TestDrillSideways extends FacetTestCase {
// termination occurs (i.e., #finish is properly called in that scenario):
assertEquals(1, baseFC.getMatchingDocs().size());
assertEquals(1, dimFC.getMatchingDocs().size());
FacetsCollector.MatchingDocs baseMD = baseFC.getMatchingDocs().get(0);
FacetsCollector.MatchingDocs dimMD = dimFC.getMatchingDocs().get(0);
assertEquals(1, baseMD.totalHits);
assertEquals(1, dimMD.totalHits);
}
}
}

View File

@ -27,7 +27,7 @@ import org.apache.lucene.search.SimpleCollector;
abstract class DocValuesTermsCollector<DV> extends SimpleCollector {
@FunctionalInterface
static interface Function<R> {
interface Function<R> {
R apply(LeafReader t) throws IOException;
}

View File

@ -200,6 +200,7 @@ abstract class ParentBlockJoinKnnVectorQueryTestCase extends LuceneTestCase {
}
toAdd.add(makeParent(new int[] {6, 7, 8, 9, 10}));
w.addDocuments(toAdd);
w.forceMerge(1);
}
try (IndexReader reader = DirectoryReader.open(d)) {
assertEquals(1, reader.leaves().size());

View File

@ -41,7 +41,7 @@ public class TestQueryTreeBuilder extends LuceneTestCase {
assertEquals("OK", result);
}
private static interface DummyQueryNodeInterface extends QueryNode {}
private interface DummyQueryNodeInterface extends QueryNode {}
private abstract static class AbstractDummyQueryNode extends QueryNodeImpl
implements DummyQueryNodeInterface {}

View File

@ -80,17 +80,17 @@ public abstract class NumberRangePrefixTree extends SpatialPrefixTree {
*
* @lucene.experimental
*/
public static interface NRShape extends Shape, Cloneable {
public interface NRShape extends Shape, Cloneable {
/** The result should be parseable by {@link #parseShape(String)}. */
@Override
abstract String toString();
String toString();
/**
* Returns this shape rounded to the target level. If we are already more course than the level
* then the shape is simply returned. The result may refer to internal state of the argument so
* you may want to clone it.
*/
public NRShape roundToLevel(int targetLevel);
NRShape roundToLevel(int targetLevel);
}
//
@ -234,7 +234,7 @@ public abstract class NumberRangePrefixTree extends SpatialPrefixTree {
*
* @lucene.experimental
*/
public static interface UnitNRShape extends NRShape, Comparable<UnitNRShape> {
public interface UnitNRShape extends NRShape, Comparable<UnitNRShape> {
// note: formerly known as LevelledValue; thus some variables still use 'lv'
/** Get the prefix tree level, the higher the more precise. 0 means the world (universe). */

View File

@ -1120,12 +1120,12 @@ class GeoComplexPolygon extends GeoBasePolygon {
* into the traversal method of a tree, and each edge that matches will cause this object to be
* called.
*/
private static interface EdgeIterator {
private interface EdgeIterator {
/**
* @param edge is the edge that matched.
* @return true if the iteration should continue, false otherwise.
*/
public boolean matches(final Edge edge);
boolean matches(final Edge edge);
}
/**
@ -1133,16 +1133,16 @@ class GeoComplexPolygon extends GeoBasePolygon {
* implementing this interface into the traversal method of a tree, and each edge that matches
* will cause this object to be called.
*/
private static interface CountingEdgeIterator extends EdgeIterator {
private interface CountingEdgeIterator extends EdgeIterator {
/**
* @return the number of edges that were crossed.
*/
public int getCrossingCount();
int getCrossingCount();
/**
* @return true if the endpoint was on an edge.
*/
public boolean isOnEdge();
boolean isOnEdge();
}
/**

View File

@ -100,8 +100,8 @@ public final class SuggestRebuildTestUtil {
* Simple marker interface to allow {@link #testLookupsDuringReBuild} callbacks to throw
* Exceptions
*/
public static interface ExceptionalCallback {
public void check(final Lookup suggester) throws Exception;
public interface ExceptionalCallback {
void check(final Lookup suggester) throws Exception;
}
/**

View File

@ -1200,7 +1200,7 @@ public class TestFuzzySuggester extends LuceneTestCase {
// and tweaked to return the edit distance not the float
// lucene measure
/* Finds unicode (code point) Levenstein (edit) distance
/* Finds unicode (code point) Levenshtein (edit) distance
* between two strings, including transpositions. */
public int getDistance(String target, String other, boolean allowTransposition) {
IntsRef targetPoints;

View File

@ -90,7 +90,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
* Attribute that records if it was cleared or not. This is used for testing that
* clearAttributes() was called correctly.
*/
public static interface CheckClearAttributesAttribute extends Attribute {
public interface CheckClearAttributesAttribute extends Attribute {
boolean getAndResetClearCalled();
}
@ -129,7 +129,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
@Override
public void copyTo(AttributeImpl target) {
((CheckClearAttributesAttributeImpl) target).clear();
target.clear();
}
@Override

View File

@ -38,6 +38,7 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
@ -47,6 +48,7 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiTerms;
import org.apache.lucene.index.PostingsEnum;
@ -1610,4 +1612,29 @@ public abstract class BasePostingsFormatTestCase extends BaseIndexFileFormatTest
}
}
}
/** Test realistic data, which is often better at uncovering real bugs. */
@Nightly // this test takes a few seconds
public void testLineFileDocs() throws IOException {
// Use a FS dir and a non-randomized IWC to not slow down indexing
try (Directory dir = newFSDirectory(createTempDir())) {
try (LineFileDocs docs = new LineFileDocs(random());
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) {
final int numDocs = atLeast(10_000);
for (int i = 0; i < numDocs; ++i) {
// Only keep the body field, and don't index term vectors on it, we only care about
// postings
Document doc = docs.nextDoc();
IndexableField body = doc.getField("body");
assertNotNull(body);
assertNotNull(body.stringValue());
assertNotEquals(IndexOptions.NONE, body.fieldType().indexOptions());
body = new TextField("body", body.stringValue(), Store.NO);
w.addDocument(Collections.singletonList(body));
}
w.forceMerge(1);
}
TestUtil.checkIndex(dir);
}
}
}

View File

@ -30,7 +30,7 @@ import org.junit.runners.model.Statement;
*/
public final class TestRuleIgnoreTestSuites implements TestRule {
/** Marker interface for nested suites that should be ignored if executed in stand-alone mode. */
public static interface NestedTestSuite {}
public interface NestedTestSuite {}
/** A boolean system property indicating nested suites should be executed normally. */
public static final String PROPERTY_RUN_NESTED = "tests.runnested";

View File

@ -140,7 +140,7 @@ public class TestRuleLimitSysouts extends TestRuleAdapter {
/** Test failures from any tests or rules before. */
private final TestRuleMarkFailure failureMarker;
static interface LimitPredicate {
interface LimitPredicate {
void check(long before, long after) throws IOException;
}