mirror of https://github.com/apache/lucene.git
Merge branch 'main' into optimize_prefix_query
This commit is contained in:
commit
9a97fbe5ca
|
@ -10,7 +10,7 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
- 'branch_10x'
|
||||
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
|
|
@ -6,7 +6,7 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
- 'branch_10x'
|
||||
paths:
|
||||
- '.github/workflows/run-checks-gradle-upgrade.yml'
|
||||
- 'gradle/wrapper/**'
|
||||
|
@ -14,7 +14,7 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
- 'branch_10x'
|
||||
paths:
|
||||
- '.github/workflows/run-checks-gradle-upgrade.yml'
|
||||
- 'gradle/wrapper/**'
|
||||
|
|
|
@ -6,7 +6,7 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
- 'branch_10x'
|
||||
paths:
|
||||
- '.github/workflows/run-checks-mod-analysis-common.yml'
|
||||
- 'lucene/analysis/common/**'
|
||||
|
@ -14,7 +14,7 @@ on:
|
|||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
- 'branch_10x'
|
||||
paths:
|
||||
- '.github/workflows/run-checks-mod-analysis-common.yml'
|
||||
- 'lucene/analysis/common/**'
|
||||
|
|
|
@ -6,12 +6,12 @@ on:
|
|||
pull_request:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
- 'branch_10x'
|
||||
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
- 'branch_9x'
|
||||
- 'branch_10x'
|
||||
|
||||
env:
|
||||
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
Apache Lucene
|
||||
Copyright 2001-2022 The Apache Software Foundation
|
||||
Copyright 2001-2024 The Apache Software Foundation
|
||||
|
||||
This product includes software developed at
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
|
|
@ -41,7 +41,7 @@ apply from: file('gradle/globals.gradle')
|
|||
// Calculate project version:
|
||||
version = {
|
||||
// Release manager: update base version here after release:
|
||||
String baseVersion = '10.0.0'
|
||||
String baseVersion = '11.0.0'
|
||||
|
||||
// On a release explicitly set release version in one go:
|
||||
// -Dversion.release=x.y.z
|
||||
|
|
|
@ -51,7 +51,7 @@ cd lucene
|
|||
git clone git@github.com:apache/lucene.git main
|
||||
cd main
|
||||
# For each branch that you want a separate directory created for, add a worktree
|
||||
git worktree add ../9x branch_9x
|
||||
git worktree add ../10x branch_10x
|
||||
----
|
||||
|
||||
=== Using the Worktrees
|
||||
|
|
|
@ -67,13 +67,27 @@
|
|||
</maintainer>
|
||||
|
||||
<!-- NOTE: please insert releases in numeric order, NOT chronologically. -->
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-10.0.0</name>
|
||||
<created>2024-10-14</created>
|
||||
<revision>10.0.0</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-9.12.0</name>
|
||||
<created>2024-09-28</created>
|
||||
<revision>9.12.0</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-9.11.1</name>
|
||||
<created>2024-06-27</created>
|
||||
<revision>9.11.1</revision>
|
||||
</Version>
|
||||
</release>.
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-9.11.0</name>
|
||||
|
@ -186,6 +200,13 @@
|
|||
<revision>9.0.0</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-8.11.4</name>
|
||||
<created>2024-09-24</created>
|
||||
<revision>8.11.4</revision>
|
||||
</Version>
|
||||
</release>
|
||||
<release>
|
||||
<Version>
|
||||
<name>lucene-8.11.3</name>
|
||||
|
|
|
@ -40,7 +40,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
|
|||
'cfs': 'index',
|
||||
'nocfs': 'index',
|
||||
'sorted': 'sorted',
|
||||
'int8_hnsw': 'int8_hnsw',
|
||||
'int7_hnsw': 'int7_hnsw',
|
||||
'moreterms': 'moreterms',
|
||||
'dvupdates': 'dvupdates',
|
||||
'emptyIndex': 'empty'
|
||||
|
@ -61,7 +61,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp
|
|||
'cfs': 'testCreateCFS',
|
||||
'nocfs': 'testCreateNoCFS',
|
||||
'sorted': 'testCreateSortedIndex',
|
||||
'int8_hnsw': 'testCreateInt8HNSWIndices',
|
||||
'int7_hnsw': 'testCreateInt7HNSWIndices',
|
||||
'moreterms': 'testCreateMoreTermsIndex',
|
||||
'dvupdates': 'testCreateIndexWithDocValuesUpdates',
|
||||
'emptyIndex': 'testCreateEmptyIndex'
|
||||
|
@ -206,7 +206,7 @@ def main():
|
|||
current_version = scriptutil.Version.parse(scriptutil.find_current_version())
|
||||
create_and_add_index(source, 'cfs', c.version, current_version, c.temp_dir)
|
||||
create_and_add_index(source, 'nocfs', c.version, current_version, c.temp_dir)
|
||||
create_and_add_index(source, 'int8_hnsw', c.version, current_version, c.temp_dir)
|
||||
create_and_add_index(source, 'int7_hnsw', c.version, current_version, c.temp_dir)
|
||||
should_make_sorted = current_version.is_back_compat_with(c.version) \
|
||||
and (c.version.major > 6 or (c.version.major == 6 and c.version.minor >= 2))
|
||||
if should_make_sorted:
|
||||
|
|
|
@ -112,8 +112,10 @@ def prepare(root, version, pause_before_sign, gpg_key_id, gpg_password, gpg_home
|
|||
checkDOAPfiles(version)
|
||||
|
||||
if not dev_mode:
|
||||
print(' ./gradlew --stacktrace --no-daemon clean check')
|
||||
run('./gradlew --stacktrace --no-daemon clean check')
|
||||
print(' ./gradlew --stacktrace --no-daemon clean')
|
||||
run('./gradlew --stacktrace --no-daemon clean')
|
||||
print(' ./gradlew --stacktrace --no-daemon check')
|
||||
run('./gradlew --stacktrace --no-daemon check')
|
||||
else:
|
||||
print(' skipping precommit check due to dev-mode')
|
||||
|
||||
|
|
|
@ -239,7 +239,7 @@ def maybe_remove_rc_from_svn():
|
|||
logfile="svn_rm.log",
|
||||
tee=True,
|
||||
vars={
|
||||
'dist_folder': """lucene-{{ release_version }}-RC{{ rc_number }}-rev{{ build_rc.git_rev | default("<git_rev>", True) }}""",
|
||||
'dist_folder': """lucene-{{ release_version }}-RC{{ rc_number }}-rev-{{ build_rc.git_rev | default("<git_rev>", True) }}""",
|
||||
'dist_url': "{{ dist_url_base }}/{{ dist_folder }}"
|
||||
}
|
||||
)],
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
allprojects {
|
||||
tasks.withType(AbstractArchiveTask).configureEach { task ->
|
||||
duplicatesStrategy = DuplicatesStrategy.FAIL
|
||||
preserveFileTimestamps = false
|
||||
reproducibleFileOrder = true
|
||||
dirPermissions {
|
||||
it.unix(0755)
|
||||
|
|
|
@ -3,6 +3,78 @@ Lucene Change Log
|
|||
For more information on past and future Lucene versions, please see:
|
||||
http://s.apache.org/luceneversions
|
||||
|
||||
======================= Lucene 11.0.0 =======================
|
||||
|
||||
API Changes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
======================= Lucene 10.1.0 =======================
|
||||
|
||||
API Changes
|
||||
---------------------
|
||||
|
||||
* GITHUB#13859: Allow open-ended ranges in Intervals range queries. (Mayya Sharipova)
|
||||
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Improvements
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
* GITHUB#13828: Reduce long[] array allocation for bitset in readBitSetIterator. (Zhang Chao)
|
||||
|
||||
* GITHUB#13800: MaxScoreBulkScorer now recomputes scorer partitions when the
|
||||
minimum competitive allows for a more favorable partitioning. (Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
|
||||
when they were not sorted by startOffset. (Seunghan Jung)
|
||||
* GITHUB#13884: Remove broken .toArray from Long/CharObjectHashMap entirely. (Pan Guixin)
|
||||
* GITHUB#12686: Added support for highlighting IndexOrDocValuesQuery. (Prudhvi Godithi)
|
||||
|
||||
Build
|
||||
---------------------
|
||||
|
||||
* Upgrade forbiddenapis to version 3.8. (Uwe Schindler)
|
||||
|
||||
Other
|
||||
---------------------
|
||||
(No changes)
|
||||
|
||||
======================== Lucene 10.0.1 =======================
|
||||
|
||||
Bug Fixes
|
||||
---------------------
|
||||
|
||||
|
||||
======================= Lucene 10.0.0 =======================
|
||||
|
||||
API Changes
|
||||
|
@ -48,9 +120,9 @@ API Changes
|
|||
|
||||
* GITHUB#12296: Make IndexReader and IndexReaderContext classes explicitly sealed.
|
||||
They have already been runtime-checked to only be implemented by the specific classes
|
||||
so this is effectively a non-breaking change.
|
||||
so this is effectively a non-breaking change. (Petr Portnov)
|
||||
|
||||
* GITHUB#12276: Rename DaciukMihovAutomatonBuilder to StringsToAutomaton
|
||||
* GITHUB#12276: Rename DaciukMihovAutomatonBuilder to StringsToAutomaton. (Michael McCandless)
|
||||
|
||||
* GITHUB#12321: Reduced visibility of StringsToAutomaton. Please use Automata#makeStringUnion instead. (Greg Miller)
|
||||
|
||||
|
@ -120,8 +192,17 @@ API Changes
|
|||
|
||||
* GITHUB#13328: Convert many basic Lucene classes to record classes, including CollectionStatistics, TermStatistics and LeafMetadata. (Shubham Chaudhary)
|
||||
|
||||
* GITHUB#13780: Remove `IndexSearcher#search(List<LeafReaderContext>, Weight, Collector)` in favour of the newly
|
||||
introduced `IndexSearcher#search(LeafReaderContextPartition[], Weight, Collector)`
|
||||
* GITHUB#13780: Remove IndexSearcher#search(List<LeafReaderContext>, Weight, Collector) in favour of the newly
|
||||
introduced IndexSearcher#search(LeafReaderContextPartition[], Weight, Collector). (Luca Cavanna)
|
||||
|
||||
* GITHUB#13779: First-class random access API for KnnVectorValues
|
||||
unifies Byte/FloatVectorValues incorporating RandomAccess* API and introduces
|
||||
DocIndexIterator for iterative access in place of direct inheritance from DISI. (Michael Sokolov)
|
||||
|
||||
* GITHUB#13845: Add missing with-discountOverlaps Similarity constructor variants. (Pierre Salagnac, Christine Poerschke, Robert Muir)
|
||||
|
||||
* GITHUB#13820, GITHUB#13825, GITHUB#13830: Corrects DataInput.readGroupVInts to be public and not-final, removes the protected
|
||||
DataInput.readGroupVInt method. (Zhang Chao, Robert Muir, Uwe Schindler, Dawid Weiss)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
@ -209,7 +290,7 @@ Bug Fixes
|
|||
* LUCENE-10599: LogMergePolicy is more likely to keep merging segments until
|
||||
they reach the maximum merge size. (Adrien Grand)
|
||||
|
||||
* GITHUB#12220: Hunspell: disallow hidden title-case entries from compound middle/end
|
||||
* GITHUB#12220: Hunspell: disallow hidden title-case entries from compound middle/end. (Peter Gromov)
|
||||
|
||||
* GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those
|
||||
of DoubleValues#doubleValue(). (Uwe Schindler)
|
||||
|
@ -292,9 +373,17 @@ Build
|
|||
|
||||
======================== Lucene 9.12.0 =======================
|
||||
|
||||
Security Fixes
|
||||
---------------------
|
||||
|
||||
* Deserialization of Untrusted Data vulnerability in Apache Lucene Replicator - CVE-2024-45772
|
||||
(Summ3r from Vidar-Team, Robert Muir, Paul Irwin)
|
||||
|
||||
API Changes
|
||||
---------------------
|
||||
|
||||
* GITHUB#13806: Add TermInSetQuery#getBytesRefIterator to be able to iterate over query terms. (Christoph Büscher)
|
||||
|
||||
* GITHUB#13469: Expose FlatVectorsFormat as a first-class format; can be configured using a custom Codec. (Michael Sokolov)
|
||||
|
||||
* GITHUB#13612: Hunspell: add Suggester#proceedPastRep to avoid losing relevant suggestions. (Peter Gromov)
|
||||
|
@ -311,6 +400,9 @@ API Changes
|
|||
* GITHUB#13568, GITHUB#13750: Add DrillSideways#search method that supports any CollectorManagers for drill-sideways dimensions
|
||||
or drill-down. (Egor Potemkin)
|
||||
|
||||
* GITHUB#13757: For similarities, provide default computeNorm implementation and remove remaining discountOverlaps setters.
|
||||
(Christine Poerschke, Adrien Grand, Robert Muir)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
||||
|
@ -418,8 +510,6 @@ Optimizations
|
|||
|
||||
* GITHUB#13742: Reorder checks in LRUQueryCache#count (Shubham Chaudhary)
|
||||
|
||||
* GITHUB#13686: Replace Map<String,Object> with IntObjectHashMap for DV producer (Pan Guixin)
|
||||
|
||||
* GITHUB#13697: Add a bulk scorer to ToParentBlockJoinQuery, which delegates to the bulk scorer of the child query.
|
||||
This should speed up query evaluation when the child query has a specialized bulk scorer, such as disjunctive queries.
|
||||
(Mike Pellegrini)
|
||||
|
@ -470,6 +560,8 @@ Bug Fixes
|
|||
`IndexWriter.forceMerge` or
|
||||
`IndexWriter.addIndexes(CodecReader...)`, or reindexing entirely.
|
||||
|
||||
* GITHUB#13799: Disable intra-merge parallelism for all structures but kNN vectors. (Ben Trent)
|
||||
|
||||
Build
|
||||
---------------------
|
||||
|
||||
|
@ -482,6 +574,8 @@ Other
|
|||
* GITHUB#13720: Add float comparison based on unit of least precision and use it to stop test failures caused by float
|
||||
summation not being associative in IEEE 754. (Alex Herbert, Stefan Vodita)
|
||||
|
||||
* Remove code triggering forbidden-apis regarding Java serialization. (Uwe Schindler, Robert Muir)
|
||||
|
||||
======================== Lucene 9.11.1 =======================
|
||||
|
||||
Bug Fixes
|
||||
|
|
|
@ -19,6 +19,12 @@
|
|||
|
||||
## Migration from Lucene 9.x to Lucene 10.0
|
||||
|
||||
### Changes to DataInput.readGroupVInt and readGroupVInts methods
|
||||
|
||||
As part of GITHUB#13820, GITHUB#13825, GITHUB#13830, this issue corrects DataInput.readGroupVInts
|
||||
to be public and not-final, allowing subclasses to override it. This change also removes the protected
|
||||
DataInput.readGroupVInt method: subclasses should delegate or reimplement it entirely.
|
||||
|
||||
### OpenNLP dependency upgrade
|
||||
|
||||
[Apache OpenNLP](https://opennlp.apache.org) 2.x opens the door to accessing various models via the ONNX runtime. To migrate you will need to update any deprecated OpenNLP methods that you may be using.
|
||||
|
@ -888,3 +894,7 @@ additional vectors into the same field with either 4 or 7 bit
|
|||
quantization (or no quantization), and ensure all older (9.x written)
|
||||
segments are rewritten either via `IndexWriter.forceMerge` or
|
||||
`IndexWriter.addIndexes(CodecReader...)`, or reindexing entirely.
|
||||
|
||||
### Vector values APIs switched to primarily random-access
|
||||
|
||||
`{Byte/Float}VectorValues` no longer inherit from `DocIdSetIterator`. Rather they extend a common class, `KnnVectorValues`, that provides a random access API (previously provided by `RandomAccessVectorValues`, now removed), and an `iterator()` method for retrieving `DocIndexIterator`: an iterator which is a DISI that also provides an `index()` method. Therefore, any iteration over vector values must now be performed using the values' `iterator()`. Random access works as before, but does not require casting to `RandomAccessVectorValues`.
|
||||
|
|
|
@ -18,10 +18,10 @@
|
|||
package org.apache.lucene.analysis.synonym.word2vec;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.TermAndVector;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
|
||||
/**
|
||||
* Word2VecModel is a class representing the parsed Word2Vec model containing the vectors for each
|
||||
|
@ -29,7 +29,7 @@ import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Word2VecModel implements RandomAccessVectorValues.Floats {
|
||||
public class Word2VecModel extends FloatVectorValues {
|
||||
|
||||
private final int dictionarySize;
|
||||
private final int vectorDimension;
|
||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.util.Version;
|
|||
public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
private static final Version LUCENE_9_0_0 = Version.LUCENE_9_0_0;
|
||||
private static final Version LUCENE_10_0_0 = Version.LUCENE_10_0_0;
|
||||
|
||||
// Test some examples (TODO: we only check behavior, we may need something like
|
||||
// TestRandomChains...)
|
||||
|
@ -111,7 +111,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testVersionAwareFilter() throws Exception {
|
||||
CustomAnalyzer a =
|
||||
CustomAnalyzer.builder()
|
||||
.withDefaultMatchVersion(LUCENE_9_0_0)
|
||||
.withDefaultMatchVersion(LUCENE_10_0_0)
|
||||
.withTokenizer(StandardTokenizerFactory.class)
|
||||
.addTokenFilter(DummyVersionAwareTokenFilterFactory.class)
|
||||
.build();
|
||||
|
@ -128,7 +128,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testFactoryHtmlStripClassicFolding() throws Exception {
|
||||
CustomAnalyzer a =
|
||||
CustomAnalyzer.builder()
|
||||
.withDefaultMatchVersion(LUCENE_9_0_0)
|
||||
.withDefaultMatchVersion(LUCENE_10_0_0)
|
||||
.addCharFilter(HTMLStripCharFilterFactory.class)
|
||||
.withTokenizer(ClassicTokenizerFactory.class)
|
||||
.addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "true")
|
||||
|
@ -164,7 +164,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testHtmlStripClassicFolding() throws Exception {
|
||||
CustomAnalyzer a =
|
||||
CustomAnalyzer.builder()
|
||||
.withDefaultMatchVersion(LUCENE_9_0_0)
|
||||
.withDefaultMatchVersion(LUCENE_10_0_0)
|
||||
.addCharFilter("htmlstrip")
|
||||
.withTokenizer("classic")
|
||||
.addTokenFilter("asciifolding", "preserveOriginal", "true")
|
||||
|
@ -513,7 +513,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
if (luceneMatchVersion.equals(LUCENE_9_0_0)) {
|
||||
if (luceneMatchVersion.equals(LUCENE_10_0_0)) {
|
||||
return input;
|
||||
}
|
||||
return new LowerCaseFilter(input);
|
||||
|
|
|
@ -36,6 +36,7 @@ module org.apache.lucene.backward_codecs {
|
|||
exports org.apache.lucene.backward_codecs.lucene94;
|
||||
exports org.apache.lucene.backward_codecs.lucene95;
|
||||
exports org.apache.lucene.backward_codecs.lucene99;
|
||||
exports org.apache.lucene.backward_codecs.lucene912;
|
||||
exports org.apache.lucene.backward_codecs.packed;
|
||||
exports org.apache.lucene.backward_codecs.store;
|
||||
|
||||
|
@ -62,5 +63,6 @@ module org.apache.lucene.backward_codecs {
|
|||
org.apache.lucene.backward_codecs.lucene92.Lucene92Codec,
|
||||
org.apache.lucene.backward_codecs.lucene94.Lucene94Codec,
|
||||
org.apache.lucene.backward_codecs.lucene95.Lucene95Codec,
|
||||
org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
|
||||
org.apache.lucene.backward_codecs.lucene99.Lucene99Codec,
|
||||
org.apache.lucene.backward_codecs.lucene912.Lucene912Codec;
|
||||
}
|
||||
|
|
|
@ -77,9 +77,8 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
|
|||
public Lucene50CompoundFormat() {}
|
||||
|
||||
@Override
|
||||
public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context)
|
||||
throws IOException {
|
||||
return new Lucene50CompoundReader(dir, si, context);
|
||||
public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si) throws IOException {
|
||||
return new Lucene50CompoundReader(dir, si);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.ChecksumIndexInput;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.ReadAdvice;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -57,8 +58,7 @@ final class Lucene50CompoundReader extends CompoundDirectory {
|
|||
/** Create a new CompoundFileDirectory. */
|
||||
// TODO: we should just pre-strip "entries" and append segment name up-front like simpletext?
|
||||
// this need not be a "general purpose" directory anymore (it only writes index files)
|
||||
public Lucene50CompoundReader(Directory directory, SegmentInfo si, IOContext context)
|
||||
throws IOException {
|
||||
public Lucene50CompoundReader(Directory directory, SegmentInfo si) throws IOException {
|
||||
this.directory = directory;
|
||||
this.segmentName = si.name;
|
||||
String dataFileName =
|
||||
|
@ -74,7 +74,7 @@ final class Lucene50CompoundReader extends CompoundDirectory {
|
|||
}
|
||||
expectedLength += CodecUtil.footerLength();
|
||||
|
||||
handle = directory.openInput(dataFileName, context);
|
||||
handle = directory.openInput(dataFileName, IOContext.DEFAULT.withReadAdvice(ReadAdvice.NORMAL));
|
||||
// DirectoryUtil.openInput(directory, dataFileName, context);
|
||||
try {
|
||||
CodecUtil.checkIndexHeader(
|
||||
|
@ -170,7 +170,7 @@ final class Lucene50CompoundReader extends CompoundDirectory {
|
|||
+ entries.keySet()
|
||||
+ ")");
|
||||
}
|
||||
return handle.slice(name, entry.offset, entry.length);
|
||||
return handle.slice(name, entry.offset, entry.length, context.readAdvice());
|
||||
}
|
||||
|
||||
/** Returns an array of strings, one for each file in the directory. */
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
package org.apache.lucene.backward_codecs.lucene80;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.backward_codecs.packed.LegacyDirectMonotonicReader;
|
||||
import org.apache.lucene.backward_codecs.packed.LegacyDirectReader;
|
||||
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||
|
@ -39,7 +41,6 @@ import org.apache.lucene.index.SortedNumericDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.internal.hppc.IntObjectHashMap;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
|
@ -52,11 +53,11 @@ import org.apache.lucene.util.compress.LZ4;
|
|||
|
||||
/** reader for {@link Lucene80DocValuesFormat} */
|
||||
final class Lucene80DocValuesProducer extends DocValuesProducer {
|
||||
private final IntObjectHashMap<NumericEntry> numerics = new IntObjectHashMap<>();
|
||||
private final IntObjectHashMap<BinaryEntry> binaries = new IntObjectHashMap<>();
|
||||
private final IntObjectHashMap<SortedEntry> sorted = new IntObjectHashMap<>();
|
||||
private final IntObjectHashMap<SortedSetEntry> sortedSets = new IntObjectHashMap<>();
|
||||
private final IntObjectHashMap<SortedNumericEntry> sortedNumerics = new IntObjectHashMap<>();
|
||||
private final Map<String, NumericEntry> numerics = new HashMap<>();
|
||||
private final Map<String, BinaryEntry> binaries = new HashMap<>();
|
||||
private final Map<String, SortedEntry> sorted = new HashMap<>();
|
||||
private final Map<String, SortedSetEntry> sortedSets = new HashMap<>();
|
||||
private final Map<String, SortedNumericEntry> sortedNumerics = new HashMap<>();
|
||||
private final IndexInput data;
|
||||
private final int maxDoc;
|
||||
private int version = -1;
|
||||
|
@ -138,7 +139,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
}
|
||||
byte type = meta.readByte();
|
||||
if (type == Lucene80DocValuesFormat.NUMERIC) {
|
||||
numerics.put(info.number, readNumeric(meta));
|
||||
numerics.put(info.name, readNumeric(meta));
|
||||
} else if (type == Lucene80DocValuesFormat.BINARY) {
|
||||
final boolean compressed;
|
||||
if (version >= Lucene80DocValuesFormat.VERSION_CONFIGURABLE_COMPRESSION) {
|
||||
|
@ -157,13 +158,13 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
} else {
|
||||
compressed = version >= Lucene80DocValuesFormat.VERSION_BIN_COMPRESSED;
|
||||
}
|
||||
binaries.put(info.number, readBinary(meta, compressed));
|
||||
binaries.put(info.name, readBinary(meta, compressed));
|
||||
} else if (type == Lucene80DocValuesFormat.SORTED) {
|
||||
sorted.put(info.number, readSorted(meta));
|
||||
sorted.put(info.name, readSorted(meta));
|
||||
} else if (type == Lucene80DocValuesFormat.SORTED_SET) {
|
||||
sortedSets.put(info.number, readSortedSet(meta));
|
||||
sortedSets.put(info.name, readSortedSet(meta));
|
||||
} else if (type == Lucene80DocValuesFormat.SORTED_NUMERIC) {
|
||||
sortedNumerics.put(info.number, readSortedNumeric(meta));
|
||||
sortedNumerics.put(info.name, readSortedNumeric(meta));
|
||||
} else {
|
||||
throw new CorruptIndexException("invalid type: " + type, meta);
|
||||
}
|
||||
|
@ -425,7 +426,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
|
||||
NumericEntry entry = numerics.get(field.number);
|
||||
NumericEntry entry = numerics.get(field.name);
|
||||
return getNumeric(entry);
|
||||
}
|
||||
|
||||
|
@ -914,7 +915,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
|
||||
BinaryEntry entry = binaries.get(field.number);
|
||||
BinaryEntry entry = binaries.get(field.name);
|
||||
if (entry.compressed) {
|
||||
return getCompressedBinary(entry);
|
||||
} else {
|
||||
|
@ -972,7 +973,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public SortedDocValues getSorted(FieldInfo field) throws IOException {
|
||||
SortedEntry entry = sorted.get(field.number);
|
||||
SortedEntry entry = sorted.get(field.name);
|
||||
return getSorted(entry);
|
||||
}
|
||||
|
||||
|
@ -1406,7 +1407,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
|
||||
SortedNumericEntry entry = sortedNumerics.get(field.number);
|
||||
SortedNumericEntry entry = sortedNumerics.get(field.name);
|
||||
if (entry.numValues == entry.numDocsWithField) {
|
||||
return DocValues.singleton(getNumeric(entry));
|
||||
}
|
||||
|
@ -1542,7 +1543,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
@Override
|
||||
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
|
||||
SortedSetEntry entry = sortedSets.get(field.number);
|
||||
SortedSetEntry entry = sortedSets.get(field.name);
|
||||
if (entry.singleValueEntry != null) {
|
||||
return DocValues.singleton(getSorted(entry.singleValueEntry));
|
||||
}
|
||||
|
|
|
@ -22,10 +22,10 @@ import java.util.Locale;
|
|||
import java.util.Objects;
|
||||
import java.util.SplittableRandom;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.hnsw.NeighborQueue;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
|
||||
/**
|
||||
* Builder for HNSW graph. See {@link Lucene90OnHeapHnswGraph} for a gloss on the algorithm and the
|
||||
|
@ -49,7 +49,7 @@ public final class Lucene90HnswGraphBuilder {
|
|||
private final Lucene90NeighborArray scratch;
|
||||
|
||||
private final VectorSimilarityFunction similarityFunction;
|
||||
private final RandomAccessVectorValues.Floats vectorValues;
|
||||
private final FloatVectorValues vectorValues;
|
||||
private final SplittableRandom random;
|
||||
private final Lucene90BoundsChecker bound;
|
||||
final Lucene90OnHeapHnswGraph hnsw;
|
||||
|
@ -58,7 +58,7 @@ public final class Lucene90HnswGraphBuilder {
|
|||
|
||||
// we need two sources of vectors in order to perform diversity check comparisons without
|
||||
// colliding
|
||||
private final RandomAccessVectorValues.Floats buildVectors;
|
||||
private final FloatVectorValues buildVectors;
|
||||
|
||||
/**
|
||||
* Reads all the vectors from vector values, builds a graph connecting them by their dense
|
||||
|
@ -73,7 +73,7 @@ public final class Lucene90HnswGraphBuilder {
|
|||
* to ensure repeatable construction.
|
||||
*/
|
||||
public Lucene90HnswGraphBuilder(
|
||||
RandomAccessVectorValues.Floats vectors,
|
||||
FloatVectorValues vectors,
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
int maxConn,
|
||||
int beamWidth,
|
||||
|
@ -97,14 +97,14 @@ public final class Lucene90HnswGraphBuilder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Reads all the vectors from two copies of a {@link RandomAccessVectorValues}. Providing two
|
||||
* copies enables efficient retrieval without extra data copying, while avoiding collision of the
|
||||
* Reads all the vectors from two copies of a {@link FloatVectorValues}. Providing two copies
|
||||
* enables efficient retrieval without extra data copying, while avoiding collision of the
|
||||
* returned values.
|
||||
*
|
||||
* @param vectors the vectors for which to build a nearest neighbors graph. Must be an independet
|
||||
* accessor for the vectors
|
||||
*/
|
||||
public Lucene90OnHeapHnswGraph build(RandomAccessVectorValues.Floats vectors) throws IOException {
|
||||
public Lucene90OnHeapHnswGraph build(FloatVectorValues vectors) throws IOException {
|
||||
if (vectors == vectorValues) {
|
||||
throw new IllegalArgumentException(
|
||||
"Vectors to build must be independent of the source of vectors provided to HnswGraphBuilder()");
|
||||
|
@ -230,7 +230,7 @@ public final class Lucene90HnswGraphBuilder {
|
|||
float[] candidate,
|
||||
float score,
|
||||
Lucene90NeighborArray neighbors,
|
||||
RandomAccessVectorValues.Floats vectorValues)
|
||||
FloatVectorValues vectorValues)
|
||||
throws IOException {
|
||||
bound.set(score);
|
||||
for (int i = 0; i < neighbors.size(); i++) {
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.backward_codecs.lucene90;
|
|||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.SplittableRandom;
|
||||
|
@ -34,7 +33,6 @@ import org.apache.lucene.index.FloatVectorValues;
|
|||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.KnnCollector;
|
||||
import org.apache.lucene.search.VectorScorer;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
|
@ -44,7 +42,6 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.hnsw.HnswGraph;
|
||||
import org.apache.lucene.util.hnsw.NeighborQueue;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
|
||||
/**
|
||||
* Reads vectors from the index segments along with index data structures supporting KNN search.
|
||||
|
@ -263,7 +260,7 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
int node = results.topNode();
|
||||
float minSimilarity = results.topScore();
|
||||
results.pop();
|
||||
knnCollector.collect(node, minSimilarity);
|
||||
knnCollector.collect(vectorValues.ordToDoc(node), minSimilarity);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -355,8 +352,7 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
}
|
||||
|
||||
/** Read the vector values from the index input. This supports both iterated and random access. */
|
||||
static class OffHeapFloatVectorValues extends FloatVectorValues
|
||||
implements RandomAccessVectorValues.Floats {
|
||||
static class OffHeapFloatVectorValues extends FloatVectorValues {
|
||||
|
||||
final int dimension;
|
||||
final int[] ordToDoc;
|
||||
|
@ -367,9 +363,6 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
final float[] value;
|
||||
final VectorSimilarityFunction similarityFunction;
|
||||
|
||||
int ord = -1;
|
||||
int doc = -1;
|
||||
|
||||
OffHeapFloatVectorValues(
|
||||
int dimension,
|
||||
int[] ordToDoc,
|
||||
|
@ -394,42 +387,6 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
return ordToDoc.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue() throws IOException {
|
||||
return vectorValue(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
if (++ord >= size()) {
|
||||
doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
doc = ordToDoc[ord];
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
assert docID() < target;
|
||||
ord = Arrays.binarySearch(ordToDoc, ord + 1, ordToDoc.length, target);
|
||||
if (ord < 0) {
|
||||
ord = -(ord + 1);
|
||||
}
|
||||
assert ord <= ordToDoc.length;
|
||||
if (ord == ordToDoc.length) {
|
||||
doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
doc = ordToDoc[ord];
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OffHeapFloatVectorValues copy() {
|
||||
return new OffHeapFloatVectorValues(dimension, ordToDoc, similarityFunction, dataIn.clone());
|
||||
|
@ -446,21 +403,32 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
|
|||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ordToDoc(int ord) {
|
||||
return ordToDoc[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return createSparseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorScorer scorer(float[] target) {
|
||||
if (size() == 0) {
|
||||
return null;
|
||||
}
|
||||
OffHeapFloatVectorValues values = this.copy();
|
||||
DocIndexIterator iterator = values.iterator();
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return values.similarityFunction.compare(values.vectorValue(), target);
|
||||
return values.similarityFunction.compare(values.vectorValue(iterator.index()), target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return values;
|
||||
public DocIndexIterator iterator() {
|
||||
return iterator;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -23,12 +23,12 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.SplittableRandom;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
import org.apache.lucene.util.hnsw.HnswGraph;
|
||||
import org.apache.lucene.util.hnsw.NeighborQueue;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
|
||||
/**
|
||||
* An {@link HnswGraph} where all nodes and connections are held in memory. This class is used to
|
||||
|
@ -74,7 +74,7 @@ public final class Lucene90OnHeapHnswGraph extends HnswGraph {
|
|||
float[] query,
|
||||
int topK,
|
||||
int numSeed,
|
||||
RandomAccessVectorValues.Floats vectors,
|
||||
FloatVectorValues vectors,
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
HnswGraph graphValues,
|
||||
Bits acceptOrds,
|
||||
|
|
|
@ -46,7 +46,6 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.hnsw.HnswGraph;
|
||||
import org.apache.lucene.util.hnsw.HnswGraphSearcher;
|
||||
import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorer;
|
||||
|
||||
/**
|
||||
|
@ -398,8 +397,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
}
|
||||
|
||||
/** Read the vector values from the index input. This supports both iterated and random access. */
|
||||
static class OffHeapFloatVectorValues extends FloatVectorValues
|
||||
implements RandomAccessVectorValues.Floats {
|
||||
static class OffHeapFloatVectorValues extends FloatVectorValues {
|
||||
|
||||
private final int dimension;
|
||||
private final int size;
|
||||
|
@ -410,9 +408,6 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
private final float[] value;
|
||||
private final VectorSimilarityFunction similarityFunction;
|
||||
|
||||
private int ord = -1;
|
||||
private int doc = -1;
|
||||
|
||||
OffHeapFloatVectorValues(
|
||||
int dimension,
|
||||
int size,
|
||||
|
@ -439,49 +434,6 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue() throws IOException {
|
||||
dataIn.seek((long) ord * byteSize);
|
||||
dataIn.readFloats(value, 0, value.length);
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
if (++ord >= size) {
|
||||
doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
doc = ordToDocOperator.applyAsInt(ord);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
assert docID() < target;
|
||||
|
||||
if (ordToDoc == null) {
|
||||
ord = target;
|
||||
} else {
|
||||
ord = Arrays.binarySearch(ordToDoc, ord + 1, ordToDoc.length, target);
|
||||
if (ord < 0) {
|
||||
ord = -(ord + 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (ord < size) {
|
||||
doc = ordToDocOperator.applyAsInt(ord);
|
||||
} else {
|
||||
doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OffHeapFloatVectorValues copy() {
|
||||
return new OffHeapFloatVectorValues(
|
||||
|
@ -495,21 +447,32 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
|
|||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ordToDoc(int ord) {
|
||||
return ordToDocOperator.applyAsInt(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return createSparseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VectorScorer scorer(float[] target) {
|
||||
if (size == 0) {
|
||||
return null;
|
||||
}
|
||||
OffHeapFloatVectorValues values = this.copy();
|
||||
DocIndexIterator iterator = values.iterator();
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return values.similarityFunction.compare(values.vectorValue(), target);
|
||||
return values.similarityFunction.compare(values.vectorValue(iterator.index()), target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return values;
|
||||
return iterator;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.codecs.lucene912;
|
||||
package org.apache.lucene.backward_codecs.lucene912;
|
||||
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
|
@ -37,6 +37,7 @@ import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
|
|||
import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat;
|
|
@ -0,0 +1,433 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Lucene 9.12 file format.
|
||||
*
|
||||
* <h2>Apache Lucene - Index File Formats</h2>
|
||||
*
|
||||
* <div>
|
||||
*
|
||||
* <ul>
|
||||
* <li><a href="#Introduction">Introduction</a>
|
||||
* <li><a href="#Definitions">Definitions</a>
|
||||
* <ul>
|
||||
* <li><a href="#Inverted_Indexing">Inverted Indexing</a>
|
||||
* <li><a href="#Types_of_Fields">Types of Fields</a>
|
||||
* <li><a href="#Segments">Segments</a>
|
||||
* <li><a href="#Document_Numbers">Document Numbers</a>
|
||||
* </ul>
|
||||
* <li><a href="#Overview">Index Structure Overview</a>
|
||||
* <li><a href="#File_Naming">File Naming</a>
|
||||
* <li><a href="#file-names">Summary of File Extensions</a>
|
||||
* <ul>
|
||||
* <li><a href="#Lock_File">Lock File</a>
|
||||
* <li><a href="#History">History</a>
|
||||
* <li><a href="#Limitations">Limitations</a>
|
||||
* </ul>
|
||||
* </ul>
|
||||
*
|
||||
* </div> <a id="Introduction"></a>
|
||||
*
|
||||
* <h3>Introduction</h3>
|
||||
*
|
||||
* <div>
|
||||
*
|
||||
* <p>This document defines the index file formats used in this version of Lucene. If you are using
|
||||
* a different version of Lucene, please consult the copy of <code>docs/</code> that was distributed
|
||||
* with the version you are using.
|
||||
*
|
||||
* <p>This document attempts to provide a high-level definition of the Apache Lucene file formats.
|
||||
* </div> <a id="Definitions"></a>
|
||||
*
|
||||
* <h3>Definitions</h3>
|
||||
*
|
||||
* <div>
|
||||
*
|
||||
* <p>The fundamental concepts in Lucene are index, document, field and term.
|
||||
*
|
||||
* <p>An index contains a sequence of documents.
|
||||
*
|
||||
* <ul>
|
||||
* <li>A document is a sequence of fields.
|
||||
* <li>A field is a named sequence of terms.
|
||||
* <li>A term is a sequence of bytes.
|
||||
* </ul>
|
||||
*
|
||||
* <p>The same sequence of bytes in two different fields is considered a different term. Thus terms
|
||||
* are represented as a pair: the string naming the field, and the bytes within the field. <a
|
||||
* id="Inverted_Indexing"></a>
|
||||
*
|
||||
* <h4>Inverted Indexing</h4>
|
||||
*
|
||||
* <p>Lucene's index stores terms and statistics about those terms in order to make term-based
|
||||
* search more efficient. Lucene's terms index falls into the family of indexes known as an
|
||||
* <i>inverted index.</i> This is because it can list, for a term, the documents that contain it.
|
||||
* This is the inverse of the natural relationship, in which documents list terms. <a
|
||||
* id="Types_of_Fields"></a>
|
||||
*
|
||||
* <h4>Types of Fields</h4>
|
||||
*
|
||||
* <p>In Lucene, fields may be <i>stored</i>, in which case their text is stored in the index
|
||||
* literally, in a non-inverted manner. Fields that are inverted are called <i>indexed</i>. A field
|
||||
* may be both stored and indexed.
|
||||
*
|
||||
* <p>The text of a field may be <i>tokenized</i> into terms to be indexed, or the text of a field
|
||||
* may be used literally as a term to be indexed. Most fields are tokenized, but sometimes it is
|
||||
* useful for certain identifier fields to be indexed literally.
|
||||
*
|
||||
* <p>See the {@link org.apache.lucene.document.Field Field} java docs for more information on
|
||||
* Fields. <a id="Segments"></a>
|
||||
*
|
||||
* <h4>Segments</h4>
|
||||
*
|
||||
* <p>Lucene indexes may be composed of multiple sub-indexes, or <i>segments</i>. Each segment is a
|
||||
* fully independent index, which could be searched separately. Indexes evolve by:
|
||||
*
|
||||
* <ol>
|
||||
* <li>Creating new segments for newly added documents.
|
||||
* <li>Merging existing segments.
|
||||
* </ol>
|
||||
*
|
||||
* <p>Searches may involve multiple segments and/or multiple indexes, each index potentially
|
||||
* composed of a set of segments. <a id="Document_Numbers"></a>
|
||||
*
|
||||
* <h4>Document Numbers</h4>
|
||||
*
|
||||
* <p>Internally, Lucene refers to documents by an integer <i>document number</i>. The first
|
||||
* document added to an index is numbered zero, and each subsequent document added gets a number one
|
||||
* greater than the previous.
|
||||
*
|
||||
* <p>Note that a document's number may change, so caution should be taken when storing these
|
||||
* numbers outside of Lucene. In particular, numbers may change in the following situations:
|
||||
*
|
||||
* <ul>
|
||||
* <li>
|
||||
* <p>The numbers stored in each segment are unique only within the segment, and must be
|
||||
* converted before they can be used in a larger context. The standard technique is to
|
||||
* allocate each segment a range of values, based on the range of numbers used in that
|
||||
* segment. To convert a document number from a segment to an external value, the segment's
|
||||
* <i>base</i> document number is added. To convert an external value back to a
|
||||
* segment-specific value, the segment is identified by the range that the external value is
|
||||
* in, and the segment's base value is subtracted. For example two five document segments
|
||||
* might be combined, so that the first segment has a base value of zero, and the second of
|
||||
* five. Document three from the second segment would have an external value of eight.
|
||||
* <li>
|
||||
* <p>When documents are deleted, gaps are created in the numbering. These are eventually
|
||||
* removed as the index evolves through merging. Deleted documents are dropped when segments
|
||||
* are merged. A freshly-merged segment thus has no gaps in its numbering.
|
||||
* </ul>
|
||||
*
|
||||
* </div> <a id="Overview"></a>
|
||||
*
|
||||
* <h3>Index Structure Overview</h3>
|
||||
*
|
||||
* <div>
|
||||
*
|
||||
* <p>Each segment index maintains the following:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@link org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat Segment info}. This
|
||||
* contains metadata about a segment, such as the number of documents, what files it uses, and
|
||||
* information about how the segment is sorted
|
||||
* <li>{@link org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat Field names}. This
|
||||
* contains metadata about the set of named fields used in the index.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Stored Field values}.
|
||||
* This contains, for each document, a list of attribute-value pairs, where the attributes are
|
||||
* field names. These are used to store auxiliary information about the document, such as its
|
||||
* title, url, or an identifier to access a database. The set of stored fields are what is
|
||||
* returned for each hit when searching. This is keyed by document number.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term dictionary}. A
|
||||
* dictionary containing all of the terms used in all of the indexed fields of all of the
|
||||
* documents. The dictionary also contains the number of documents which contain the term, and
|
||||
* pointers to the term's frequency and proximity data.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Frequency data}. For
|
||||
* each term in the dictionary, the numbers of all the documents that contain that term, and
|
||||
* the frequency of the term in that document, unless frequencies are omitted ({@link
|
||||
* org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
|
||||
* <li>{@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Proximity data}. For
|
||||
* each term in the dictionary, the positions that the term occurs in each document. Note that
|
||||
* this will not exist if all fields in all documents omit position data.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Normalization factors}. For
|
||||
* each field in each document, a value is stored that is multiplied into the score for hits
|
||||
* on that field.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vectors}. For each
|
||||
* field in each document, the term vector (sometimes called document vector) may be stored. A
|
||||
* term vector consists of term text and term frequency. To add Term Vectors to your index see
|
||||
* the {@link org.apache.lucene.document.Field Field} constructors
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat Per-document values}. Like
|
||||
* stored values, these are also keyed by document number, but are generally intended to be
|
||||
* loaded into main memory for fast access. Whereas stored values are generally intended for
|
||||
* summary results from searches, per-document values are useful for things like scoring
|
||||
* factors.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live documents}. An
|
||||
* optional file indicating which documents are live.
|
||||
* <li>{@link org.apache.lucene.codecs.lucene90.Lucene90PointsFormat Point values}. Optional pair
|
||||
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
|
||||
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
|
||||
* intersection (2D, 3D).
|
||||
* <li>{@link org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat Vector values}. The
|
||||
* vector format stores numeric vectors in a format optimized for random access and
|
||||
* computation, supporting high-dimensional nearest-neighbor search.
|
||||
* </ul>
|
||||
*
|
||||
* <p>Details on each of these are provided in their linked pages. </div> <a id="File_Naming"></a>
|
||||
*
|
||||
* <h3>File Naming</h3>
|
||||
*
|
||||
* <div>
|
||||
*
|
||||
* <p>All files belonging to a segment have the same name with varying extensions. The extensions
|
||||
* correspond to the different file formats described below. When using the Compound File format
|
||||
* (default for small segments) these files (except for the Segment info file, the Lock file, and
|
||||
* Deleted documents file) are collapsed into a single .cfs file (see below for details)
|
||||
*
|
||||
* <p>Typically, all segments in an index are stored in a single directory, although this is not
|
||||
* required.
|
||||
*
|
||||
* <p>File names are never re-used. That is, when any file is saved to the Directory it is given a
|
||||
* never before used filename. This is achieved using a simple generations approach. For example,
|
||||
* the first segments file is segments_1, then segments_2, etc. The generation is a sequential long
|
||||
* integer represented in alpha-numeric (base 36) form. </div> <a id="file-names"></a>
|
||||
*
|
||||
* <h3>Summary of File Extensions</h3>
|
||||
*
|
||||
* <div>
|
||||
*
|
||||
* <p>The following table summarizes the names and extensions of the files in Lucene:
|
||||
*
|
||||
* <table class="padding4" style="border-spacing: 1px; border-collapse: separate">
|
||||
* <caption>lucene filenames by extension</caption>
|
||||
* <tr>
|
||||
* <th>Name</th>
|
||||
* <th>Extension</th>
|
||||
* <th>Brief Description</th>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.index.SegmentInfos Segments File}</td>
|
||||
* <td>segments_N</td>
|
||||
* <td>Stores information about a commit point</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td><a href="#Lock_File">Lock File</a></td>
|
||||
* <td>write.lock</td>
|
||||
* <td>The Write lock prevents multiple IndexWriters from writing to the same
|
||||
* file.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat Segment Info}</td>
|
||||
* <td>.si</td>
|
||||
* <td>Stores metadata about a segment</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90CompoundFormat Compound File}</td>
|
||||
* <td>.cfs, .cfe</td>
|
||||
* <td>An optional "virtual" file consisting of all the other index files for
|
||||
* systems that frequently run out of file handles.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat Fields}</td>
|
||||
* <td>.fnm</td>
|
||||
* <td>Stores information about the fields</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Field Index}</td>
|
||||
* <td>.fdx</td>
|
||||
* <td>Contains pointers to field data</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Field Data}</td>
|
||||
* <td>.fdt</td>
|
||||
* <td>The stored fields for documents</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Dictionary}</td>
|
||||
* <td>.tim</td>
|
||||
* <td>The term dictionary, stores term info</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Index}</td>
|
||||
* <td>.tip</td>
|
||||
* <td>The index into the Term Dictionary</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Frequencies}</td>
|
||||
* <td>.doc</td>
|
||||
* <td>Contains the list of docs which contain each term along with frequency</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Positions}</td>
|
||||
* <td>.pos</td>
|
||||
* <td>Stores position information about where a term occurs in the index</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Payloads}</td>
|
||||
* <td>.pay</td>
|
||||
* <td>Stores additional per-position metadata information such as character offsets and user payloads</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Norms}</td>
|
||||
* <td>.nvd, .nvm</td>
|
||||
* <td>Encodes length and boost factors for docs and fields</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat Per-Document Values}</td>
|
||||
* <td>.dvd, .dvm</td>
|
||||
* <td>Encodes additional scoring factors or other per-document information.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vector Index}</td>
|
||||
* <td>.tvx</td>
|
||||
* <td>Stores offset into the document data file</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vector Data}</td>
|
||||
* <td>.tvd</td>
|
||||
* <td>Contains term vector data.</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live Documents}</td>
|
||||
* <td>.liv</td>
|
||||
* <td>Info about what documents are live</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene90.Lucene90PointsFormat Point values}</td>
|
||||
* <td>.kdd, .kdi, .kdm</td>
|
||||
* <td>Holds indexed points</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>{@link org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat Vector values}</td>
|
||||
* <td>.vec, .vem, .veq, vex</td>
|
||||
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data,
|
||||
* <code>.vem</code> the vector metadata, <code>.veq</code> the quantized vector data, and <code>.vex</code> the
|
||||
* hnsw graph data.</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
*
|
||||
* </div> <a id="Lock_File"></a>
|
||||
*
|
||||
* <h3>Lock File</h3>
|
||||
*
|
||||
* The write lock, which is stored in the index directory by default, is named "write.lock". If the
|
||||
* lock directory is different from the index directory then the write lock will be named
|
||||
* "XXXX-write.lock" where XXXX is a unique prefix derived from the full path to the index
|
||||
* directory. When this file is present, a writer is currently modifying the index (adding or
|
||||
* removing documents). This lock file ensures that only one writer is modifying the index at a
|
||||
* time. <a id="History"></a>
|
||||
*
|
||||
* <h3>History</h3>
|
||||
*
|
||||
* <p>Compatibility notes are provided in this document, describing how file formats have changed
|
||||
* from prior versions:
|
||||
*
|
||||
* <ul>
|
||||
* <li>In version 2.1, the file format was changed to allow lock-less commits (ie, no more commit
|
||||
* lock). The change is fully backwards compatible: you can open a pre-2.1 index for searching
|
||||
* or adding/deleting of docs. When the new segments file is saved (committed), it will be
|
||||
* written in the new file format (meaning no specific "upgrade" process is needed). But note
|
||||
* that once a commit has occurred, pre-2.1 Lucene will not be able to read the index.
|
||||
* <li>In version 2.3, the file format was changed to allow segments to share a single set of doc
|
||||
* store (vectors & stored fields) files. This allows for faster indexing in certain
|
||||
* cases. The change is fully backwards compatible (in the same way as the lock-less commits
|
||||
* change in 2.1).
|
||||
* <li>In version 2.4, Strings are now written as true UTF-8 byte sequence, not Java's modified
|
||||
* UTF-8. See <a href="http://issues.apache.org/jira/browse/LUCENE-510">LUCENE-510</a> for
|
||||
* details.
|
||||
* <li>In version 2.9, an optional opaque Map<String,String> CommitUserData may be passed to
|
||||
* IndexWriter's commit methods (and later retrieved), which is recorded in the segments_N
|
||||
* file. See <a href="http://issues.apache.org/jira/browse/LUCENE-1382">LUCENE-1382</a> for
|
||||
* details. Also, diagnostics were added to each segment written recording details about why
|
||||
* it was written (due to flush, merge; which OS/JRE was used; etc.). See issue <a
|
||||
* href="http://issues.apache.org/jira/browse/LUCENE-1654">LUCENE-1654</a> for details.
|
||||
* <li>In version 3.0, compressed fields are no longer written to the index (they can still be
|
||||
* read, but on merge the new segment will write them, uncompressed). See issue <a
|
||||
* href="http://issues.apache.org/jira/browse/LUCENE-1960">LUCENE-1960</a> for details.
|
||||
* <li>In version 3.1, segments records the code version that created them. See <a
|
||||
* href="http://issues.apache.org/jira/browse/LUCENE-2720">LUCENE-2720</a> for details.
|
||||
* Additionally segments track explicitly whether or not they have term vectors. See <a
|
||||
* href="http://issues.apache.org/jira/browse/LUCENE-2811">LUCENE-2811</a> for details.
|
||||
* <li>In version 3.2, numeric fields are written as natively to stored fields file, previously
|
||||
* they were stored in text format only.
|
||||
* <li>In version 3.4, fields can omit position data while still indexing term frequencies.
|
||||
* <li>In version 4.0, the format of the inverted index became extensible via the {@link
|
||||
* org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage ({@code DocValues})
|
||||
* was introduced. Normalization factors need no longer be a single byte, they can be any
|
||||
* {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. Terms need not be
|
||||
* unicode strings, they can be any byte sequence. Term offsets can optionally be indexed into
|
||||
* the postings lists. Payloads can be stored in the term vectors.
|
||||
* <li>In version 4.1, the format of the postings list changed to use either of FOR compression or
|
||||
* variable-byte encoding, depending upon the frequency of the term. Terms appearing only once
|
||||
* were changed to inline directly into the term dictionary. Stored fields are compressed by
|
||||
* default.
|
||||
* <li>In version 4.2, term vectors are compressed by default. DocValues has a new multi-valued
|
||||
* type (SortedSet), that can be used for faceting/grouping/joining on multi-valued fields.
|
||||
* <li>In version 4.5, DocValues were extended to explicitly represent missing values.
|
||||
* <li>In version 4.6, FieldInfos were extended to support per-field DocValues generation, to
|
||||
* allow updating NumericDocValues fields.
|
||||
* <li>In version 4.8, checksum footers were added to the end of each index file for improved data
|
||||
* integrity. Specifically, the last 8 bytes of every index file contain the zlib-crc32
|
||||
* checksum of the file.
|
||||
* <li>In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric) that is
|
||||
* suitable for faceting/sorting/analytics.
|
||||
* <li>In version 5.4, DocValues have been improved to store more information on disk: addresses
|
||||
* for binary fields and ord indexes for multi-valued fields.
|
||||
* <li>In version 6.0, Points were added, for multi-dimensional range/distance search.
|
||||
* <li>In version 6.2, new Segment info format that reads/writes the index sort, to support index
|
||||
* sorting.
|
||||
* <li>In version 7.0, DocValues have been improved to better support sparse doc values thanks to
|
||||
* an iterator API.
|
||||
* <li>In version 8.0, postings have been enhanced to record, for each block of doc ids, the (term
|
||||
* freq, normalization factor) pairs that may trigger the maximum score of the block. This
|
||||
* information is recorded alongside skip data in order to be able to skip blocks of doc ids
|
||||
* if they may not produce high enough scores. Additionally doc values and norms has been
|
||||
* extended with jump-tables to make access O(1) instead of O(n), where n is the number of
|
||||
* elements to skip when advancing in the data.
|
||||
* <li>In version 8.4, postings, positions, offsets and payload lengths have move to a more
|
||||
* performant encoding that is vectorized.
|
||||
* <li>In version 8.6, index sort serialization is delegated to the sorts themselves, to allow
|
||||
* user-defined sorts to be used
|
||||
* <li>In version 8.6, points fields split the index tree and leaf data into separate files, to
|
||||
* allow for different access patterns to the different data structures
|
||||
* <li>In version 8.7, stored fields compression became adaptive to better handle documents with
|
||||
* smaller stored fields.
|
||||
* <li>In version 9.0, vector-valued fields were added.
|
||||
* <li>In version 9.1, vector-valued fields were modified to add a graph hierarchy.
|
||||
* <li>In version 9.2, docs of vector-valued fields were moved from .vem to .vec and encoded by
|
||||
* IndexDISI. ordToDoc mappings was added to .vem.
|
||||
* <li>In version 9.5, HNSW graph connections were changed to be delta-encoded with vints.
|
||||
* Additionally, metadata file size improvements were made by delta-encoding nodes by graph
|
||||
* layer and not writing the node ids for the zeroth layer.
|
||||
* <li>In version 9.9, Vector scalar quantization support was added. Allowing the HNSW vector
|
||||
* format to utilize int8 quantized vectors for float32 vector search.
|
||||
* <li>In version 9.12, skip data was refactored to have only two levels: every 128 docs and every
|
||||
* 4,06 docs, and to be inlined in postings lists. This resulted in a speedup for queries that
|
||||
* need skipping, especially conjunctions.
|
||||
* </ul>
|
||||
*
|
||||
* <a id="Limitations"></a>
|
||||
*
|
||||
* <h3>Limitations</h3>
|
||||
*
|
||||
* <div>
|
||||
*
|
||||
* <p>Lucene uses a Java <code>int</code> to refer to document numbers, and the index file format
|
||||
* uses an <code>Int32</code> on-disk to store document numbers. This is a limitation of both the
|
||||
* index file format and the current implementation. Eventually these should be replaced with either
|
||||
* <code>UInt64</code> values, or better yet, {@link org.apache.lucene.store.DataOutput#writeVInt
|
||||
* VInt} values which have no limit. </div>
|
||||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene912;
|
|
@ -26,12 +26,10 @@ import org.apache.lucene.search.VectorScorer;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicReader;
|
||||
|
||||
/** Read the vector values from the index input. This supports both iterated and random access. */
|
||||
abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
||||
implements RandomAccessVectorValues.Floats {
|
||||
abstract class OffHeapFloatVectorValues extends FloatVectorValues {
|
||||
|
||||
protected final int dimension;
|
||||
protected final int size;
|
||||
|
@ -95,8 +93,6 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
|
||||
static class DenseOffHeapVectorValues extends OffHeapFloatVectorValues {
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
public DenseOffHeapVectorValues(
|
||||
int dimension,
|
||||
int size,
|
||||
|
@ -105,35 +101,16 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
super(dimension, size, vectorSimilarityFunction, slice);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue() throws IOException {
|
||||
return vectorValue(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert docID() < target;
|
||||
if (target >= size) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DenseOffHeapVectorValues copy() throws IOException {
|
||||
return new DenseOffHeapVectorValues(dimension, size, vectorSimilarityFunction, slice.clone());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return createDenseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getAcceptOrds(Bits acceptDocs) {
|
||||
return acceptDocs;
|
||||
|
@ -142,15 +119,17 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
@Override
|
||||
public VectorScorer scorer(float[] query) throws IOException {
|
||||
DenseOffHeapVectorValues values = this.copy();
|
||||
DocIndexIterator iterator = values.iterator();
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return values.vectorSimilarityFunction.compare(values.vectorValue(), query);
|
||||
return values.vectorSimilarityFunction.compare(
|
||||
values.vectorValue(iterator.index()), query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return values;
|
||||
return iterator;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -186,33 +165,17 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
fieldEntry.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue() throws IOException {
|
||||
return vectorValue(disi.index());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return disi.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return disi.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert docID() < target;
|
||||
return disi.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SparseOffHeapVectorValues copy() throws IOException {
|
||||
return new SparseOffHeapVectorValues(
|
||||
fieldEntry, dataIn, vectorSimilarityFunction, slice.clone());
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return IndexedDISI.asDocIndexIterator(disi);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ordToDoc(int ord) {
|
||||
return (int) ordToDoc.get(ord);
|
||||
|
@ -239,15 +202,17 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
@Override
|
||||
public VectorScorer scorer(float[] query) throws IOException {
|
||||
SparseOffHeapVectorValues values = this.copy();
|
||||
DocIndexIterator iterator = values.iterator();
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return values.vectorSimilarityFunction.compare(values.vectorValue(), query);
|
||||
return values.vectorSimilarityFunction.compare(
|
||||
values.vectorValue(iterator.index()), query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return values;
|
||||
return iterator;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -259,8 +224,6 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
super(dimension, 0, VectorSimilarityFunction.COSINE, null);
|
||||
}
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
@Override
|
||||
public int dimension() {
|
||||
return super.dimension();
|
||||
|
@ -271,26 +234,6 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OffHeapFloatVectorValues copy() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
@ -306,6 +249,11 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return createDenseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getAcceptOrds(Bits acceptDocs) {
|
||||
return null;
|
||||
|
|
|
@ -28,12 +28,10 @@ import org.apache.lucene.search.VectorScorer;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicReader;
|
||||
|
||||
/** Read the vector values from the index input. This supports both iterated and random access. */
|
||||
abstract class OffHeapByteVectorValues extends ByteVectorValues
|
||||
implements RandomAccessVectorValues.Bytes {
|
||||
abstract class OffHeapByteVectorValues extends ByteVectorValues {
|
||||
|
||||
protected final int dimension;
|
||||
protected final int size;
|
||||
|
@ -108,8 +106,6 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
|
||||
static class DenseOffHeapVectorValues extends OffHeapByteVectorValues {
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
public DenseOffHeapVectorValues(
|
||||
int dimension,
|
||||
int size,
|
||||
|
@ -119,36 +115,17 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
super(dimension, size, slice, vectorSimilarityFunction, byteSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] vectorValue() throws IOException {
|
||||
return vectorValue(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert docID() < target;
|
||||
if (target >= size) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DenseOffHeapVectorValues copy() throws IOException {
|
||||
return new DenseOffHeapVectorValues(
|
||||
dimension, size, slice.clone(), vectorSimilarityFunction, byteSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return createDenseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getAcceptOrds(Bits acceptDocs) {
|
||||
return acceptDocs;
|
||||
|
@ -157,15 +134,16 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
@Override
|
||||
public VectorScorer scorer(byte[] query) throws IOException {
|
||||
DenseOffHeapVectorValues copy = this.copy();
|
||||
DocIndexIterator iterator = copy.iterator();
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return vectorSimilarityFunction.compare(copy.vectorValue(), query);
|
||||
return vectorSimilarityFunction.compare(copy.vectorValue(iterator.index()), query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return copy;
|
||||
return iterator;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -202,27 +180,6 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
fieldEntry.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] vectorValue() throws IOException {
|
||||
return vectorValue(disi.index());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return disi.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return disi.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert docID() < target;
|
||||
return disi.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SparseOffHeapVectorValues copy() throws IOException {
|
||||
return new SparseOffHeapVectorValues(
|
||||
|
@ -234,6 +191,11 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
return (int) ordToDoc.get(ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return fromDISI(disi);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getAcceptOrds(Bits acceptDocs) {
|
||||
if (acceptDocs == null) {
|
||||
|
@ -255,15 +217,16 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
@Override
|
||||
public VectorScorer scorer(byte[] query) throws IOException {
|
||||
SparseOffHeapVectorValues copy = this.copy();
|
||||
IndexedDISI disi = copy.disi;
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return vectorSimilarityFunction.compare(copy.vectorValue(), query);
|
||||
return vectorSimilarityFunction.compare(copy.vectorValue(disi.index()), query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return copy;
|
||||
return disi;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -275,8 +238,6 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
super(dimension, 0, null, VectorSimilarityFunction.COSINE, 0);
|
||||
}
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
@Override
|
||||
public int dimension() {
|
||||
return super.dimension();
|
||||
|
@ -287,26 +248,6 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] vectorValue() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OffHeapByteVectorValues copy() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
@ -322,6 +263,11 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return createDenseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getAcceptOrds(Bits acceptDocs) {
|
||||
return null;
|
||||
|
|
|
@ -26,12 +26,10 @@ import org.apache.lucene.search.VectorScorer;
|
|||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicReader;
|
||||
|
||||
/** Read the vector values from the index input. This supports both iterated and random access. */
|
||||
abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
||||
implements RandomAccessVectorValues.Floats {
|
||||
abstract class OffHeapFloatVectorValues extends FloatVectorValues {
|
||||
|
||||
protected final int dimension;
|
||||
protected final int size;
|
||||
|
@ -104,8 +102,6 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
|
||||
static class DenseOffHeapVectorValues extends OffHeapFloatVectorValues {
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
public DenseOffHeapVectorValues(
|
||||
int dimension,
|
||||
int size,
|
||||
|
@ -115,36 +111,17 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
super(dimension, size, slice, vectorSimilarityFunction, byteSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue() throws IOException {
|
||||
return vectorValue(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert docID() < target;
|
||||
if (target >= size) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc = target;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DenseOffHeapVectorValues copy() throws IOException {
|
||||
return new DenseOffHeapVectorValues(
|
||||
dimension, size, slice.clone(), vectorSimilarityFunction, byteSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return createDenseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getAcceptOrds(Bits acceptDocs) {
|
||||
return acceptDocs;
|
||||
|
@ -153,15 +130,18 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
@Override
|
||||
public VectorScorer scorer(float[] query) throws IOException {
|
||||
DenseOffHeapVectorValues values = this.copy();
|
||||
DocIndexIterator iterator = values.iterator();
|
||||
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return values.vectorSimilarityFunction.compare(values.vectorValue(), query);
|
||||
return values.vectorSimilarityFunction.compare(
|
||||
values.vectorValue(iterator.index()), query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return values;
|
||||
return iterator;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -198,33 +178,17 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
fieldEntry.size());
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue() throws IOException {
|
||||
return vectorValue(disi.index());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return disi.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return disi.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert docID() < target;
|
||||
return disi.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SparseOffHeapVectorValues copy() throws IOException {
|
||||
return new SparseOffHeapVectorValues(
|
||||
fieldEntry, dataIn, slice.clone(), vectorSimilarityFunction, byteSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return IndexedDISI.asDocIndexIterator(disi);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ordToDoc(int ord) {
|
||||
return (int) ordToDoc.get(ord);
|
||||
|
@ -251,15 +215,17 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
@Override
|
||||
public VectorScorer scorer(float[] query) throws IOException {
|
||||
SparseOffHeapVectorValues values = this.copy();
|
||||
DocIndexIterator iterator = values.iterator();
|
||||
return new VectorScorer() {
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return values.vectorSimilarityFunction.compare(values.vectorValue(), query);
|
||||
return values.vectorSimilarityFunction.compare(
|
||||
values.vectorValue(iterator.index()), query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return values;
|
||||
return iterator;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -271,8 +237,6 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
super(dimension, 0, null, VectorSimilarityFunction.COSINE, 0);
|
||||
}
|
||||
|
||||
private int doc = -1;
|
||||
|
||||
@Override
|
||||
public int dimension() {
|
||||
return super.dimension();
|
||||
|
@ -283,26 +247,6 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float[] vectorValue() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OffHeapFloatVectorValues copy() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
|
@ -318,6 +262,11 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIndexIterator iterator() {
|
||||
return createDenseIterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits getAcceptOrds(Bits acceptDocs) {
|
||||
return null;
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.backward_codecs.lucene99;
|
|||
import java.io.IOException;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.GroupVIntUtil;
|
||||
|
||||
/** Utility class to encode/decode postings block. */
|
||||
final class PostingsUtil {
|
||||
|
@ -35,7 +36,7 @@ final class PostingsUtil {
|
|||
boolean indexHasFreq,
|
||||
boolean decodeFreq)
|
||||
throws IOException {
|
||||
docIn.readGroupVInts(docBuffer, num);
|
||||
GroupVIntUtil.readGroupVInts(docIn, docBuffer, num);
|
||||
if (indexHasFreq && decodeFreq) {
|
||||
for (int i = 0; i < num; ++i) {
|
||||
freqBuffer[i] = docBuffer[i] & 0x01;
|
||||
|
|
|
@ -23,3 +23,4 @@ org.apache.lucene.backward_codecs.lucene92.Lucene92Codec
|
|||
org.apache.lucene.backward_codecs.lucene94.Lucene94Codec
|
||||
org.apache.lucene.backward_codecs.lucene95.Lucene95Codec
|
||||
org.apache.lucene.backward_codecs.lucene99.Lucene99Codec
|
||||
org.apache.lucene.backward_codecs.lucene912.Lucene912Codec
|
||||
|
|
|
@ -81,9 +81,8 @@ public final class Lucene50RWCompoundFormat extends CompoundFormat {
|
|||
public Lucene50RWCompoundFormat() {}
|
||||
|
||||
@Override
|
||||
public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context)
|
||||
throws IOException {
|
||||
return new Lucene50CompoundReader(dir, si, context);
|
||||
public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si) throws IOException {
|
||||
return new Lucene50CompoundReader(dir, si);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -218,7 +218,7 @@ public class Lucene60PointsWriter extends PointsWriter {
|
|||
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
|
||||
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
|
||||
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
|
||||
PointValues bkdReader = reader60.readers.get(readerFieldInfo.number);
|
||||
PointValues bkdReader = reader60.getValues(readerFieldInfo.name);
|
||||
if (bkdReader != null) {
|
||||
bkdReaders.add(bkdReader);
|
||||
docMaps.add(mergeState.docMaps[i]);
|
||||
|
|
|
@ -249,7 +249,7 @@ public class Lucene86PointsWriter extends PointsWriter {
|
|||
|
||||
// we confirmed this up above
|
||||
assert reader instanceof Lucene86PointsReader;
|
||||
Lucene86PointsReader reader60 = (Lucene86PointsReader) reader;
|
||||
Lucene86PointsReader reader86 = (Lucene86PointsReader) reader;
|
||||
|
||||
// NOTE: we cannot just use the merged fieldInfo.number (instead of resolving to
|
||||
// this
|
||||
|
@ -259,7 +259,7 @@ public class Lucene86PointsWriter extends PointsWriter {
|
|||
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
|
||||
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
|
||||
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
|
||||
PointValues aPointValues = reader60.readers.get(readerFieldInfo.number);
|
||||
PointValues aPointValues = reader86.getValues(readerFieldInfo.name);
|
||||
if (aPointValues != null) {
|
||||
pointValues.add(aPointValues);
|
||||
docMaps.add(mergeState.docMaps[i]);
|
||||
|
|
|
@ -29,13 +29,13 @@ import org.apache.lucene.index.ByteVectorValues;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.KnnVectorValues;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
|
||||
/**
|
||||
* Writes vector values and knn graphs to index segments.
|
||||
|
@ -188,12 +188,13 @@ public final class Lucene90HnswVectorsWriter extends BufferingKnnVectorsWriter {
|
|||
int count = 0;
|
||||
ByteBuffer binaryVector =
|
||||
ByteBuffer.allocate(vectors.dimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
||||
for (int docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc(), count++) {
|
||||
KnnVectorValues.DocIndexIterator iter = vectors.iterator();
|
||||
for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) {
|
||||
// write vector
|
||||
float[] vectorValue = vectors.vectorValue();
|
||||
float[] vectorValue = vectors.vectorValue(iter.index());
|
||||
binaryVector.asFloatBuffer().put(vectorValue);
|
||||
output.writeBytes(binaryVector.array(), binaryVector.limit());
|
||||
docIds[count] = docV;
|
||||
docIds[count++] = docV;
|
||||
}
|
||||
|
||||
if (docIds.length > count) {
|
||||
|
@ -234,7 +235,7 @@ public final class Lucene90HnswVectorsWriter extends BufferingKnnVectorsWriter {
|
|||
|
||||
private void writeGraph(
|
||||
IndexOutput graphData,
|
||||
RandomAccessVectorValues.Floats vectorValues,
|
||||
FloatVectorValues vectorValues,
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
long graphDataOffset,
|
||||
long[] offsets,
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* limIndexedDISIitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.backward_codecs.lucene90;
|
||||
|
||||
|
@ -83,4 +83,9 @@ public class TestLucene90HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testMergingWithDifferentByteKnnFields() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testMismatchedFields() throws Exception {
|
||||
// requires byte support
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ public class TestLucene90SegmentInfoFormat extends BaseSegmentInfoFormatTestCase
|
|||
|
||||
@Override
|
||||
protected Version[] getVersions() {
|
||||
return new Version[] {Version.LUCENE_9_0_0};
|
||||
return new Version[] {Version.fromBits(9, 0, 0)};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Objects;
|
|||
import java.util.SplittableRandom;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
|
@ -32,7 +33,6 @@ import org.apache.lucene.util.hnsw.HnswGraph;
|
|||
import org.apache.lucene.util.hnsw.HnswGraphBuilder;
|
||||
import org.apache.lucene.util.hnsw.HnswGraphSearcher;
|
||||
import org.apache.lucene.util.hnsw.NeighborQueue;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorer;
|
||||
|
||||
/**
|
||||
|
@ -57,7 +57,7 @@ public final class Lucene91HnswGraphBuilder {
|
|||
|
||||
private final DefaultFlatVectorScorer defaultFlatVectorScorer = new DefaultFlatVectorScorer();
|
||||
private final VectorSimilarityFunction similarityFunction;
|
||||
private final RandomAccessVectorValues.Floats vectorValues;
|
||||
private final FloatVectorValues vectorValues;
|
||||
private final SplittableRandom random;
|
||||
private final Lucene91BoundsChecker bound;
|
||||
private final HnswGraphSearcher graphSearcher;
|
||||
|
@ -68,7 +68,7 @@ public final class Lucene91HnswGraphBuilder {
|
|||
|
||||
// we need two sources of vectors in order to perform diversity check comparisons without
|
||||
// colliding
|
||||
private RandomAccessVectorValues.Floats buildVectors;
|
||||
private FloatVectorValues buildVectors;
|
||||
|
||||
/**
|
||||
* Reads all the vectors from vector values, builds a graph connecting them by their dense
|
||||
|
@ -83,7 +83,7 @@ public final class Lucene91HnswGraphBuilder {
|
|||
* to ensure repeatable construction.
|
||||
*/
|
||||
public Lucene91HnswGraphBuilder(
|
||||
RandomAccessVectorValues.Floats vectors,
|
||||
FloatVectorValues vectors,
|
||||
VectorSimilarityFunction similarityFunction,
|
||||
int maxConn,
|
||||
int beamWidth,
|
||||
|
@ -113,14 +113,14 @@ public final class Lucene91HnswGraphBuilder {
|
|||
}
|
||||
|
||||
/**
|
||||
* Reads all the vectors from two copies of a {@link RandomAccessVectorValues}. Providing two
|
||||
* copies enables efficient retrieval without extra data copying, while avoiding collision of the
|
||||
* Reads all the vectors from two copies of a {@link FloatVectorValues}. Providing two copies
|
||||
* enables efficient retrieval without extra data copying, while avoiding collision of the
|
||||
* returned values.
|
||||
*
|
||||
* @param vectors the vectors for which to build a nearest neighbors graph. Must be an independet
|
||||
* @param vectors the vectors for which to build a nearest neighbors graph. Must be an independent
|
||||
* accessor for the vectors
|
||||
*/
|
||||
public Lucene91OnHeapHnswGraph build(RandomAccessVectorValues.Floats vectors) throws IOException {
|
||||
public Lucene91OnHeapHnswGraph build(FloatVectorValues vectors) throws IOException {
|
||||
if (vectors == vectorValues) {
|
||||
throw new IllegalArgumentException(
|
||||
"Vectors to build must be independent of the source of vectors provided to HnswGraphBuilder()");
|
||||
|
@ -236,7 +236,7 @@ public final class Lucene91HnswGraphBuilder {
|
|||
// extract all the Neighbors from the queue into an array; these will now be
|
||||
// sorted from worst to best
|
||||
for (int i = 0; i < candidateCount; i++) {
|
||||
float similarity = candidates.minCompetitiveSimilarity();
|
||||
float similarity = candidates.minimumScore();
|
||||
scratch.add(candidates.popNode(), similarity);
|
||||
}
|
||||
}
|
||||
|
@ -254,7 +254,7 @@ public final class Lucene91HnswGraphBuilder {
|
|||
float[] candidate,
|
||||
float score,
|
||||
Lucene91NeighborArray neighbors,
|
||||
RandomAccessVectorValues.Floats vectorValues)
|
||||
FloatVectorValues vectorValues)
|
||||
throws IOException {
|
||||
bound.set(score);
|
||||
for (int i = 0; i < neighbors.size(); i++) {
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
package org.apache.lucene.backward_codecs.lucene91;
|
||||
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
|
@ -30,6 +28,7 @@ import org.apache.lucene.index.DocsWithFieldSet;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.KnnVectorValues;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -37,7 +36,6 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.hnsw.HnswGraph;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
|
||||
/**
|
||||
* Writes vector values and knn graphs to index segments.
|
||||
|
@ -183,9 +181,10 @@ public final class Lucene91HnswVectorsWriter extends BufferingKnnVectorsWriter {
|
|||
DocsWithFieldSet docsWithField = new DocsWithFieldSet();
|
||||
ByteBuffer binaryVector =
|
||||
ByteBuffer.allocate(vectors.dimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
||||
for (int docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc()) {
|
||||
KnnVectorValues.DocIndexIterator iter = vectors.iterator();
|
||||
for (int docV = iter.nextDoc(); docV != DocIdSetIterator.NO_MORE_DOCS; docV = iter.nextDoc()) {
|
||||
// write vector
|
||||
float[] vectorValue = vectors.vectorValue();
|
||||
float[] vectorValue = vectors.vectorValue(iter.index());
|
||||
binaryVector.asFloatBuffer().put(vectorValue);
|
||||
output.writeBytes(binaryVector.array(), binaryVector.limit());
|
||||
docsWithField.add(docV);
|
||||
|
@ -243,7 +242,7 @@ public final class Lucene91HnswVectorsWriter extends BufferingKnnVectorsWriter {
|
|||
}
|
||||
|
||||
private Lucene91OnHeapHnswGraph writeGraph(
|
||||
RandomAccessVectorValues.Floats vectorValues, VectorSimilarityFunction similarityFunction)
|
||||
FloatVectorValues vectorValues, VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
|
||||
// build graph
|
||||
|
|
|
@ -82,4 +82,9 @@ public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testMergingWithDifferentByteKnnFields() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testMismatchedFields() throws Exception {
|
||||
// requires byte support
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
package org.apache.lucene.backward_codecs.lucene92;
|
||||
|
||||
import static org.apache.lucene.backward_codecs.lucene92.Lucene92RWHnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
@ -33,6 +32,7 @@ import org.apache.lucene.index.DocsWithFieldSet;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.KnnVectorValues;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -43,7 +43,6 @@ import org.apache.lucene.util.hnsw.HnswGraph;
|
|||
import org.apache.lucene.util.hnsw.HnswGraphBuilder;
|
||||
import org.apache.lucene.util.hnsw.NeighborArray;
|
||||
import org.apache.lucene.util.hnsw.OnHeapHnswGraph;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicWriter;
|
||||
|
||||
|
@ -190,9 +189,12 @@ public final class Lucene92HnswVectorsWriter extends BufferingKnnVectorsWriter {
|
|||
DocsWithFieldSet docsWithField = new DocsWithFieldSet();
|
||||
ByteBuffer binaryVector =
|
||||
ByteBuffer.allocate(vectors.dimension() * Float.BYTES).order(ByteOrder.LITTLE_ENDIAN);
|
||||
for (int docV = vectors.nextDoc(); docV != NO_MORE_DOCS; docV = vectors.nextDoc()) {
|
||||
KnnVectorValues.DocIndexIterator iterator = vectors.iterator();
|
||||
for (int docV = iterator.nextDoc();
|
||||
docV != DocIdSetIterator.NO_MORE_DOCS;
|
||||
docV = iterator.nextDoc()) {
|
||||
// write vector
|
||||
float[] vectorValue = vectors.vectorValue();
|
||||
float[] vectorValue = vectors.vectorValue(iterator.index());
|
||||
binaryVector.asFloatBuffer().put(vectorValue);
|
||||
output.writeBytes(binaryVector.array(), binaryVector.limit());
|
||||
docsWithField.add(docV);
|
||||
|
@ -277,7 +279,7 @@ public final class Lucene92HnswVectorsWriter extends BufferingKnnVectorsWriter {
|
|||
}
|
||||
|
||||
private OnHeapHnswGraph writeGraph(
|
||||
RandomAccessVectorValues.Floats vectorValues, VectorSimilarityFunction similarityFunction)
|
||||
FloatVectorValues vectorValues, VectorSimilarityFunction similarityFunction)
|
||||
throws IOException {
|
||||
DefaultFlatVectorScorer defaultFlatVectorScorer = new DefaultFlatVectorScorer();
|
||||
// build graph
|
||||
|
|
|
@ -72,4 +72,9 @@ public class TestLucene92HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
|
|||
public void testMergingWithDifferentByteKnnFields() {
|
||||
// unimplemented
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testMismatchedFields() throws Exception {
|
||||
// requires byte support
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.index.DocsWithFieldSet;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.KnnVectorValues;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Sorter;
|
||||
|
@ -52,7 +53,6 @@ import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
|
|||
import org.apache.lucene.util.hnsw.HnswGraphBuilder;
|
||||
import org.apache.lucene.util.hnsw.NeighborArray;
|
||||
import org.apache.lucene.util.hnsw.OnHeapHnswGraph;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicWriter;
|
||||
|
||||
|
@ -216,9 +216,7 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
final int[] docIdOffsets = new int[sortMap.size()];
|
||||
int offset = 1; // 0 means no vector for this (field, document)
|
||||
DocIdSetIterator iterator = fieldData.docsWithField.iterator();
|
||||
for (int docID = iterator.nextDoc();
|
||||
docID != DocIdSetIterator.NO_MORE_DOCS;
|
||||
docID = iterator.nextDoc()) {
|
||||
for (int docID = iterator.nextDoc(); docID != NO_MORE_DOCS; docID = iterator.nextDoc()) {
|
||||
int newDocID = sortMap.oldToNew(docID);
|
||||
docIdOffsets[newDocID] = offset++;
|
||||
}
|
||||
|
@ -556,9 +554,7 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
final DirectMonotonicWriter ordToDocWriter =
|
||||
DirectMonotonicWriter.getInstance(meta, vectorData, count, DIRECT_MONOTONIC_BLOCK_SHIFT);
|
||||
DocIdSetIterator iterator = docsWithField.iterator();
|
||||
for (int doc = iterator.nextDoc();
|
||||
doc != DocIdSetIterator.NO_MORE_DOCS;
|
||||
doc = iterator.nextDoc()) {
|
||||
for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) {
|
||||
ordToDocWriter.add(doc);
|
||||
}
|
||||
ordToDocWriter.finish();
|
||||
|
@ -590,11 +586,10 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
private static DocsWithFieldSet writeByteVectorData(
|
||||
IndexOutput output, ByteVectorValues byteVectorValues) throws IOException {
|
||||
DocsWithFieldSet docsWithField = new DocsWithFieldSet();
|
||||
for (int docV = byteVectorValues.nextDoc();
|
||||
docV != NO_MORE_DOCS;
|
||||
docV = byteVectorValues.nextDoc()) {
|
||||
KnnVectorValues.DocIndexIterator iter = byteVectorValues.iterator();
|
||||
for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) {
|
||||
// write vector
|
||||
byte[] binaryValue = byteVectorValues.vectorValue();
|
||||
byte[] binaryValue = byteVectorValues.vectorValue(iter.index());
|
||||
assert binaryValue.length == byteVectorValues.dimension() * VectorEncoding.BYTE.byteSize;
|
||||
output.writeBytes(binaryValue, binaryValue.length);
|
||||
docsWithField.add(docV);
|
||||
|
@ -608,14 +603,13 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
private static DocsWithFieldSet writeVectorData(
|
||||
IndexOutput output, FloatVectorValues floatVectorValues) throws IOException {
|
||||
DocsWithFieldSet docsWithField = new DocsWithFieldSet();
|
||||
KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator();
|
||||
ByteBuffer binaryVector =
|
||||
ByteBuffer.allocate(floatVectorValues.dimension() * VectorEncoding.FLOAT32.byteSize)
|
||||
.order(ByteOrder.LITTLE_ENDIAN);
|
||||
for (int docV = floatVectorValues.nextDoc();
|
||||
docV != NO_MORE_DOCS;
|
||||
docV = floatVectorValues.nextDoc()) {
|
||||
for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) {
|
||||
// write vector
|
||||
float[] vectorValue = floatVectorValues.vectorValue();
|
||||
float[] vectorValue = floatVectorValues.vectorValue(iter.index());
|
||||
binaryVector.asFloatBuffer().put(vectorValue);
|
||||
output.writeBytes(binaryVector.array(), binaryVector.limit());
|
||||
docsWithField.add(docV);
|
||||
|
@ -672,11 +666,11 @@ public final class Lucene94HnswVectorsWriter extends KnnVectorsWriter {
|
|||
case BYTE ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
ByteVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
case FLOAT32 ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
FloatVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
};
|
||||
hnswGraphBuilder =
|
||||
HnswGraphBuilder.create(scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed);
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.index.DocsWithFieldSet;
|
|||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FloatVectorValues;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.KnnVectorValues;
|
||||
import org.apache.lucene.index.MergeState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Sorter;
|
||||
|
@ -56,7 +57,6 @@ import org.apache.lucene.util.hnsw.HnswGraphBuilder;
|
|||
import org.apache.lucene.util.hnsw.IncrementalHnswGraphMerger;
|
||||
import org.apache.lucene.util.hnsw.NeighborArray;
|
||||
import org.apache.lucene.util.hnsw.OnHeapHnswGraph;
|
||||
import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
|
||||
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicWriter;
|
||||
|
||||
|
@ -221,9 +221,7 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
final int[] docIdOffsets = new int[sortMap.size()];
|
||||
int offset = 1; // 0 means no vector for this (field, document)
|
||||
DocIdSetIterator iterator = fieldData.docsWithField.iterator();
|
||||
for (int docID = iterator.nextDoc();
|
||||
docID != DocIdSetIterator.NO_MORE_DOCS;
|
||||
docID = iterator.nextDoc()) {
|
||||
for (int docID = iterator.nextDoc(); docID != NO_MORE_DOCS; docID = iterator.nextDoc()) {
|
||||
int newDocID = sortMap.oldToNew(docID);
|
||||
docIdOffsets[newDocID] = offset++;
|
||||
}
|
||||
|
@ -482,18 +480,18 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
mergeState.knnVectorsReaders[i], mergeState.docMaps[i], mergeState.liveDocs[i]);
|
||||
}
|
||||
}
|
||||
DocIdSetIterator mergedVectorIterator = null;
|
||||
KnnVectorValues mergedVectorValues = null;
|
||||
switch (fieldInfo.getVectorEncoding()) {
|
||||
case BYTE ->
|
||||
mergedVectorIterator =
|
||||
mergedVectorValues =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
|
||||
case FLOAT32 ->
|
||||
mergedVectorIterator =
|
||||
mergedVectorValues =
|
||||
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
|
||||
}
|
||||
graph =
|
||||
merger.merge(
|
||||
mergedVectorIterator, segmentWriteState.infoStream, docsWithField.cardinality());
|
||||
mergedVectorValues, segmentWriteState.infoStream, docsWithField.cardinality());
|
||||
vectorIndexNodeOffsets = writeGraph(graph);
|
||||
}
|
||||
long vectorIndexLength = vectorIndex.getFilePointer() - vectorIndexOffset;
|
||||
|
@ -636,14 +634,13 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
private static DocsWithFieldSet writeByteVectorData(
|
||||
IndexOutput output, ByteVectorValues byteVectorValues) throws IOException {
|
||||
DocsWithFieldSet docsWithField = new DocsWithFieldSet();
|
||||
for (int docV = byteVectorValues.nextDoc();
|
||||
docV != NO_MORE_DOCS;
|
||||
docV = byteVectorValues.nextDoc()) {
|
||||
KnnVectorValues.DocIndexIterator iter = byteVectorValues.iterator();
|
||||
for (int docId = iter.nextDoc(); docId != NO_MORE_DOCS; docId = iter.nextDoc()) {
|
||||
// write vector
|
||||
byte[] binaryValue = byteVectorValues.vectorValue();
|
||||
byte[] binaryValue = byteVectorValues.vectorValue(iter.index());
|
||||
assert binaryValue.length == byteVectorValues.dimension() * VectorEncoding.BYTE.byteSize;
|
||||
output.writeBytes(binaryValue, binaryValue.length);
|
||||
docsWithField.add(docV);
|
||||
docsWithField.add(docId);
|
||||
}
|
||||
return docsWithField;
|
||||
}
|
||||
|
@ -657,11 +654,10 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
ByteBuffer buffer =
|
||||
ByteBuffer.allocate(floatVectorValues.dimension() * VectorEncoding.FLOAT32.byteSize)
|
||||
.order(ByteOrder.LITTLE_ENDIAN);
|
||||
for (int docV = floatVectorValues.nextDoc();
|
||||
docV != NO_MORE_DOCS;
|
||||
docV = floatVectorValues.nextDoc()) {
|
||||
KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator();
|
||||
for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) {
|
||||
// write vector
|
||||
float[] value = floatVectorValues.vectorValue();
|
||||
float[] value = floatVectorValues.vectorValue(iter.index());
|
||||
buffer.asFloatBuffer().put(value);
|
||||
output.writeBytes(buffer.array(), buffer.limit());
|
||||
docsWithField.add(docV);
|
||||
|
@ -718,11 +714,11 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
|
|||
case BYTE ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
ByteVectorValues.fromBytes((List<byte[]>) vectors, dim));
|
||||
case FLOAT32 ->
|
||||
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
|
||||
fieldInfo.getVectorSimilarityFunction(),
|
||||
RandomAccessVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
FloatVectorValues.fromFloats((List<float[]>) vectors, dim));
|
||||
};
|
||||
hnswGraphBuilder =
|
||||
HnswGraphBuilder.create(scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed);
|
||||
|
|
|
@ -106,8 +106,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
|
|||
* This is a base constructor for parameterized BWC tests. The constructor arguments are provided
|
||||
* by {@link com.carrotsearch.randomizedtesting.RandomizedRunner} during test execution. A {@link
|
||||
* com.carrotsearch.randomizedtesting.annotations.ParametersFactory} specified in a subclass
|
||||
* provides a list lists of arguments for the tests and RandomizedRunner will execute the test for
|
||||
* each of the argument list.
|
||||
* provides a list of arguments for the tests and RandomizedRunner will execute the test for each
|
||||
* of the argument list.
|
||||
*
|
||||
* @param version the version this test should run for
|
||||
* @param indexPattern an index pattern in order to open an index of see {@link
|
||||
|
|
|
@ -198,7 +198,7 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
|||
checker.setInfoStream(new PrintStream(bos, false, UTF_8));
|
||||
checker.setLevel(CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
if (version.startsWith("8.")) {
|
||||
if (version.startsWith("8.") || version.startsWith("9.")) {
|
||||
assertTrue(indexStatus.clean);
|
||||
} else {
|
||||
assertFalse(indexStatus.clean);
|
||||
|
@ -219,10 +219,11 @@ public class TestAncientIndicesCompatibility extends LuceneTestCase {
|
|||
// #12895: test on a carefully crafted 9.8.0 index (from a small contiguous subset
|
||||
// of wikibigall unique terms) that shows the read-time exception of
|
||||
// IntersectTermsEnum (used by WildcardQuery)
|
||||
@AwaitsFix(bugUrl = "https://github.com/apache/lucene/issues/13847")
|
||||
public void testWildcardQueryExceptions990() throws IOException {
|
||||
Path path = createTempDir("12895");
|
||||
|
||||
String name = "index.12895.9.8.0.zip";
|
||||
String name = "unsupported.12895.9.8.0.zip";
|
||||
InputStream resource = TestAncientIndicesCompatibility.class.getResourceAsStream(name);
|
||||
assertNotNull("missing zip file to reproduce #12895", resource);
|
||||
TestUtil.unzip(resource, path);
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.lucene.backward_index;
|
||||
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
import static org.apache.lucene.util.Version.LUCENE_9_0_0;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import java.io.IOException;
|
||||
|
@ -52,6 +51,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.KnnVectorValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.LogByteSizeMergePolicy;
|
||||
import org.apache.lucene.index.MultiBits;
|
||||
|
@ -95,7 +95,7 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
|
|||
private static final int DOCS_COUNT = 35;
|
||||
private static final int DELETED_ID = 7;
|
||||
|
||||
private static final int KNN_VECTOR_MIN_SUPPORTED_VERSION = LUCENE_9_0_0.major;
|
||||
private static final int KNN_VECTOR_MIN_SUPPORTED_VERSION = Version.fromBits(9, 0, 0).major;
|
||||
private static final String KNN_VECTOR_FIELD = "knn_field";
|
||||
private static final FieldType KNN_VECTOR_FIELD_TYPE =
|
||||
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
|
||||
|
@ -477,10 +477,14 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
|
|||
FloatVectorValues values = ctx.reader().getFloatVectorValues(KNN_VECTOR_FIELD);
|
||||
if (values != null) {
|
||||
assertEquals(KNN_VECTOR_FIELD_TYPE.vectorDimension(), values.dimension());
|
||||
for (int doc = values.nextDoc(); doc != NO_MORE_DOCS; doc = values.nextDoc()) {
|
||||
KnnVectorValues.DocIndexIterator it = values.iterator();
|
||||
for (int doc = it.nextDoc(); doc != NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
float[] expectedVector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * cnt};
|
||||
assertArrayEquals(
|
||||
"vectors do not match for doc=" + cnt, expectedVector, values.vectorValue(), 0);
|
||||
"vectors do not match for doc=" + cnt,
|
||||
expectedVector,
|
||||
values.vectorValue(it.index()),
|
||||
0);
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
@ -828,7 +832,7 @@ public class TestBasicBackwardsCompatibility extends BackwardsCompatibilityTestB
|
|||
expectThrows(IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader));
|
||||
assertEquals(
|
||||
e.getMessage(),
|
||||
"Cannot merge a segment that has been created with major version 9 into this index which has been created by major version 10");
|
||||
"Cannot merge a segment that has been created with major version 10 into this index which has been created by major version 11");
|
||||
w.close();
|
||||
targetDir2.close();
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ public class TestDVUpdateBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
public static Iterable<Object[]> testVersionsFactory() {
|
||||
List<Object[]> params = new ArrayList<>();
|
||||
// TODO - WHY ONLY on the first major version?
|
||||
params.add(new Object[] {Version.LUCENE_9_0_0, createPattern(INDEX_NAME, SUFFIX)});
|
||||
params.add(new Object[] {Version.LUCENE_10_0_0, createPattern(INDEX_NAME, SUFFIX)});
|
||||
return params;
|
||||
}
|
||||
|
||||
|
|
|
@ -53,14 +53,14 @@ public class TestEmptyIndexBackwardsCompatibility extends BackwardsCompatibility
|
|||
public static Iterable<Object[]> testVersionsFactory() {
|
||||
List<Object[]> params = new ArrayList<>();
|
||||
// TODO - WHY ONLY on the first major version?
|
||||
params.add(new Object[] {Version.LUCENE_9_0_0, createPattern(INDEX_NAME, SUFFIX)});
|
||||
params.add(new Object[] {Version.LUCENE_10_0_0, createPattern(INDEX_NAME, SUFFIX)});
|
||||
return params;
|
||||
}
|
||||
|
||||
public void testUpgradeEmptyOldIndex() throws Exception {
|
||||
try (Directory dir = newDirectory(directory)) {
|
||||
TestIndexUpgradeBackwardsCompatibility.newIndexUpgrader(dir).upgrade();
|
||||
TestIndexUpgradeBackwardsCompatibility.checkAllSegmentsUpgraded(dir, 9);
|
||||
TestIndexUpgradeBackwardsCompatibility.checkAllSegmentsUpgraded(dir, 10);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ public class TestGenerateBwcIndices extends LuceneTestCase {
|
|||
// To generate backcompat indexes with the current default codec, run the following gradle
|
||||
// command:
|
||||
// gradlew test -Ptests.bwcdir=/path/to/store/indexes -Ptests.codec=default
|
||||
// -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices
|
||||
// -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices --max-workers=1
|
||||
//
|
||||
// Also add testmethod with one of the index creation methods below, for example:
|
||||
// -Ptestmethod=testCreateCFS
|
||||
|
@ -82,14 +82,14 @@ public class TestGenerateBwcIndices extends LuceneTestCase {
|
|||
sortedTest.createBWCIndex();
|
||||
}
|
||||
|
||||
public void testCreateInt8HNSWIndices() throws IOException {
|
||||
TestInt8HnswBackwardsCompatibility int8HnswBackwardsCompatibility =
|
||||
new TestInt8HnswBackwardsCompatibility(
|
||||
public void testCreateInt7HNSWIndices() throws IOException {
|
||||
TestInt7HnswBackwardsCompatibility int7HnswBackwardsCompatibility =
|
||||
new TestInt7HnswBackwardsCompatibility(
|
||||
Version.LATEST,
|
||||
createPattern(
|
||||
TestInt8HnswBackwardsCompatibility.INDEX_NAME,
|
||||
TestInt8HnswBackwardsCompatibility.SUFFIX));
|
||||
int8HnswBackwardsCompatibility.createBWCIndex();
|
||||
TestInt7HnswBackwardsCompatibility.INDEX_NAME,
|
||||
TestInt7HnswBackwardsCompatibility.SUFFIX));
|
||||
int7HnswBackwardsCompatibility.createBWCIndex();
|
||||
}
|
||||
|
||||
private boolean isInitialMajorVersionRelease() {
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
|
||||
static final String INDEX_NAME = "sorted";
|
||||
static final String SUFFIX = "";
|
||||
private static final Version FIRST_PARENT_DOC_VERSION = Version.LUCENE_9_11_0;
|
||||
private static final Version FIRST_PARENT_DOC_VERSION = Version.fromBits(9, 11, 0);
|
||||
private static final String PARENT_FIELD_NAME = "___parent";
|
||||
|
||||
public TestIndexSortBackwardsCompatibility(Version version, String pattern) {
|
||||
|
|
|
@ -23,17 +23,22 @@ import java.io.IOException;
|
|||
import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.KnnVectorsFormat;
|
||||
import org.apache.lucene.codecs.KnnVectorsReader;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
|
||||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.KnnFloatVectorField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -41,23 +46,23 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
|
||||
|
||||
public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTestBase {
|
||||
public class TestInt7HnswBackwardsCompatibility extends BackwardsCompatibilityTestBase {
|
||||
|
||||
static final String INDEX_NAME = "int8_hnsw";
|
||||
static final String INDEX_NAME = "int7_hnsw";
|
||||
static final String SUFFIX = "";
|
||||
private static final Version FIRST_INT8_HNSW_VERSION = Version.LUCENE_9_10_0;
|
||||
private static final Version FIRST_INT7_HNSW_VERSION = Version.fromBits(9, 10, 0);
|
||||
private static final String KNN_VECTOR_FIELD = "knn_field";
|
||||
private static final int DOC_COUNT = 30;
|
||||
private static final FieldType KNN_VECTOR_FIELD_TYPE =
|
||||
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
|
||||
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
|
||||
|
||||
public TestInt8HnswBackwardsCompatibility(Version version, String pattern) {
|
||||
public TestInt7HnswBackwardsCompatibility(Version version, String pattern) {
|
||||
super(version, pattern);
|
||||
}
|
||||
|
||||
/** Provides all sorted versions to the test-framework */
|
||||
@ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s")
|
||||
public static Iterable<Object[]> testVersionsFactory() throws IllegalAccessException {
|
||||
return allVersion(INDEX_NAME, SUFFIX);
|
||||
|
@ -76,7 +81,7 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
|
||||
@Override
|
||||
protected boolean supportsVersion(Version version) {
|
||||
return version.onOrAfter(FIRST_INT8_HNSW_VERSION);
|
||||
return version.onOrAfter(FIRST_INT7_HNSW_VERSION);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -84,7 +89,7 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
// We don't use the default codec
|
||||
}
|
||||
|
||||
public void testInt8HnswIndexAndSearch() throws Exception {
|
||||
public void testInt7HnswIndexAndSearch() throws Exception {
|
||||
IndexWriterConfig indexWriterConfig =
|
||||
newIndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
|
||||
|
@ -108,7 +113,6 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
|
||||
}
|
||||
}
|
||||
// This will confirm the docs are really sorted
|
||||
TestUtil.checkIndex(directory);
|
||||
}
|
||||
|
||||
|
@ -117,7 +121,7 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
IndexWriterConfig conf =
|
||||
new IndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(10)
|
||||
.setCodec(TestUtil.getDefaultCodec())
|
||||
.setCodec(getCodec())
|
||||
.setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
try (IndexWriter writer = new IndexWriter(dir, conf)) {
|
||||
for (int i = 0; i < DOC_COUNT; i++) {
|
||||
|
@ -147,4 +151,29 @@ public class TestInt8HnswBackwardsCompatibility extends BackwardsCompatibilityTe
|
|||
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
|
||||
}
|
||||
}
|
||||
|
||||
// #13880: make sure the BWC index really contains quantized HNSW not float32
|
||||
public void testIndexIsReallyQuantized() throws Exception {
|
||||
try (DirectoryReader reader = DirectoryReader.open(directory)) {
|
||||
for (LeafReaderContext leafContext : reader.leaves()) {
|
||||
KnnVectorsReader knnVectorsReader = ((CodecReader) leafContext.reader()).getVectorReader();
|
||||
assertTrue(
|
||||
"expected PerFieldKnnVectorsFormat.FieldsReader but got: " + knnVectorsReader,
|
||||
knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader);
|
||||
|
||||
KnnVectorsReader forField =
|
||||
((PerFieldKnnVectorsFormat.FieldsReader) knnVectorsReader)
|
||||
.getFieldReader(KNN_VECTOR_FIELD);
|
||||
|
||||
assertTrue(forField instanceof Lucene99HnswVectorsReader);
|
||||
|
||||
QuantizedByteVectorValues quantized =
|
||||
((Lucene99HnswVectorsReader) forField).getQuantizedVectorValues(KNN_VECTOR_FIELD);
|
||||
|
||||
assertNotNull(
|
||||
"KnnVectorsReader should have quantized interface for field " + KNN_VECTOR_FIELD,
|
||||
quantized);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -31,13 +31,15 @@ import org.apache.lucene.index.LogByteSizeMergePolicy;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.tests.util.LineFileDocs;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/apache/lucene/issues/13847")
|
||||
public class TestMoreTermsBackwardsCompatibility extends BackwardsCompatibilityTestBase {
|
||||
|
||||
static final String INDEX_NAME = "moreterms";
|
||||
static final String INDEX_NAME = "unsupported.moreterms";
|
||||
|
||||
static final String SUFFIX = "";
|
||||
|
||||
|
@ -48,7 +50,7 @@ public class TestMoreTermsBackwardsCompatibility extends BackwardsCompatibilityT
|
|||
@ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s")
|
||||
public static Iterable<Object[]> testVersionsFactory() {
|
||||
List<Object[]> params = new ArrayList<>();
|
||||
params.add(new Object[] {Version.LUCENE_9_0_0, createPattern(INDEX_NAME, SUFFIX)});
|
||||
params.add(new Object[] {Version.fromBits(9, 0, 0), createPattern(INDEX_NAME, SUFFIX)});
|
||||
return params;
|
||||
}
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue