[Upgrade] Lucene 9.0.0 release (#1109)

This commit upgrades the core codebase from Lucene 8.10.1 to
lucene 9.0.0. It includes all necessary refactoring of features and
API changes when upgrading to a new major Lucene release.

Signed-off-by: Nicholas Walter Knize <nknize@apache.org>
Co-authored-by: Andriy Redko <drreta@gmail.com>
This commit is contained in:
Nick Knize 2022-03-15 15:48:13 -05:00 committed by GitHub
parent 757abdb9a0
commit 006c832c5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
274 changed files with 3052 additions and 980 deletions

View File

@ -230,7 +230,10 @@ tasks.register("branchConsistency") {
allprojects {
// configure compiler options
tasks.withType(JavaCompile).configureEach { JavaCompile compile ->
compile.options.compilerArgs << '-Werror'
// See please https://bugs.openjdk.java.net/browse/JDK-8209058
if (BuildParams.runtimeJavaVersion > JavaVersion.VERSION_11) {
compile.options.compilerArgs << '-Werror'
}
compile.options.compilerArgs << '-Xlint:auxiliaryclass'
compile.options.compilerArgs << '-Xlint:cast'
compile.options.compilerArgs << '-Xlint:classfile'

View File

@ -1,5 +1,5 @@
opensearch = 2.0.0
lucene = 8.10.1
lucene = 9.0.0
bundled_jdk_vendor = adoptium
bundled_jdk = 17.0.2+8
@ -11,7 +11,7 @@ spatial4j = 0.7
jts = 1.15.0
jackson = 2.12.6
snakeyaml = 1.26
icu4j = 62.1
icu4j = 68.2
supercsv = 2.4.0
log4j = 2.17.1
slf4j = 1.6.2

View File

@ -32,7 +32,9 @@
package org.opensearch.common.settings;
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.NIOFSDirectory;
@ -328,13 +330,14 @@ public class KeyStoreWrapperTests extends OpenSearchTestCase {
byte[] encryptedBytes,
int truncEncryptedDataLength
) throws Exception {
indexOutput.writeInt(4 + salt.length + 4 + iv.length + 4 + encryptedBytes.length);
indexOutput.writeInt(salt.length);
indexOutput.writeBytes(salt, salt.length);
indexOutput.writeInt(iv.length);
indexOutput.writeBytes(iv, iv.length);
indexOutput.writeInt(encryptedBytes.length - truncEncryptedDataLength);
indexOutput.writeBytes(encryptedBytes, encryptedBytes.length);
DataOutput io = EndiannessReverserUtil.wrapDataOutput(indexOutput);
io.writeInt(4 + salt.length + 4 + iv.length + 4 + encryptedBytes.length);
io.writeInt(salt.length);
io.writeBytes(salt, salt.length);
io.writeInt(iv.length);
io.writeBytes(iv, iv.length);
io.writeInt(encryptedBytes.length - truncEncryptedDataLength);
io.writeBytes(encryptedBytes, encryptedBytes.length);
}
public void testUpgradeAddsSeed() throws Exception {
@ -363,7 +366,7 @@ public class KeyStoreWrapperTests extends OpenSearchTestCase {
assumeFalse("Can't run in a FIPS JVM as PBE is not available", inFipsJvm());
Path configDir = env.configFile();
NIOFSDirectory directory = new NIOFSDirectory(configDir);
try (IndexOutput output = directory.createOutput("opensearch.keystore", IOContext.DEFAULT)) {
try (IndexOutput output = EndiannessReverserUtil.createOutput(directory, "opensearch.keystore", IOContext.DEFAULT)) {
CodecUtil.writeHeader(output, "opensearch.keystore", 1);
output.writeByte((byte) 0); // hasPassword = false
output.writeString("PKCS12");
@ -396,7 +399,7 @@ public class KeyStoreWrapperTests extends OpenSearchTestCase {
NIOFSDirectory directory = new NIOFSDirectory(configDir);
byte[] fileBytes = new byte[20];
random().nextBytes(fileBytes);
try (IndexOutput output = directory.createOutput("opensearch.keystore", IOContext.DEFAULT)) {
try (IndexOutput output = EndiannessReverserUtil.createOutput(directory, "opensearch.keystore", IOContext.DEFAULT)) {
CodecUtil.writeHeader(output, "opensearch.keystore", 2);
output.writeByte((byte) 0); // hasPassword = false

View File

@ -32,7 +32,7 @@
package org.opensearch.analysis.common;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.classic.ClassicFilter;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;

View File

@ -33,7 +33,7 @@
package org.opensearch.analysis.common;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.classic.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;

View File

@ -51,6 +51,8 @@ import org.apache.lucene.analysis.cjk.CJKBigramFilter;
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
import org.apache.lucene.analysis.classic.ClassicFilter;
import org.apache.lucene.analysis.classic.ClassicTokenizer;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.core.KeywordTokenizer;
@ -64,6 +66,7 @@ import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.email.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
@ -113,10 +116,7 @@ import org.apache.lucene.analysis.ro.RomanianAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.th.ThaiTokenizer;

View File

@ -62,10 +62,18 @@ public class MinHashTokenFilterFactory extends AbstractTokenFilterFactory {
private Map<String, String> convertSettings(Settings settings) {
Map<String, String> settingMap = new HashMap<>();
settingMap.put("hashCount", settings.get("hash_count"));
settingMap.put("bucketCount", settings.get("bucket_count"));
settingMap.put("hashSetSize", settings.get("hash_set_size"));
settingMap.put("withRotation", settings.get("with_rotation"));
if (settings.hasValue("hash_count")) {
settingMap.put("hashCount", settings.get("hash_count"));
}
if (settings.hasValue("bucketCount")) {
settingMap.put("bucketCount", settings.get("bucket_count"));
}
if (settings.hasValue("hashSetSize")) {
settingMap.put("hashSetSize", settings.get("hash_set_size"));
}
if (settings.hasValue("with_rotation")) {
settingMap.put("withRotation", settings.get("with_rotation"));
}
return settingMap;
}
}

View File

@ -34,7 +34,7 @@ package org.opensearch.analysis.common;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.email.UAX29URLEmailTokenizer;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;

View File

@ -110,6 +110,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
filters.put("latvianstem", StemmerTokenFilterFactory.class);
filters.put("norwegianlightstem", StemmerTokenFilterFactory.class);
filters.put("norwegianminimalstem", StemmerTokenFilterFactory.class);
filters.put("norwegiannormalization", Void.class);
filters.put("portuguesestem", StemmerTokenFilterFactory.class);
filters.put("portugueselightstem", StemmerTokenFilterFactory.class);
filters.put("portugueseminimalstem", StemmerTokenFilterFactory.class);
@ -117,6 +118,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
filters.put("soranistem", StemmerTokenFilterFactory.class);
filters.put("spanishlightstem", StemmerTokenFilterFactory.class);
filters.put("swedishlightstem", StemmerTokenFilterFactory.class);
filters.put("swedishminimalstem", Void.class);
filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
filters.put("kstem", KStemTokenFilterFactory.class);
filters.put("synonym", SynonymTokenFilterFactory.class);
@ -242,7 +244,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
tokenizers.put("keyword", null);
tokenizers.put("lowercase", Void.class);
tokenizers.put("classic", null);
tokenizers.put("uax_url_email", org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory.class);
tokenizers.put("uax_url_email", org.apache.lucene.analysis.email.UAX29URLEmailTokenizerFactory.class);
tokenizers.put("path_hierarchy", null);
tokenizers.put("letter", null);
tokenizers.put("whitespace", null);

View File

@ -107,11 +107,15 @@ public class DisableGraphQueryTests extends OpenSearchSingleNodeTestCase {
// parsed queries for "text_shingle_unigram:(foo bar baz)" with query parsers
// that ignores position length attribute
expectedQueryWithUnigram = new BooleanQuery.Builder().add(
new SynonymQuery(new Term("text_shingle_unigram", "foo"), new Term("text_shingle_unigram", "foo bar")),
new SynonymQuery.Builder("text_shingle_unigram").addTerm(new Term("text_shingle_unigram", "foo"))
.addTerm(new Term("text_shingle_unigram", "foo bar"))
.build(),
BooleanClause.Occur.SHOULD
)
.add(
new SynonymQuery(new Term("text_shingle_unigram", "bar"), new Term("text_shingle_unigram", "bar baz")),
new SynonymQuery.Builder("text_shingle_unigram").addTerm(new Term("text_shingle_unigram", "bar"))
.addTerm(new Term("text_shingle_unigram", "bar baz"))
.build(),
BooleanClause.Occur.SHOULD
)
.add(new TermQuery(new Term("text_shingle_unigram", "baz")), BooleanClause.Occur.SHOULD)

View File

@ -1 +0,0 @@
24932a4be7064a99126d80776718845b356abae0

View File

@ -0,0 +1 @@
0a3d818d6f6fb113831ed34553b24763fbda1e84

View File

@ -37,7 +37,6 @@ import org.apache.lucene.expressions.SimpleBindings;
import org.apache.lucene.expressions.js.JavascriptCompiler;
import org.apache.lucene.expressions.js.VariableContext;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.SortField;
import org.opensearch.SpecialPermission;
import org.opensearch.common.Nullable;
import org.opensearch.index.fielddata.IndexFieldData;
@ -263,7 +262,7 @@ public class ExpressionScriptEngine implements ScriptEngine {
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (vars != null && vars.containsKey(variable)) {
bindFromParams(vars, bindings, variable);
@ -320,7 +319,7 @@ public class ExpressionScriptEngine implements ScriptEngine {
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (variable.equals("_value")) {
specialValue = new ReplaceableConstDoubleValueSource();
@ -393,7 +392,7 @@ public class ExpressionScriptEngine implements ScriptEngine {
for (String variable : expr.variables) {
try {
if (variable.equals("_score")) {
bindings.add(new SortField("_score", SortField.Type.SCORE));
bindings.add("_score", DoubleValuesSource.SCORES);
needsScores = true;
} else if (variable.equals("_value")) {
specialValue = new ReplaceableConstDoubleValueSource();

View File

@ -42,4 +42,5 @@ grant {
permission org.opensearch.script.ClassPermission "java.lang.Math";
permission org.opensearch.script.ClassPermission "org.apache.lucene.util.MathUtil";
permission org.opensearch.script.ClassPermission "org.apache.lucene.util.SloppyMath";
permission org.opensearch.script.ClassPermission "org.apache.lucene.expressions.js.ExpressionMath";
};

View File

@ -44,6 +44,10 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -52,10 +56,6 @@ import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;

View File

@ -38,6 +38,9 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
@ -47,9 +50,6 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.opensearch.common.Strings;
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.opensearch.common.xcontent.XContentBuilder;

View File

@ -37,6 +37,7 @@ import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.join.JoinUtil;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.similarities.Similarity;
@ -409,6 +410,11 @@ public class HasChildQueryBuilder extends AbstractQueryBuilder<HasChildQueryBuil
this.similarity = similarity;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);

View File

@ -34,11 +34,11 @@ package org.opensearch.percolator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
@ -56,7 +56,6 @@ import org.opensearch.common.lucene.Lucene;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.Set;
final class PercolateQuery extends Query implements Accountable {
@ -112,8 +111,6 @@ final class PercolateQuery extends Query implements Accountable {
final Weight verifiedMatchesWeight = verifiedMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
final Weight candidateMatchesWeight = candidateMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
return new Weight(this) {
@Override
public void extractTerms(Set<Term> set) {}
@Override
public Explanation explain(LeafReaderContext leafReaderContext, int docId) throws IOException {
@ -245,6 +242,11 @@ final class PercolateQuery extends Query implements Accountable {
return verifiedMatchesQuery;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
// Comparing identity here to avoid being cached
// Note that in theory if the same instance gets used multiple times it could still get cached,
// however since we create a new query instance each time we this query this shouldn't happen and thus

View File

@ -43,9 +43,9 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.sandbox.search.CoveringQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchNoDocsQuery;
@ -279,7 +279,7 @@ public class PercolatorFieldMapper extends ParametrizedFieldMapper {
}
Query filter = null;
if (excludeNestedDocuments) {
filter = Queries.newNonNestedFilter(indexVersion);
filter = Queries.newNonNestedFilter();
}
return new PercolateQuery(name, queryStore, documents, candidateQuery, searcher, filter, verifiedMatchesQuery);
}

View File

@ -44,7 +44,6 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.opensearch.Version;
import org.opensearch.common.document.DocumentField;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.search.fetch.FetchContext;
@ -127,7 +126,7 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
this.percolateQuery = pq;
this.singlePercolateQuery = singlePercolateQuery;
IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(Version.CURRENT));
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter());
Weight weight = percolatorIndexSearcher.createWeight(nonNestedFilter, ScoreMode.COMPLETE_NO_SCORES, 1f);
Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
@ -148,7 +147,7 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
if (rootDocsBySlot != null) {
// Ensures that we filter out nested documents
return new BooleanQuery.Builder().add(in, BooleanClause.Occur.MUST)
.add(Queries.newNonNestedFilter(Version.CURRENT), BooleanClause.Occur.FILTER)
.add(Queries.newNonNestedFilter(), BooleanClause.Occur.FILTER)
.build();
}
return in;

View File

@ -35,6 +35,8 @@ import org.apache.lucene.document.BinaryRange;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.spans.SpanOrQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
@ -48,8 +50,6 @@ import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.automaton.ByteRunAutomaton;

View File

@ -37,7 +37,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.HalfFloatPoint;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
@ -60,10 +59,15 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanNotQuery;
import org.apache.lucene.queries.spans.SpanOrQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.sandbox.document.HalfFloatPoint;
import org.apache.lucene.sandbox.search.CoveringQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
@ -74,6 +78,7 @@ import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
@ -83,10 +88,6 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@ -123,7 +124,6 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
@ -1279,6 +1279,11 @@ public class CandidateQueryTests extends OpenSearchSingleNodeTestCase {
return new TermQuery(term);
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public String toString(String field) {
return "custom{" + field + "}";
@ -1310,9 +1315,6 @@ public class CandidateQueryTests extends OpenSearchSingleNodeTestCase {
final IndexSearcher percolatorIndexSearcher = memoryIndex.createSearcher();
return new Weight(this) {
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
Scorer scorer = scorer(context);
@ -1386,6 +1388,11 @@ public class CandidateQueryTests extends OpenSearchSingleNodeTestCase {
};
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public String toString(String field) {
return "control{" + field + "}";

View File

@ -42,6 +42,8 @@ import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
@ -53,8 +55,6 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.opensearch.common.bytes.BytesArray;
import org.opensearch.test.OpenSearchTestCase;

View File

@ -35,7 +35,6 @@ package org.opensearch.percolator;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.HalfFloatPoint;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
@ -43,9 +42,10 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.sandbox.document.HalfFloatPoint;
import org.apache.lucene.sandbox.search.CoveringQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CoveringQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery;

View File

@ -33,7 +33,6 @@ package org.opensearch.percolator;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.HalfFloatPoint;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LatLonPoint;
@ -45,6 +44,12 @@ import org.apache.lucene.queries.CommonTermsQuery;
import org.apache.lucene.queries.intervals.IntervalQuery;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.queries.spans.SpanFirstQuery;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanNotQuery;
import org.apache.lucene.queries.spans.SpanOrQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.sandbox.document.HalfFloatPoint;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
@ -63,11 +68,6 @@ import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.Version;
import org.opensearch.common.lucene.search.function.CombineFunction;
@ -824,13 +824,13 @@ public class QueryAnalyzerTests extends OpenSearchTestCase {
}
public void testSynonymQuery() {
SynonymQuery query = new SynonymQuery();
SynonymQuery query = new SynonymQuery.Builder("field").build();
Result result = analyze(query, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.minimumShouldMatch, equalTo(0));
assertThat(result.extractions.isEmpty(), is(true));
query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2"));
query = new SynonymQuery.Builder("_field").addTerm(new Term("_field", "_value1")).addTerm(new Term("_field", "_value2")).build();
result = analyze(query, Version.CURRENT);
assertThat(result.verified, is(true));
assertThat(result.minimumShouldMatch, equalTo(1));

View File

@ -28,8 +28,6 @@
* under the License.
*/
import de.thetaphi.forbiddenapis.gradle.CheckForbiddenApis
apply plugin: 'opensearch.yaml-rest-test'
apply plugin: 'opensearch.internal-cluster-test'
@ -46,7 +44,7 @@ forbiddenApisMain {
}
dependencies {
api "org.apache.lucene:lucene-analyzers-icu:${versions.lucene}"
api "org.apache.lucene:lucene-analysis-icu:${versions.lucene}"
api "com.ibm.icu:icu4j:${versions.icu4j}"
}

View File

@ -1 +0,0 @@
7a4d00d5ec5febd252a6182e8b6e87a0a9821f81

View File

@ -0,0 +1 @@
76893e6000401ace133a65262254be0ebe556d46

View File

@ -0,0 +1 @@
a23a2c1c9baad61b6fb5380f072e41534c275875

View File

@ -1 +0,0 @@
a1eec256a25340ba5d432d2800f759db83eb5145

View File

@ -35,7 +35,7 @@ import com.ibm.icu.text.RawCollationKey;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.collation.ICUCollationDocValuesField;
import org.apache.lucene.analysis.icu.ICUCollationDocValuesField;
import java.io.IOException;

View File

@ -35,7 +35,7 @@ opensearchplugin {
}
dependencies {
api "org.apache.lucene:lucene-analyzers-kuromoji:${versions.lucene}"
api "org.apache.lucene:lucene-analysis-kuromoji:${versions.lucene}"
}
restResources {

View File

@ -0,0 +1 @@
55f00abe01e51181d687c6bbceca8544f319b97d

View File

@ -1 +0,0 @@
d9ff6329a9755bbdb7343452bf246e61ae9279d8

View File

@ -35,7 +35,7 @@ opensearchplugin {
}
dependencies {
api "org.apache.lucene:lucene-analyzers-nori:${versions.lucene}"
api "org.apache.lucene:lucene-analysis-nori:${versions.lucene}"
}
restResources {

View File

@ -0,0 +1 @@
c5258e674ad9c189338b026710869c2955d8e11d

View File

@ -1 +0,0 @@
6e78aef6d1b709ed3e27dbc949255e078da08d41

View File

@ -35,7 +35,7 @@ opensearchplugin {
}
dependencies {
api "org.apache.lucene:lucene-analyzers-phonetic:${versions.lucene}"
api "org.apache.lucene:lucene-analysis-phonetic:${versions.lucene}"
api "commons-codec:commons-codec:${versions.commonscodec}"
}

View File

@ -0,0 +1 @@
437960fac10a9f8327fbd87be4e408eb140988b3

View File

@ -1 +0,0 @@
c186bf6dd0c2fa6612ba9b0d785ff2d388d32a23

View File

@ -35,7 +35,7 @@ opensearchplugin {
}
dependencies {
api "org.apache.lucene:lucene-analyzers-smartcn:${versions.lucene}"
api "org.apache.lucene:lucene-analysis-smartcn:${versions.lucene}"
}
restResources {

View File

@ -0,0 +1 @@
fe96c0b4609be5f7450773c2d7f099c51f4b1f7a

View File

@ -1 +0,0 @@
ebda1884c24bb14ee451b98e7565c86966f8863d

View File

@ -35,7 +35,7 @@ opensearchplugin {
}
dependencies {
api "org.apache.lucene:lucene-analyzers-stempel:${versions.lucene}"
api "org.apache.lucene:lucene-analysis-stempel:${versions.lucene}"
}
restResources {

View File

@ -0,0 +1 @@
b92e86dd451d225e68ee4abac5b00bf883b6ea00

View File

@ -1 +0,0 @@
2a4bd86c96374cdc5acaf7c0efd5127f2fd3a519

View File

@ -35,7 +35,7 @@ opensearchplugin {
}
dependencies {
api "org.apache.lucene:lucene-analyzers-morfologik:${versions.lucene}"
api "org.apache.lucene:lucene-analysis-morfologik:${versions.lucene}"
api "org.carrot2:morfologik-stemming:2.1.8"
api "org.carrot2:morfologik-fsa:2.1.8"
api "ua.net.nlp:morfologik-ukrainian-search:4.9.1"

View File

@ -0,0 +1 @@
048fddf601c6de7dd296f6da3f394544618f7cea

View File

@ -1 +0,0 @@
09de2e3fa72355228b2723f958dcb0ec1bc3f31a

View File

@ -136,7 +136,6 @@ public class AnnotatedTextHighlighterTests extends OpenSearchTestCase {
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
highlighter.setFieldMatcher((name) -> "text".equals(name));

View File

@ -6,13 +6,34 @@
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.index.store;
import org.opensearch.common.settings.Settings;
/**
* Index Settings Tests for NIO FileSystem as index store type.
*/
public class SmbNIOFsTests extends AbstractAzureFsTestCase {
@Override
public Settings indexSettings() {

View File

@ -97,7 +97,7 @@ dependencies {
// lucene
api "org.apache.lucene:lucene-core:${versions.lucene}"
api "org.apache.lucene:lucene-analyzers-common:${versions.lucene}"
api "org.apache.lucene:lucene-analysis-common:${versions.lucene}"
api "org.apache.lucene:lucene-backward-codecs:${versions.lucene}"
api "org.apache.lucene:lucene-grouping:${versions.lucene}"
api "org.apache.lucene:lucene-highlighter:${versions.lucene}"

View File

@ -0,0 +1 @@
f78890829c3d6f15de48fdbc2c77ef4c0e3f005c

View File

@ -1 +0,0 @@
23bb36a98d01100953674c56c20861b29b5a5175

View File

@ -1 +0,0 @@
7399c32bc4ba7e37e14a9660ffd7962acf68a802

View File

@ -0,0 +1 @@
9fb48d0244799e18299449ee62459caab0728490

View File

@ -1 +0,0 @@
deb78f6b21d29f964ab267ad59fafb58ef740101

View File

@ -0,0 +1 @@
be679fd274f264e4e8b02bc032d2788cd4076ab4

View File

@ -1 +0,0 @@
7b91bb886d30c67a8f980d3bdfd6b7826a62d5e7

View File

@ -0,0 +1 @@
27ebe235d427b4e392fabab9b6bfa09524ca7f8b

View File

@ -1 +0,0 @@
ec4a2103cb300aab7e6142f1c7778dd505ecb8e2

View File

@ -0,0 +1 @@
a3cb395c2e8c672e6eec951b2b02371a4a883f73

View File

@ -1 +0,0 @@
aa368e9d11660dcfcfaab1a39dd871f05fa2b031

View File

@ -0,0 +1 @@
94a855b5d09a6601289aeaeba0f11d5539552590

View File

@ -1 +0,0 @@
9de18bf605879647e964fd57ddf3fa6f85ca743e

View File

@ -0,0 +1 @@
2371c95031422bc1f501d43ffcc7311baed4b35b

View File

@ -1 +0,0 @@
e9cca86ebbe010d375388c5a17216e2d2b2e76bb

View File

@ -0,0 +1 @@
25c6170f4fa2f707908dfb92fbafc76727f901e0

View File

@ -1 +0,0 @@
21b70a0996e3408291514d99e3b03800d0bcd657

View File

@ -0,0 +1 @@
87b4c7833d30895baf7091f9cb0db878e970b604

View File

@ -1 +0,0 @@
087f52ee3f72f387b802c49a96e4a14b3b05dd21

View File

@ -0,0 +1 @@
bf13395ad2033bca3182fcbc83204e8ae1951945

View File

@ -1 +0,0 @@
82b15ef61297e6d7b0c1f6c37c502d6b77a82f1e

View File

@ -0,0 +1 @@
3c153a1dc1da3f98083cc932c9476df4b77b0ca5

View File

@ -1 +0,0 @@
7a3b6eac3e66bb1c6fb05c0cd980e5592adaf96b

View File

@ -0,0 +1 @@
91535ef6512c45c7e2b113b04cab7738ee774893

View File

@ -1 +0,0 @@
823a5e9d2fd3b5b668d305e0781d0e074e9f2ebb

View File

@ -0,0 +1 @@
6b4ee47f218ed3d123c1b07671677a2e4f3c133b

View File

@ -1 +0,0 @@
92d7e5a178d0df58e0b4d400755ac46bae3eea11

View File

@ -0,0 +1 @@
a7d0e7279737114c039f5214082da948732096a6

View File

@ -56,6 +56,7 @@ public class IndexPrimaryRelocationIT extends OpenSearchIntegTestCase {
private static final int RELOCATION_COUNT = 15;
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/2063")
public void testPrimaryRelocationWhileIndexing() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(randomIntBetween(2, 3));
client().admin()

View File

@ -32,9 +32,7 @@
package org.opensearch.recovery;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.carrotsearch.hppc.procedures.IntProcedure;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.util.English;
import org.opensearch.action.ActionFuture;
@ -61,6 +59,7 @@ import org.opensearch.common.xcontent.XContentType;
import org.opensearch.env.NodeEnvironment;
import org.opensearch.index.IndexService;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.seqno.ReplicationTracker;
import org.opensearch.index.seqno.RetentionLease;
import org.opensearch.index.shard.IndexEventListener;
@ -192,6 +191,7 @@ public class RelocationIT extends OpenSearchIntegTestCase {
assertThat(client().prepareSearch("test").setSize(0).execute().actionGet().getHits().getTotalHits().value, equalTo(20L));
}
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/2063")
public void testRelocationWhileIndexingRandom() throws Exception {
int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4);
int numberOfReplicas = randomBoolean() ? 0 : 1;
@ -228,7 +228,7 @@ public class RelocationIT extends OpenSearchIntegTestCase {
}
int numDocs = scaledRandomIntBetween(200, 2500);
try (BackgroundIndexer indexer = new BackgroundIndexer("test", "type1", client(), numDocs)) {
try (BackgroundIndexer indexer = new BackgroundIndexer("test", MapperService.SINGLE_MAPPING_NAME, client(), numDocs)) {
logger.info("--> waiting for {} docs to be indexed ...", numDocs);
waitForDocs(numDocs, indexer);
logger.info("--> {} docs indexed", numDocs);
@ -285,20 +285,20 @@ public class RelocationIT extends OpenSearchIntegTestCase {
for (int hit = 0; hit < indexer.totalIndexedDocs(); hit++) {
hitIds[hit] = hit + 1;
}
IntHashSet set = IntHashSet.from(hitIds);
Set<Integer> set = Arrays.stream(hitIds).boxed().collect(Collectors.toSet());
for (SearchHit hit : hits.getHits()) {
int id = Integer.parseInt(hit.getId());
if (!set.remove(id)) {
if (set.remove(id) == false) {
logger.error("Extra id [{}]", id);
}
}
set.forEach((IntProcedure) value -> { logger.error("Missing id [{}]", value); });
set.forEach(value -> logger.error("Missing id [{}]", value));
}
assertThat(hits.getTotalHits().value, equalTo(indexer.totalIndexedDocs()));
logger.info("--> DONE search test round {}", i + 1);
}
if (!ranOnce) {
if (ranOnce == false) {
fail();
}
}

View File

@ -3288,6 +3288,36 @@ public class HighlighterSearchIT extends OpenSearchIntegTestCase {
);
}
public void testCopyToFields() throws Exception {
XContentBuilder b = jsonBuilder().startObject().startObject("properties");
b.startObject("foo");
{
b.field("type", "text");
b.field("copy_to", "foo_copy");
}
b.endObject();
// If field is not stored, it is looked up in source (but source has only 'foo'
b.startObject("foo_copy").field("type", "text").field("store", true).endObject();
b.endObject().endObject();
prepareCreate("test").addMapping("type", b).get();
client().prepareIndex("test")
.setId("1")
.setSource(jsonBuilder().startObject().field("foo", "how now brown cow").endObject())
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
.get();
SearchResponse response = client().prepareSearch()
.setQuery(matchQuery("foo_copy", "brown"))
.highlighter(new HighlightBuilder().field(new Field("foo_copy")))
.get();
assertHitCount(response, 1);
HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo_copy");
assertThat(field.getFragments().length, equalTo(1));
assertThat(field.getFragments()[0].string(), equalTo("how now <em>brown</em> cow"));
}
public void testACopyFieldWithNestedQuery() throws Exception {
String mapping = Strings.toString(
jsonBuilder().startObject()

View File

@ -331,8 +331,6 @@ public class QueryStringIT extends OpenSearchIntegTestCase {
doAssertOneHitForQueryString("field_A0:foo");
// expanding to the limit should work
doAssertOneHitForQueryString("field_A\\*:foo");
// expanding two blocks to the limit still works
doAssertOneHitForQueryString("field_A\\*:foo field_B\\*:bar");
// adding a non-existing field on top shouldn't overshoot the limit
doAssertOneHitForQueryString("field_A\\*:foo unmapped:something");

View File

@ -238,11 +238,10 @@ public class SimpleValidateQueryIT extends OpenSearchIntegTestCase {
assertThat(response.getQueryExplanation().size(), equalTo(1));
assertThat(
response.getQueryExplanation().get(0).getExplanation(),
equalTo(
"(MatchNoDocsQuery(\"failed [bar] query, caused by number_format_exception:[For input string: \"foo\"]\") "
+ "| foo:foo | baz:foo)"
)
containsString("MatchNoDocsQuery(\"failed [bar] query, caused by number_format_exception:[For input string: \"foo\"]\")")
);
assertThat(response.getQueryExplanation().get(0).getExplanation(), containsString("foo:foo"));
assertThat(response.getQueryExplanation().get(0).getExplanation(), containsString("baz:foo"));
assertThat(response.getQueryExplanation().get(0).getError(), nullValue());
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.misc.search.similarity;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
/**
* Similarity that behaves like {@link BM25Similarity} while also applying the k1+1 factor to the
* numerator of the scoring formula
*
* @see BM25Similarity
* @deprecated {@link BM25Similarity} should be used instead
*/
@Deprecated
public final class LegacyBM25Similarity extends Similarity {
private final BM25Similarity bm25Similarity;
/**
* BM25 with these default values:
*
* <ul>
* <li>{@code k1 = 1.2}
* <li>{@code b = 0.75}
* <li>{@code discountOverlaps = true}
* </ul>
*/
public LegacyBM25Similarity() {
this.bm25Similarity = new BM25Similarity();
}
/**
* BM25 with the supplied parameter values.
*
* @param k1 Controls non-linear term frequency normalization (saturation).
* @param b Controls to what degree document length normalizes tf values.
* @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is not
* within the range {@code [0..1]}
*/
public LegacyBM25Similarity(float k1, float b) {
this.bm25Similarity = new BM25Similarity(k1, b);
}
/**
* BM25 with the supplied parameter values.
*
* @param k1 Controls non-linear term frequency normalization (saturation).
* @param b Controls to what degree document length normalizes tf values.
* @param discountOverlaps True if overlap tokens (tokens with a position of increment of zero)
* are discounted from the document's length.
* @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is not
* within the range {@code [0..1]}
*/
public LegacyBM25Similarity(float k1, float b, boolean discountOverlaps) {
this.bm25Similarity = new BM25Similarity(k1, b, discountOverlaps);
}
@Override
public long computeNorm(FieldInvertState state) {
return bm25Similarity.computeNorm(state);
}
@Override
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
return bm25Similarity.scorer(boost * (1 + bm25Similarity.getK1()), collectionStats, termStats);
}
/**
* Returns the <code>k1</code> parameter
*
* @see #LegacyBM25Similarity(float, float)
*/
public final float getK1() {
return bm25Similarity.getK1();
}
/**
* Returns the <code>b</code> parameter
*
* @see #LegacyBM25Similarity(float, float)
*/
public final float getB() {
return bm25Similarity.getB();
}
/**
* Returns true if overlap tokens are discounted from the document's length.
*
* @see #LegacyBM25Similarity(float, float, boolean)
*/
public boolean getDiscountOverlaps() {
return bm25Similarity.getDiscountOverlaps();
}
@Override
public String toString() {
return bm25Similarity.toString();
}
}

View File

@ -39,6 +39,7 @@ import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -138,6 +139,13 @@ public final class BinaryDocValuesRangeQuery extends Query {
};
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(fieldName)) {
visitor.visitLeaf(this);
}
}
@Override
public String toString(String field) {
return "BinaryDocValuesRangeQuery(fieldName=" + field + ",from=" + originalFrom + ",to=" + originalTo + ")";

View File

@ -34,16 +34,16 @@ package org.apache.lucene.queries;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanWeight;
import org.apache.lucene.queries.spans.Spans;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
/**
* A {@link SpanQuery} that matches no documents.
@ -57,6 +57,11 @@ public class SpanMatchNoDocsQuery extends SpanQuery {
this.reason = reason;
}
@Override
public void visit(QueryVisitor visitor) {
visitor.visitLeaf(this);
}
@Override
public String getField() {
return field;
@ -88,9 +93,6 @@ public class SpanMatchNoDocsQuery extends SpanQuery {
return null;
}
@Override
public void extractTerms(Set<Term> terms) {}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return true;

View File

@ -35,14 +35,15 @@ package org.apache.lucene.search.uhighlight;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanOrQuery;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag;
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.CheckedSupplier;
import org.opensearch.common.Nullable;
@ -77,7 +78,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private final FieldHighlighter fieldHighlighter;
private final int keywordIgnoreAbove;
private final int maxAnalyzedOffset;
/**
@ -97,7 +97,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
* @param noMatchSize The size of the text that should be returned when no highlighting can be performed.
* @param maxPassages the maximum number of passes to highlight
* @param fieldMatcher decides which terms should be highlighted
* @param keywordIgnoreAbove if the field's value is longer than this we'll skip it
* @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED
* offset source for it because it'd be super slow
*/
@ -114,7 +113,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
int noMatchSize,
int maxPassages,
Predicate<String> fieldMatcher,
int keywordIgnoreAbove,
int maxAnalyzedOffset
) throws IOException {
super(searcher, analyzer);
@ -126,7 +124,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
this.field = field;
this.noMatchSize = noMatchSize;
this.setFieldMatcher(fieldMatcher);
this.keywordIgnoreAbove = keywordIgnoreAbove;
this.maxAnalyzedOffset = maxAnalyzedOffset;
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
}
@ -144,9 +141,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
return null;
}
int fieldValueLength = fieldValue.length();
if (fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of ["
@ -266,4 +260,12 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
return offsetSource;
}
/** Customize the highlighting flags to use by field. */
@Override
protected Set<HighlightFlag> getFlags(String field) {
final Set<HighlightFlag> flags = super.getFlags(field);
// Change the defaults introduced by https://issues.apache.org/jira/browse/LUCENE-9431
flags.remove(HighlightFlag.WEIGHT_MATCHES);
return flags;
}
}

View File

@ -35,6 +35,7 @@ package org.apache.lucene.search.vectorhighlight;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.BlendedTermQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
@ -42,7 +43,6 @@ import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.opensearch.common.lucene.search.function.FunctionScoreQuery;
import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;

View File

@ -127,4 +127,9 @@ public final class CombinedBitSet extends BitSet implements Bits {
public void clear(int startIndex, int endIndex) {
throw new UnsupportedOperationException("not implemented");
}
@Override
public boolean getAndSet(int i) {
throw new UnsupportedOperationException("not implemented");
}
}

View File

@ -0,0 +1,186 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/* @notice
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.apache.lucene.util;
import java.io.IOException;
import java.io.InputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.ServiceConfigurationError;
/**
* Helper class for loading SPI classes from classpath (META-INF files).
* This is a light impl of {@link java.util.ServiceLoader} but is guaranteed to
* be bug-free regarding classpath order and does not instantiate or initialize
* the classes found.
*/
@SuppressForbidden(reason = "Taken from Lucene")
public final class SPIClassIterator<S> implements Iterator<Class<? extends S>> {
private static final String META_INF_SERVICES = "META-INF/services/";
private final Class<S> clazz;
private final ClassLoader loader;
private final Enumeration<URL> profilesEnum;
private Iterator<String> linesIterator;
/** Creates a new SPI iterator to lookup services of type {@code clazz} using
* the same {@link ClassLoader} as the argument. */
public static <S> SPIClassIterator<S> get(Class<S> clazz) {
return new SPIClassIterator<>(clazz, Objects.requireNonNull(clazz.getClassLoader(), () -> clazz + " has no classloader."));
}
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the given classloader. */
public static <S> SPIClassIterator<S> get(Class<S> clazz, ClassLoader loader) {
return new SPIClassIterator<>(clazz, loader);
}
/**
* Utility method to check if some class loader is a (grand-)parent of or the same as another one.
* This means the child will be able to load all classes from the parent, too.
* <p>
* If caller's codesource doesn't have enough permissions to do the check, {@code false} is returned
* (this is fine, because if we get a {@code SecurityException} it is for sure no parent).
*/
public static boolean isParentClassLoader(final ClassLoader parent, final ClassLoader child) {
try {
ClassLoader cl = child;
while (cl != null) {
if (cl == parent) {
return true;
}
cl = cl.getParent();
}
return false;
} catch (SecurityException se) {
return false;
}
}
private SPIClassIterator(Class<S> clazz, ClassLoader loader) {
this.clazz = Objects.requireNonNull(clazz, "clazz");
this.loader = Objects.requireNonNull(loader, "loader");
try {
final String fullName = META_INF_SERVICES + clazz.getName();
this.profilesEnum = loader.getResources(fullName);
} catch (IOException ioe) {
throw new ServiceConfigurationError("Error loading SPI profiles for type " + clazz.getName() + " from classpath", ioe);
}
this.linesIterator = Collections.<String>emptySet().iterator();
}
private boolean loadNextProfile() {
ArrayList<String> lines = null;
while (profilesEnum.hasMoreElements()) {
if (lines != null) {
lines.clear();
} else {
lines = new ArrayList<>();
}
final URL url = profilesEnum.nextElement();
try {
final InputStream in = url.openStream();
boolean success = false;
try {
final BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
String line;
while ((line = reader.readLine()) != null) {
final int pos = line.indexOf('#');
if (pos >= 0) {
line = line.substring(0, pos);
}
line = line.trim();
if (line.length() > 0) {
lines.add(line);
}
}
success = true;
} finally {
if (success) {
IOUtils.close(in);
} else {
IOUtils.closeWhileHandlingException(in);
}
}
} catch (IOException ioe) {
throw new ServiceConfigurationError("Error loading SPI class list from URL: " + url, ioe);
}
if (lines.isEmpty() == false) {
this.linesIterator = lines.iterator();
return true;
}
}
return false;
}
@Override
public boolean hasNext() {
return linesIterator.hasNext() || loadNextProfile();
}
@Override
public Class<? extends S> next() {
// hasNext() implicitely loads the next profile, so it is essential to call this here!
if (hasNext() == false) {
throw new NoSuchElementException();
}
assert linesIterator.hasNext();
final String c = linesIterator.next();
try {
// don't initialize the class (pass false as 2nd parameter):
return Class.forName(c, false, loader).asSubclass(clazz);
} catch (ClassNotFoundException cnfe) {
throw new ServiceConfigurationError(
String.format(
Locale.ROOT,
"An SPI class of type %s with classname %s does not exist, " + "please fix the file '%s%1$s' in your classpath.",
clazz.getName(),
c,
META_INF_SERVICES
)
);
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}

View File

@ -0,0 +1,317 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Forked from Lucene 8.x; removed in Lucene 9.0
*
* @todo further investigate a better alternative
*
* Space optimized random access capable array of values with a fixed number of bits/value. Values
* are packed contiguously.
*
* <p>The implementation strives to perform as fast as possible under the constraint of contiguous
* bits, by avoiding expensive operations. This comes at the cost of code clarity.
*
* <p>Technical details: This implementation is a refinement of a non-branching version. The
* non-branching get and set methods meant that 2 or 4 atomics in the underlying array were always
* accessed, even for the cases where only 1 or 2 were needed. Even with caching, this had a
* detrimental effect on performance. Related to this issue, the old implementation used lookup
* tables for shifts and masks, which also proved to be a bit slower than calculating the shifts and
* masks on the fly. See https://issues.apache.org/jira/browse/LUCENE-4062 for details.
*/
class XPacked64 extends XPackedInts.MutableImpl {
static final int BLOCK_SIZE = 64; // 32 = int, 64 = long
static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
/** Values are stores contiguously in the blocks array. */
private final long[] blocks;
/** A right-aligned mask of width BitsPerValue used by {@link #get(int)}. */
private final long maskRight;
/** Optimization: Saves one lookup in {@link #get(int)}. */
private final int bpvMinusBlockSize;
/**
* Creates an array with the internal structures adjusted for the given limits and initialized to
* 0.
*
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
*/
public XPacked64(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
final PackedInts.Format format = PackedInts.Format.PACKED;
final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue);
this.blocks = new long[longCount];
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
/**
* Creates an array with content retrieved from the given DataInput.
*
* @param in a DataInput, positioned at the start of Packed64-content.
* @param valueCount the number of elements.
* @param bitsPerValue the number of bits available for any given value.
* @throws java.io.IOException if the values for the backing array could not be retrieved.
*/
public XPacked64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue) throws IOException {
super(valueCount, bitsPerValue);
final PackedInts.Format format = PackedInts.Format.PACKED;
final long byteCount = format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
blocks = new long[longCount];
// read as many longs as we can
for (int i = 0; i < byteCount / 8; ++i) {
blocks[i] = in.readLong();
}
final int remaining = (int) (byteCount % 8);
if (remaining != 0) {
// read the last bytes
long lastLong = 0;
for (int i = 0; i < remaining; ++i) {
lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
}
blocks[blocks.length - 1] = lastLong;
}
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
}
/**
* @param index the position of the value.
* @return the value at the given index.
*/
@Override
public long get(final int index) {
// The abstract index in a bit stream
final long majorBitPos = (long) index * bitsPerValue;
// The index in the backing long-array
final int elementPos = (int) (majorBitPos >>> BLOCK_BITS);
// The number of value-bits in the second long
final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
if (endBits <= 0) { // Single block
return (blocks[elementPos] >>> -endBits) & maskRight;
}
// Two blocks
return ((blocks[elementPos] << endBits) | (blocks[elementPos + 1] >>> (BLOCK_SIZE - endBits))) & maskRight;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % decoder.longValueCount();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < decoder.longValueCount() && len > 0; ++i) {
arr[off++] = get(index++);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % decoder.longValueCount() == 0;
int blockIndex = (int) (((long) index * bitsPerValue) >>> BLOCK_BITS);
assert (((long) index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / decoder.longValueCount();
decoder.decode(blocks, blockIndex, arr, off, iterations);
final int gotValues = iterations * decoder.longValueCount();
index += gotValues;
len -= gotValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.get(index, arr, off, len);
}
}
@Override
public void set(final int index, final long value) {
// The abstract index in a contiguous bit stream
final long majorBitPos = (long) index * bitsPerValue;
// The index in the backing long-array
final int elementPos = (int) (majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
// The number of value-bits in the second long
final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
if (endBits <= 0) { // Single block
blocks[elementPos] = blocks[elementPos] & ~(maskRight << -endBits) | (value << -endBits);
return;
}
// Two blocks
blocks[elementPos] = blocks[elementPos] & ~(maskRight >>> endBits) | (value >>> endBits);
blocks[elementPos + 1] = blocks[elementPos + 1] & (~0L >>> endBits) | (value << (BLOCK_SIZE - endBits));
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
final PackedInts.Encoder encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
// go to the next block where the value does not span across two blocks
final int offsetInBlocks = index % encoder.longValueCount();
if (offsetInBlocks != 0) {
for (int i = offsetInBlocks; i < encoder.longValueCount() && len > 0; ++i) {
set(index++, arr[off++]);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk set
assert index % encoder.longValueCount() == 0;
int blockIndex = (int) (((long) index * bitsPerValue) >>> BLOCK_BITS);
assert (((long) index * bitsPerValue) & MOD_MASK) == 0;
final int iterations = len / encoder.longValueCount();
encoder.encode(arr, off, blocks, blockIndex, iterations);
final int setValues = iterations * encoder.longValueCount();
index += setValues;
len -= setValues;
assert len >= 0;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to get
assert index == originalIndex;
return super.set(index, arr, off, len);
}
}
@Override
public String toString() {
return "Packed64(bitsPerValue=" + bitsPerValue + ",size=" + size() + ",blocks=" + blocks.length + ")";
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3 * Integer.BYTES // bpvMinusBlockSize,valueCount,bitsPerValue
+ Long.BYTES // maskRight
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
) // blocks ref
+ RamUsageEstimator.sizeOf(blocks);
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert PackedInts.unsignedBitsRequired(val) <= getBitsPerValue();
assert fromIndex <= toIndex;
// minimum number of values that use an exact number of full blocks
final int nAlignedValues = 64 / gcd(64, bitsPerValue);
final int span = toIndex - fromIndex;
if (span <= 3 * nAlignedValues) {
// there needs be at least 2 * nAlignedValues aligned values for the
// block approach to be worth trying
super.fill(fromIndex, toIndex, val);
return;
}
// fill the first values naively until the next block start
final int fromIndexModNAlignedValues = fromIndex % nAlignedValues;
if (fromIndexModNAlignedValues != 0) {
for (int i = fromIndexModNAlignedValues; i < nAlignedValues; ++i) {
set(fromIndex++, val);
}
}
assert fromIndex % nAlignedValues == 0;
// compute the long[] blocks for nAlignedValues consecutive values and
// use them to set as many values as possible without applying any mask
// or shift
final int nAlignedBlocks = (nAlignedValues * bitsPerValue) >> 6;
final long[] nAlignedValuesBlocks;
{
XPacked64 values = new XPacked64(nAlignedValues, bitsPerValue);
for (int i = 0; i < nAlignedValues; ++i) {
values.set(i, val);
}
nAlignedValuesBlocks = values.blocks;
assert nAlignedBlocks <= nAlignedValuesBlocks.length;
}
final int startBlock = (int) (((long) fromIndex * bitsPerValue) >>> 6);
final int endBlock = (int) (((long) toIndex * bitsPerValue) >>> 6);
for (int block = startBlock; block < endBlock; ++block) {
final long blockValue = nAlignedValuesBlocks[block % nAlignedBlocks];
blocks[block] = blockValue;
}
// fill the gap
for (int i = (int) (((long) endBlock << 6) / bitsPerValue); i < toIndex; ++i) {
set(i, val);
}
}
private static int gcd(int a, int b) {
if (a < b) {
return gcd(b, a);
} else if (b == 0) {
return a;
} else {
return gcd(b, a % b);
}
}
@Override
public void clear() {
Arrays.fill(blocks, 0L);
}
}

View File

@ -0,0 +1,574 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.lucene.util.packed;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.RamUsageEstimator;
/**
* Forked from Lucene 8.x; removed in Lucene 9.0
*
* @todo further investigate a better alternative
*
* This class is similar to {@link Packed64} except that it trades space for speed by ensuring that
* a single block needs to be read/written in order to read/write a value.
*/
abstract class XPacked64SingleBlock extends XPackedInts.MutableImpl {
public static final int MAX_SUPPORTED_BITS_PER_VALUE = 32;
private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32 };
public static boolean isSupported(int bitsPerValue) {
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
}
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
return valueCount / valuesPerBlock + (valueCount % valuesPerBlock == 0 ? 0 : 1);
}
final long[] blocks;
XPacked64SingleBlock(int valueCount, int bitsPerValue) {
super(valueCount, bitsPerValue);
assert isSupported(bitsPerValue);
final int valuesPerBlock = 64 / bitsPerValue;
blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
}
@Override
public void clear() {
Arrays.fill(blocks, 0L);
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 2 * Integer.BYTES // valueCount,bitsPerValue
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
) // blocks ref
+ RamUsageEstimator.sizeOf(blocks);
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
// go to the next block boundary
final int valuesPerBlock = 64 / bitsPerValue;
final int offsetInBlock = index % valuesPerBlock;
if (offsetInBlock != 0) {
for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
arr[off++] = get(index++);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk get
assert index % valuesPerBlock == 0;
@SuppressWarnings("deprecation")
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
assert decoder.longBlockCount() == 1;
assert decoder.longValueCount() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
decoder.decode(blocks, blockIndex, arr, off, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff;
len -= diff;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to
// get
assert index == originalIndex;
return super.get(index, arr, off, len);
}
}
@Override
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
assert off + len <= arr.length;
final int originalIndex = index;
// go to the next block boundary
final int valuesPerBlock = 64 / bitsPerValue;
final int offsetInBlock = index % valuesPerBlock;
if (offsetInBlock != 0) {
for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
set(index++, arr[off++]);
--len;
}
if (len == 0) {
return index - originalIndex;
}
}
// bulk set
assert index % valuesPerBlock == 0;
@SuppressWarnings("deprecation")
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
assert op.longBlockCount() == 1;
assert op.longValueCount() == valuesPerBlock;
final int blockIndex = index / valuesPerBlock;
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
op.encode(arr, off, blocks, blockIndex, nblocks);
final int diff = nblocks * valuesPerBlock;
index += diff;
len -= diff;
if (index > originalIndex) {
// stay at the block boundary
return index - originalIndex;
} else {
// no progress so far => already at a block boundary but no full block to
// set
assert index == originalIndex;
return super.set(index, arr, off, len);
}
}
@Override
public void fill(int fromIndex, int toIndex, long val) {
assert fromIndex >= 0;
assert fromIndex <= toIndex;
assert PackedInts.unsignedBitsRequired(val) <= bitsPerValue;
final int valuesPerBlock = 64 / bitsPerValue;
if (toIndex - fromIndex <= valuesPerBlock << 1) {
// there needs to be at least one full block to set for the block
// approach to be worth trying
super.fill(fromIndex, toIndex, val);
return;
}
// set values naively until the next block start
int fromOffsetInBlock = fromIndex % valuesPerBlock;
if (fromOffsetInBlock != 0) {
for (int i = fromOffsetInBlock; i < valuesPerBlock; ++i) {
set(fromIndex++, val);
}
assert fromIndex % valuesPerBlock == 0;
}
// bulk set of the inner blocks
final int fromBlock = fromIndex / valuesPerBlock;
final int toBlock = toIndex / valuesPerBlock;
assert fromBlock * valuesPerBlock == fromIndex;
long blockValue = 0L;
for (int i = 0; i < valuesPerBlock; ++i) {
blockValue = blockValue | (val << (i * bitsPerValue));
}
Arrays.fill(blocks, fromBlock, toBlock, blockValue);
// fill the gap
for (int i = valuesPerBlock * toBlock; i < toIndex; ++i) {
set(i, val);
}
}
@SuppressWarnings("deprecation")
protected PackedInts.Format getFormat() {
return PackedInts.Format.PACKED_SINGLE_BLOCK;
}
@Override
public String toString() {
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue + ",size=" + size() + ",blocks=" + blocks.length + ")";
}
public static XPacked64SingleBlock create(DataInput in, int valueCount, int bitsPerValue) throws IOException {
XPacked64SingleBlock reader = create(valueCount, bitsPerValue);
for (int i = 0; i < reader.blocks.length; ++i) {
reader.blocks[i] = in.readLong();
}
return reader;
}
public static XPacked64SingleBlock create(int valueCount, int bitsPerValue) {
switch (bitsPerValue) {
case 1:
return new XPacked64SingleBlock1(valueCount);
case 2:
return new XPacked64SingleBlock2(valueCount);
case 3:
return new XPacked64SingleBlock3(valueCount);
case 4:
return new XPacked64SingleBlock4(valueCount);
case 5:
return new XPacked64SingleBlock5(valueCount);
case 6:
return new XPacked64SingleBlock6(valueCount);
case 7:
return new XPacked64SingleBlock7(valueCount);
case 8:
return new XPacked64SingleBlock8(valueCount);
case 9:
return new XPacked64SingleBlock9(valueCount);
case 10:
return new XPacked64SingleBlock10(valueCount);
case 12:
return new XPacked64SingleBlock12(valueCount);
case 16:
return new XPacked64SingleBlock16(valueCount);
case 21:
return new XPacked64SingleBlock21(valueCount);
case 32:
return new XPacked64SingleBlock32(valueCount);
default:
throw new IllegalArgumentException("Unsupported number of bits per value: " + 32);
}
}
static class XPacked64SingleBlock1 extends XPacked64SingleBlock {
XPacked64SingleBlock1(int valueCount) {
super(valueCount, 1);
}
@Override
public long get(int index) {
final int o = index >>> 6;
final int b = index & 63;
final int shift = b << 0;
return (blocks[o] >>> shift) & 1L;
}
@Override
public void set(int index, long value) {
final int o = index >>> 6;
final int b = index & 63;
final int shift = b << 0;
blocks[o] = (blocks[o] & ~(1L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock2 extends XPacked64SingleBlock {
XPacked64SingleBlock2(int valueCount) {
super(valueCount, 2);
}
@Override
public long get(int index) {
final int o = index >>> 5;
final int b = index & 31;
final int shift = b << 1;
return (blocks[o] >>> shift) & 3L;
}
@Override
public void set(int index, long value) {
final int o = index >>> 5;
final int b = index & 31;
final int shift = b << 1;
blocks[o] = (blocks[o] & ~(3L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock3 extends XPacked64SingleBlock {
XPacked64SingleBlock3(int valueCount) {
super(valueCount, 3);
}
@Override
public long get(int index) {
final int o = index / 21;
final int b = index % 21;
final int shift = b * 3;
return (blocks[o] >>> shift) & 7L;
}
@Override
public void set(int index, long value) {
final int o = index / 21;
final int b = index % 21;
final int shift = b * 3;
blocks[o] = (blocks[o] & ~(7L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock4 extends XPacked64SingleBlock {
XPacked64SingleBlock4(int valueCount) {
super(valueCount, 4);
}
@Override
public long get(int index) {
final int o = index >>> 4;
final int b = index & 15;
final int shift = b << 2;
return (blocks[o] >>> shift) & 15L;
}
@Override
public void set(int index, long value) {
final int o = index >>> 4;
final int b = index & 15;
final int shift = b << 2;
blocks[o] = (blocks[o] & ~(15L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock5 extends XPacked64SingleBlock {
XPacked64SingleBlock5(int valueCount) {
super(valueCount, 5);
}
@Override
public long get(int index) {
final int o = index / 12;
final int b = index % 12;
final int shift = b * 5;
return (blocks[o] >>> shift) & 31L;
}
@Override
public void set(int index, long value) {
final int o = index / 12;
final int b = index % 12;
final int shift = b * 5;
blocks[o] = (blocks[o] & ~(31L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock6 extends XPacked64SingleBlock {
XPacked64SingleBlock6(int valueCount) {
super(valueCount, 6);
}
@Override
public long get(int index) {
final int o = index / 10;
final int b = index % 10;
final int shift = b * 6;
return (blocks[o] >>> shift) & 63L;
}
@Override
public void set(int index, long value) {
final int o = index / 10;
final int b = index % 10;
final int shift = b * 6;
blocks[o] = (blocks[o] & ~(63L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock7 extends XPacked64SingleBlock {
XPacked64SingleBlock7(int valueCount) {
super(valueCount, 7);
}
@Override
public long get(int index) {
final int o = index / 9;
final int b = index % 9;
final int shift = b * 7;
return (blocks[o] >>> shift) & 127L;
}
@Override
public void set(int index, long value) {
final int o = index / 9;
final int b = index % 9;
final int shift = b * 7;
blocks[o] = (blocks[o] & ~(127L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock8 extends XPacked64SingleBlock {
XPacked64SingleBlock8(int valueCount) {
super(valueCount, 8);
}
@Override
public long get(int index) {
final int o = index >>> 3;
final int b = index & 7;
final int shift = b << 3;
return (blocks[o] >>> shift) & 255L;
}
@Override
public void set(int index, long value) {
final int o = index >>> 3;
final int b = index & 7;
final int shift = b << 3;
blocks[o] = (blocks[o] & ~(255L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock9 extends XPacked64SingleBlock {
XPacked64SingleBlock9(int valueCount) {
super(valueCount, 9);
}
@Override
public long get(int index) {
final int o = index / 7;
final int b = index % 7;
final int shift = b * 9;
return (blocks[o] >>> shift) & 511L;
}
@Override
public void set(int index, long value) {
final int o = index / 7;
final int b = index % 7;
final int shift = b * 9;
blocks[o] = (blocks[o] & ~(511L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock10 extends XPacked64SingleBlock {
XPacked64SingleBlock10(int valueCount) {
super(valueCount, 10);
}
@Override
public long get(int index) {
final int o = index / 6;
final int b = index % 6;
final int shift = b * 10;
return (blocks[o] >>> shift) & 1023L;
}
@Override
public void set(int index, long value) {
final int o = index / 6;
final int b = index % 6;
final int shift = b * 10;
blocks[o] = (blocks[o] & ~(1023L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock12 extends XPacked64SingleBlock {
XPacked64SingleBlock12(int valueCount) {
super(valueCount, 12);
}
@Override
public long get(int index) {
final int o = index / 5;
final int b = index % 5;
final int shift = b * 12;
return (blocks[o] >>> shift) & 4095L;
}
@Override
public void set(int index, long value) {
final int o = index / 5;
final int b = index % 5;
final int shift = b * 12;
blocks[o] = (blocks[o] & ~(4095L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock16 extends XPacked64SingleBlock {
XPacked64SingleBlock16(int valueCount) {
super(valueCount, 16);
}
@Override
public long get(int index) {
final int o = index >>> 2;
final int b = index & 3;
final int shift = b << 4;
return (blocks[o] >>> shift) & 65535L;
}
@Override
public void set(int index, long value) {
final int o = index >>> 2;
final int b = index & 3;
final int shift = b << 4;
blocks[o] = (blocks[o] & ~(65535L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock21 extends XPacked64SingleBlock {
XPacked64SingleBlock21(int valueCount) {
super(valueCount, 21);
}
@Override
public long get(int index) {
final int o = index / 3;
final int b = index % 3;
final int shift = b * 21;
return (blocks[o] >>> shift) & 2097151L;
}
@Override
public void set(int index, long value) {
final int o = index / 3;
final int b = index % 3;
final int shift = b * 21;
blocks[o] = (blocks[o] & ~(2097151L << shift)) | (value << shift);
}
}
static class XPacked64SingleBlock32 extends XPacked64SingleBlock {
XPacked64SingleBlock32(int valueCount) {
super(valueCount, 32);
}
@Override
public long get(int index) {
final int o = index >>> 1;
final int b = index & 1;
final int shift = b << 5;
return (blocks[o] >>> shift) & 4294967295L;
}
@Override
public void set(int index, long value) {
final int o = index >>> 1;
final int b = index & 1;
final int shift = b << 5;
blocks[o] = (blocks[o] & ~(4294967295L << shift)) | (value << shift);
}
}
}

View File

@ -0,0 +1,740 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.packed;
import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts.Decoder;
import org.apache.lucene.util.packed.PackedInts.Encoder;
import org.apache.lucene.util.packed.PackedInts.Format;
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
import org.apache.lucene.util.packed.PackedInts.Reader;
import org.apache.lucene.util.packed.PackedInts.ReaderIterator;
import org.apache.lucene.util.packed.PackedInts.Writer;
/**
* Forked from Lucene 8.x; removed in Lucene 8.9
*
* Todo: further investigate a better alternative
*
* Simplistic compression for array of unsigned long values. Each value is {@code >= 0} and {@code
* <=} a specified maximum value. The values are stored as packed ints, with each value consuming a
* fixed number of bits.
*/
public class XPackedInts {
/** At most 700% memory overhead, always select a direct implementation. */
public static final float FASTEST = 7f;
/** At most 50% memory overhead, always select a reasonably fast implementation. */
public static final float FAST = 0.5f;
/** At most 25% memory overhead. */
public static final float DEFAULT = 0.25f;
/** No memory overhead at all, but the returned implementation may be slow. */
public static final float COMPACT = 0f;
/** Default amount of memory to use for bulk operations. */
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
public static final String CODEC_NAME = "PackedInts";
public static final int VERSION_MONOTONIC_WITHOUT_ZIGZAG = 2;
public static final int VERSION_START = VERSION_MONOTONIC_WITHOUT_ZIGZAG;
public static final int VERSION_CURRENT = VERSION_MONOTONIC_WITHOUT_ZIGZAG;
/** Check the validity of a version number. */
public static void checkVersion(int version) {
if (version < VERSION_START) {
throw new IllegalArgumentException("Version is too old, should be at least " + VERSION_START + " (got " + version + ")");
} else if (version > VERSION_CURRENT) {
throw new IllegalArgumentException("Version is too new, should be at most " + VERSION_CURRENT + " (got " + version + ")");
}
}
/**
* Try to find the {@link Format} and number of bits per value that would restore from disk the
* fastest reader whose overhead is less than <code>acceptableOverheadRatio</code>.
*
* <p>The <code>acceptableOverheadRatio</code> parameter makes sense for random-access {@link
* Reader}s. In case you only plan to perform sequential access on this stream later on, you
* should probably use {@link PackedInts#COMPACT}.
*
* <p>If you don't know how many values you are going to write, use <code>valueCount = -1</code>.
*/
public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
if (valueCount == -1) {
valueCount = Integer.MAX_VALUE;
}
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
int actualBitsPerValue = -1;
// rounded number of bits per value are usually the fastest
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
actualBitsPerValue = 8;
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
actualBitsPerValue = 16;
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
actualBitsPerValue = 32;
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
actualBitsPerValue = 64;
} else {
actualBitsPerValue = bitsPerValue;
}
return new FormatAndBits(Format.PACKED, actualBitsPerValue);
}
final static class XPackedWriter extends XWriter {
boolean finished;
final PackedInts.Format format;
final BulkOperation encoder;
final byte[] nextBlocks;
final long[] nextValues;
final int iterations;
int off;
int written;
XPackedWriter(PackedInts.Format format, DataOutput out, int valueCount, int bitsPerValue, int mem) {
super(out, valueCount, bitsPerValue);
this.format = format;
encoder = BulkOperation.of(format, bitsPerValue);
iterations = encoder.computeIterations(valueCount, mem);
nextBlocks = new byte[iterations * encoder.byteBlockCount()];
nextValues = new long[iterations * encoder.byteValueCount()];
off = 0;
written = 0;
finished = false;
}
@Override
protected PackedInts.Format getFormat() {
return format;
}
@Override
public void add(long v) throws IOException {
assert PackedInts.unsignedBitsRequired(v) <= bitsPerValue;
assert !finished;
if (valueCount != -1 && written >= valueCount) {
throw new EOFException("Writing past end of stream");
}
nextValues[off++] = v;
if (off == nextValues.length) {
flush();
}
++written;
}
@Override
public void finish() throws IOException {
assert !finished;
if (valueCount != -1) {
while (written < valueCount) {
add(0L);
}
}
flush();
finished = true;
}
private void flush() throws IOException {
encoder.encode(nextValues, 0, nextBlocks, 0, iterations);
final int blockCount = (int) format.byteCount(PackedInts.VERSION_CURRENT, off, bitsPerValue);
out.writeBytes(nextBlocks, blockCount);
Arrays.fill(nextValues, 0L);
off = 0;
}
@Override
public int ord() {
return written - 1;
}
}
/**
* A packed integer array that can be modified.
*
*/
public abstract static class Mutable extends Reader {
/**
* @return the number of bits used to store any given value. Note: This does not imply that
* memory usage is {@code bitsPerValue * #values} as implementations are free to use
* non-space-optimal packing of bits.
*/
public abstract int getBitsPerValue();
/**
* Set the value at the given index in the array.
*
* @param index where the value should be positioned.
* @param value a value conforming to the constraints set by the array.
*/
public abstract void set(int index, long value);
/**
* Bulk set: set at least one and at most <code>len</code> longs starting at <code>off</code> in
* <code>arr</code> into this mutable, starting at <code>index</code>. Returns the actual number
* of values that have been set.
*/
public int set(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < size();
len = Math.min(len, size() - index);
assert off + len <= arr.length;
for (int i = index, o = off, end = index + len; i < end; ++i, ++o) {
set(i, arr[o]);
}
return len;
}
/**
* Fill the mutable from <code>fromIndex</code> (inclusive) to <code>toIndex</code> (exclusive)
* with <code>val</code>.
*/
public void fill(int fromIndex, int toIndex, long val) {
assert val <= maxValue(getBitsPerValue());
assert fromIndex <= toIndex;
for (int i = fromIndex; i < toIndex; ++i) {
set(i, val);
}
}
/** Sets all values to 0. */
public void clear() {
fill(0, size(), 0);
}
/**
* Save this mutable into <code>out</code>. Instantiating a reader from the generated data will
* return a reader with the same number of bits per value.
*/
public void save(DataOutput out) throws IOException {
XWriter writer = getWriterNoHeader(out, getFormat(), size(), getBitsPerValue(), DEFAULT_BUFFER_SIZE);
writer.writeHeader();
for (int i = 0; i < size(); ++i) {
writer.add(get(i));
}
writer.finish();
}
/** The underlying format. */
Format getFormat() {
return Format.PACKED;
}
}
/**
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
*
*/
abstract static class ReaderImpl extends Reader {
protected final int valueCount;
protected ReaderImpl(int valueCount) {
this.valueCount = valueCount;
}
@Override
public abstract long get(int index);
@Override
public final int size() {
return valueCount;
}
}
abstract static class MutableImpl extends Mutable {
protected final int valueCount;
protected final int bitsPerValue;
protected MutableImpl(int valueCount, int bitsPerValue) {
this.valueCount = valueCount;
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
this.bitsPerValue = bitsPerValue;
}
@Override
public final int getBitsPerValue() {
return bitsPerValue;
}
@Override
public final int size() {
return valueCount;
}
@Override
public String toString() {
return getClass().getSimpleName() + "(valueCount=" + valueCount + ",bitsPerValue=" + bitsPerValue + ")";
}
}
/** A {@link Reader} which has all its values equal to 0 (bitsPerValue = 0). */
public static final class NullReader extends Reader {
private final int valueCount;
/** Sole constructor. */
public NullReader(int valueCount) {
this.valueCount = valueCount;
}
@Override
public long get(int index) {
return 0;
}
@Override
public int get(int index, long[] arr, int off, int len) {
assert len > 0 : "len must be > 0 (got " + len + ")";
assert index >= 0 && index < valueCount;
len = Math.min(len, valueCount - index);
Arrays.fill(arr, off, off + len, 0);
return len;
}
@Override
public int size() {
return valueCount;
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + Integer.BYTES);
}
}
/**
* A write-once Writer.
*
*/
public abstract static class XWriter extends Writer {
protected XWriter(DataOutput out, int valueCount, int bitsPerValue) {
super(out, valueCount, bitsPerValue);
}
void writeHeader() throws IOException {
assert valueCount != -1;
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
out.writeVInt(bitsPerValue);
out.writeVInt(valueCount);
out.writeVInt(getFormat().getId());
}
}
/**
* Get a {@link Decoder}.
*
* @param format the format used to store packed ints
* @param version the compatibility version
* @param bitsPerValue the number of bits per value
* @return a decoder
*/
public static Decoder getDecoder(Format format, int version, int bitsPerValue) {
checkVersion(version);
return BulkOperation.of(format, bitsPerValue);
}
/**
* Get an {@link Encoder}.
*
* @param format the format used to store packed ints
* @param version the compatibility version
* @param bitsPerValue the number of bits per value
* @return an encoder
*/
public static Encoder getEncoder(Format format, int version, int bitsPerValue) {
checkVersion(version);
return BulkOperation.of(format, bitsPerValue);
}
/**
* Expert: Restore a {@link Reader} from a stream without reading metadata at the beginning of the
* stream. This method is useful to restore data from streams which have been created using {@link
* XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
* @param in the stream to read data from, positioned at the beginning of the packed values
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @return a Reader
* @throws IOException If there is a low-level I/O error
* @see XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
*/
public static Reader getReaderNoHeader(DataInput in, Format format, int version, int valueCount, int bitsPerValue) throws IOException {
checkVersion(version);
switch (format) {
case PACKED_SINGLE_BLOCK:
return XPacked64SingleBlock.create(in, valueCount, bitsPerValue);
case PACKED:
return new XPacked64(version, in, valueCount, bitsPerValue);
default:
throw new AssertionError("Unknown Writer format: " + format);
}
}
/**
* Restore a {@link Reader} from a stream.
*
* @param in the stream to read data from
* @return a Reader
* @throws IOException If there is a low-level I/O error
*/
public static Reader getReader(DataInput in) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final Format format = Format.byId(in.readVInt());
return getReaderNoHeader(in, format, version, valueCount, bitsPerValue);
}
/**
* Expert: Restore a {@link ReaderIterator} from a stream without reading metadata at the
* beginning of the stream. This method is useful to restore data from streams which have been
* created using {@link XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
* @param in the stream to read data from, positioned at the beginning of the packed values
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up
* iteration)
* @return a ReaderIterator
* @see XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
*/
public static ReaderIterator getReaderIteratorNoHeader(
DataInput in,
Format format,
int version,
int valueCount,
int bitsPerValue,
int mem
) {
checkVersion(version);
return new PackedReaderIterator(format, version, valueCount, bitsPerValue, in, mem);
}
/**
* Retrieve PackedInts as a {@link ReaderIterator}
*
* @param in positioned at the beginning of a stored packed int structure.
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up
* iteration)
* @return an iterator to access the values
* @throws IOException if the structure could not be retrieved.
*/
public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final Format format = Format.byId(in.readVInt());
return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
}
/**
* Expert: Construct a direct {@link Reader} from a stream without reading metadata at the
* beginning of the stream. This method is useful to restore data from streams which have been
* created using {@link XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
* <p>The returned reader will have very little memory overhead, but every call to {@link
* Reader#get(int)} is likely to perform a disk seek.
*
* @param in the stream to read data from
* @param format the format used to serialize
* @param version the version used to serialize the data
* @param valueCount how many values the stream holds
* @param bitsPerValue the number of bits per value
* @return a direct Reader
*/
public static Reader getDirectReaderNoHeader(final IndexInput in, Format format, int version, int valueCount, int bitsPerValue) {
checkVersion(version);
switch (format) {
case PACKED:
return new DirectPackedReader(bitsPerValue, valueCount, in);
case PACKED_SINGLE_BLOCK:
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
default:
throw new AssertionError("Unknown format: " + format);
}
}
/**
* Construct a direct {@link Reader} from an {@link IndexInput}. This method is useful to restore
* data from streams which have been created using {@link XPackedInts#getWriter(DataOutput, int,
* int, float)}.
*
* <p>The returned reader will have very little memory overhead, but every call to {@link
* Reader#get(int)} is likely to perform a disk seek.
*
* @param in the stream to read data from
* @return a direct Reader
* @throws IOException If there is a low-level I/O error
*/
public static Reader getDirectReader(IndexInput in) throws IOException {
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
final int bitsPerValue = in.readVInt();
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
final int valueCount = in.readVInt();
final Format format = Format.byId(in.readVInt());
return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue);
}
/**
* Create a packed integer array with the given amount of values initialized to 0. the valueCount
* and the bitsPerValue cannot be changed after creation. All Mutables known by this factory are
* kept fully in RAM.
*
* <p>Positive values of <code>acceptableOverheadRatio</code> will trade space for speed by
* selecting a faster but potentially less memory-efficient implementation. An <code>
* acceptableOverheadRatio</code> of {@link PackedInts#COMPACT} will make sure that the most
* memory-efficient implementation is selected whereas {@link PackedInts#FASTEST} will make sure
* that the fastest implementation is selected.
*
* @param valueCount the number of elements
* @param bitsPerValue the number of bits available for any given value
* @param acceptableOverheadRatio an acceptable overhead ratio per value
* @return a mutable packed integer array
*/
public static Mutable getMutable(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
return getMutable(valueCount, formatAndBits.bitsPerValue, formatAndBits.format);
}
/**
* Same as {@link #getMutable(int, int, float)} with a pre-computed number of bits per value and
* format.
*
*/
public static Mutable getMutable(int valueCount, int bitsPerValue, PackedInts.Format format) {
assert valueCount >= 0;
switch (format) {
case PACKED_SINGLE_BLOCK:
return XPacked64SingleBlock.create(valueCount, bitsPerValue);
case PACKED:
return new XPacked64(valueCount, bitsPerValue);
default:
throw new AssertionError();
}
}
/**
* Expert: Create a packed integer array writer for the given output, format, value count, and
* number of bits per value.
*
* <p>The resulting stream will be long-aligned. This means that depending on the format which is
* used, up to 63 bits will be wasted. An easy way to make sure that no space is lost is to always
* use a <code>valueCount</code> that is a multiple of 64.
*
* <p>This method does not write any metadata to the stream, meaning that it is your
* responsibility to store it somewhere else in order to be able to recover data from the stream
* later on:
*
* <ul>
* <li><code>format</code> (using {@link Format#getId()}),
* <li><code>valueCount</code>,
* <li><code>bitsPerValue</code>,
* <li>{@link #VERSION_CURRENT}.
* </ul>
*
* <p>It is possible to start writing values without knowing how many of them you are actually
* going to write. To do this, just pass <code>-1</code> as <code>valueCount</code>. On the other
* hand, for any positive value of <code>valueCount</code>, the returned writer will make sure
* that you don't write more values than expected and pad the end of stream with zeros in case you
* have written less than <code>valueCount</code> when calling {@link Writer#finish()}.
*
* <p>The <code>mem</code> parameter lets you control how much memory can be used to buffer
* changes in memory before flushing to disk. High values of <code>mem</code> are likely to
* improve throughput. On the other hand, if speed is not that important to you, a value of <code>
* 0</code> will use as little memory as possible and should already offer reasonable throughput.
*
* @param out the data output
* @param format the format to use to serialize the values
* @param valueCount the number of values
* @param bitsPerValue the number of bits per value
* @param mem how much memory (in bytes) can be used to speed up serialization
* @return a Writer
* @see XPackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int)
* @see XPackedInts#getReaderNoHeader(DataInput, Format, int, int, int)
*/
public static XWriter getWriterNoHeader(DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) {
return new XPackedWriter(format, out, valueCount, bitsPerValue, mem);
}
/**
* Create a packed integer array writer for the given output, format, value count, and number of
* bits per value.
*
* <p>The resulting stream will be long-aligned. This means that depending on the format which is
* used under the hoods, up to 63 bits will be wasted. An easy way to make sure that no space is
* lost is to always use a <code>valueCount</code> that is a multiple of 64.
*
* <p>This method writes metadata to the stream, so that the resulting stream is sufficient to
* restore a {@link Reader} from it. You don't need to track <code>valueCount</code> or <code>
* bitsPerValue</code> by yourself. In case this is a problem, you should probably look at {@link
* #getWriterNoHeader(DataOutput, Format, int, int, int)}.
*
* <p>The <code>acceptableOverheadRatio</code> parameter controls how readers that will be
* restored from this stream trade space for speed by selecting a faster but potentially less
* memory-efficient implementation. An <code>acceptableOverheadRatio</code> of {@link
* PackedInts#COMPACT} will make sure that the most memory-efficient implementation is selected
* whereas {@link PackedInts#FASTEST} will make sure that the fastest implementation is selected.
* In case you are only interested in reading this stream sequentially later on, you should
* probably use {@link PackedInts#COMPACT}.
*
* @param out the data output
* @param valueCount the number of values
* @param bitsPerValue the number of bits per value
* @param acceptableOverheadRatio an acceptable overhead ratio per value
* @return a Writer
* @throws IOException If there is a low-level I/O error
*/
public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue, float acceptableOverheadRatio) throws IOException {
assert valueCount >= 0;
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
final XWriter writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE);
writer.writeHeader();
return writer;
}
/**
* Returns how many bits are required to hold values up to and including maxValue NOTE: This
* method returns at least 1.
*
* @param maxValue the maximum value that should be representable.
* @return the amount of bits needed to represent values from 0 to maxValue.
*/
public static int bitsRequired(long maxValue) {
if (maxValue < 0) {
throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")");
}
return unsignedBitsRequired(maxValue);
}
/**
* Returns how many bits are required to store <code>bits</code>, interpreted as an unsigned
* value. NOTE: This method returns at least 1.
*
*/
public static int unsignedBitsRequired(long bits) {
return Math.max(1, 64 - Long.numberOfLeadingZeros(bits));
}
/**
* Calculates the maximum unsigned long that can be expressed with the given number of bits.
*
* @param bitsPerValue the number of bits available for any given value.
* @return the maximum value for the given bits.
*/
public static long maxValue(int bitsPerValue) {
return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
}
/**
* Copy <code>src[srcPos:srcPos+len]</code> into <code>dest[destPos:destPos+len]</code> using at
* most <code>mem</code> bytes.
*/
public static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, int mem) {
assert srcPos + len <= src.size();
assert destPos + len <= dest.size();
final int capacity = mem >>> 3;
if (capacity == 0) {
for (int i = 0; i < len; ++i) {
dest.set(destPos++, src.get(srcPos++));
}
} else if (len > 0) {
// use bulk operations
final long[] buf = new long[Math.min(capacity, len)];
copy(src, srcPos, dest, destPos, len, buf);
}
}
/**
* Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer.
*/
static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
assert buf.length > 0;
int remaining = 0;
while (len > 0) {
final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
assert read > 0;
srcPos += read;
len -= read;
remaining += read;
final int written = dest.set(destPos, buf, 0, remaining);
assert written > 0;
destPos += written;
if (written < remaining) {
System.arraycopy(buf, written, buf, 0, remaining - written);
}
remaining -= written;
}
while (remaining > 0) {
final int written = dest.set(destPos, buf, 0, remaining);
destPos += written;
remaining -= written;
System.arraycopy(buf, written, buf, 0, remaining);
}
}
/**
* Check that the block size is a power of 2, in the right bounds, and return its log in base 2.
*/
static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
if (blockSize < minBlockSize || blockSize > maxBlockSize) {
throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
}
if ((blockSize & (blockSize - 1)) != 0) {
throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
}
return Integer.numberOfTrailingZeros(blockSize);
}
/**
* Return the number of blocks required to store <code>size</code> values on <code>blockSize
* </code>.
*/
static int numBlocks(long size, int blockSize) {
final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
if ((long) numBlocks * blockSize < size) {
throw new IllegalArgumentException("size is too large for this block size");
}
return numBlocks;
}
}

View File

@ -80,7 +80,7 @@ public class Version implements Comparable<Version>, ToXContentFragment {
public static final Version V_1_2_5 = new Version(1020599, org.apache.lucene.util.Version.LUCENE_8_10_1);
public static final Version V_1_3_0 = new Version(1030099, org.apache.lucene.util.Version.LUCENE_8_10_1);
public static final Version V_1_4_0 = new Version(1040099, org.apache.lucene.util.Version.LUCENE_8_10_1);
public static final Version V_2_0_0 = new Version(2000099, org.apache.lucene.util.Version.LUCENE_8_10_1);
public static final Version V_2_0_0 = new Version(2000099, org.apache.lucene.util.Version.LUCENE_9_0_0);
public static final Version CURRENT = V_2_0_0;
public static Version readVersion(StreamInput in) throws IOException {

View File

@ -154,13 +154,6 @@ public class IndicesSegmentResponse extends BroadcastResponse {
if (segment.getSegmentSort() != null) {
toXContent(builder, segment.getSegmentSort());
}
if (segment.ramTree != null) {
builder.startArray(Fields.RAM_TREE);
for (Accountable child : segment.ramTree.getChildResources()) {
toXContent(builder, child);
}
builder.endArray();
}
if (segment.attributes != null && segment.attributes.isEmpty() == false) {
builder.field("attributes", segment.attributes);
}

View File

@ -224,7 +224,6 @@ public final class SearchPhaseController {
if (results.isEmpty()) {
return null;
}
final boolean setShardIndex = false;
final TopDocs topDocs = results.stream().findFirst().get();
final TopDocs mergedTopDocs;
final int numShards = results.size();
@ -234,15 +233,15 @@ public final class SearchPhaseController {
CollapseTopFieldDocs firstTopDocs = (CollapseTopFieldDocs) topDocs;
final Sort sort = new Sort(firstTopDocs.fields);
final CollapseTopFieldDocs[] shardTopDocs = results.toArray(new CollapseTopFieldDocs[numShards]);
mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs, setShardIndex);
mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs, false);
} else if (topDocs instanceof TopFieldDocs) {
TopFieldDocs firstTopDocs = (TopFieldDocs) topDocs;
final Sort sort = new Sort(firstTopDocs.fields);
final TopFieldDocs[] shardTopDocs = results.toArray(new TopFieldDocs[numShards]);
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs, setShardIndex);
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
} else {
final TopDocs[] shardTopDocs = results.toArray(new TopDocs[numShards]);
mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs, setShardIndex);
mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs);
}
return mergedTopDocs;
}

View File

@ -32,10 +32,11 @@
package org.opensearch.action.search;
import org.apache.lucene.store.RAMOutputStream;
import org.opensearch.LegacyESVersion;
import org.opensearch.Version;
import org.opensearch.common.bytes.BytesReference;
import org.opensearch.common.io.stream.BytesStreamInput;
import org.opensearch.common.io.stream.BytesStreamOutput;
import org.opensearch.common.util.concurrent.AtomicArray;
import org.opensearch.search.SearchPhaseResult;
import org.opensearch.search.SearchShardTarget;
@ -57,7 +58,8 @@ final class TransportSearchHelper {
static String buildScrollId(AtomicArray<? extends SearchPhaseResult> searchPhaseResults, Version version) {
boolean includeContextUUID = version.onOrAfter(LegacyESVersion.V_7_7_0);
try (RAMOutputStream out = new RAMOutputStream()) {
try {
BytesStreamOutput out = new BytesStreamOutput();
if (includeContextUUID) {
out.writeString(INCLUDE_CONTEXT_UUID);
}
@ -77,8 +79,7 @@ final class TransportSearchHelper {
out.writeString(searchShardTarget.getNodeId());
}
}
byte[] bytes = new byte[(int) out.getFilePointer()];
out.writeTo(bytes, 0);
byte[] bytes = BytesReference.toBytes(out.bytes());
return Base64.getUrlEncoder().encodeToString(bytes);
} catch (IOException e) {
throw new UncheckedIOException(e);

Some files were not shown because too many files have changed in this diff Show More