[Upgrade] Lucene 9.0.0 release (#1109)
This commit upgrades the core codebase from Lucene 8.10.1 to lucene 9.0.0. It includes all necessary refactoring of features and API changes when upgrading to a new major Lucene release. Signed-off-by: Nicholas Walter Knize <nknize@apache.org> Co-authored-by: Andriy Redko <drreta@gmail.com>
This commit is contained in:
parent
757abdb9a0
commit
006c832c5f
|
@ -230,7 +230,10 @@ tasks.register("branchConsistency") {
|
|||
allprojects {
|
||||
// configure compiler options
|
||||
tasks.withType(JavaCompile).configureEach { JavaCompile compile ->
|
||||
compile.options.compilerArgs << '-Werror'
|
||||
// See please https://bugs.openjdk.java.net/browse/JDK-8209058
|
||||
if (BuildParams.runtimeJavaVersion > JavaVersion.VERSION_11) {
|
||||
compile.options.compilerArgs << '-Werror'
|
||||
}
|
||||
compile.options.compilerArgs << '-Xlint:auxiliaryclass'
|
||||
compile.options.compilerArgs << '-Xlint:cast'
|
||||
compile.options.compilerArgs << '-Xlint:classfile'
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
opensearch = 2.0.0
|
||||
lucene = 8.10.1
|
||||
lucene = 9.0.0
|
||||
|
||||
bundled_jdk_vendor = adoptium
|
||||
bundled_jdk = 17.0.2+8
|
||||
|
@ -11,7 +11,7 @@ spatial4j = 0.7
|
|||
jts = 1.15.0
|
||||
jackson = 2.12.6
|
||||
snakeyaml = 1.26
|
||||
icu4j = 62.1
|
||||
icu4j = 68.2
|
||||
supercsv = 2.4.0
|
||||
log4j = 2.17.1
|
||||
slf4j = 1.6.2
|
||||
|
|
|
@ -32,7 +32,9 @@
|
|||
|
||||
package org.opensearch.common.settings;
|
||||
|
||||
import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.NIOFSDirectory;
|
||||
|
@ -328,13 +330,14 @@ public class KeyStoreWrapperTests extends OpenSearchTestCase {
|
|||
byte[] encryptedBytes,
|
||||
int truncEncryptedDataLength
|
||||
) throws Exception {
|
||||
indexOutput.writeInt(4 + salt.length + 4 + iv.length + 4 + encryptedBytes.length);
|
||||
indexOutput.writeInt(salt.length);
|
||||
indexOutput.writeBytes(salt, salt.length);
|
||||
indexOutput.writeInt(iv.length);
|
||||
indexOutput.writeBytes(iv, iv.length);
|
||||
indexOutput.writeInt(encryptedBytes.length - truncEncryptedDataLength);
|
||||
indexOutput.writeBytes(encryptedBytes, encryptedBytes.length);
|
||||
DataOutput io = EndiannessReverserUtil.wrapDataOutput(indexOutput);
|
||||
io.writeInt(4 + salt.length + 4 + iv.length + 4 + encryptedBytes.length);
|
||||
io.writeInt(salt.length);
|
||||
io.writeBytes(salt, salt.length);
|
||||
io.writeInt(iv.length);
|
||||
io.writeBytes(iv, iv.length);
|
||||
io.writeInt(encryptedBytes.length - truncEncryptedDataLength);
|
||||
io.writeBytes(encryptedBytes, encryptedBytes.length);
|
||||
}
|
||||
|
||||
public void testUpgradeAddsSeed() throws Exception {
|
||||
|
@ -363,7 +366,7 @@ public class KeyStoreWrapperTests extends OpenSearchTestCase {
|
|||
assumeFalse("Can't run in a FIPS JVM as PBE is not available", inFipsJvm());
|
||||
Path configDir = env.configFile();
|
||||
NIOFSDirectory directory = new NIOFSDirectory(configDir);
|
||||
try (IndexOutput output = directory.createOutput("opensearch.keystore", IOContext.DEFAULT)) {
|
||||
try (IndexOutput output = EndiannessReverserUtil.createOutput(directory, "opensearch.keystore", IOContext.DEFAULT)) {
|
||||
CodecUtil.writeHeader(output, "opensearch.keystore", 1);
|
||||
output.writeByte((byte) 0); // hasPassword = false
|
||||
output.writeString("PKCS12");
|
||||
|
@ -396,7 +399,7 @@ public class KeyStoreWrapperTests extends OpenSearchTestCase {
|
|||
NIOFSDirectory directory = new NIOFSDirectory(configDir);
|
||||
byte[] fileBytes = new byte[20];
|
||||
random().nextBytes(fileBytes);
|
||||
try (IndexOutput output = directory.createOutput("opensearch.keystore", IOContext.DEFAULT)) {
|
||||
try (IndexOutput output = EndiannessReverserUtil.createOutput(directory, "opensearch.keystore", IOContext.DEFAULT)) {
|
||||
|
||||
CodecUtil.writeHeader(output, "opensearch.keystore", 2);
|
||||
output.writeByte((byte) 0); // hasPassword = false
|
||||
|
|
|
@ -32,7 +32,7 @@
|
|||
package org.opensearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.ClassicFilter;
|
||||
import org.apache.lucene.analysis.classic.ClassicFilter;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
package org.opensearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.ClassicTokenizer;
|
||||
import org.apache.lucene.analysis.classic.ClassicTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
|
|
|
@ -51,6 +51,8 @@ import org.apache.lucene.analysis.cjk.CJKBigramFilter;
|
|||
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
|
||||
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
|
||||
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
|
||||
import org.apache.lucene.analysis.classic.ClassicFilter;
|
||||
import org.apache.lucene.analysis.classic.ClassicTokenizer;
|
||||
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
@ -64,6 +66,7 @@ import org.apache.lucene.analysis.de.GermanAnalyzer;
|
|||
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
|
||||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||
import org.apache.lucene.analysis.el.GreekAnalyzer;
|
||||
import org.apache.lucene.analysis.email.UAX29URLEmailTokenizer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.en.KStemFilter;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
|
@ -113,10 +116,7 @@ import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
|||
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||
import org.apache.lucene.analysis.shingle.ShingleFilter;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.ClassicFilter;
|
||||
import org.apache.lucene.analysis.standard.ClassicTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||
import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
||||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||
import org.apache.lucene.analysis.th.ThaiTokenizer;
|
||||
|
|
|
@ -62,10 +62,18 @@ public class MinHashTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
private Map<String, String> convertSettings(Settings settings) {
|
||||
Map<String, String> settingMap = new HashMap<>();
|
||||
settingMap.put("hashCount", settings.get("hash_count"));
|
||||
settingMap.put("bucketCount", settings.get("bucket_count"));
|
||||
settingMap.put("hashSetSize", settings.get("hash_set_size"));
|
||||
settingMap.put("withRotation", settings.get("with_rotation"));
|
||||
if (settings.hasValue("hash_count")) {
|
||||
settingMap.put("hashCount", settings.get("hash_count"));
|
||||
}
|
||||
if (settings.hasValue("bucketCount")) {
|
||||
settingMap.put("bucketCount", settings.get("bucket_count"));
|
||||
}
|
||||
if (settings.hasValue("hashSetSize")) {
|
||||
settingMap.put("hashSetSize", settings.get("hash_set_size"));
|
||||
}
|
||||
if (settings.hasValue("with_rotation")) {
|
||||
settingMap.put("withRotation", settings.get("with_rotation"));
|
||||
}
|
||||
return settingMap;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ package org.opensearch.analysis.common;
|
|||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||
import org.apache.lucene.analysis.email.UAX29URLEmailTokenizer;
|
||||
import org.opensearch.common.settings.Settings;
|
||||
import org.opensearch.env.Environment;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
|
|
|
@ -110,6 +110,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
|||
filters.put("latvianstem", StemmerTokenFilterFactory.class);
|
||||
filters.put("norwegianlightstem", StemmerTokenFilterFactory.class);
|
||||
filters.put("norwegianminimalstem", StemmerTokenFilterFactory.class);
|
||||
filters.put("norwegiannormalization", Void.class);
|
||||
filters.put("portuguesestem", StemmerTokenFilterFactory.class);
|
||||
filters.put("portugueselightstem", StemmerTokenFilterFactory.class);
|
||||
filters.put("portugueseminimalstem", StemmerTokenFilterFactory.class);
|
||||
|
@ -117,6 +118,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
|||
filters.put("soranistem", StemmerTokenFilterFactory.class);
|
||||
filters.put("spanishlightstem", StemmerTokenFilterFactory.class);
|
||||
filters.put("swedishlightstem", StemmerTokenFilterFactory.class);
|
||||
filters.put("swedishminimalstem", Void.class);
|
||||
filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
|
||||
filters.put("kstem", KStemTokenFilterFactory.class);
|
||||
filters.put("synonym", SynonymTokenFilterFactory.class);
|
||||
|
@ -242,7 +244,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
|||
tokenizers.put("keyword", null);
|
||||
tokenizers.put("lowercase", Void.class);
|
||||
tokenizers.put("classic", null);
|
||||
tokenizers.put("uax_url_email", org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory.class);
|
||||
tokenizers.put("uax_url_email", org.apache.lucene.analysis.email.UAX29URLEmailTokenizerFactory.class);
|
||||
tokenizers.put("path_hierarchy", null);
|
||||
tokenizers.put("letter", null);
|
||||
tokenizers.put("whitespace", null);
|
||||
|
|
|
@ -107,11 +107,15 @@ public class DisableGraphQueryTests extends OpenSearchSingleNodeTestCase {
|
|||
// parsed queries for "text_shingle_unigram:(foo bar baz)" with query parsers
|
||||
// that ignores position length attribute
|
||||
expectedQueryWithUnigram = new BooleanQuery.Builder().add(
|
||||
new SynonymQuery(new Term("text_shingle_unigram", "foo"), new Term("text_shingle_unigram", "foo bar")),
|
||||
new SynonymQuery.Builder("text_shingle_unigram").addTerm(new Term("text_shingle_unigram", "foo"))
|
||||
.addTerm(new Term("text_shingle_unigram", "foo bar"))
|
||||
.build(),
|
||||
BooleanClause.Occur.SHOULD
|
||||
)
|
||||
.add(
|
||||
new SynonymQuery(new Term("text_shingle_unigram", "bar"), new Term("text_shingle_unigram", "bar baz")),
|
||||
new SynonymQuery.Builder("text_shingle_unigram").addTerm(new Term("text_shingle_unigram", "bar"))
|
||||
.addTerm(new Term("text_shingle_unigram", "bar baz"))
|
||||
.build(),
|
||||
BooleanClause.Occur.SHOULD
|
||||
)
|
||||
.add(new TermQuery(new Term("text_shingle_unigram", "baz")), BooleanClause.Occur.SHOULD)
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
24932a4be7064a99126d80776718845b356abae0
|
|
@ -0,0 +1 @@
|
|||
0a3d818d6f6fb113831ed34553b24763fbda1e84
|
|
@ -37,7 +37,6 @@ import org.apache.lucene.expressions.SimpleBindings;
|
|||
import org.apache.lucene.expressions.js.JavascriptCompiler;
|
||||
import org.apache.lucene.expressions.js.VariableContext;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.opensearch.SpecialPermission;
|
||||
import org.opensearch.common.Nullable;
|
||||
import org.opensearch.index.fielddata.IndexFieldData;
|
||||
|
@ -263,7 +262,7 @@ public class ExpressionScriptEngine implements ScriptEngine {
|
|||
for (String variable : expr.variables) {
|
||||
try {
|
||||
if (variable.equals("_score")) {
|
||||
bindings.add(new SortField("_score", SortField.Type.SCORE));
|
||||
bindings.add("_score", DoubleValuesSource.SCORES);
|
||||
needsScores = true;
|
||||
} else if (vars != null && vars.containsKey(variable)) {
|
||||
bindFromParams(vars, bindings, variable);
|
||||
|
@ -320,7 +319,7 @@ public class ExpressionScriptEngine implements ScriptEngine {
|
|||
for (String variable : expr.variables) {
|
||||
try {
|
||||
if (variable.equals("_score")) {
|
||||
bindings.add(new SortField("_score", SortField.Type.SCORE));
|
||||
bindings.add("_score", DoubleValuesSource.SCORES);
|
||||
needsScores = true;
|
||||
} else if (variable.equals("_value")) {
|
||||
specialValue = new ReplaceableConstDoubleValueSource();
|
||||
|
@ -393,7 +392,7 @@ public class ExpressionScriptEngine implements ScriptEngine {
|
|||
for (String variable : expr.variables) {
|
||||
try {
|
||||
if (variable.equals("_score")) {
|
||||
bindings.add(new SortField("_score", SortField.Type.SCORE));
|
||||
bindings.add("_score", DoubleValuesSource.SCORES);
|
||||
needsScores = true;
|
||||
} else if (variable.equals("_value")) {
|
||||
specialValue = new ReplaceableConstDoubleValueSource();
|
||||
|
|
|
@ -42,4 +42,5 @@ grant {
|
|||
permission org.opensearch.script.ClassPermission "java.lang.Math";
|
||||
permission org.opensearch.script.ClassPermission "org.apache.lucene.util.MathUtil";
|
||||
permission org.opensearch.script.ClassPermission "org.apache.lucene.util.SloppyMath";
|
||||
permission org.opensearch.script.ClassPermission "org.apache.lucene.expressions.js.ExpressionMath";
|
||||
};
|
||||
|
|
|
@ -44,6 +44,10 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.queries.spans.SpanQuery;
|
||||
import org.apache.lucene.queries.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
@ -52,10 +56,6 @@ import org.apache.lucene.search.MultiTermQuery;
|
|||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
|
|
|
@ -38,6 +38,9 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.queries.spans.SpanNearQuery;
|
||||
import org.apache.lucene.queries.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
|
@ -47,9 +50,6 @@ import org.apache.lucene.search.MultiPhraseQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.opensearch.common.Strings;
|
||||
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||
import org.opensearch.common.xcontent.XContentBuilder;
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.index.OrdinalMap;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.join.JoinUtil;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
@ -409,6 +410,11 @@ public class HasChildQueryBuilder extends AbstractQueryBuilder<HasChildQueryBuil
|
|||
this.similarity = similarity;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(QueryVisitor visitor) {
|
||||
visitor.visitLeaf(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
Query rewritten = super.rewrite(reader);
|
||||
|
|
|
@ -34,11 +34,11 @@ package org.opensearch.percolator;
|
|||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
|
@ -56,7 +56,6 @@ import org.opensearch.common.lucene.Lucene;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
final class PercolateQuery extends Query implements Accountable {
|
||||
|
||||
|
@ -112,8 +111,6 @@ final class PercolateQuery extends Query implements Accountable {
|
|||
final Weight verifiedMatchesWeight = verifiedMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
|
||||
final Weight candidateMatchesWeight = candidateMatchesQuery.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost);
|
||||
return new Weight(this) {
|
||||
@Override
|
||||
public void extractTerms(Set<Term> set) {}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext leafReaderContext, int docId) throws IOException {
|
||||
|
@ -245,6 +242,11 @@ final class PercolateQuery extends Query implements Accountable {
|
|||
return verifiedMatchesQuery;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(QueryVisitor visitor) {
|
||||
visitor.visitLeaf(this);
|
||||
}
|
||||
|
||||
// Comparing identity here to avoid being cached
|
||||
// Note that in theory if the same instance gets used multiple times it could still get cached,
|
||||
// however since we create a new query instance each time we this query this shouldn't happen and thus
|
||||
|
|
|
@ -43,9 +43,9 @@ import org.apache.lucene.index.PointValues;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.sandbox.search.CoveringQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.CoveringQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
|
@ -279,7 +279,7 @@ public class PercolatorFieldMapper extends ParametrizedFieldMapper {
|
|||
}
|
||||
Query filter = null;
|
||||
if (excludeNestedDocuments) {
|
||||
filter = Queries.newNonNestedFilter(indexVersion);
|
||||
filter = Queries.newNonNestedFilter();
|
||||
}
|
||||
return new PercolateQuery(name, queryStore, documents, candidateQuery, searcher, filter, verifiedMatchesQuery);
|
||||
}
|
||||
|
|
|
@ -44,7 +44,6 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.opensearch.Version;
|
||||
import org.opensearch.common.document.DocumentField;
|
||||
import org.opensearch.common.lucene.search.Queries;
|
||||
import org.opensearch.search.fetch.FetchContext;
|
||||
|
@ -127,7 +126,7 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
|
|||
this.percolateQuery = pq;
|
||||
this.singlePercolateQuery = singlePercolateQuery;
|
||||
IndexSearcher percolatorIndexSearcher = percolateQuery.getPercolatorIndexSearcher();
|
||||
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter(Version.CURRENT));
|
||||
Query nonNestedFilter = percolatorIndexSearcher.rewrite(Queries.newNonNestedFilter());
|
||||
Weight weight = percolatorIndexSearcher.createWeight(nonNestedFilter, ScoreMode.COMPLETE_NO_SCORES, 1f);
|
||||
Scorer s = weight.scorer(percolatorIndexSearcher.getIndexReader().leaves().get(0));
|
||||
int memoryIndexMaxDoc = percolatorIndexSearcher.getIndexReader().maxDoc();
|
||||
|
@ -148,7 +147,7 @@ final class PercolatorMatchedSlotSubFetchPhase implements FetchSubPhase {
|
|||
if (rootDocsBySlot != null) {
|
||||
// Ensures that we filter out nested documents
|
||||
return new BooleanQuery.Builder().add(in, BooleanClause.Occur.MUST)
|
||||
.add(Queries.newNonNestedFilter(Version.CURRENT), BooleanClause.Occur.FILTER)
|
||||
.add(Queries.newNonNestedFilter(), BooleanClause.Occur.FILTER)
|
||||
.build();
|
||||
}
|
||||
return in;
|
||||
|
|
|
@ -35,6 +35,8 @@ import org.apache.lucene.document.BinaryRange;
|
|||
import org.apache.lucene.index.PrefixCodedTerms;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.BlendedTermQuery;
|
||||
import org.apache.lucene.queries.spans.SpanOrQuery;
|
||||
import org.apache.lucene.queries.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
|
@ -48,8 +50,6 @@ import org.apache.lucene.search.QueryVisitor;
|
|||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.HalfFloatPoint;
|
||||
import org.apache.lucene.document.InetAddressPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
|
@ -60,10 +59,15 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.queries.BlendedTermQuery;
|
||||
import org.apache.lucene.queries.CommonTermsQuery;
|
||||
import org.apache.lucene.queries.spans.SpanNearQuery;
|
||||
import org.apache.lucene.queries.spans.SpanNotQuery;
|
||||
import org.apache.lucene.queries.spans.SpanOrQuery;
|
||||
import org.apache.lucene.queries.spans.SpanTermQuery;
|
||||
import org.apache.lucene.sandbox.document.HalfFloatPoint;
|
||||
import org.apache.lucene.sandbox.search.CoveringQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.CoveringQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
@ -74,6 +78,7 @@ import org.apache.lucene.search.MatchNoDocsQuery;
|
|||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Sort;
|
||||
|
@ -83,10 +88,6 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.ByteBuffersDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -123,7 +124,6 @@ import java.util.Collections;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -1279,6 +1279,11 @@ public class CandidateQueryTests extends OpenSearchSingleNodeTestCase {
|
|||
return new TermQuery(term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(QueryVisitor visitor) {
|
||||
visitor.visitLeaf(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "custom{" + field + "}";
|
||||
|
@ -1310,9 +1315,6 @@ public class CandidateQueryTests extends OpenSearchSingleNodeTestCase {
|
|||
final IndexSearcher percolatorIndexSearcher = memoryIndex.createSearcher();
|
||||
return new Weight(this) {
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
Scorer scorer = scorer(context);
|
||||
|
@ -1386,6 +1388,11 @@ public class CandidateQueryTests extends OpenSearchSingleNodeTestCase {
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(QueryVisitor visitor) {
|
||||
visitor.visitLeaf(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "control{" + field + "}";
|
||||
|
|
|
@ -42,6 +42,8 @@ import org.apache.lucene.index.IndexableField;
|
|||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.queries.spans.SpanNearQuery;
|
||||
import org.apache.lucene.queries.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
|
@ -53,8 +55,6 @@ import org.apache.lucene.search.PhraseQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.opensearch.common.bytes.BytesArray;
|
||||
import org.opensearch.test.OpenSearchTestCase;
|
||||
|
|
|
@ -35,7 +35,6 @@ package org.opensearch.percolator;
|
|||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.HalfFloatPoint;
|
||||
import org.apache.lucene.document.InetAddressPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
|
@ -43,9 +42,10 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.sandbox.document.HalfFloatPoint;
|
||||
import org.apache.lucene.sandbox.search.CoveringQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.CoveringQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
|
|
|
@ -33,7 +33,6 @@ package org.opensearch.percolator;
|
|||
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.HalfFloatPoint;
|
||||
import org.apache.lucene.document.InetAddressPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LatLonPoint;
|
||||
|
@ -45,6 +44,12 @@ import org.apache.lucene.queries.CommonTermsQuery;
|
|||
import org.apache.lucene.queries.intervals.IntervalQuery;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
import org.apache.lucene.queries.spans.SpanFirstQuery;
|
||||
import org.apache.lucene.queries.spans.SpanNearQuery;
|
||||
import org.apache.lucene.queries.spans.SpanNotQuery;
|
||||
import org.apache.lucene.queries.spans.SpanOrQuery;
|
||||
import org.apache.lucene.queries.spans.SpanTermQuery;
|
||||
import org.apache.lucene.sandbox.document.HalfFloatPoint;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
|
@ -63,11 +68,6 @@ import org.apache.lucene.search.TermRangeQuery;
|
|||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.join.QueryBitSetProducer;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.lucene.search.spans.SpanFirstQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.opensearch.Version;
|
||||
import org.opensearch.common.lucene.search.function.CombineFunction;
|
||||
|
@ -824,13 +824,13 @@ public class QueryAnalyzerTests extends OpenSearchTestCase {
|
|||
}
|
||||
|
||||
public void testSynonymQuery() {
|
||||
SynonymQuery query = new SynonymQuery();
|
||||
SynonymQuery query = new SynonymQuery.Builder("field").build();
|
||||
Result result = analyze(query, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(0));
|
||||
assertThat(result.extractions.isEmpty(), is(true));
|
||||
|
||||
query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2"));
|
||||
query = new SynonymQuery.Builder("_field").addTerm(new Term("_field", "_value1")).addTerm(new Term("_field", "_value2")).build();
|
||||
result = analyze(query, Version.CURRENT);
|
||||
assertThat(result.verified, is(true));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
|
|
|
@ -28,8 +28,6 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
import de.thetaphi.forbiddenapis.gradle.CheckForbiddenApis
|
||||
|
||||
apply plugin: 'opensearch.yaml-rest-test'
|
||||
apply plugin: 'opensearch.internal-cluster-test'
|
||||
|
||||
|
@ -46,7 +44,7 @@ forbiddenApisMain {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
api "org.apache.lucene:lucene-analyzers-icu:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analysis-icu:${versions.lucene}"
|
||||
api "com.ibm.icu:icu4j:${versions.icu4j}"
|
||||
}
|
||||
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
7a4d00d5ec5febd252a6182e8b6e87a0a9821f81
|
|
@ -0,0 +1 @@
|
|||
76893e6000401ace133a65262254be0ebe556d46
|
|
@ -0,0 +1 @@
|
|||
a23a2c1c9baad61b6fb5380f072e41534c275875
|
|
@ -1 +0,0 @@
|
|||
a1eec256a25340ba5d432d2800f759db83eb5145
|
|
@ -35,7 +35,7 @@ import com.ibm.icu.text.RawCollationKey;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.collation.ICUCollationDocValuesField;
|
||||
import org.apache.lucene.analysis.icu.ICUCollationDocValuesField;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ opensearchplugin {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
api "org.apache.lucene:lucene-analyzers-kuromoji:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analysis-kuromoji:${versions.lucene}"
|
||||
}
|
||||
|
||||
restResources {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
55f00abe01e51181d687c6bbceca8544f319b97d
|
|
@ -1 +0,0 @@
|
|||
d9ff6329a9755bbdb7343452bf246e61ae9279d8
|
|
@ -35,7 +35,7 @@ opensearchplugin {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
api "org.apache.lucene:lucene-analyzers-nori:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analysis-nori:${versions.lucene}"
|
||||
}
|
||||
|
||||
restResources {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
c5258e674ad9c189338b026710869c2955d8e11d
|
|
@ -1 +0,0 @@
|
|||
6e78aef6d1b709ed3e27dbc949255e078da08d41
|
|
@ -35,7 +35,7 @@ opensearchplugin {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
api "org.apache.lucene:lucene-analyzers-phonetic:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analysis-phonetic:${versions.lucene}"
|
||||
api "commons-codec:commons-codec:${versions.commonscodec}"
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
437960fac10a9f8327fbd87be4e408eb140988b3
|
|
@ -1 +0,0 @@
|
|||
c186bf6dd0c2fa6612ba9b0d785ff2d388d32a23
|
|
@ -35,7 +35,7 @@ opensearchplugin {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
api "org.apache.lucene:lucene-analyzers-smartcn:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analysis-smartcn:${versions.lucene}"
|
||||
}
|
||||
|
||||
restResources {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
fe96c0b4609be5f7450773c2d7f099c51f4b1f7a
|
|
@ -1 +0,0 @@
|
|||
ebda1884c24bb14ee451b98e7565c86966f8863d
|
|
@ -35,7 +35,7 @@ opensearchplugin {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
api "org.apache.lucene:lucene-analyzers-stempel:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analysis-stempel:${versions.lucene}"
|
||||
}
|
||||
|
||||
restResources {
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
b92e86dd451d225e68ee4abac5b00bf883b6ea00
|
|
@ -1 +0,0 @@
|
|||
2a4bd86c96374cdc5acaf7c0efd5127f2fd3a519
|
|
@ -35,7 +35,7 @@ opensearchplugin {
|
|||
}
|
||||
|
||||
dependencies {
|
||||
api "org.apache.lucene:lucene-analyzers-morfologik:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analysis-morfologik:${versions.lucene}"
|
||||
api "org.carrot2:morfologik-stemming:2.1.8"
|
||||
api "org.carrot2:morfologik-fsa:2.1.8"
|
||||
api "ua.net.nlp:morfologik-ukrainian-search:4.9.1"
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
048fddf601c6de7dd296f6da3f394544618f7cea
|
|
@ -1 +0,0 @@
|
|||
09de2e3fa72355228b2723f958dcb0ec1bc3f31a
|
|
@ -136,7 +136,6 @@ public class AnnotatedTextHighlighterTests extends OpenSearchTestCase {
|
|||
noMatchSize,
|
||||
expectedPassages.length,
|
||||
name -> "text".equals(name),
|
||||
Integer.MAX_VALUE,
|
||||
Integer.MAX_VALUE
|
||||
);
|
||||
highlighter.setFieldMatcher((name) -> "text".equals(name));
|
||||
|
|
|
@ -6,13 +6,34 @@
|
|||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Modifications Copyright OpenSearch Contributors. See
|
||||
* GitHub history for details.
|
||||
*/
|
||||
|
||||
package org.opensearch.index.store;
|
||||
|
||||
import org.opensearch.common.settings.Settings;
|
||||
|
||||
/**
|
||||
* Index Settings Tests for NIO FileSystem as index store type.
|
||||
*/
|
||||
public class SmbNIOFsTests extends AbstractAzureFsTestCase {
|
||||
@Override
|
||||
public Settings indexSettings() {
|
||||
|
|
|
@ -97,7 +97,7 @@ dependencies {
|
|||
|
||||
// lucene
|
||||
api "org.apache.lucene:lucene-core:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analyzers-common:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-analysis-common:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-backward-codecs:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-grouping:${versions.lucene}"
|
||||
api "org.apache.lucene:lucene-highlighter:${versions.lucene}"
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
f78890829c3d6f15de48fdbc2c77ef4c0e3f005c
|
|
@ -1 +0,0 @@
|
|||
23bb36a98d01100953674c56c20861b29b5a5175
|
|
@ -1 +0,0 @@
|
|||
7399c32bc4ba7e37e14a9660ffd7962acf68a802
|
|
@ -0,0 +1 @@
|
|||
9fb48d0244799e18299449ee62459caab0728490
|
|
@ -1 +0,0 @@
|
|||
deb78f6b21d29f964ab267ad59fafb58ef740101
|
|
@ -0,0 +1 @@
|
|||
be679fd274f264e4e8b02bc032d2788cd4076ab4
|
|
@ -1 +0,0 @@
|
|||
7b91bb886d30c67a8f980d3bdfd6b7826a62d5e7
|
|
@ -0,0 +1 @@
|
|||
27ebe235d427b4e392fabab9b6bfa09524ca7f8b
|
|
@ -1 +0,0 @@
|
|||
ec4a2103cb300aab7e6142f1c7778dd505ecb8e2
|
|
@ -0,0 +1 @@
|
|||
a3cb395c2e8c672e6eec951b2b02371a4a883f73
|
|
@ -1 +0,0 @@
|
|||
aa368e9d11660dcfcfaab1a39dd871f05fa2b031
|
|
@ -0,0 +1 @@
|
|||
94a855b5d09a6601289aeaeba0f11d5539552590
|
|
@ -1 +0,0 @@
|
|||
9de18bf605879647e964fd57ddf3fa6f85ca743e
|
|
@ -0,0 +1 @@
|
|||
2371c95031422bc1f501d43ffcc7311baed4b35b
|
|
@ -1 +0,0 @@
|
|||
e9cca86ebbe010d375388c5a17216e2d2b2e76bb
|
|
@ -0,0 +1 @@
|
|||
25c6170f4fa2f707908dfb92fbafc76727f901e0
|
|
@ -1 +0,0 @@
|
|||
21b70a0996e3408291514d99e3b03800d0bcd657
|
|
@ -0,0 +1 @@
|
|||
87b4c7833d30895baf7091f9cb0db878e970b604
|
|
@ -1 +0,0 @@
|
|||
087f52ee3f72f387b802c49a96e4a14b3b05dd21
|
|
@ -0,0 +1 @@
|
|||
bf13395ad2033bca3182fcbc83204e8ae1951945
|
|
@ -1 +0,0 @@
|
|||
82b15ef61297e6d7b0c1f6c37c502d6b77a82f1e
|
|
@ -0,0 +1 @@
|
|||
3c153a1dc1da3f98083cc932c9476df4b77b0ca5
|
|
@ -1 +0,0 @@
|
|||
7a3b6eac3e66bb1c6fb05c0cd980e5592adaf96b
|
|
@ -0,0 +1 @@
|
|||
91535ef6512c45c7e2b113b04cab7738ee774893
|
|
@ -1 +0,0 @@
|
|||
823a5e9d2fd3b5b668d305e0781d0e074e9f2ebb
|
|
@ -0,0 +1 @@
|
|||
6b4ee47f218ed3d123c1b07671677a2e4f3c133b
|
|
@ -1 +0,0 @@
|
|||
92d7e5a178d0df58e0b4d400755ac46bae3eea11
|
|
@ -0,0 +1 @@
|
|||
a7d0e7279737114c039f5214082da948732096a6
|
|
@ -56,6 +56,7 @@ public class IndexPrimaryRelocationIT extends OpenSearchIntegTestCase {
|
|||
|
||||
private static final int RELOCATION_COUNT = 15;
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/2063")
|
||||
public void testPrimaryRelocationWhileIndexing() throws Exception {
|
||||
internalCluster().ensureAtLeastNumDataNodes(randomIntBetween(2, 3));
|
||||
client().admin()
|
||||
|
|
|
@ -32,9 +32,7 @@
|
|||
|
||||
package org.opensearch.recovery;
|
||||
|
||||
import com.carrotsearch.hppc.IntHashSet;
|
||||
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||
import com.carrotsearch.hppc.procedures.IntProcedure;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.opensearch.action.ActionFuture;
|
||||
|
@ -61,6 +59,7 @@ import org.opensearch.common.xcontent.XContentType;
|
|||
import org.opensearch.env.NodeEnvironment;
|
||||
import org.opensearch.index.IndexService;
|
||||
import org.opensearch.index.IndexSettings;
|
||||
import org.opensearch.index.mapper.MapperService;
|
||||
import org.opensearch.index.seqno.ReplicationTracker;
|
||||
import org.opensearch.index.seqno.RetentionLease;
|
||||
import org.opensearch.index.shard.IndexEventListener;
|
||||
|
@ -192,6 +191,7 @@ public class RelocationIT extends OpenSearchIntegTestCase {
|
|||
assertThat(client().prepareSearch("test").setSize(0).execute().actionGet().getHits().getTotalHits().value, equalTo(20L));
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/2063")
|
||||
public void testRelocationWhileIndexingRandom() throws Exception {
|
||||
int numberOfRelocations = scaledRandomIntBetween(1, rarely() ? 10 : 4);
|
||||
int numberOfReplicas = randomBoolean() ? 0 : 1;
|
||||
|
@ -228,7 +228,7 @@ public class RelocationIT extends OpenSearchIntegTestCase {
|
|||
}
|
||||
|
||||
int numDocs = scaledRandomIntBetween(200, 2500);
|
||||
try (BackgroundIndexer indexer = new BackgroundIndexer("test", "type1", client(), numDocs)) {
|
||||
try (BackgroundIndexer indexer = new BackgroundIndexer("test", MapperService.SINGLE_MAPPING_NAME, client(), numDocs)) {
|
||||
logger.info("--> waiting for {} docs to be indexed ...", numDocs);
|
||||
waitForDocs(numDocs, indexer);
|
||||
logger.info("--> {} docs indexed", numDocs);
|
||||
|
@ -285,20 +285,20 @@ public class RelocationIT extends OpenSearchIntegTestCase {
|
|||
for (int hit = 0; hit < indexer.totalIndexedDocs(); hit++) {
|
||||
hitIds[hit] = hit + 1;
|
||||
}
|
||||
IntHashSet set = IntHashSet.from(hitIds);
|
||||
Set<Integer> set = Arrays.stream(hitIds).boxed().collect(Collectors.toSet());
|
||||
for (SearchHit hit : hits.getHits()) {
|
||||
int id = Integer.parseInt(hit.getId());
|
||||
if (!set.remove(id)) {
|
||||
if (set.remove(id) == false) {
|
||||
logger.error("Extra id [{}]", id);
|
||||
}
|
||||
}
|
||||
set.forEach((IntProcedure) value -> { logger.error("Missing id [{}]", value); });
|
||||
set.forEach(value -> logger.error("Missing id [{}]", value));
|
||||
}
|
||||
assertThat(hits.getTotalHits().value, equalTo(indexer.totalIndexedDocs()));
|
||||
logger.info("--> DONE search test round {}", i + 1);
|
||||
|
||||
}
|
||||
if (!ranOnce) {
|
||||
if (ranOnce == false) {
|
||||
fail();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3288,6 +3288,36 @@ public class HighlighterSearchIT extends OpenSearchIntegTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
public void testCopyToFields() throws Exception {
|
||||
XContentBuilder b = jsonBuilder().startObject().startObject("properties");
|
||||
b.startObject("foo");
|
||||
{
|
||||
b.field("type", "text");
|
||||
b.field("copy_to", "foo_copy");
|
||||
}
|
||||
b.endObject();
|
||||
// If field is not stored, it is looked up in source (but source has only 'foo'
|
||||
b.startObject("foo_copy").field("type", "text").field("store", true).endObject();
|
||||
b.endObject().endObject();
|
||||
prepareCreate("test").addMapping("type", b).get();
|
||||
|
||||
client().prepareIndex("test")
|
||||
.setId("1")
|
||||
.setSource(jsonBuilder().startObject().field("foo", "how now brown cow").endObject())
|
||||
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
|
||||
.get();
|
||||
|
||||
SearchResponse response = client().prepareSearch()
|
||||
.setQuery(matchQuery("foo_copy", "brown"))
|
||||
.highlighter(new HighlightBuilder().field(new Field("foo_copy")))
|
||||
.get();
|
||||
|
||||
assertHitCount(response, 1);
|
||||
HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo_copy");
|
||||
assertThat(field.getFragments().length, equalTo(1));
|
||||
assertThat(field.getFragments()[0].string(), equalTo("how now <em>brown</em> cow"));
|
||||
}
|
||||
|
||||
public void testACopyFieldWithNestedQuery() throws Exception {
|
||||
String mapping = Strings.toString(
|
||||
jsonBuilder().startObject()
|
||||
|
|
|
@ -331,8 +331,6 @@ public class QueryStringIT extends OpenSearchIntegTestCase {
|
|||
doAssertOneHitForQueryString("field_A0:foo");
|
||||
// expanding to the limit should work
|
||||
doAssertOneHitForQueryString("field_A\\*:foo");
|
||||
// expanding two blocks to the limit still works
|
||||
doAssertOneHitForQueryString("field_A\\*:foo field_B\\*:bar");
|
||||
|
||||
// adding a non-existing field on top shouldn't overshoot the limit
|
||||
doAssertOneHitForQueryString("field_A\\*:foo unmapped:something");
|
||||
|
|
|
@ -238,11 +238,10 @@ public class SimpleValidateQueryIT extends OpenSearchIntegTestCase {
|
|||
assertThat(response.getQueryExplanation().size(), equalTo(1));
|
||||
assertThat(
|
||||
response.getQueryExplanation().get(0).getExplanation(),
|
||||
equalTo(
|
||||
"(MatchNoDocsQuery(\"failed [bar] query, caused by number_format_exception:[For input string: \"foo\"]\") "
|
||||
+ "| foo:foo | baz:foo)"
|
||||
)
|
||||
containsString("MatchNoDocsQuery(\"failed [bar] query, caused by number_format_exception:[For input string: \"foo\"]\")")
|
||||
);
|
||||
assertThat(response.getQueryExplanation().get(0).getExplanation(), containsString("foo:foo"));
|
||||
assertThat(response.getQueryExplanation().get(0).getExplanation(), containsString("baz:foo"));
|
||||
assertThat(response.getQueryExplanation().get(0).getError(), nullValue());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.misc.search.similarity;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
/**
|
||||
* Similarity that behaves like {@link BM25Similarity} while also applying the k1+1 factor to the
|
||||
* numerator of the scoring formula
|
||||
*
|
||||
* @see BM25Similarity
|
||||
* @deprecated {@link BM25Similarity} should be used instead
|
||||
*/
|
||||
@Deprecated
|
||||
public final class LegacyBM25Similarity extends Similarity {
|
||||
|
||||
private final BM25Similarity bm25Similarity;
|
||||
|
||||
/**
|
||||
* BM25 with these default values:
|
||||
*
|
||||
* <ul>
|
||||
* <li>{@code k1 = 1.2}
|
||||
* <li>{@code b = 0.75}
|
||||
* <li>{@code discountOverlaps = true}
|
||||
* </ul>
|
||||
*/
|
||||
public LegacyBM25Similarity() {
|
||||
this.bm25Similarity = new BM25Similarity();
|
||||
}
|
||||
|
||||
/**
|
||||
* BM25 with the supplied parameter values.
|
||||
*
|
||||
* @param k1 Controls non-linear term frequency normalization (saturation).
|
||||
* @param b Controls to what degree document length normalizes tf values.
|
||||
* @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is not
|
||||
* within the range {@code [0..1]}
|
||||
*/
|
||||
public LegacyBM25Similarity(float k1, float b) {
|
||||
this.bm25Similarity = new BM25Similarity(k1, b);
|
||||
}
|
||||
|
||||
/**
|
||||
* BM25 with the supplied parameter values.
|
||||
*
|
||||
* @param k1 Controls non-linear term frequency normalization (saturation).
|
||||
* @param b Controls to what degree document length normalizes tf values.
|
||||
* @param discountOverlaps True if overlap tokens (tokens with a position of increment of zero)
|
||||
* are discounted from the document's length.
|
||||
* @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is not
|
||||
* within the range {@code [0..1]}
|
||||
*/
|
||||
public LegacyBM25Similarity(float k1, float b, boolean discountOverlaps) {
|
||||
this.bm25Similarity = new BM25Similarity(k1, b, discountOverlaps);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long computeNorm(FieldInvertState state) {
|
||||
return bm25Similarity.computeNorm(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
|
||||
return bm25Similarity.scorer(boost * (1 + bm25Similarity.getK1()), collectionStats, termStats);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the <code>k1</code> parameter
|
||||
*
|
||||
* @see #LegacyBM25Similarity(float, float)
|
||||
*/
|
||||
public final float getK1() {
|
||||
return bm25Similarity.getK1();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the <code>b</code> parameter
|
||||
*
|
||||
* @see #LegacyBM25Similarity(float, float)
|
||||
*/
|
||||
public final float getB() {
|
||||
return bm25Similarity.getB();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if overlap tokens are discounted from the document's length.
|
||||
*
|
||||
* @see #LegacyBM25Similarity(float, float, boolean)
|
||||
*/
|
||||
public boolean getDiscountOverlaps() {
|
||||
return bm25Similarity.getDiscountOverlaps();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return bm25Similarity.toString();
|
||||
}
|
||||
}
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.search.ConstantScoreScorer;
|
|||
import org.apache.lucene.search.ConstantScoreWeight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
|
@ -138,6 +139,13 @@ public final class BinaryDocValuesRangeQuery extends Query {
|
|||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(QueryVisitor visitor) {
|
||||
if (visitor.acceptField(fieldName)) {
|
||||
visitor.visitLeaf(this);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return "BinaryDocValuesRangeQuery(fieldName=" + field + ",from=" + originalFrom + ",to=" + originalTo + ")";
|
||||
|
|
|
@ -34,16 +34,16 @@ package org.apache.lucene.queries;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermStates;
|
||||
import org.apache.lucene.queries.spans.SpanQuery;
|
||||
import org.apache.lucene.queries.spans.SpanWeight;
|
||||
import org.apache.lucene.queries.spans.Spans;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A {@link SpanQuery} that matches no documents.
|
||||
|
@ -57,6 +57,11 @@ public class SpanMatchNoDocsQuery extends SpanQuery {
|
|||
this.reason = reason;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(QueryVisitor visitor) {
|
||||
visitor.visitLeaf(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getField() {
|
||||
return field;
|
||||
|
@ -88,9 +93,6 @@ public class SpanMatchNoDocsQuery extends SpanQuery {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable(LeafReaderContext ctx) {
|
||||
return true;
|
||||
|
|
|
@ -35,14 +35,15 @@ package org.apache.lucene.search.uhighlight;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.queries.spans.SpanNearQuery;
|
||||
import org.apache.lucene.queries.spans.SpanOrQuery;
|
||||
import org.apache.lucene.queries.spans.SpanQuery;
|
||||
import org.apache.lucene.queries.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.opensearch.common.CheckedSupplier;
|
||||
import org.opensearch.common.Nullable;
|
||||
|
@ -77,7 +78,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
|
|||
private final Locale breakIteratorLocale;
|
||||
private final int noMatchSize;
|
||||
private final FieldHighlighter fieldHighlighter;
|
||||
private final int keywordIgnoreAbove;
|
||||
private final int maxAnalyzedOffset;
|
||||
|
||||
/**
|
||||
|
@ -97,7 +97,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
|
|||
* @param noMatchSize The size of the text that should be returned when no highlighting can be performed.
|
||||
* @param maxPassages the maximum number of passes to highlight
|
||||
* @param fieldMatcher decides which terms should be highlighted
|
||||
* @param keywordIgnoreAbove if the field's value is longer than this we'll skip it
|
||||
* @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED
|
||||
* offset source for it because it'd be super slow
|
||||
*/
|
||||
|
@ -114,7 +113,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
|
|||
int noMatchSize,
|
||||
int maxPassages,
|
||||
Predicate<String> fieldMatcher,
|
||||
int keywordIgnoreAbove,
|
||||
int maxAnalyzedOffset
|
||||
) throws IOException {
|
||||
super(searcher, analyzer);
|
||||
|
@ -126,7 +124,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
|
|||
this.field = field;
|
||||
this.noMatchSize = noMatchSize;
|
||||
this.setFieldMatcher(fieldMatcher);
|
||||
this.keywordIgnoreAbove = keywordIgnoreAbove;
|
||||
this.maxAnalyzedOffset = maxAnalyzedOffset;
|
||||
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
|
||||
}
|
||||
|
@ -144,9 +141,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
|
|||
return null;
|
||||
}
|
||||
int fieldValueLength = fieldValue.length();
|
||||
if (fieldValueLength > keywordIgnoreAbove) {
|
||||
return null; // skip highlighting keyword terms that were ignored during indexing
|
||||
}
|
||||
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
|
||||
throw new IllegalArgumentException(
|
||||
"The length of ["
|
||||
|
@ -266,4 +260,12 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
|
|||
return offsetSource;
|
||||
}
|
||||
|
||||
/** Customize the highlighting flags to use by field. */
|
||||
@Override
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
final Set<HighlightFlag> flags = super.getFlags(field);
|
||||
// Change the defaults introduced by https://issues.apache.org/jira/browse/LUCENE-9431
|
||||
flags.remove(HighlightFlag.WEIGHT_MATCHES);
|
||||
return flags;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ package org.apache.lucene.search.vectorhighlight;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.BlendedTermQuery;
|
||||
import org.apache.lucene.queries.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
|
@ -42,7 +43,6 @@ import org.apache.lucene.search.PhraseQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||
import org.opensearch.common.lucene.search.function.FunctionScoreQuery;
|
||||
import org.opensearch.index.search.OpenSearchToParentBlockJoinQuery;
|
||||
|
|
|
@ -127,4 +127,9 @@ public final class CombinedBitSet extends BitSet implements Bits {
|
|||
public void clear(int startIndex, int endIndex) {
|
||||
throw new UnsupportedOperationException("not implemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getAndSet(int i) {
|
||||
throw new UnsupportedOperationException("not implemented");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* The OpenSearch Contributors require contributions made to
|
||||
* this file be licensed under the Apache-2.0 license or a
|
||||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/* @notice
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Modifications Copyright OpenSearch Contributors. See
|
||||
* GitHub history for details.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Objects;
|
||||
import java.util.ServiceConfigurationError;
|
||||
|
||||
/**
|
||||
* Helper class for loading SPI classes from classpath (META-INF files).
|
||||
* This is a light impl of {@link java.util.ServiceLoader} but is guaranteed to
|
||||
* be bug-free regarding classpath order and does not instantiate or initialize
|
||||
* the classes found.
|
||||
*/
|
||||
@SuppressForbidden(reason = "Taken from Lucene")
|
||||
public final class SPIClassIterator<S> implements Iterator<Class<? extends S>> {
|
||||
private static final String META_INF_SERVICES = "META-INF/services/";
|
||||
|
||||
private final Class<S> clazz;
|
||||
private final ClassLoader loader;
|
||||
private final Enumeration<URL> profilesEnum;
|
||||
private Iterator<String> linesIterator;
|
||||
|
||||
/** Creates a new SPI iterator to lookup services of type {@code clazz} using
|
||||
* the same {@link ClassLoader} as the argument. */
|
||||
public static <S> SPIClassIterator<S> get(Class<S> clazz) {
|
||||
return new SPIClassIterator<>(clazz, Objects.requireNonNull(clazz.getClassLoader(), () -> clazz + " has no classloader."));
|
||||
}
|
||||
|
||||
/** Creates a new SPI iterator to lookup services of type {@code clazz} using the given classloader. */
|
||||
public static <S> SPIClassIterator<S> get(Class<S> clazz, ClassLoader loader) {
|
||||
return new SPIClassIterator<>(clazz, loader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to check if some class loader is a (grand-)parent of or the same as another one.
|
||||
* This means the child will be able to load all classes from the parent, too.
|
||||
* <p>
|
||||
* If caller's codesource doesn't have enough permissions to do the check, {@code false} is returned
|
||||
* (this is fine, because if we get a {@code SecurityException} it is for sure no parent).
|
||||
*/
|
||||
public static boolean isParentClassLoader(final ClassLoader parent, final ClassLoader child) {
|
||||
try {
|
||||
ClassLoader cl = child;
|
||||
while (cl != null) {
|
||||
if (cl == parent) {
|
||||
return true;
|
||||
}
|
||||
cl = cl.getParent();
|
||||
}
|
||||
return false;
|
||||
} catch (SecurityException se) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private SPIClassIterator(Class<S> clazz, ClassLoader loader) {
|
||||
this.clazz = Objects.requireNonNull(clazz, "clazz");
|
||||
this.loader = Objects.requireNonNull(loader, "loader");
|
||||
try {
|
||||
final String fullName = META_INF_SERVICES + clazz.getName();
|
||||
this.profilesEnum = loader.getResources(fullName);
|
||||
} catch (IOException ioe) {
|
||||
throw new ServiceConfigurationError("Error loading SPI profiles for type " + clazz.getName() + " from classpath", ioe);
|
||||
}
|
||||
this.linesIterator = Collections.<String>emptySet().iterator();
|
||||
}
|
||||
|
||||
private boolean loadNextProfile() {
|
||||
ArrayList<String> lines = null;
|
||||
while (profilesEnum.hasMoreElements()) {
|
||||
if (lines != null) {
|
||||
lines.clear();
|
||||
} else {
|
||||
lines = new ArrayList<>();
|
||||
}
|
||||
final URL url = profilesEnum.nextElement();
|
||||
try {
|
||||
final InputStream in = url.openStream();
|
||||
boolean success = false;
|
||||
try {
|
||||
final BufferedReader reader = new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
final int pos = line.indexOf('#');
|
||||
if (pos >= 0) {
|
||||
line = line.substring(0, pos);
|
||||
}
|
||||
line = line.trim();
|
||||
if (line.length() > 0) {
|
||||
lines.add(line);
|
||||
}
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
IOUtils.close(in);
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(in);
|
||||
}
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
throw new ServiceConfigurationError("Error loading SPI class list from URL: " + url, ioe);
|
||||
}
|
||||
if (lines.isEmpty() == false) {
|
||||
this.linesIterator = lines.iterator();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return linesIterator.hasNext() || loadNextProfile();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<? extends S> next() {
|
||||
// hasNext() implicitely loads the next profile, so it is essential to call this here!
|
||||
if (hasNext() == false) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
assert linesIterator.hasNext();
|
||||
final String c = linesIterator.next();
|
||||
try {
|
||||
// don't initialize the class (pass false as 2nd parameter):
|
||||
return Class.forName(c, false, loader).asSubclass(clazz);
|
||||
} catch (ClassNotFoundException cnfe) {
|
||||
throw new ServiceConfigurationError(
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"An SPI class of type %s with classname %s does not exist, " + "please fix the file '%s%1$s' in your classpath.",
|
||||
clazz.getName(),
|
||||
c,
|
||||
META_INF_SERVICES
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,317 @@
|
|||
/*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* The OpenSearch Contributors require contributions made to
|
||||
* this file be licensed under the Apache-2.0 license or a
|
||||
* compatible open source license.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Modifications Copyright OpenSearch Contributors. See
|
||||
* GitHub history for details.
|
||||
*/
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Forked from Lucene 8.x; removed in Lucene 9.0
|
||||
*
|
||||
* @todo further investigate a better alternative
|
||||
*
|
||||
* Space optimized random access capable array of values with a fixed number of bits/value. Values
|
||||
* are packed contiguously.
|
||||
*
|
||||
* <p>The implementation strives to perform as fast as possible under the constraint of contiguous
|
||||
* bits, by avoiding expensive operations. This comes at the cost of code clarity.
|
||||
*
|
||||
* <p>Technical details: This implementation is a refinement of a non-branching version. The
|
||||
* non-branching get and set methods meant that 2 or 4 atomics in the underlying array were always
|
||||
* accessed, even for the cases where only 1 or 2 were needed. Even with caching, this had a
|
||||
* detrimental effect on performance. Related to this issue, the old implementation used lookup
|
||||
* tables for shifts and masks, which also proved to be a bit slower than calculating the shifts and
|
||||
* masks on the fly. See https://issues.apache.org/jira/browse/LUCENE-4062 for details.
|
||||
*/
|
||||
class XPacked64 extends XPackedInts.MutableImpl {
|
||||
static final int BLOCK_SIZE = 64; // 32 = int, 64 = long
|
||||
static final int BLOCK_BITS = 6; // The #bits representing BLOCK_SIZE
|
||||
static final int MOD_MASK = BLOCK_SIZE - 1; // x % BLOCK_SIZE
|
||||
|
||||
/** Values are stores contiguously in the blocks array. */
|
||||
private final long[] blocks;
|
||||
/** A right-aligned mask of width BitsPerValue used by {@link #get(int)}. */
|
||||
private final long maskRight;
|
||||
/** Optimization: Saves one lookup in {@link #get(int)}. */
|
||||
private final int bpvMinusBlockSize;
|
||||
|
||||
/**
|
||||
* Creates an array with the internal structures adjusted for the given limits and initialized to
|
||||
* 0.
|
||||
*
|
||||
* @param valueCount the number of elements.
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
*/
|
||||
public XPacked64(int valueCount, int bitsPerValue) {
|
||||
super(valueCount, bitsPerValue);
|
||||
final PackedInts.Format format = PackedInts.Format.PACKED;
|
||||
final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue);
|
||||
this.blocks = new long[longCount];
|
||||
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
|
||||
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an array with content retrieved from the given DataInput.
|
||||
*
|
||||
* @param in a DataInput, positioned at the start of Packed64-content.
|
||||
* @param valueCount the number of elements.
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
* @throws java.io.IOException if the values for the backing array could not be retrieved.
|
||||
*/
|
||||
public XPacked64(int packedIntsVersion, DataInput in, int valueCount, int bitsPerValue) throws IOException {
|
||||
super(valueCount, bitsPerValue);
|
||||
final PackedInts.Format format = PackedInts.Format.PACKED;
|
||||
final long byteCount = format.byteCount(packedIntsVersion, valueCount, bitsPerValue); // to know how much to read
|
||||
final int longCount = format.longCount(PackedInts.VERSION_CURRENT, valueCount, bitsPerValue); // to size the array
|
||||
blocks = new long[longCount];
|
||||
// read as many longs as we can
|
||||
for (int i = 0; i < byteCount / 8; ++i) {
|
||||
blocks[i] = in.readLong();
|
||||
}
|
||||
final int remaining = (int) (byteCount % 8);
|
||||
if (remaining != 0) {
|
||||
// read the last bytes
|
||||
long lastLong = 0;
|
||||
for (int i = 0; i < remaining; ++i) {
|
||||
lastLong |= (in.readByte() & 0xFFL) << (56 - i * 8);
|
||||
}
|
||||
blocks[blocks.length - 1] = lastLong;
|
||||
}
|
||||
maskRight = ~0L << (BLOCK_SIZE - bitsPerValue) >>> (BLOCK_SIZE - bitsPerValue);
|
||||
bpvMinusBlockSize = bitsPerValue - BLOCK_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param index the position of the value.
|
||||
* @return the value at the given index.
|
||||
*/
|
||||
@Override
|
||||
public long get(final int index) {
|
||||
// The abstract index in a bit stream
|
||||
final long majorBitPos = (long) index * bitsPerValue;
|
||||
// The index in the backing long-array
|
||||
final int elementPos = (int) (majorBitPos >>> BLOCK_BITS);
|
||||
// The number of value-bits in the second long
|
||||
final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
|
||||
|
||||
if (endBits <= 0) { // Single block
|
||||
return (blocks[elementPos] >>> -endBits) & maskRight;
|
||||
}
|
||||
// Two blocks
|
||||
return ((blocks[elementPos] << endBits) | (blocks[elementPos + 1] >>> (BLOCK_SIZE - endBits))) & maskRight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int originalIndex = index;
|
||||
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
|
||||
|
||||
// go to the next block where the value does not span across two blocks
|
||||
final int offsetInBlocks = index % decoder.longValueCount();
|
||||
if (offsetInBlocks != 0) {
|
||||
for (int i = offsetInBlocks; i < decoder.longValueCount() && len > 0; ++i) {
|
||||
arr[off++] = get(index++);
|
||||
--len;
|
||||
}
|
||||
if (len == 0) {
|
||||
return index - originalIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// bulk get
|
||||
assert index % decoder.longValueCount() == 0;
|
||||
int blockIndex = (int) (((long) index * bitsPerValue) >>> BLOCK_BITS);
|
||||
assert (((long) index * bitsPerValue) & MOD_MASK) == 0;
|
||||
final int iterations = len / decoder.longValueCount();
|
||||
decoder.decode(blocks, blockIndex, arr, off, iterations);
|
||||
final int gotValues = iterations * decoder.longValueCount();
|
||||
index += gotValues;
|
||||
len -= gotValues;
|
||||
assert len >= 0;
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
return index - originalIndex;
|
||||
} else {
|
||||
// no progress so far => already at a block boundary but no full block to get
|
||||
assert index == originalIndex;
|
||||
return super.get(index, arr, off, len);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(final int index, final long value) {
|
||||
// The abstract index in a contiguous bit stream
|
||||
final long majorBitPos = (long) index * bitsPerValue;
|
||||
// The index in the backing long-array
|
||||
final int elementPos = (int) (majorBitPos >>> BLOCK_BITS); // / BLOCK_SIZE
|
||||
// The number of value-bits in the second long
|
||||
final long endBits = (majorBitPos & MOD_MASK) + bpvMinusBlockSize;
|
||||
|
||||
if (endBits <= 0) { // Single block
|
||||
blocks[elementPos] = blocks[elementPos] & ~(maskRight << -endBits) | (value << -endBits);
|
||||
return;
|
||||
}
|
||||
// Two blocks
|
||||
blocks[elementPos] = blocks[elementPos] & ~(maskRight >>> endBits) | (value >>> endBits);
|
||||
blocks[elementPos + 1] = blocks[elementPos + 1] & (~0L >>> endBits) | (value << (BLOCK_SIZE - endBits));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int originalIndex = index;
|
||||
final PackedInts.Encoder encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);
|
||||
|
||||
// go to the next block where the value does not span across two blocks
|
||||
final int offsetInBlocks = index % encoder.longValueCount();
|
||||
if (offsetInBlocks != 0) {
|
||||
for (int i = offsetInBlocks; i < encoder.longValueCount() && len > 0; ++i) {
|
||||
set(index++, arr[off++]);
|
||||
--len;
|
||||
}
|
||||
if (len == 0) {
|
||||
return index - originalIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// bulk set
|
||||
assert index % encoder.longValueCount() == 0;
|
||||
int blockIndex = (int) (((long) index * bitsPerValue) >>> BLOCK_BITS);
|
||||
assert (((long) index * bitsPerValue) & MOD_MASK) == 0;
|
||||
final int iterations = len / encoder.longValueCount();
|
||||
encoder.encode(arr, off, blocks, blockIndex, iterations);
|
||||
final int setValues = iterations * encoder.longValueCount();
|
||||
index += setValues;
|
||||
len -= setValues;
|
||||
assert len >= 0;
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
return index - originalIndex;
|
||||
} else {
|
||||
// no progress so far => already at a block boundary but no full block to get
|
||||
assert index == originalIndex;
|
||||
return super.set(index, arr, off, len);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Packed64(bitsPerValue=" + bitsPerValue + ",size=" + size() + ",blocks=" + blocks.length + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3 * Integer.BYTES // bpvMinusBlockSize,valueCount,bitsPerValue
|
||||
+ Long.BYTES // maskRight
|
||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||
) // blocks ref
|
||||
+ RamUsageEstimator.sizeOf(blocks);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert PackedInts.unsignedBitsRequired(val) <= getBitsPerValue();
|
||||
assert fromIndex <= toIndex;
|
||||
|
||||
// minimum number of values that use an exact number of full blocks
|
||||
final int nAlignedValues = 64 / gcd(64, bitsPerValue);
|
||||
final int span = toIndex - fromIndex;
|
||||
if (span <= 3 * nAlignedValues) {
|
||||
// there needs be at least 2 * nAlignedValues aligned values for the
|
||||
// block approach to be worth trying
|
||||
super.fill(fromIndex, toIndex, val);
|
||||
return;
|
||||
}
|
||||
|
||||
// fill the first values naively until the next block start
|
||||
final int fromIndexModNAlignedValues = fromIndex % nAlignedValues;
|
||||
if (fromIndexModNAlignedValues != 0) {
|
||||
for (int i = fromIndexModNAlignedValues; i < nAlignedValues; ++i) {
|
||||
set(fromIndex++, val);
|
||||
}
|
||||
}
|
||||
assert fromIndex % nAlignedValues == 0;
|
||||
|
||||
// compute the long[] blocks for nAlignedValues consecutive values and
|
||||
// use them to set as many values as possible without applying any mask
|
||||
// or shift
|
||||
final int nAlignedBlocks = (nAlignedValues * bitsPerValue) >> 6;
|
||||
final long[] nAlignedValuesBlocks;
|
||||
{
|
||||
XPacked64 values = new XPacked64(nAlignedValues, bitsPerValue);
|
||||
for (int i = 0; i < nAlignedValues; ++i) {
|
||||
values.set(i, val);
|
||||
}
|
||||
nAlignedValuesBlocks = values.blocks;
|
||||
assert nAlignedBlocks <= nAlignedValuesBlocks.length;
|
||||
}
|
||||
final int startBlock = (int) (((long) fromIndex * bitsPerValue) >>> 6);
|
||||
final int endBlock = (int) (((long) toIndex * bitsPerValue) >>> 6);
|
||||
for (int block = startBlock; block < endBlock; ++block) {
|
||||
final long blockValue = nAlignedValuesBlocks[block % nAlignedBlocks];
|
||||
blocks[block] = blockValue;
|
||||
}
|
||||
|
||||
// fill the gap
|
||||
for (int i = (int) (((long) endBlock << 6) / bitsPerValue); i < toIndex; ++i) {
|
||||
set(i, val);
|
||||
}
|
||||
}
|
||||
|
||||
private static int gcd(int a, int b) {
|
||||
if (a < b) {
|
||||
return gcd(b, a);
|
||||
} else if (b == 0) {
|
||||
return a;
|
||||
} else {
|
||||
return gcd(b, a % b);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
Arrays.fill(blocks, 0L);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,574 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Forked from Lucene 8.x; removed in Lucene 9.0
|
||||
*
|
||||
* @todo further investigate a better alternative
|
||||
*
|
||||
* This class is similar to {@link Packed64} except that it trades space for speed by ensuring that
|
||||
* a single block needs to be read/written in order to read/write a value.
|
||||
*/
|
||||
abstract class XPacked64SingleBlock extends XPackedInts.MutableImpl {
|
||||
|
||||
public static final int MAX_SUPPORTED_BITS_PER_VALUE = 32;
|
||||
private static final int[] SUPPORTED_BITS_PER_VALUE = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 32 };
|
||||
|
||||
public static boolean isSupported(int bitsPerValue) {
|
||||
return Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) >= 0;
|
||||
}
|
||||
|
||||
private static int requiredCapacity(int valueCount, int valuesPerBlock) {
|
||||
return valueCount / valuesPerBlock + (valueCount % valuesPerBlock == 0 ? 0 : 1);
|
||||
}
|
||||
|
||||
final long[] blocks;
|
||||
|
||||
XPacked64SingleBlock(int valueCount, int bitsPerValue) {
|
||||
super(valueCount, bitsPerValue);
|
||||
assert isSupported(bitsPerValue);
|
||||
final int valuesPerBlock = 64 / bitsPerValue;
|
||||
blocks = new long[requiredCapacity(valueCount, valuesPerBlock)];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
Arrays.fill(blocks, 0L);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 2 * Integer.BYTES // valueCount,bitsPerValue
|
||||
+ RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||
) // blocks ref
|
||||
+ RamUsageEstimator.sizeOf(blocks);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int originalIndex = index;
|
||||
|
||||
// go to the next block boundary
|
||||
final int valuesPerBlock = 64 / bitsPerValue;
|
||||
final int offsetInBlock = index % valuesPerBlock;
|
||||
if (offsetInBlock != 0) {
|
||||
for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
|
||||
arr[off++] = get(index++);
|
||||
--len;
|
||||
}
|
||||
if (len == 0) {
|
||||
return index - originalIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// bulk get
|
||||
assert index % valuesPerBlock == 0;
|
||||
@SuppressWarnings("deprecation")
|
||||
final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||
assert decoder.longBlockCount() == 1;
|
||||
assert decoder.longValueCount() == valuesPerBlock;
|
||||
final int blockIndex = index / valuesPerBlock;
|
||||
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||
decoder.decode(blocks, blockIndex, arr, off, nblocks);
|
||||
final int diff = nblocks * valuesPerBlock;
|
||||
index += diff;
|
||||
len -= diff;
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
return index - originalIndex;
|
||||
} else {
|
||||
// no progress so far => already at a block boundary but no full block to
|
||||
// get
|
||||
assert index == originalIndex;
|
||||
return super.get(index, arr, off, len);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
final int originalIndex = index;
|
||||
|
||||
// go to the next block boundary
|
||||
final int valuesPerBlock = 64 / bitsPerValue;
|
||||
final int offsetInBlock = index % valuesPerBlock;
|
||||
if (offsetInBlock != 0) {
|
||||
for (int i = offsetInBlock; i < valuesPerBlock && len > 0; ++i) {
|
||||
set(index++, arr[off++]);
|
||||
--len;
|
||||
}
|
||||
if (len == 0) {
|
||||
return index - originalIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// bulk set
|
||||
assert index % valuesPerBlock == 0;
|
||||
@SuppressWarnings("deprecation")
|
||||
final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
|
||||
assert op.longBlockCount() == 1;
|
||||
assert op.longValueCount() == valuesPerBlock;
|
||||
final int blockIndex = index / valuesPerBlock;
|
||||
final int nblocks = (index + len) / valuesPerBlock - blockIndex;
|
||||
op.encode(arr, off, blocks, blockIndex, nblocks);
|
||||
final int diff = nblocks * valuesPerBlock;
|
||||
index += diff;
|
||||
len -= diff;
|
||||
|
||||
if (index > originalIndex) {
|
||||
// stay at the block boundary
|
||||
return index - originalIndex;
|
||||
} else {
|
||||
// no progress so far => already at a block boundary but no full block to
|
||||
// set
|
||||
assert index == originalIndex;
|
||||
return super.set(index, arr, off, len);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert fromIndex >= 0;
|
||||
assert fromIndex <= toIndex;
|
||||
assert PackedInts.unsignedBitsRequired(val) <= bitsPerValue;
|
||||
|
||||
final int valuesPerBlock = 64 / bitsPerValue;
|
||||
if (toIndex - fromIndex <= valuesPerBlock << 1) {
|
||||
// there needs to be at least one full block to set for the block
|
||||
// approach to be worth trying
|
||||
super.fill(fromIndex, toIndex, val);
|
||||
return;
|
||||
}
|
||||
|
||||
// set values naively until the next block start
|
||||
int fromOffsetInBlock = fromIndex % valuesPerBlock;
|
||||
if (fromOffsetInBlock != 0) {
|
||||
for (int i = fromOffsetInBlock; i < valuesPerBlock; ++i) {
|
||||
set(fromIndex++, val);
|
||||
}
|
||||
assert fromIndex % valuesPerBlock == 0;
|
||||
}
|
||||
|
||||
// bulk set of the inner blocks
|
||||
final int fromBlock = fromIndex / valuesPerBlock;
|
||||
final int toBlock = toIndex / valuesPerBlock;
|
||||
assert fromBlock * valuesPerBlock == fromIndex;
|
||||
|
||||
long blockValue = 0L;
|
||||
for (int i = 0; i < valuesPerBlock; ++i) {
|
||||
blockValue = blockValue | (val << (i * bitsPerValue));
|
||||
}
|
||||
Arrays.fill(blocks, fromBlock, toBlock, blockValue);
|
||||
|
||||
// fill the gap
|
||||
for (int i = valuesPerBlock * toBlock; i < toIndex; ++i) {
|
||||
set(i, val);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
protected PackedInts.Format getFormat() {
|
||||
return PackedInts.Format.PACKED_SINGLE_BLOCK;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(bitsPerValue=" + bitsPerValue + ",size=" + size() + ",blocks=" + blocks.length + ")";
|
||||
}
|
||||
|
||||
public static XPacked64SingleBlock create(DataInput in, int valueCount, int bitsPerValue) throws IOException {
|
||||
XPacked64SingleBlock reader = create(valueCount, bitsPerValue);
|
||||
for (int i = 0; i < reader.blocks.length; ++i) {
|
||||
reader.blocks[i] = in.readLong();
|
||||
}
|
||||
return reader;
|
||||
}
|
||||
|
||||
public static XPacked64SingleBlock create(int valueCount, int bitsPerValue) {
|
||||
switch (bitsPerValue) {
|
||||
case 1:
|
||||
return new XPacked64SingleBlock1(valueCount);
|
||||
case 2:
|
||||
return new XPacked64SingleBlock2(valueCount);
|
||||
case 3:
|
||||
return new XPacked64SingleBlock3(valueCount);
|
||||
case 4:
|
||||
return new XPacked64SingleBlock4(valueCount);
|
||||
case 5:
|
||||
return new XPacked64SingleBlock5(valueCount);
|
||||
case 6:
|
||||
return new XPacked64SingleBlock6(valueCount);
|
||||
case 7:
|
||||
return new XPacked64SingleBlock7(valueCount);
|
||||
case 8:
|
||||
return new XPacked64SingleBlock8(valueCount);
|
||||
case 9:
|
||||
return new XPacked64SingleBlock9(valueCount);
|
||||
case 10:
|
||||
return new XPacked64SingleBlock10(valueCount);
|
||||
case 12:
|
||||
return new XPacked64SingleBlock12(valueCount);
|
||||
case 16:
|
||||
return new XPacked64SingleBlock16(valueCount);
|
||||
case 21:
|
||||
return new XPacked64SingleBlock21(valueCount);
|
||||
case 32:
|
||||
return new XPacked64SingleBlock32(valueCount);
|
||||
default:
|
||||
throw new IllegalArgumentException("Unsupported number of bits per value: " + 32);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock1 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock1(int valueCount) {
|
||||
super(valueCount, 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index >>> 6;
|
||||
final int b = index & 63;
|
||||
final int shift = b << 0;
|
||||
return (blocks[o] >>> shift) & 1L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index >>> 6;
|
||||
final int b = index & 63;
|
||||
final int shift = b << 0;
|
||||
blocks[o] = (blocks[o] & ~(1L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock2 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock2(int valueCount) {
|
||||
super(valueCount, 2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index >>> 5;
|
||||
final int b = index & 31;
|
||||
final int shift = b << 1;
|
||||
return (blocks[o] >>> shift) & 3L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index >>> 5;
|
||||
final int b = index & 31;
|
||||
final int shift = b << 1;
|
||||
blocks[o] = (blocks[o] & ~(3L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock3 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock3(int valueCount) {
|
||||
super(valueCount, 3);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index / 21;
|
||||
final int b = index % 21;
|
||||
final int shift = b * 3;
|
||||
return (blocks[o] >>> shift) & 7L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index / 21;
|
||||
final int b = index % 21;
|
||||
final int shift = b * 3;
|
||||
blocks[o] = (blocks[o] & ~(7L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock4 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock4(int valueCount) {
|
||||
super(valueCount, 4);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index >>> 4;
|
||||
final int b = index & 15;
|
||||
final int shift = b << 2;
|
||||
return (blocks[o] >>> shift) & 15L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index >>> 4;
|
||||
final int b = index & 15;
|
||||
final int shift = b << 2;
|
||||
blocks[o] = (blocks[o] & ~(15L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock5 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock5(int valueCount) {
|
||||
super(valueCount, 5);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index / 12;
|
||||
final int b = index % 12;
|
||||
final int shift = b * 5;
|
||||
return (blocks[o] >>> shift) & 31L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index / 12;
|
||||
final int b = index % 12;
|
||||
final int shift = b * 5;
|
||||
blocks[o] = (blocks[o] & ~(31L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock6 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock6(int valueCount) {
|
||||
super(valueCount, 6);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index / 10;
|
||||
final int b = index % 10;
|
||||
final int shift = b * 6;
|
||||
return (blocks[o] >>> shift) & 63L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index / 10;
|
||||
final int b = index % 10;
|
||||
final int shift = b * 6;
|
||||
blocks[o] = (blocks[o] & ~(63L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock7 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock7(int valueCount) {
|
||||
super(valueCount, 7);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index / 9;
|
||||
final int b = index % 9;
|
||||
final int shift = b * 7;
|
||||
return (blocks[o] >>> shift) & 127L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index / 9;
|
||||
final int b = index % 9;
|
||||
final int shift = b * 7;
|
||||
blocks[o] = (blocks[o] & ~(127L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock8 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock8(int valueCount) {
|
||||
super(valueCount, 8);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index >>> 3;
|
||||
final int b = index & 7;
|
||||
final int shift = b << 3;
|
||||
return (blocks[o] >>> shift) & 255L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index >>> 3;
|
||||
final int b = index & 7;
|
||||
final int shift = b << 3;
|
||||
blocks[o] = (blocks[o] & ~(255L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock9 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock9(int valueCount) {
|
||||
super(valueCount, 9);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index / 7;
|
||||
final int b = index % 7;
|
||||
final int shift = b * 9;
|
||||
return (blocks[o] >>> shift) & 511L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index / 7;
|
||||
final int b = index % 7;
|
||||
final int shift = b * 9;
|
||||
blocks[o] = (blocks[o] & ~(511L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock10 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock10(int valueCount) {
|
||||
super(valueCount, 10);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index / 6;
|
||||
final int b = index % 6;
|
||||
final int shift = b * 10;
|
||||
return (blocks[o] >>> shift) & 1023L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index / 6;
|
||||
final int b = index % 6;
|
||||
final int shift = b * 10;
|
||||
blocks[o] = (blocks[o] & ~(1023L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock12 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock12(int valueCount) {
|
||||
super(valueCount, 12);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index / 5;
|
||||
final int b = index % 5;
|
||||
final int shift = b * 12;
|
||||
return (blocks[o] >>> shift) & 4095L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index / 5;
|
||||
final int b = index % 5;
|
||||
final int shift = b * 12;
|
||||
blocks[o] = (blocks[o] & ~(4095L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock16 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock16(int valueCount) {
|
||||
super(valueCount, 16);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index >>> 2;
|
||||
final int b = index & 3;
|
||||
final int shift = b << 4;
|
||||
return (blocks[o] >>> shift) & 65535L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index >>> 2;
|
||||
final int b = index & 3;
|
||||
final int shift = b << 4;
|
||||
blocks[o] = (blocks[o] & ~(65535L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock21 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock21(int valueCount) {
|
||||
super(valueCount, 21);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index / 3;
|
||||
final int b = index % 3;
|
||||
final int shift = b * 21;
|
||||
return (blocks[o] >>> shift) & 2097151L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index / 3;
|
||||
final int b = index % 3;
|
||||
final int shift = b * 21;
|
||||
blocks[o] = (blocks[o] & ~(2097151L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
|
||||
static class XPacked64SingleBlock32 extends XPacked64SingleBlock {
|
||||
|
||||
XPacked64SingleBlock32(int valueCount) {
|
||||
super(valueCount, 32);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
final int o = index >>> 1;
|
||||
final int b = index & 1;
|
||||
final int shift = b << 5;
|
||||
return (blocks[o] >>> shift) & 4294967295L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(int index, long value) {
|
||||
final int o = index >>> 1;
|
||||
final int b = index & 1;
|
||||
final int shift = b << 5;
|
||||
blocks[o] = (blocks[o] & ~(4294967295L << shift)) | (value << shift);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,740 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.util.packed;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.packed.PackedInts.Decoder;
|
||||
import org.apache.lucene.util.packed.PackedInts.Encoder;
|
||||
import org.apache.lucene.util.packed.PackedInts.Format;
|
||||
import org.apache.lucene.util.packed.PackedInts.FormatAndBits;
|
||||
import org.apache.lucene.util.packed.PackedInts.Reader;
|
||||
import org.apache.lucene.util.packed.PackedInts.ReaderIterator;
|
||||
import org.apache.lucene.util.packed.PackedInts.Writer;
|
||||
|
||||
/**
|
||||
* Forked from Lucene 8.x; removed in Lucene 8.9
|
||||
*
|
||||
* Todo: further investigate a better alternative
|
||||
*
|
||||
* Simplistic compression for array of unsigned long values. Each value is {@code >= 0} and {@code
|
||||
* <=} a specified maximum value. The values are stored as packed ints, with each value consuming a
|
||||
* fixed number of bits.
|
||||
*/
|
||||
public class XPackedInts {
|
||||
|
||||
/** At most 700% memory overhead, always select a direct implementation. */
|
||||
public static final float FASTEST = 7f;
|
||||
|
||||
/** At most 50% memory overhead, always select a reasonably fast implementation. */
|
||||
public static final float FAST = 0.5f;
|
||||
|
||||
/** At most 25% memory overhead. */
|
||||
public static final float DEFAULT = 0.25f;
|
||||
|
||||
/** No memory overhead at all, but the returned implementation may be slow. */
|
||||
public static final float COMPACT = 0f;
|
||||
|
||||
/** Default amount of memory to use for bulk operations. */
|
||||
public static final int DEFAULT_BUFFER_SIZE = 1024; // 1K
|
||||
|
||||
public static final String CODEC_NAME = "PackedInts";
|
||||
public static final int VERSION_MONOTONIC_WITHOUT_ZIGZAG = 2;
|
||||
public static final int VERSION_START = VERSION_MONOTONIC_WITHOUT_ZIGZAG;
|
||||
public static final int VERSION_CURRENT = VERSION_MONOTONIC_WITHOUT_ZIGZAG;
|
||||
|
||||
/** Check the validity of a version number. */
|
||||
public static void checkVersion(int version) {
|
||||
if (version < VERSION_START) {
|
||||
throw new IllegalArgumentException("Version is too old, should be at least " + VERSION_START + " (got " + version + ")");
|
||||
} else if (version > VERSION_CURRENT) {
|
||||
throw new IllegalArgumentException("Version is too new, should be at most " + VERSION_CURRENT + " (got " + version + ")");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Try to find the {@link Format} and number of bits per value that would restore from disk the
|
||||
* fastest reader whose overhead is less than <code>acceptableOverheadRatio</code>.
|
||||
*
|
||||
* <p>The <code>acceptableOverheadRatio</code> parameter makes sense for random-access {@link
|
||||
* Reader}s. In case you only plan to perform sequential access on this stream later on, you
|
||||
* should probably use {@link PackedInts#COMPACT}.
|
||||
*
|
||||
* <p>If you don't know how many values you are going to write, use <code>valueCount = -1</code>.
|
||||
*/
|
||||
public static FormatAndBits fastestFormatAndBits(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
|
||||
if (valueCount == -1) {
|
||||
valueCount = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
acceptableOverheadRatio = Math.max(COMPACT, acceptableOverheadRatio);
|
||||
acceptableOverheadRatio = Math.min(FASTEST, acceptableOverheadRatio);
|
||||
float acceptableOverheadPerValue = acceptableOverheadRatio * bitsPerValue; // in bits
|
||||
|
||||
int maxBitsPerValue = bitsPerValue + (int) acceptableOverheadPerValue;
|
||||
|
||||
int actualBitsPerValue = -1;
|
||||
|
||||
// rounded number of bits per value are usually the fastest
|
||||
if (bitsPerValue <= 8 && maxBitsPerValue >= 8) {
|
||||
actualBitsPerValue = 8;
|
||||
} else if (bitsPerValue <= 16 && maxBitsPerValue >= 16) {
|
||||
actualBitsPerValue = 16;
|
||||
} else if (bitsPerValue <= 32 && maxBitsPerValue >= 32) {
|
||||
actualBitsPerValue = 32;
|
||||
} else if (bitsPerValue <= 64 && maxBitsPerValue >= 64) {
|
||||
actualBitsPerValue = 64;
|
||||
} else {
|
||||
actualBitsPerValue = bitsPerValue;
|
||||
}
|
||||
|
||||
return new FormatAndBits(Format.PACKED, actualBitsPerValue);
|
||||
}
|
||||
|
||||
final static class XPackedWriter extends XWriter {
|
||||
|
||||
boolean finished;
|
||||
final PackedInts.Format format;
|
||||
final BulkOperation encoder;
|
||||
final byte[] nextBlocks;
|
||||
final long[] nextValues;
|
||||
final int iterations;
|
||||
int off;
|
||||
int written;
|
||||
|
||||
XPackedWriter(PackedInts.Format format, DataOutput out, int valueCount, int bitsPerValue, int mem) {
|
||||
super(out, valueCount, bitsPerValue);
|
||||
this.format = format;
|
||||
encoder = BulkOperation.of(format, bitsPerValue);
|
||||
iterations = encoder.computeIterations(valueCount, mem);
|
||||
nextBlocks = new byte[iterations * encoder.byteBlockCount()];
|
||||
nextValues = new long[iterations * encoder.byteValueCount()];
|
||||
off = 0;
|
||||
written = 0;
|
||||
finished = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PackedInts.Format getFormat() {
|
||||
return format;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(long v) throws IOException {
|
||||
assert PackedInts.unsignedBitsRequired(v) <= bitsPerValue;
|
||||
assert !finished;
|
||||
if (valueCount != -1 && written >= valueCount) {
|
||||
throw new EOFException("Writing past end of stream");
|
||||
}
|
||||
nextValues[off++] = v;
|
||||
if (off == nextValues.length) {
|
||||
flush();
|
||||
}
|
||||
++written;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish() throws IOException {
|
||||
assert !finished;
|
||||
if (valueCount != -1) {
|
||||
while (written < valueCount) {
|
||||
add(0L);
|
||||
}
|
||||
}
|
||||
flush();
|
||||
finished = true;
|
||||
}
|
||||
|
||||
private void flush() throws IOException {
|
||||
encoder.encode(nextValues, 0, nextBlocks, 0, iterations);
|
||||
final int blockCount = (int) format.byteCount(PackedInts.VERSION_CURRENT, off, bitsPerValue);
|
||||
out.writeBytes(nextBlocks, blockCount);
|
||||
Arrays.fill(nextValues, 0L);
|
||||
off = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int ord() {
|
||||
return written - 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A packed integer array that can be modified.
|
||||
*
|
||||
*/
|
||||
public abstract static class Mutable extends Reader {
|
||||
|
||||
/**
|
||||
* @return the number of bits used to store any given value. Note: This does not imply that
|
||||
* memory usage is {@code bitsPerValue * #values} as implementations are free to use
|
||||
* non-space-optimal packing of bits.
|
||||
*/
|
||||
public abstract int getBitsPerValue();
|
||||
|
||||
/**
|
||||
* Set the value at the given index in the array.
|
||||
*
|
||||
* @param index where the value should be positioned.
|
||||
* @param value a value conforming to the constraints set by the array.
|
||||
*/
|
||||
public abstract void set(int index, long value);
|
||||
|
||||
/**
|
||||
* Bulk set: set at least one and at most <code>len</code> longs starting at <code>off</code> in
|
||||
* <code>arr</code> into this mutable, starting at <code>index</code>. Returns the actual number
|
||||
* of values that have been set.
|
||||
*/
|
||||
public int set(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < size();
|
||||
len = Math.min(len, size() - index);
|
||||
assert off + len <= arr.length;
|
||||
|
||||
for (int i = index, o = off, end = index + len; i < end; ++i, ++o) {
|
||||
set(i, arr[o]);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the mutable from <code>fromIndex</code> (inclusive) to <code>toIndex</code> (exclusive)
|
||||
* with <code>val</code>.
|
||||
*/
|
||||
public void fill(int fromIndex, int toIndex, long val) {
|
||||
assert val <= maxValue(getBitsPerValue());
|
||||
assert fromIndex <= toIndex;
|
||||
for (int i = fromIndex; i < toIndex; ++i) {
|
||||
set(i, val);
|
||||
}
|
||||
}
|
||||
|
||||
/** Sets all values to 0. */
|
||||
public void clear() {
|
||||
fill(0, size(), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Save this mutable into <code>out</code>. Instantiating a reader from the generated data will
|
||||
* return a reader with the same number of bits per value.
|
||||
*/
|
||||
public void save(DataOutput out) throws IOException {
|
||||
XWriter writer = getWriterNoHeader(out, getFormat(), size(), getBitsPerValue(), DEFAULT_BUFFER_SIZE);
|
||||
writer.writeHeader();
|
||||
for (int i = 0; i < size(); ++i) {
|
||||
writer.add(get(i));
|
||||
}
|
||||
writer.finish();
|
||||
}
|
||||
|
||||
/** The underlying format. */
|
||||
Format getFormat() {
|
||||
return Format.PACKED;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple base for Readers that keeps track of valueCount and bitsPerValue.
|
||||
*
|
||||
*/
|
||||
abstract static class ReaderImpl extends Reader {
|
||||
protected final int valueCount;
|
||||
|
||||
protected ReaderImpl(int valueCount) {
|
||||
this.valueCount = valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract long get(int index);
|
||||
|
||||
@Override
|
||||
public final int size() {
|
||||
return valueCount;
|
||||
}
|
||||
}
|
||||
|
||||
abstract static class MutableImpl extends Mutable {
|
||||
|
||||
protected final int valueCount;
|
||||
protected final int bitsPerValue;
|
||||
|
||||
protected MutableImpl(int valueCount, int bitsPerValue) {
|
||||
this.valueCount = valueCount;
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
|
||||
this.bitsPerValue = bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int getBitsPerValue() {
|
||||
return bitsPerValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int size() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName() + "(valueCount=" + valueCount + ",bitsPerValue=" + bitsPerValue + ")";
|
||||
}
|
||||
}
|
||||
|
||||
/** A {@link Reader} which has all its values equal to 0 (bitsPerValue = 0). */
|
||||
public static final class NullReader extends Reader {
|
||||
|
||||
private final int valueCount;
|
||||
|
||||
/** Sole constructor. */
|
||||
public NullReader(int valueCount) {
|
||||
this.valueCount = valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long get(int index) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(int index, long[] arr, int off, int len) {
|
||||
assert len > 0 : "len must be > 0 (got " + len + ")";
|
||||
assert index >= 0 && index < valueCount;
|
||||
len = Math.min(len, valueCount - index);
|
||||
Arrays.fill(arr, off, off + len, 0);
|
||||
return len;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return valueCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + Integer.BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A write-once Writer.
|
||||
*
|
||||
*/
|
||||
public abstract static class XWriter extends Writer {
|
||||
protected XWriter(DataOutput out, int valueCount, int bitsPerValue) {
|
||||
super(out, valueCount, bitsPerValue);
|
||||
}
|
||||
|
||||
void writeHeader() throws IOException {
|
||||
assert valueCount != -1;
|
||||
CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
|
||||
out.writeVInt(bitsPerValue);
|
||||
out.writeVInt(valueCount);
|
||||
out.writeVInt(getFormat().getId());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a {@link Decoder}.
|
||||
*
|
||||
* @param format the format used to store packed ints
|
||||
* @param version the compatibility version
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @return a decoder
|
||||
*/
|
||||
public static Decoder getDecoder(Format format, int version, int bitsPerValue) {
|
||||
checkVersion(version);
|
||||
return BulkOperation.of(format, bitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an {@link Encoder}.
|
||||
*
|
||||
* @param format the format used to store packed ints
|
||||
* @param version the compatibility version
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @return an encoder
|
||||
*/
|
||||
public static Encoder getEncoder(Format format, int version, int bitsPerValue) {
|
||||
checkVersion(version);
|
||||
return BulkOperation.of(format, bitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Restore a {@link Reader} from a stream without reading metadata at the beginning of the
|
||||
* stream. This method is useful to restore data from streams which have been created using {@link
|
||||
* XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||
*
|
||||
* @param in the stream to read data from, positioned at the beginning of the packed values
|
||||
* @param format the format used to serialize
|
||||
* @param version the version used to serialize the data
|
||||
* @param valueCount how many values the stream holds
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @return a Reader
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
* @see XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
|
||||
*/
|
||||
public static Reader getReaderNoHeader(DataInput in, Format format, int version, int valueCount, int bitsPerValue) throws IOException {
|
||||
checkVersion(version);
|
||||
switch (format) {
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return XPacked64SingleBlock.create(in, valueCount, bitsPerValue);
|
||||
case PACKED:
|
||||
return new XPacked64(version, in, valueCount, bitsPerValue);
|
||||
default:
|
||||
throw new AssertionError("Unknown Writer format: " + format);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore a {@link Reader} from a stream.
|
||||
*
|
||||
* @param in the stream to read data from
|
||||
* @return a Reader
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public static Reader getReader(DataInput in) throws IOException {
|
||||
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final Format format = Format.byId(in.readVInt());
|
||||
|
||||
return getReaderNoHeader(in, format, version, valueCount, bitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Restore a {@link ReaderIterator} from a stream without reading metadata at the
|
||||
* beginning of the stream. This method is useful to restore data from streams which have been
|
||||
* created using {@link XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||
*
|
||||
* @param in the stream to read data from, positioned at the beginning of the packed values
|
||||
* @param format the format used to serialize
|
||||
* @param version the version used to serialize the data
|
||||
* @param valueCount how many values the stream holds
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up
|
||||
* iteration)
|
||||
* @return a ReaderIterator
|
||||
* @see XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)
|
||||
*/
|
||||
public static ReaderIterator getReaderIteratorNoHeader(
|
||||
DataInput in,
|
||||
Format format,
|
||||
int version,
|
||||
int valueCount,
|
||||
int bitsPerValue,
|
||||
int mem
|
||||
) {
|
||||
checkVersion(version);
|
||||
return new PackedReaderIterator(format, version, valueCount, bitsPerValue, in, mem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve PackedInts as a {@link ReaderIterator}
|
||||
*
|
||||
* @param in positioned at the beginning of a stored packed int structure.
|
||||
* @param mem how much memory the iterator is allowed to use to read-ahead (likely to speed up
|
||||
* iteration)
|
||||
* @return an iterator to access the values
|
||||
* @throws IOException if the structure could not be retrieved.
|
||||
*/
|
||||
public static ReaderIterator getReaderIterator(DataInput in, int mem) throws IOException {
|
||||
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final Format format = Format.byId(in.readVInt());
|
||||
return getReaderIteratorNoHeader(in, format, version, valueCount, bitsPerValue, mem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Construct a direct {@link Reader} from a stream without reading metadata at the
|
||||
* beginning of the stream. This method is useful to restore data from streams which have been
|
||||
* created using {@link XPackedInts#getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||
*
|
||||
* <p>The returned reader will have very little memory overhead, but every call to {@link
|
||||
* Reader#get(int)} is likely to perform a disk seek.
|
||||
*
|
||||
* @param in the stream to read data from
|
||||
* @param format the format used to serialize
|
||||
* @param version the version used to serialize the data
|
||||
* @param valueCount how many values the stream holds
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @return a direct Reader
|
||||
*/
|
||||
public static Reader getDirectReaderNoHeader(final IndexInput in, Format format, int version, int valueCount, int bitsPerValue) {
|
||||
checkVersion(version);
|
||||
switch (format) {
|
||||
case PACKED:
|
||||
return new DirectPackedReader(bitsPerValue, valueCount, in);
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return new DirectPacked64SingleBlockReader(bitsPerValue, valueCount, in);
|
||||
default:
|
||||
throw new AssertionError("Unknown format: " + format);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a direct {@link Reader} from an {@link IndexInput}. This method is useful to restore
|
||||
* data from streams which have been created using {@link XPackedInts#getWriter(DataOutput, int,
|
||||
* int, float)}.
|
||||
*
|
||||
* <p>The returned reader will have very little memory overhead, but every call to {@link
|
||||
* Reader#get(int)} is likely to perform a disk seek.
|
||||
*
|
||||
* @param in the stream to read data from
|
||||
* @return a direct Reader
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public static Reader getDirectReader(IndexInput in) throws IOException {
|
||||
final int version = CodecUtil.checkHeader(in, CODEC_NAME, VERSION_START, VERSION_CURRENT);
|
||||
final int bitsPerValue = in.readVInt();
|
||||
assert bitsPerValue > 0 && bitsPerValue <= 64 : "bitsPerValue=" + bitsPerValue;
|
||||
final int valueCount = in.readVInt();
|
||||
final Format format = Format.byId(in.readVInt());
|
||||
return getDirectReaderNoHeader(in, format, version, valueCount, bitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a packed integer array with the given amount of values initialized to 0. the valueCount
|
||||
* and the bitsPerValue cannot be changed after creation. All Mutables known by this factory are
|
||||
* kept fully in RAM.
|
||||
*
|
||||
* <p>Positive values of <code>acceptableOverheadRatio</code> will trade space for speed by
|
||||
* selecting a faster but potentially less memory-efficient implementation. An <code>
|
||||
* acceptableOverheadRatio</code> of {@link PackedInts#COMPACT} will make sure that the most
|
||||
* memory-efficient implementation is selected whereas {@link PackedInts#FASTEST} will make sure
|
||||
* that the fastest implementation is selected.
|
||||
*
|
||||
* @param valueCount the number of elements
|
||||
* @param bitsPerValue the number of bits available for any given value
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
* @return a mutable packed integer array
|
||||
*/
|
||||
public static Mutable getMutable(int valueCount, int bitsPerValue, float acceptableOverheadRatio) {
|
||||
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
|
||||
return getMutable(valueCount, formatAndBits.bitsPerValue, formatAndBits.format);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link #getMutable(int, int, float)} with a pre-computed number of bits per value and
|
||||
* format.
|
||||
*
|
||||
*/
|
||||
public static Mutable getMutable(int valueCount, int bitsPerValue, PackedInts.Format format) {
|
||||
assert valueCount >= 0;
|
||||
switch (format) {
|
||||
case PACKED_SINGLE_BLOCK:
|
||||
return XPacked64SingleBlock.create(valueCount, bitsPerValue);
|
||||
case PACKED:
|
||||
return new XPacked64(valueCount, bitsPerValue);
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Create a packed integer array writer for the given output, format, value count, and
|
||||
* number of bits per value.
|
||||
*
|
||||
* <p>The resulting stream will be long-aligned. This means that depending on the format which is
|
||||
* used, up to 63 bits will be wasted. An easy way to make sure that no space is lost is to always
|
||||
* use a <code>valueCount</code> that is a multiple of 64.
|
||||
*
|
||||
* <p>This method does not write any metadata to the stream, meaning that it is your
|
||||
* responsibility to store it somewhere else in order to be able to recover data from the stream
|
||||
* later on:
|
||||
*
|
||||
* <ul>
|
||||
* <li><code>format</code> (using {@link Format#getId()}),
|
||||
* <li><code>valueCount</code>,
|
||||
* <li><code>bitsPerValue</code>,
|
||||
* <li>{@link #VERSION_CURRENT}.
|
||||
* </ul>
|
||||
*
|
||||
* <p>It is possible to start writing values without knowing how many of them you are actually
|
||||
* going to write. To do this, just pass <code>-1</code> as <code>valueCount</code>. On the other
|
||||
* hand, for any positive value of <code>valueCount</code>, the returned writer will make sure
|
||||
* that you don't write more values than expected and pad the end of stream with zeros in case you
|
||||
* have written less than <code>valueCount</code> when calling {@link Writer#finish()}.
|
||||
*
|
||||
* <p>The <code>mem</code> parameter lets you control how much memory can be used to buffer
|
||||
* changes in memory before flushing to disk. High values of <code>mem</code> are likely to
|
||||
* improve throughput. On the other hand, if speed is not that important to you, a value of <code>
|
||||
* 0</code> will use as little memory as possible and should already offer reasonable throughput.
|
||||
*
|
||||
* @param out the data output
|
||||
* @param format the format to use to serialize the values
|
||||
* @param valueCount the number of values
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @param mem how much memory (in bytes) can be used to speed up serialization
|
||||
* @return a Writer
|
||||
* @see XPackedInts#getReaderIteratorNoHeader(DataInput, Format, int, int, int, int)
|
||||
* @see XPackedInts#getReaderNoHeader(DataInput, Format, int, int, int)
|
||||
*/
|
||||
public static XWriter getWriterNoHeader(DataOutput out, Format format, int valueCount, int bitsPerValue, int mem) {
|
||||
return new XPackedWriter(format, out, valueCount, bitsPerValue, mem);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a packed integer array writer for the given output, format, value count, and number of
|
||||
* bits per value.
|
||||
*
|
||||
* <p>The resulting stream will be long-aligned. This means that depending on the format which is
|
||||
* used under the hoods, up to 63 bits will be wasted. An easy way to make sure that no space is
|
||||
* lost is to always use a <code>valueCount</code> that is a multiple of 64.
|
||||
*
|
||||
* <p>This method writes metadata to the stream, so that the resulting stream is sufficient to
|
||||
* restore a {@link Reader} from it. You don't need to track <code>valueCount</code> or <code>
|
||||
* bitsPerValue</code> by yourself. In case this is a problem, you should probably look at {@link
|
||||
* #getWriterNoHeader(DataOutput, Format, int, int, int)}.
|
||||
*
|
||||
* <p>The <code>acceptableOverheadRatio</code> parameter controls how readers that will be
|
||||
* restored from this stream trade space for speed by selecting a faster but potentially less
|
||||
* memory-efficient implementation. An <code>acceptableOverheadRatio</code> of {@link
|
||||
* PackedInts#COMPACT} will make sure that the most memory-efficient implementation is selected
|
||||
* whereas {@link PackedInts#FASTEST} will make sure that the fastest implementation is selected.
|
||||
* In case you are only interested in reading this stream sequentially later on, you should
|
||||
* probably use {@link PackedInts#COMPACT}.
|
||||
*
|
||||
* @param out the data output
|
||||
* @param valueCount the number of values
|
||||
* @param bitsPerValue the number of bits per value
|
||||
* @param acceptableOverheadRatio an acceptable overhead ratio per value
|
||||
* @return a Writer
|
||||
* @throws IOException If there is a low-level I/O error
|
||||
*/
|
||||
public static Writer getWriter(DataOutput out, int valueCount, int bitsPerValue, float acceptableOverheadRatio) throws IOException {
|
||||
assert valueCount >= 0;
|
||||
|
||||
final FormatAndBits formatAndBits = fastestFormatAndBits(valueCount, bitsPerValue, acceptableOverheadRatio);
|
||||
final XWriter writer = getWriterNoHeader(out, formatAndBits.format, valueCount, formatAndBits.bitsPerValue, DEFAULT_BUFFER_SIZE);
|
||||
writer.writeHeader();
|
||||
return writer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns how many bits are required to hold values up to and including maxValue NOTE: This
|
||||
* method returns at least 1.
|
||||
*
|
||||
* @param maxValue the maximum value that should be representable.
|
||||
* @return the amount of bits needed to represent values from 0 to maxValue.
|
||||
*/
|
||||
public static int bitsRequired(long maxValue) {
|
||||
if (maxValue < 0) {
|
||||
throw new IllegalArgumentException("maxValue must be non-negative (got: " + maxValue + ")");
|
||||
}
|
||||
return unsignedBitsRequired(maxValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns how many bits are required to store <code>bits</code>, interpreted as an unsigned
|
||||
* value. NOTE: This method returns at least 1.
|
||||
*
|
||||
*/
|
||||
public static int unsignedBitsRequired(long bits) {
|
||||
return Math.max(1, 64 - Long.numberOfLeadingZeros(bits));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the maximum unsigned long that can be expressed with the given number of bits.
|
||||
*
|
||||
* @param bitsPerValue the number of bits available for any given value.
|
||||
* @return the maximum value for the given bits.
|
||||
*/
|
||||
public static long maxValue(int bitsPerValue) {
|
||||
return bitsPerValue == 64 ? Long.MAX_VALUE : ~(~0L << bitsPerValue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy <code>src[srcPos:srcPos+len]</code> into <code>dest[destPos:destPos+len]</code> using at
|
||||
* most <code>mem</code> bytes.
|
||||
*/
|
||||
public static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, int mem) {
|
||||
assert srcPos + len <= src.size();
|
||||
assert destPos + len <= dest.size();
|
||||
final int capacity = mem >>> 3;
|
||||
if (capacity == 0) {
|
||||
for (int i = 0; i < len; ++i) {
|
||||
dest.set(destPos++, src.get(srcPos++));
|
||||
}
|
||||
} else if (len > 0) {
|
||||
// use bulk operations
|
||||
final long[] buf = new long[Math.min(capacity, len)];
|
||||
copy(src, srcPos, dest, destPos, len, buf);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as {@link #copy(Reader, int, Mutable, int, int, int)} but using a pre-allocated buffer.
|
||||
*/
|
||||
static void copy(Reader src, int srcPos, Mutable dest, int destPos, int len, long[] buf) {
|
||||
assert buf.length > 0;
|
||||
int remaining = 0;
|
||||
while (len > 0) {
|
||||
final int read = src.get(srcPos, buf, remaining, Math.min(len, buf.length - remaining));
|
||||
assert read > 0;
|
||||
srcPos += read;
|
||||
len -= read;
|
||||
remaining += read;
|
||||
final int written = dest.set(destPos, buf, 0, remaining);
|
||||
assert written > 0;
|
||||
destPos += written;
|
||||
if (written < remaining) {
|
||||
System.arraycopy(buf, written, buf, 0, remaining - written);
|
||||
}
|
||||
remaining -= written;
|
||||
}
|
||||
while (remaining > 0) {
|
||||
final int written = dest.set(destPos, buf, 0, remaining);
|
||||
destPos += written;
|
||||
remaining -= written;
|
||||
System.arraycopy(buf, written, buf, 0, remaining);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check that the block size is a power of 2, in the right bounds, and return its log in base 2.
|
||||
*/
|
||||
static int checkBlockSize(int blockSize, int minBlockSize, int maxBlockSize) {
|
||||
if (blockSize < minBlockSize || blockSize > maxBlockSize) {
|
||||
throw new IllegalArgumentException("blockSize must be >= " + minBlockSize + " and <= " + maxBlockSize + ", got " + blockSize);
|
||||
}
|
||||
if ((blockSize & (blockSize - 1)) != 0) {
|
||||
throw new IllegalArgumentException("blockSize must be a power of two, got " + blockSize);
|
||||
}
|
||||
return Integer.numberOfTrailingZeros(blockSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of blocks required to store <code>size</code> values on <code>blockSize
|
||||
* </code>.
|
||||
*/
|
||||
static int numBlocks(long size, int blockSize) {
|
||||
final int numBlocks = (int) (size / blockSize) + (size % blockSize == 0 ? 0 : 1);
|
||||
if ((long) numBlocks * blockSize < size) {
|
||||
throw new IllegalArgumentException("size is too large for this block size");
|
||||
}
|
||||
return numBlocks;
|
||||
}
|
||||
}
|
|
@ -80,7 +80,7 @@ public class Version implements Comparable<Version>, ToXContentFragment {
|
|||
public static final Version V_1_2_5 = new Version(1020599, org.apache.lucene.util.Version.LUCENE_8_10_1);
|
||||
public static final Version V_1_3_0 = new Version(1030099, org.apache.lucene.util.Version.LUCENE_8_10_1);
|
||||
public static final Version V_1_4_0 = new Version(1040099, org.apache.lucene.util.Version.LUCENE_8_10_1);
|
||||
public static final Version V_2_0_0 = new Version(2000099, org.apache.lucene.util.Version.LUCENE_8_10_1);
|
||||
public static final Version V_2_0_0 = new Version(2000099, org.apache.lucene.util.Version.LUCENE_9_0_0);
|
||||
public static final Version CURRENT = V_2_0_0;
|
||||
|
||||
public static Version readVersion(StreamInput in) throws IOException {
|
||||
|
|
|
@ -154,13 +154,6 @@ public class IndicesSegmentResponse extends BroadcastResponse {
|
|||
if (segment.getSegmentSort() != null) {
|
||||
toXContent(builder, segment.getSegmentSort());
|
||||
}
|
||||
if (segment.ramTree != null) {
|
||||
builder.startArray(Fields.RAM_TREE);
|
||||
for (Accountable child : segment.ramTree.getChildResources()) {
|
||||
toXContent(builder, child);
|
||||
}
|
||||
builder.endArray();
|
||||
}
|
||||
if (segment.attributes != null && segment.attributes.isEmpty() == false) {
|
||||
builder.field("attributes", segment.attributes);
|
||||
}
|
||||
|
|
|
@ -224,7 +224,6 @@ public final class SearchPhaseController {
|
|||
if (results.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
final boolean setShardIndex = false;
|
||||
final TopDocs topDocs = results.stream().findFirst().get();
|
||||
final TopDocs mergedTopDocs;
|
||||
final int numShards = results.size();
|
||||
|
@ -234,15 +233,15 @@ public final class SearchPhaseController {
|
|||
CollapseTopFieldDocs firstTopDocs = (CollapseTopFieldDocs) topDocs;
|
||||
final Sort sort = new Sort(firstTopDocs.fields);
|
||||
final CollapseTopFieldDocs[] shardTopDocs = results.toArray(new CollapseTopFieldDocs[numShards]);
|
||||
mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs, setShardIndex);
|
||||
mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs, false);
|
||||
} else if (topDocs instanceof TopFieldDocs) {
|
||||
TopFieldDocs firstTopDocs = (TopFieldDocs) topDocs;
|
||||
final Sort sort = new Sort(firstTopDocs.fields);
|
||||
final TopFieldDocs[] shardTopDocs = results.toArray(new TopFieldDocs[numShards]);
|
||||
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs, setShardIndex);
|
||||
mergedTopDocs = TopDocs.merge(sort, from, topN, shardTopDocs);
|
||||
} else {
|
||||
final TopDocs[] shardTopDocs = results.toArray(new TopDocs[numShards]);
|
||||
mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs, setShardIndex);
|
||||
mergedTopDocs = TopDocs.merge(from, topN, shardTopDocs);
|
||||
}
|
||||
return mergedTopDocs;
|
||||
}
|
||||
|
|
|
@ -32,10 +32,11 @@
|
|||
|
||||
package org.opensearch.action.search;
|
||||
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.opensearch.LegacyESVersion;
|
||||
import org.opensearch.Version;
|
||||
import org.opensearch.common.bytes.BytesReference;
|
||||
import org.opensearch.common.io.stream.BytesStreamInput;
|
||||
import org.opensearch.common.io.stream.BytesStreamOutput;
|
||||
import org.opensearch.common.util.concurrent.AtomicArray;
|
||||
import org.opensearch.search.SearchPhaseResult;
|
||||
import org.opensearch.search.SearchShardTarget;
|
||||
|
@ -57,7 +58,8 @@ final class TransportSearchHelper {
|
|||
|
||||
static String buildScrollId(AtomicArray<? extends SearchPhaseResult> searchPhaseResults, Version version) {
|
||||
boolean includeContextUUID = version.onOrAfter(LegacyESVersion.V_7_7_0);
|
||||
try (RAMOutputStream out = new RAMOutputStream()) {
|
||||
try {
|
||||
BytesStreamOutput out = new BytesStreamOutput();
|
||||
if (includeContextUUID) {
|
||||
out.writeString(INCLUDE_CONTEXT_UUID);
|
||||
}
|
||||
|
@ -77,8 +79,7 @@ final class TransportSearchHelper {
|
|||
out.writeString(searchShardTarget.getNodeId());
|
||||
}
|
||||
}
|
||||
byte[] bytes = new byte[(int) out.getFilePointer()];
|
||||
out.writeTo(bytes, 0);
|
||||
byte[] bytes = BytesReference.toBytes(out.bytes());
|
||||
return Base64.getUrlEncoder().encodeToString(bytes);
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
|
|
|
@ -33,11 +33,11 @@
|
|||
package org.opensearch.common.bytes;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FutureArrays;
|
||||
import org.opensearch.common.io.stream.StreamInput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
|
||||
public final class BytesArray extends AbstractBytesReference {
|
||||
|
||||
|
@ -96,7 +96,7 @@ public final class BytesArray extends AbstractBytesReference {
|
|||
}
|
||||
if (other instanceof BytesArray) {
|
||||
final BytesArray that = (BytesArray) other;
|
||||
return FutureArrays.equals(bytes, offset, offset + length, that.bytes, that.offset, that.offset + that.length);
|
||||
return Arrays.equals(bytes, offset, offset + length, that.bytes, that.offset, that.offset + that.length);
|
||||
}
|
||||
return super.equals(other);
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue