fixup main rebase by bringing over index sort opto

This commit is contained in:
Greg Miller 2024-08-27 08:11:26 -07:00 committed by Greg Miller
parent 67fd647bab
commit 0fb0a63ebc
3 changed files with 127 additions and 152 deletions

View File

@ -16,19 +16,11 @@
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.util.Objects;
import java.util.function.LongPredicate;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DocValuesRangeIterator;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -36,12 +28,13 @@ import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortedSetDocValuesRangeScorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Objects;
final class SortedSetDocValuesRangeQuery extends Query {
private final String field;
@ -155,76 +148,7 @@ final class SortedSetDocValuesRangeQuery extends Query {
}
}
return new SortedSetDocValuesRangeScorer(values, minOrd, maxOrd, scoreMode, score(), skipper);
/**
// no terms matched in this segment
if (minOrd > maxOrd
|| (skipper != null
&& (minOrd > skipper.maxValue() || maxOrd < skipper.minValue()))) {
return new ConstantScoreScorer(score(), scoreMode, DocIdSetIterator.empty());
}
// all terms matched in this segment
if (skipper != null
&& skipper.docCount() == context.reader().maxDoc()
&& skipper.minValue() >= minOrd
&& skipper.maxValue() <= maxOrd) {
return new ConstantScoreScorer(
score(), scoreMode, DocIdSetIterator.all(skipper.docCount()));
}
final SortedDocValues singleton = DocValues.unwrapSingleton(values);
TwoPhaseIterator iterator;
if (singleton != null) {
if (skipper != null) {
final DocIdSetIterator psIterator =
getDocIdSetIteratorOrNullForPrimarySort(
context.reader(), singleton, skipper, minOrd, maxOrd);
if (psIterator != null) {
return new ConstantScoreScorer(score(), scoreMode, psIterator);
}
}
iterator =
new TwoPhaseIterator(singleton) {
@Override
public boolean matches() throws IOException {
final long ord = singleton.ordValue();
return ord >= minOrd && ord <= maxOrd;
}
@Override
public float matchCost() {
return 2; // 2 comparisons
}
};
} else {
iterator =
new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (int i = 0; i < values.docValueCount(); i++) {
long ord = values.nextOrd();
if (ord < minOrd) {
continue;
}
// Values are sorted, so the first ord that is >= minOrd is our best
// candidate
return ord <= maxOrd;
}
return false; // all ords were < minOrd
}
@Override
public float matchCost() {
return 2; // 2 comparisons
}
};
}
if (skipper != null) {
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd, false);
}
return new ConstantScoreScorer(score(), scoreMode, iterator);
return new SortedSetDocValuesRangeScorer(field, values, minOrd, maxOrd, scoreMode, score(), skipper, context);
}
@Override
@ -232,7 +156,6 @@ final class SortedSetDocValuesRangeQuery extends Query {
return values.cost();
}
};
**/
}
@Override
@ -241,69 +164,4 @@ final class SortedSetDocValuesRangeQuery extends Query {
}
};
}
private DocIdSetIterator getDocIdSetIteratorOrNullForPrimarySort(
LeafReader reader,
SortedDocValues sortedDocValues,
DocValuesSkipper skipper,
long minOrd,
long maxOrd)
throws IOException {
if (skipper.docCount() != reader.maxDoc()) {
return null;
}
final Sort indexSort = reader.getMetaData().sort();
if (indexSort == null
|| indexSort.getSort().length == 0
|| indexSort.getSort()[0].getField().equals(field) == false) {
return null;
}
final int minDocID;
final int maxDocID;
if (indexSort.getSort()[0].getReverse()) {
if (skipper.maxValue() <= maxOrd) {
minDocID = 0;
} else {
skipper.advance(Long.MIN_VALUE, maxOrd);
minDocID = nextDoc(skipper.minDocID(0), sortedDocValues, l -> l <= maxOrd);
}
if (skipper.minValue() >= minOrd) {
maxDocID = skipper.docCount();
} else {
skipper.advance(Long.MIN_VALUE, minOrd);
maxDocID = nextDoc(skipper.minDocID(0), sortedDocValues, l -> l < minOrd);
}
} else {
if (skipper.minValue() >= minOrd) {
minDocID = 0;
} else {
skipper.advance(minOrd, Long.MAX_VALUE);
minDocID = nextDoc(skipper.minDocID(0), sortedDocValues, l -> l >= minOrd);
}
if (skipper.maxValue() <= maxOrd) {
maxDocID = skipper.docCount();
} else {
skipper.advance(maxOrd, Long.MAX_VALUE);
maxDocID = nextDoc(skipper.minDocID(0), sortedDocValues, l -> l > maxOrd);
}
}
return minDocID == maxDocID
? DocIdSetIterator.empty()
: DocIdSetIterator.range(minDocID, maxDocID);
}
private static int nextDoc(int startDoc, SortedDocValues docValues, LongPredicate predicate)
throws IOException {
int doc = docValues.docID();
if (startDoc > doc) {
doc = docValues.advance(startDoc);
}
for (; doc < DocIdSetIterator.NO_MORE_DOCS; doc = docValues.nextDoc()) {
if (predicate.test(docValues.ordValue())) {
break;
}
}
return doc;
}
}

View File

@ -195,7 +195,7 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod {
}
if (hasGaps == false) {
return new SortedSetDocValuesRangeScorer(values, minOrd, maxOrd, scoreMode, score(), skipper);
return new SortedSetDocValuesRangeScorer(query.field, values, minOrd, maxOrd, scoreMode, score(), skipper, context);
}
final SortedDocValues singleton = DocValues.unwrapSingleton(values);

View File

@ -1,22 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.document.DocValuesRangeIterator;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import java.io.IOException;
import java.util.Collection;
import java.util.function.LongPredicate;
public class SortedSetDocValuesRangeScorer extends Scorer {
final Scorer delegate;
public SortedSetDocValuesRangeScorer(SortedSetDocValues values, long minOrd, long maxOrd, ScoreMode scoreMode, float score, DocValuesSkipper skipper) throws IOException {
delegate = setupScorer(values, minOrd, maxOrd, scoreMode, score, skipper);
public SortedSetDocValuesRangeScorer(
String field,
SortedSetDocValues values,
long minOrd,
long maxOrd,
ScoreMode scoreMode,
float score,
DocValuesSkipper skipper,
LeafReaderContext context) throws IOException {
delegate = setupScorer(field, values, minOrd, maxOrd, scoreMode, score, skipper, context);
}
static Scorer setupScorer(SortedSetDocValues values, long minOrd, long maxOrd, ScoreMode scoreMode, float score, DocValuesSkipper skipper) throws IOException {
static Scorer setupScorer(
String field,
SortedSetDocValues values,
long minOrd,
long maxOrd,
ScoreMode scoreMode,
float score,
DocValuesSkipper skipper,
LeafReaderContext context) throws IOException {
// no terms matched in this segment
if (minOrd > maxOrd
|| (skipper != null
@ -24,9 +58,26 @@ public class SortedSetDocValuesRangeScorer extends Scorer {
return new ConstantScoreScorer(score, scoreMode, DocIdSetIterator.empty());
}
// all terms matched in this segment
if (skipper != null
&& skipper.docCount() == context.reader().maxDoc()
&& skipper.minValue() >= minOrd
&& skipper.maxValue() <= maxOrd) {
return new ConstantScoreScorer(
score, scoreMode, DocIdSetIterator.all(skipper.docCount()));
}
final SortedDocValues singleton = DocValues.unwrapSingleton(values);
TwoPhaseIterator iterator;
if (singleton != null) {
if (skipper != null) {
final DocIdSetIterator psIterator =
getDocIdSetIteratorOrNullForPrimarySort(
context.reader(), field, singleton, skipper, minOrd, maxOrd);
if (psIterator != null) {
return new ConstantScoreScorer(score, scoreMode, psIterator);
}
}
iterator =
new TwoPhaseIterator(singleton) {
@Override
@ -64,7 +115,7 @@ public class SortedSetDocValuesRangeScorer extends Scorer {
};
}
if (skipper != null) {
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd);
iterator = new DocValuesRangeIterator(iterator, skipper, minOrd, maxOrd, false);
}
return new ConstantScoreScorer(score, scoreMode, iterator);
}
@ -113,4 +164,70 @@ public class SortedSetDocValuesRangeScorer extends Scorer {
public Collection<ChildScorable> getChildren() throws IOException {
return delegate.getChildren();
}
private static DocIdSetIterator getDocIdSetIteratorOrNullForPrimarySort(
LeafReader reader,
String field,
SortedDocValues sortedDocValues,
DocValuesSkipper skipper,
long minOrd,
long maxOrd)
throws IOException {
if (skipper.docCount() != reader.maxDoc()) {
return null;
}
final Sort indexSort = reader.getMetaData().getSort();
if (indexSort == null
|| indexSort.getSort().length == 0
|| indexSort.getSort()[0].getField().equals(field) == false) {
return null;
}
final int minDocID;
final int maxDocID;
if (indexSort.getSort()[0].getReverse()) {
if (skipper.maxValue() <= maxOrd) {
minDocID = 0;
} else {
skipper.advance(Long.MIN_VALUE, maxOrd);
minDocID = nextDoc(skipper.minDocID(0), sortedDocValues, l -> l <= maxOrd);
}
if (skipper.minValue() >= minOrd) {
maxDocID = skipper.docCount();
} else {
skipper.advance(Long.MIN_VALUE, minOrd);
maxDocID = nextDoc(skipper.minDocID(0), sortedDocValues, l -> l < minOrd);
}
} else {
if (skipper.minValue() >= minOrd) {
minDocID = 0;
} else {
skipper.advance(minOrd, Long.MAX_VALUE);
minDocID = nextDoc(skipper.minDocID(0), sortedDocValues, l -> l >= minOrd);
}
if (skipper.maxValue() <= maxOrd) {
maxDocID = skipper.docCount();
} else {
skipper.advance(maxOrd, Long.MAX_VALUE);
maxDocID = nextDoc(skipper.minDocID(0), sortedDocValues, l -> l > maxOrd);
}
}
return minDocID == maxDocID
? DocIdSetIterator.empty()
: DocIdSetIterator.range(minDocID, maxDocID);
}
private static int nextDoc(int startDoc, SortedDocValues docValues, LongPredicate predicate)
throws IOException {
int doc = docValues.docID();
if (startDoc > doc) {
doc = docValues.advance(startDoc);
}
for (; doc < DocIdSetIterator.NO_MORE_DOCS; doc = docValues.nextDoc()) {
if (predicate.test(docValues.ordValue())) {
break;
}
}
return doc;
}
}