LUCENE-6472: Added min and max document options to global ordinal join

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1678989 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Martijn van Groningen 2015-05-12 15:57:08 +00:00
parent 29490437c5
commit 3111aeab0e
7 changed files with 278 additions and 64 deletions

View File

@ -54,9 +54,9 @@ New Features
queries, and supports two-phased iterators to avoid loading queries, and supports two-phased iterators to avoid loading
positions when possible. (Paul Elschot via Robert Muir) positions when possible. (Paul Elschot via Robert Muir)
* LUCENE-6352: Added a new query time join to the join module that uses * LUCENE-6352, LUCENE-6472: Added a new query time join to the join module
global ordinals, which is faster for subsequent joins between reopens. that uses global ordinals, which is faster for subsequent joins between
(Martijn van Groningen, Adrien Grand) reopens. (Martijn van Groningen, Adrien Grand)
* LUCENE-5879: Added experimental auto-prefix terms to BlockTree terms * LUCENE-5879: Added experimental auto-prefix terms to BlockTree terms
dictionary, exposed as AutoPrefixPostingsFormat (Adrien Grand, dictionary, exposed as AutoPrefixPostingsFormat (Adrien Grand,

View File

@ -28,15 +28,13 @@ import java.io.IOException;
abstract class BaseGlobalOrdinalScorer extends Scorer { abstract class BaseGlobalOrdinalScorer extends Scorer {
final LongBitSet foundOrds;
final SortedDocValues values; final SortedDocValues values;
final Scorer approximationScorer; final Scorer approximationScorer;
float score; float score;
public BaseGlobalOrdinalScorer(Weight weight, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) { public BaseGlobalOrdinalScorer(Weight weight, SortedDocValues values, Scorer approximationScorer) {
super(weight); super(weight);
this.foundOrds = foundOrds;
this.values = values; this.values = values;
this.approximationScorer = approximationScorer; this.approximationScorer = approximationScorer;
} }

View File

@ -160,11 +160,13 @@ final class GlobalOrdinalsQuery extends Query {
final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer { final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer {
final LongBitSet foundOrds;
final LongValues segmentOrdToGlobalOrdLookup; final LongValues segmentOrdToGlobalOrdLookup;
public OrdinalMapScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) { public OrdinalMapScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
super(weight, foundOrds, values, approximationScorer); super(weight, values, approximationScorer);
this.score = score; this.score = score;
this.foundOrds = foundOrds;
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup; this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
} }
@ -203,9 +205,12 @@ final class GlobalOrdinalsQuery extends Query {
final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer { final static class SegmentOrdinalScorer extends BaseGlobalOrdinalScorer {
final LongBitSet foundOrds;
public SegmentOrdinalScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) { public SegmentOrdinalScorer(Weight weight, float score, LongBitSet foundOrds, SortedDocValues values, Scorer approximationScorer) {
super(weight, foundOrds, values, approximationScorer); super(weight, values, approximationScorer);
this.score = score; this.score = score;
this.foundOrds = foundOrds;
} }
@Override @Override

View File

@ -33,23 +33,48 @@ import java.util.Arrays;
abstract class GlobalOrdinalsWithScoreCollector implements Collector { abstract class GlobalOrdinalsWithScoreCollector implements Collector {
final String field; final String field;
final boolean doMinMax;
final int min;
final int max;
final MultiDocValues.OrdinalMap ordinalMap; final MultiDocValues.OrdinalMap ordinalMap;
final LongBitSet collectedOrds; final LongBitSet collectedOrds;
protected final Scores scores;
GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { protected final Scores scores;
protected final Occurrences occurrences;
GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, ScoreMode scoreMode, int min, int max) {
if (valueCount > Integer.MAX_VALUE) { if (valueCount > Integer.MAX_VALUE) {
// We simply don't support more than // We simply don't support more than
throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids"); throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
} }
this.field = field; this.field = field;
this.doMinMax = !(min <= 0 && max == Integer.MAX_VALUE);
this.min = min;
this.max = max;;
this.ordinalMap = ordinalMap; this.ordinalMap = ordinalMap;
this.collectedOrds = new LongBitSet(valueCount); this.collectedOrds = new LongBitSet(valueCount);
this.scores = new Scores(valueCount, unset()); if (scoreMode != ScoreMode.None) {
this.scores = new Scores(valueCount, unset());
} else {
this.scores = null;
}
if (scoreMode == ScoreMode.Avg || doMinMax) {
this.occurrences = new Occurrences(valueCount);
} else {
this.occurrences = null;
}
} }
public LongBitSet getCollectorOrdinals() { public boolean match(int globalOrd) {
return collectedOrds; if (collectedOrds.get(globalOrd)) {
if (doMinMax) {
final int occurrence = occurrences.getOccurrence(globalOrd);
return occurrence >= min && occurrence <= max;
} else {
return true;
}
}
return false;
} }
public float score(int globalOrdinal) { public float score(int globalOrdinal) {
@ -96,6 +121,9 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
float existingScore = scores.getScore(globalOrd); float existingScore = scores.getScore(globalOrd);
float newScore = scorer.score(); float newScore = scorer.score();
doScore(globalOrd, existingScore, newScore); doScore(globalOrd, existingScore, newScore);
if (occurrences != null) {
occurrences.increment(globalOrd);
}
} }
} }
@ -122,6 +150,9 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
float existingScore = scores.getScore(segmentOrd); float existingScore = scores.getScore(segmentOrd);
float newScore = scorer.score(); float newScore = scorer.score();
doScore(segmentOrd, existingScore, newScore); doScore(segmentOrd, existingScore, newScore);
if (occurrences != null) {
occurrences.increment(segmentOrd);
}
} }
} }
@ -133,8 +164,8 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Min extends GlobalOrdinalsWithScoreCollector { static final class Min extends GlobalOrdinalsWithScoreCollector {
public Min(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { public Min(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount); super(field, ordinalMap, valueCount, ScoreMode.Min, min, max);
} }
@Override @Override
@ -150,8 +181,8 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Max extends GlobalOrdinalsWithScoreCollector { static final class Max extends GlobalOrdinalsWithScoreCollector {
public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount); super(field, ordinalMap, valueCount, ScoreMode.Max, min, max);
} }
@Override @Override
@ -167,8 +198,8 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Sum extends GlobalOrdinalsWithScoreCollector { static final class Sum extends GlobalOrdinalsWithScoreCollector {
public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount); super(field, ordinalMap, valueCount, ScoreMode.Total, min, max);
} }
@Override @Override
@ -184,16 +215,12 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Avg extends GlobalOrdinalsWithScoreCollector { static final class Avg extends GlobalOrdinalsWithScoreCollector {
private final Occurrences occurrences; public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Avg, min, max);
public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
super(field, ordinalMap, valueCount);
this.occurrences = new Occurrences(valueCount);
} }
@Override @Override
protected void doScore(int globalOrd, float existingScore, float newScore) { protected void doScore(int globalOrd, float existingScore, float newScore) {
occurrences.increment(globalOrd);
scores.setScore(globalOrd, existingScore + newScore); scores.setScore(globalOrd, existingScore + newScore);
} }
@ -208,6 +235,71 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
} }
} }
static final class NoScore extends GlobalOrdinalsWithScoreCollector {
public NoScore(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.None, min, max);
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
if (ordinalMap != null) {
LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
return new LeafCollector() {
@Override
public void setScorer(Scorer scorer) throws IOException {
}
@Override
public void collect(int doc) throws IOException {
final long segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
collectedOrds.set(globalOrd);
occurrences.increment(globalOrd);
}
}
};
} else {
return new LeafCollector() {
@Override
public void setScorer(Scorer scorer) throws IOException {
}
@Override
public void collect(int doc) throws IOException {
final int segmentOrd = docTermOrds.getOrd(doc);
if (segmentOrd != -1) {
collectedOrds.set(segmentOrd);
occurrences.increment(segmentOrd);
}
}
};
}
}
@Override
protected void doScore(int globalOrd, float existingScore, float newScore) {
}
@Override
public float score(int globalOrdinal) {
return 1f;
}
@Override
protected float unset() {
return 0f;
}
@Override
public boolean needsScores() {
return false;
}
}
// Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation // Because the global ordinal is directly used as a key to a score we should be somewhat smart about allocation
// the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays. // the scores array. Most of the times not all docs match so splitting the scores array up in blocks can prevent creation of huge arrays.
// Also working with smaller arrays is supposed to be more gc friendly // Also working with smaller arrays is supposed to be more gc friendly

View File

@ -17,9 +17,6 @@ package org.apache.lucene.search.join;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -37,6 +34,9 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues; import org.apache.lucene.util.LongValues;
import java.io.IOException;
import java.util.Set;
final class GlobalOrdinalsWithScoreQuery extends Query { final class GlobalOrdinalsWithScoreQuery extends Query {
private final GlobalOrdinalsWithScoreCollector collector; private final GlobalOrdinalsWithScoreCollector collector;
@ -47,14 +47,18 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
// just for hashcode and equals: // just for hashcode and equals:
private final Query fromQuery; private final Query fromQuery;
private final int min;
private final int max;
private final IndexReader indexReader; private final IndexReader indexReader;
GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, IndexReader indexReader) { GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max, IndexReader indexReader) {
this.collector = collector; this.collector = collector;
this.joinField = joinField; this.joinField = joinField;
this.globalOrds = globalOrds; this.globalOrds = globalOrds;
this.toQuery = toQuery; this.toQuery = toQuery;
this.fromQuery = fromQuery; this.fromQuery = fromQuery;
this.min = min;
this.max = max;
this.indexReader = indexReader; this.indexReader = indexReader;
} }
@ -71,8 +75,10 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
GlobalOrdinalsWithScoreQuery that = (GlobalOrdinalsWithScoreQuery) o; GlobalOrdinalsWithScoreQuery that = (GlobalOrdinalsWithScoreQuery) o;
if (!fromQuery.equals(that.fromQuery)) return false; if (min != that.min) return false;
if (max != that.max) return false;
if (!joinField.equals(that.joinField)) return false; if (!joinField.equals(that.joinField)) return false;
if (!fromQuery.equals(that.fromQuery)) return false;
if (!toQuery.equals(that.toQuery)) return false; if (!toQuery.equals(that.toQuery)) return false;
if (!indexReader.equals(that.indexReader)) return false; if (!indexReader.equals(that.indexReader)) return false;
@ -85,6 +91,8 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
result = 31 * result + joinField.hashCode(); result = 31 * result + joinField.hashCode();
result = 31 * result + toQuery.hashCode(); result = 31 * result + toQuery.hashCode();
result = 31 * result + fromQuery.hashCode(); result = 31 * result + fromQuery.hashCode();
result = 31 * result + min;
result = 31 * result + max;
result = 31 * result + indexReader.hashCode(); result = 31 * result + indexReader.hashCode();
return result; return result;
} }
@ -92,7 +100,10 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
@Override @Override
public String toString(String field) { public String toString(String field) {
return "GlobalOrdinalsQuery{" + return "GlobalOrdinalsQuery{" +
"joinField=" + joinField + "joinField=" + joinField +
"min=" + min +
"max=" + max +
"fromQuery=" + fromQuery +
'}'; '}';
} }
@ -168,7 +179,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
final GlobalOrdinalsWithScoreCollector collector; final GlobalOrdinalsWithScoreCollector collector;
public OrdinalMapScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) { public OrdinalMapScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer, LongValues segmentOrdToGlobalOrdLookup) {
super(weight, collector.getCollectorOrdinals(), values, approximationScorer); super(weight, values, approximationScorer);
this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup; this.segmentOrdToGlobalOrdLookup = segmentOrdToGlobalOrdLookup;
this.collector = collector; this.collector = collector;
} }
@ -178,9 +189,9 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) { for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final long segmentOrd = values.getOrd(docID); final long segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) { if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) { if (collector.match(globalOrd)) {
score = collector.score((int) globalOrd); score = collector.score(globalOrd);
return docID; return docID;
} }
} }
@ -196,9 +207,9 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
public boolean matches() throws IOException { public boolean matches() throws IOException {
final long segmentOrd = values.getOrd(approximationScorer.docID()); final long segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) { if (segmentOrd != -1) {
final long globalOrd = segmentOrdToGlobalOrdLookup.get(segmentOrd); final int globalOrd = (int) segmentOrdToGlobalOrdLookup.get(segmentOrd);
if (foundOrds.get(globalOrd)) { if (collector.match(globalOrd)) {
score = collector.score((int) globalOrd); score = collector.score(globalOrd);
return true; return true;
} }
} }
@ -214,7 +225,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
final GlobalOrdinalsWithScoreCollector collector; final GlobalOrdinalsWithScoreCollector collector;
public SegmentOrdinalScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer) { public SegmentOrdinalScorer(Weight weight, GlobalOrdinalsWithScoreCollector collector, SortedDocValues values, Scorer approximationScorer) {
super(weight, collector.getCollectorOrdinals(), values, approximationScorer); super(weight, values, approximationScorer);
this.collector = collector; this.collector = collector;
} }
@ -223,7 +234,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) { for (int docID = approximationScorer.advance(target); docID < NO_MORE_DOCS; docID = approximationScorer.nextDoc()) {
final int segmentOrd = values.getOrd(docID); final int segmentOrd = values.getOrd(docID);
if (segmentOrd != -1) { if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) { if (collector.match(segmentOrd)) {
score = collector.score(segmentOrd); score = collector.score(segmentOrd);
return docID; return docID;
} }
@ -240,7 +251,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
public boolean matches() throws IOException { public boolean matches() throws IOException {
final int segmentOrd = values.getOrd(approximationScorer.docID()); final int segmentOrd = values.getOrd(approximationScorer.docID());
if (segmentOrd != -1) { if (segmentOrd != -1) {
if (foundOrds.get(segmentOrd)) { if (collector.match(segmentOrd)) {
score = collector.score(segmentOrd); score = collector.score(segmentOrd);
return true; return true;
} }

View File

@ -29,7 +29,7 @@ import java.io.IOException;
import java.util.Locale; import java.util.Locale;
/** /**
* Utility for query time joining using TermsQuery and TermsCollector. * Utility for query time joining.
* *
* @lucene.experimental * @lucene.experimental
*/ */
@ -97,17 +97,10 @@ public final class JoinUtil {
} }
/** /**
* A query time join using global ordinals over a dedicated join field. * Delegates to {@link #createJoinQuery(String, Query, Query, IndexSearcher, ScoreMode, MultiDocValues.OrdinalMap, int, int)},
* but disables the min and max filtering.
* *
* This join has certain restrictions and requirements: * @param joinField The {@link SortedDocValues} field containing the join values
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
* should store the join values as UTF-8 strings.
* 4) An ordinal map must be provided that is created on top of the join field.
*
* @param joinField The {@link org.apache.lucene.index.SortedDocValues} field containing the join values
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents. * @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
* @param toQuery The query identifying all documents on the "to" side. * @param toQuery The query identifying all documents on the "to" side.
* @param searcher The index searcher used to execute the from query * @param searcher The index searcher used to execute the from query
@ -123,6 +116,47 @@ public final class JoinUtil {
IndexSearcher searcher, IndexSearcher searcher,
ScoreMode scoreMode, ScoreMode scoreMode,
MultiDocValues.OrdinalMap ordinalMap) throws IOException { MultiDocValues.OrdinalMap ordinalMap) throws IOException {
return createJoinQuery(joinField, fromQuery, toQuery, searcher, scoreMode, ordinalMap, 0, Integer.MAX_VALUE);
}
/**
* A query time join using global ordinals over a dedicated join field.
*
* This join has certain restrictions and requirements:
* 1) A document can only refer to one other document. (but can be referred by one or more documents)
* 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
* that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
* 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
* should store the join values as UTF-8 strings.
* 4) An ordinal map must be provided that is created on top of the join field.
*
* Note: min and max filtering and the avg score mode will require this join to keep track of the number of times
* a document matches per join value. This will increase the per join cost in terms of execution time and memory.
*
* @param joinField The {@link SortedDocValues} field containing the join values
* @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents.
* @param toQuery The query identifying all documents on the "to" side.
* @param searcher The index searcher used to execute the from query
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
* @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
* needs to be provided.
* @param min Optionally the minimum number of "from" documents that are required to match for a "to" document
* to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
* disables the min and max "from" documents filtering
* @param max Optionally the maximum number of "from" documents that are allowed to match for a "to" document
* to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
* disables the min and max "from" documents filtering
* @return a {@link Query} instance that can be used to join documents based on the join field
* @throws IOException If I/O related errors occur
*/
public static Query createJoinQuery(String joinField,
Query fromQuery,
Query toQuery,
IndexSearcher searcher,
ScoreMode scoreMode,
MultiDocValues.OrdinalMap ordinalMap,
int min,
int max) throws IOException {
IndexReader indexReader = searcher.getIndexReader(); IndexReader indexReader = searcher.getIndexReader();
int numSegments = indexReader.leaves().size(); int numSegments = indexReader.leaves().size();
final long valueCount; final long valueCount;
@ -146,31 +180,34 @@ public final class JoinUtil {
} }
final Query rewrittenFromQuery = searcher.rewrite(fromQuery); final Query rewrittenFromQuery = searcher.rewrite(fromQuery);
if (scoreMode == ScoreMode.None) {
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
}
GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector; GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
switch (scoreMode) { switch (scoreMode) {
case Total: case Total:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount); globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max);
break; break;
case Min: case Min:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount); globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max);
break; break;
case Max: case Max:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount); globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max);
break; break;
case Avg: case Avg:
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount); globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
break; break;
case None:
if (min <= 0 && max == Integer.MAX_VALUE) {
GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader);
} else {
globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max);
break;
}
default: default:
throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode)); throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
} }
searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector); searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector);
return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, joinField, ordinalMap, toQuery, rewrittenFromQuery, indexReader); return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, joinField, ordinalMap, toQuery, rewrittenFromQuery, min, max, indexReader);
} }
} }

View File

@ -61,6 +61,7 @@ import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSet;
@ -412,7 +413,7 @@ public class TestJoinUtil extends LuceneTestCase {
String childId = Integer.toString(p + c); String childId = Integer.toString(p + c);
Document childDoc = new Document(); Document childDoc = new Document();
childDoc.add(new StringField("id", childId, Field.Store.YES)); childDoc.add(new StringField("id", childId, Field.Store.YES));
parentDoc.add(new StringField("type", "from", Field.Store.NO)); childDoc.add(new StringField("type", "from", Field.Store.NO));
childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId))); childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
int price = random().nextInt(1000); int price = random().nextInt(1000);
childDoc.add(new NumericDocValuesField(priceField, price)); childDoc.add(new NumericDocValuesField(priceField, price));
@ -459,6 +460,76 @@ public class TestJoinUtil extends LuceneTestCase {
dir.close(); dir.close();
} }
public void testMinMaxDocs() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
);
int minChildDocsPerParent = 2;
int maxChildDocsPerParent = 16;
int numParents = RandomInts.randomIntBetween(random(), 16, 64);
int[] childDocsPerParent = new int[numParents];
for (int p = 0; p < numParents; p++) {
String parentId = Integer.toString(p);
Document parentDoc = new Document();
parentDoc.add(new StringField("id", parentId, Field.Store.YES));
parentDoc.add(new StringField("type", "to", Field.Store.NO));
parentDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(parentDoc);
int numChildren = RandomInts.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent);
childDocsPerParent[p] = numChildren;
for (int c = 0; c < numChildren; c++) {
String childId = Integer.toString(p + c);
Document childDoc = new Document();
childDoc.add(new StringField("id", childId, Field.Store.YES));
childDoc.add(new StringField("type", "from", Field.Store.NO));
childDoc.add(new SortedDocValuesField("join_field", new BytesRef(parentId)));
iw.addDocument(childDoc);
}
}
iw.close();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(dir));
SortedDocValues[] values = new SortedDocValues[searcher.getIndexReader().leaves().size()];
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
searcher.getIndexReader().getCoreCacheKey(), values, PackedInts.DEFAULT
);
Query fromQuery = new TermQuery(new Term("type", "from"));
Query toQuery = new TermQuery(new Term("type", "to"));
int iters = RandomInts.randomIntBetween(random(), 3, 9);
for (int i = 1; i <= iters; i++) {
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
int min = RandomInts.randomIntBetween(random(), minChildDocsPerParent, maxChildDocsPerParent - 1);
int max = RandomInts.randomIntBetween(random(), min, maxChildDocsPerParent);
if (VERBOSE) {
System.out.println("iter=" + i);
System.out.println("scoreMode=" + scoreMode);
System.out.println("min=" + min);
System.out.println("max=" + max);
}
Query joinQuery = JoinUtil.createJoinQuery("join_field", fromQuery, toQuery, searcher, scoreMode, ordinalMap, min, max);
TotalHitCountCollector collector = new TotalHitCountCollector();
searcher.search(joinQuery, collector);
int expectedCount = 0;
for (int numChildDocs : childDocsPerParent) {
if (numChildDocs >= min && numChildDocs <= max) {
expectedCount++;
}
}
assertEquals(expectedCount, collector.getTotalHits());
}
searcher.getIndexReader().close();
dir.close();
}
// TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE // TermsWithScoreCollector.MV.Avg forgets to grow beyond TermsWithScoreCollector.INITIAL_ARRAY_SIZE
public void testOverflowTermsWithScoreCollector() throws Exception { public void testOverflowTermsWithScoreCollector() throws Exception {
test300spartans(true, ScoreMode.Avg); test300spartans(true, ScoreMode.Avg);