mirror of https://github.com/apache/lucene.git
LUCENE-4043: Added scoring support via score mode for query time joining.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1343966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5d3dba2c56
commit
36acada762
|
@ -880,6 +880,9 @@ New features
|
||||||
returning results after a specified FieldDoc for deep
|
returning results after a specified FieldDoc for deep
|
||||||
paging. (Mike McCandless)
|
paging. (Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-4043: Added scoring support via score mode for query time joining.
|
||||||
|
(Martijn van Groningen, Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
||||||
|
|
|
@ -220,6 +220,13 @@ public class DocTermOrds {
|
||||||
return numTermsInField;
|
return numTermsInField;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Whether this <code>DocTermOrds</code> instance is empty.
|
||||||
|
*/
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return index == null;
|
||||||
|
}
|
||||||
|
|
||||||
/** Subclass can override this */
|
/** Subclass can override this */
|
||||||
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
|
protected void visitTerm(TermsEnum te, int termNum) throws IOException {
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,12 +38,24 @@ public final class JoinUtil {
|
||||||
* <p/>
|
* <p/>
|
||||||
* Execute the returned query with a {@link IndexSearcher} to retrieve all documents that have the same terms in the
|
* Execute the returned query with a {@link IndexSearcher} to retrieve all documents that have the same terms in the
|
||||||
* to field that match with documents matching the specified fromQuery and have the same terms in the from field.
|
* to field that match with documents matching the specified fromQuery and have the same terms in the from field.
|
||||||
|
* <p/>
|
||||||
|
* In the case a single document relates to more than one document the <code>multipleValuesPerDocument</code> option
|
||||||
|
* should be set to true. When the <code>multipleValuesPerDocument</code> is set to <code>true</code> only the
|
||||||
|
* the score from the first encountered join value originating from the 'from' side is mapped into the 'to' side.
|
||||||
|
* Even in the case when a second join value related to a specific document yields a higher score. Obviously this
|
||||||
|
* doesn't apply in the case that {@link ScoreMode#None} is used, since no scores are computed at all.
|
||||||
|
* </p>
|
||||||
|
* Memory considerations: During joining all unique join values are kept in memory. On top of that when the scoreMode
|
||||||
|
* isn't set to {@link ScoreMode#None} a float value per unique join value is kept in memory for computing scores.
|
||||||
|
* When scoreMode is set to {@link ScoreMode#Avg} also an additional integer value is kept in memory per unique
|
||||||
|
* join value.
|
||||||
*
|
*
|
||||||
* @param fromField The from field to join from
|
* @param fromField The from field to join from
|
||||||
* @param multipleValuesPerDocument Whether the from field has multiple terms per document
|
* @param multipleValuesPerDocument Whether the from field has multiple terms per document
|
||||||
* @param toField The to field to join to
|
* @param toField The to field to join to
|
||||||
* @param fromQuery The query to match documents on the from side
|
* @param fromQuery The query to match documents on the from side
|
||||||
* @param fromSearcher The searcher that executed the specified fromQuery
|
* @param fromSearcher The searcher that executed the specified fromQuery
|
||||||
|
* @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query
|
||||||
* @return a {@link Query} instance that can be used to join documents based on the
|
* @return a {@link Query} instance that can be used to join documents based on the
|
||||||
* terms in the from and to field
|
* terms in the from and to field
|
||||||
* @throws IOException If I/O related errors occur
|
* @throws IOException If I/O related errors occur
|
||||||
|
@ -52,10 +64,29 @@ public final class JoinUtil {
|
||||||
boolean multipleValuesPerDocument,
|
boolean multipleValuesPerDocument,
|
||||||
String toField,
|
String toField,
|
||||||
Query fromQuery,
|
Query fromQuery,
|
||||||
IndexSearcher fromSearcher) throws IOException {
|
IndexSearcher fromSearcher,
|
||||||
TermsCollector termsCollector = TermsCollector.create(fromField, multipleValuesPerDocument);
|
ScoreMode scoreMode) throws IOException {
|
||||||
fromSearcher.search(fromQuery, termsCollector);
|
switch (scoreMode) {
|
||||||
return new TermsQuery(toField, termsCollector.getCollectorTerms());
|
case None:
|
||||||
|
TermsCollector termsCollector = TermsCollector.create(fromField, multipleValuesPerDocument);
|
||||||
|
fromSearcher.search(fromQuery, termsCollector);
|
||||||
|
return new TermsQuery(toField, termsCollector.getCollectorTerms());
|
||||||
|
case Total:
|
||||||
|
case Max:
|
||||||
|
case Avg:
|
||||||
|
TermsWithScoreCollector termsWithScoreCollector =
|
||||||
|
TermsWithScoreCollector.create(fromField, multipleValuesPerDocument, scoreMode);
|
||||||
|
fromSearcher.search(fromQuery, termsWithScoreCollector);
|
||||||
|
return new TermsIncludingScoreQuery(
|
||||||
|
toField,
|
||||||
|
multipleValuesPerDocument,
|
||||||
|
termsWithScoreCollector.getCollectedTerms(),
|
||||||
|
termsWithScoreCollector.getScoresPerTerm(),
|
||||||
|
fromQuery
|
||||||
|
);
|
||||||
|
default:
|
||||||
|
throw new IllegalArgumentException(String.format("Score mode %s isn't supported.", scoreMode));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,45 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How to aggregate multiple child hit scores into a single parent score.
|
||||||
|
*/
|
||||||
|
public enum ScoreMode {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Do no scoring.
|
||||||
|
*/
|
||||||
|
None,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parent hit's score is the average of all child scores.
|
||||||
|
*/
|
||||||
|
Avg,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parent hit's score is the max of all child scores.
|
||||||
|
*/
|
||||||
|
Max,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parent hit's score is the sum of all child scores.
|
||||||
|
*/
|
||||||
|
Total
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,271 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.ComplexExplanation;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
class TermsIncludingScoreQuery extends Query {
|
||||||
|
|
||||||
|
final String field;
|
||||||
|
final boolean multipleValuesPerDocument;
|
||||||
|
final BytesRefHash terms;
|
||||||
|
final float[] scores;
|
||||||
|
final int[] ords;
|
||||||
|
final Query originalQuery;
|
||||||
|
final Query unwrittenOriginalQuery;
|
||||||
|
|
||||||
|
TermsIncludingScoreQuery(String field, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, Query originalQuery) {
|
||||||
|
this.field = field;
|
||||||
|
this.multipleValuesPerDocument = multipleValuesPerDocument;
|
||||||
|
this.terms = terms;
|
||||||
|
this.scores = scores;
|
||||||
|
this.originalQuery = originalQuery;
|
||||||
|
this.ords = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||||
|
this.unwrittenOriginalQuery = originalQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
private TermsIncludingScoreQuery(String field, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery) {
|
||||||
|
this.field = field;
|
||||||
|
this.multipleValuesPerDocument = multipleValuesPerDocument;
|
||||||
|
this.terms = terms;
|
||||||
|
this.scores = scores;
|
||||||
|
this.originalQuery = originalQuery;
|
||||||
|
this.ords = ords;
|
||||||
|
this.unwrittenOriginalQuery = unwrittenOriginalQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(String string) {
|
||||||
|
return String.format("TermsIncludingScoreQuery{field=%s;originalQuery=%s}", field, unwrittenOriginalQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void extractTerms(Set<Term> terms) {
|
||||||
|
originalQuery.extractTerms(terms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
final Query originalQueryRewrite = originalQuery.rewrite(reader);
|
||||||
|
if (originalQueryRewrite != originalQuery) {
|
||||||
|
Query rewritten = new TermsIncludingScoreQuery(field, multipleValuesPerDocument, terms, scores,
|
||||||
|
ords, originalQueryRewrite, originalQuery);
|
||||||
|
rewritten.setBoost(getBoost());
|
||||||
|
return rewritten;
|
||||||
|
} else {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
||||||
|
final Weight originalWeight = originalQuery.createWeight(searcher);
|
||||||
|
return new Weight() {
|
||||||
|
|
||||||
|
private TermsEnum segmentTermsEnum;
|
||||||
|
|
||||||
|
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
|
||||||
|
SVInnerScorer scorer = (SVInnerScorer) scorer(context, true, false, context.reader().getLiveDocs());
|
||||||
|
if (scorer != null) {
|
||||||
|
if (scorer.advance(doc) == doc) {
|
||||||
|
return scorer.explain();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new ComplexExplanation(false, 0.0f, "Not a match");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query getQuery() {
|
||||||
|
return TermsIncludingScoreQuery.this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getValueForNormalization() throws IOException {
|
||||||
|
return originalWeight.getValueForNormalization() * TermsIncludingScoreQuery.this.getBoost() * TermsIncludingScoreQuery.this.getBoost();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void normalize(float norm, float topLevelBoost) {
|
||||||
|
originalWeight.normalize(norm, topLevelBoost * TermsIncludingScoreQuery.this.getBoost());
|
||||||
|
}
|
||||||
|
|
||||||
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
|
Terms terms = context.reader().terms(field);
|
||||||
|
if (terms == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
segmentTermsEnum = terms.iterator(segmentTermsEnum);
|
||||||
|
if (multipleValuesPerDocument) {
|
||||||
|
return new MVInnerScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
|
||||||
|
} else {
|
||||||
|
return new SVInnerScorer(this, acceptDocs, segmentTermsEnum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations.
|
||||||
|
class SVInnerScorer extends Scorer {
|
||||||
|
|
||||||
|
final BytesRef spare = new BytesRef();
|
||||||
|
final Bits acceptDocs;
|
||||||
|
final TermsEnum termsEnum;
|
||||||
|
|
||||||
|
int upto;
|
||||||
|
DocsEnum docsEnum;
|
||||||
|
DocsEnum reuse;
|
||||||
|
int scoreUpto;
|
||||||
|
|
||||||
|
SVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum) {
|
||||||
|
super(weight);
|
||||||
|
this.acceptDocs = acceptDocs;
|
||||||
|
this.termsEnum = termsEnum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float score() throws IOException {
|
||||||
|
return scores[ords[scoreUpto]];
|
||||||
|
}
|
||||||
|
|
||||||
|
public Explanation explain() throws IOException {
|
||||||
|
return new ComplexExplanation(true, score(), "Score based on join value " + termsEnum.term().utf8ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public int docID() {
|
||||||
|
return docsEnum != null ? docsEnum.docID() : DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (docsEnum != null) {
|
||||||
|
int docId = docsEnum.nextDoc();
|
||||||
|
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
docsEnum = null;
|
||||||
|
} else {
|
||||||
|
return docId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
if (upto == terms.size()) {
|
||||||
|
return DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
scoreUpto = upto;
|
||||||
|
TermsEnum.SeekStatus status = termsEnum.seekCeil(terms.get(ords[upto++], spare), true);
|
||||||
|
if (status == TermsEnum.SeekStatus.FOUND) {
|
||||||
|
docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, false);
|
||||||
|
}
|
||||||
|
} while (docsEnum == null);
|
||||||
|
|
||||||
|
return docsEnum.nextDoc();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int advance(int target) throws IOException {
|
||||||
|
int docId;
|
||||||
|
do {
|
||||||
|
docId = nextDoc();
|
||||||
|
if (docId < target) {
|
||||||
|
int tempDocId = docsEnum.advance(target);
|
||||||
|
if (tempDocId == target) {
|
||||||
|
docId = tempDocId;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (docId == target) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
docsEnum = null; // goto the next ord.
|
||||||
|
} while (docId != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
return docId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
|
||||||
|
// twice for different join values. This means that the first encountered join value determines the score of a document
|
||||||
|
// even if other join values yield a higher score.
|
||||||
|
class MVInnerScorer extends SVInnerScorer {
|
||||||
|
|
||||||
|
final FixedBitSet alreadyEmittedDocs;
|
||||||
|
|
||||||
|
MVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc) {
|
||||||
|
super(weight, acceptDocs, termsEnum);
|
||||||
|
alreadyEmittedDocs = new FixedBitSet(maxDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
if (docsEnum != null) {
|
||||||
|
int docId;
|
||||||
|
do {
|
||||||
|
docId = docsEnum.nextDoc();
|
||||||
|
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (alreadyEmittedDocs.get(docId));
|
||||||
|
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
docsEnum = null;
|
||||||
|
} else {
|
||||||
|
alreadyEmittedDocs.set(docId);
|
||||||
|
return docId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
do {
|
||||||
|
if (upto == terms.size()) {
|
||||||
|
return DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
scoreUpto = upto;
|
||||||
|
TermsEnum.SeekStatus status = termsEnum.seekCeil(terms.get(ords[upto++], spare), true);
|
||||||
|
if (status == TermsEnum.SeekStatus.FOUND) {
|
||||||
|
docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, false);
|
||||||
|
}
|
||||||
|
} while (docsEnum == null);
|
||||||
|
|
||||||
|
int docId;
|
||||||
|
do {
|
||||||
|
docId = docsEnum.nextDoc();
|
||||||
|
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (alreadyEmittedDocs.get(docId));
|
||||||
|
if (docId == DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
docsEnum = null;
|
||||||
|
} else {
|
||||||
|
alreadyEmittedDocs.set(docId);
|
||||||
|
return docId;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,292 @@
|
||||||
|
package org.apache.lucene.search.join;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.DocTermOrds;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.BytesRefHash;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
abstract class TermsWithScoreCollector extends Collector {
|
||||||
|
|
||||||
|
private final static int INITIAL_ARRAY_SIZE = 256;
|
||||||
|
|
||||||
|
final String field;
|
||||||
|
final BytesRefHash collectedTerms = new BytesRefHash();
|
||||||
|
final ScoreMode scoreMode;
|
||||||
|
|
||||||
|
Scorer scorer;
|
||||||
|
float[] scoreSums = new float[INITIAL_ARRAY_SIZE];
|
||||||
|
|
||||||
|
TermsWithScoreCollector(String field, ScoreMode scoreMode) {
|
||||||
|
this.field = field;
|
||||||
|
this.scoreMode = scoreMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BytesRefHash getCollectedTerms() {
|
||||||
|
return collectedTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
public float[] getScoresPerTerm() {
|
||||||
|
return scoreSums;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Chooses the right {@link TermsWithScoreCollector} implementation.
|
||||||
|
*
|
||||||
|
* @param field The field to collect terms for
|
||||||
|
* @param multipleValuesPerDocument Whether the field to collect terms for has multiple values per document.
|
||||||
|
* @return a {@link TermsWithScoreCollector} instance
|
||||||
|
*/
|
||||||
|
static TermsWithScoreCollector create(String field, boolean multipleValuesPerDocument, ScoreMode scoreMode) {
|
||||||
|
if (multipleValuesPerDocument) {
|
||||||
|
switch (scoreMode) {
|
||||||
|
case Avg:
|
||||||
|
return new MV.Avg(field);
|
||||||
|
default:
|
||||||
|
return new MV(field, scoreMode);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
switch (scoreMode) {
|
||||||
|
case Avg:
|
||||||
|
return new SV.Avg(field);
|
||||||
|
default:
|
||||||
|
return new SV(field, scoreMode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// impl that works with single value per document
|
||||||
|
static class SV extends TermsWithScoreCollector {
|
||||||
|
|
||||||
|
final BytesRef spare = new BytesRef();
|
||||||
|
FieldCache.DocTerms fromDocTerms;
|
||||||
|
|
||||||
|
SV(String field, ScoreMode scoreMode) {
|
||||||
|
super(field, scoreMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
int ord = collectedTerms.add(fromDocTerms.getTerm(doc, spare));
|
||||||
|
if (ord < 0) {
|
||||||
|
ord = -ord - 1;
|
||||||
|
} else {
|
||||||
|
if (ord >= scoreSums.length) {
|
||||||
|
scoreSums = ArrayUtil.grow(scoreSums);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float current = scorer.score();
|
||||||
|
float existing = scoreSums[ord];
|
||||||
|
if (Float.compare(existing, 0.0f) == 0) {
|
||||||
|
scoreSums[ord] = current;
|
||||||
|
} else {
|
||||||
|
switch (scoreMode) {
|
||||||
|
case Total:
|
||||||
|
scoreSums[ord] = scoreSums[ord] + current;
|
||||||
|
break;
|
||||||
|
case Max:
|
||||||
|
if (current > existing) {
|
||||||
|
scoreSums[ord] = current;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
fromDocTerms = FieldCache.DEFAULT.getTerms(context.reader(), field);
|
||||||
|
}
|
||||||
|
|
||||||
|
static class Avg extends SV {
|
||||||
|
|
||||||
|
int[] scoreCounts = new int[INITIAL_ARRAY_SIZE];
|
||||||
|
|
||||||
|
Avg(String field) {
|
||||||
|
super(field, ScoreMode.Avg);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
int ord = collectedTerms.add(fromDocTerms.getTerm(doc, spare));
|
||||||
|
if (ord < 0) {
|
||||||
|
ord = -ord - 1;
|
||||||
|
} else {
|
||||||
|
if (ord >= scoreSums.length) {
|
||||||
|
scoreSums = ArrayUtil.grow(scoreSums);
|
||||||
|
scoreCounts = ArrayUtil.grow(scoreCounts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float current = scorer.score();
|
||||||
|
float existing = scoreSums[ord];
|
||||||
|
if (Float.compare(existing, 0.0f) == 0) {
|
||||||
|
scoreSums[ord] = current;
|
||||||
|
scoreCounts[ord] = 1;
|
||||||
|
} else {
|
||||||
|
scoreSums[ord] = scoreSums[ord] + current;
|
||||||
|
scoreCounts[ord]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float[] getScoresPerTerm() {
|
||||||
|
if (scoreCounts != null) {
|
||||||
|
for (int i = 0; i < scoreCounts.length; i++) {
|
||||||
|
scoreSums[i] = scoreSums[i] / scoreCounts[i];
|
||||||
|
}
|
||||||
|
scoreCounts = null;
|
||||||
|
}
|
||||||
|
return scoreSums;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// impl that works with multiple values per document
|
||||||
|
static class MV extends TermsWithScoreCollector {
|
||||||
|
|
||||||
|
DocTermOrds fromDocTermOrds;
|
||||||
|
TermsEnum docTermsEnum;
|
||||||
|
DocTermOrds.TermOrdsIterator reuse;
|
||||||
|
|
||||||
|
MV(String field, ScoreMode scoreMode) {
|
||||||
|
super(field, scoreMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
reuse = fromDocTermOrds.lookup(doc, reuse);
|
||||||
|
int[] buffer = new int[5];
|
||||||
|
|
||||||
|
int chunk;
|
||||||
|
do {
|
||||||
|
chunk = reuse.read(buffer);
|
||||||
|
if (chunk == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int idx = 0; idx < chunk; idx++) {
|
||||||
|
int key = buffer[idx];
|
||||||
|
docTermsEnum.seekExact((long) key);
|
||||||
|
int ord = collectedTerms.add(docTermsEnum.term());
|
||||||
|
if (ord < 0) {
|
||||||
|
ord = -ord - 1;
|
||||||
|
} else {
|
||||||
|
if (ord >= scoreSums.length) {
|
||||||
|
scoreSums = ArrayUtil.grow(scoreSums);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final float current = scorer.score();
|
||||||
|
final float existing = scoreSums[ord];
|
||||||
|
if (Float.compare(existing, 0.0f) == 0) {
|
||||||
|
scoreSums[ord] = current;
|
||||||
|
} else {
|
||||||
|
switch (scoreMode) {
|
||||||
|
case Total:
|
||||||
|
scoreSums[ord] = existing + current;
|
||||||
|
break;
|
||||||
|
case Max:
|
||||||
|
if (current > existing) {
|
||||||
|
scoreSums[ord] = current;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (chunk >= buffer.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
fromDocTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), field);
|
||||||
|
docTermsEnum = fromDocTermOrds.getOrdTermsEnum(context.reader());
|
||||||
|
reuse = null; // LUCENE-3377 needs to be fixed first then this statement can be removed...
|
||||||
|
}
|
||||||
|
|
||||||
|
static class Avg extends MV {
|
||||||
|
|
||||||
|
int[] scoreCounts = new int[INITIAL_ARRAY_SIZE];
|
||||||
|
|
||||||
|
Avg(String field) {
|
||||||
|
super(field, ScoreMode.Avg);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
reuse = fromDocTermOrds.lookup(doc, reuse);
|
||||||
|
int[] buffer = new int[5];
|
||||||
|
|
||||||
|
int chunk;
|
||||||
|
do {
|
||||||
|
chunk = reuse.read(buffer);
|
||||||
|
if (chunk == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int idx = 0; idx < chunk; idx++) {
|
||||||
|
int key = buffer[idx];
|
||||||
|
docTermsEnum.seekExact((long) key);
|
||||||
|
int ord = collectedTerms.add(docTermsEnum.term());
|
||||||
|
if (ord < 0) {
|
||||||
|
ord = -ord - 1;
|
||||||
|
} else {
|
||||||
|
if (ord >= scoreSums.length) {
|
||||||
|
scoreSums = ArrayUtil.grow(scoreSums);
|
||||||
|
scoreCounts = ArrayUtil.grow(scoreCounts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float current = scorer.score();
|
||||||
|
float existing = scoreSums[ord];
|
||||||
|
if (Float.compare(existing, 0.0f) == 0) {
|
||||||
|
scoreSums[ord] = current;
|
||||||
|
scoreCounts[ord] = 1;
|
||||||
|
} else {
|
||||||
|
scoreSums[ord] = scoreSums[ord] + current;
|
||||||
|
scoreCounts[ord]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (chunk >= buffer.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public float[] getScoresPerTerm() {
|
||||||
|
if (scoreCounts != null) {
|
||||||
|
for (int i = 0; i < scoreCounts.length; i++) {
|
||||||
|
scoreSums[i] = scoreSums[i] / scoreCounts[i];
|
||||||
|
}
|
||||||
|
scoreCounts = null;
|
||||||
|
}
|
||||||
|
return scoreSums;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -33,7 +33,6 @@ import org.apache.lucene.search.Filter;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
import org.apache.lucene.search.Scorer.ChildScorer;
|
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.grouping.TopGroups;
|
import org.apache.lucene.search.grouping.TopGroups;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -82,24 +81,8 @@ import org.apache.lucene.util.FixedBitSet;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ToParentBlockJoinQuery extends Query {
|
public class ToParentBlockJoinQuery extends Query {
|
||||||
|
|
||||||
/** How to aggregate multiple child hit scores into a
|
|
||||||
* single parent score. */
|
|
||||||
public static enum ScoreMode {
|
|
||||||
/** Do no scoring. */
|
|
||||||
None,
|
|
||||||
/** Parent hit's score is the average of all child
|
|
||||||
scores. */
|
|
||||||
Avg,
|
|
||||||
/** Parent hit's score is the max of all child
|
|
||||||
scores. */
|
|
||||||
Max,
|
|
||||||
/** Parent hit's score is the sum of all child
|
|
||||||
scores. */
|
|
||||||
Total};
|
|
||||||
|
|
||||||
private final Filter parentsFilter;
|
private final Filter parentsFilter;
|
||||||
private final Query childQuery;
|
private final Query childQuery;
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@
|
||||||
any query matching parent documents, creating the joined query
|
any query matching parent documents, creating the joined query
|
||||||
matching only child documents.
|
matching only child documents.
|
||||||
|
|
||||||
<h2>Search-time joins</h2>
|
<h2>Query-time joins</h2>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
The query time joining is index term based and implemented as two pass search. The first pass collects all the terms from a fromField
|
The query time joining is index term based and implemented as two pass search. The first pass collects all the terms from a fromField
|
||||||
|
@ -68,22 +68,26 @@
|
||||||
<li><code>fromField</code>: The from field to join from.
|
<li><code>fromField</code>: The from field to join from.
|
||||||
<li><code>fromQuery</code>: The query executed to collect the from terms. This is usually the user specified query.
|
<li><code>fromQuery</code>: The query executed to collect the from terms. This is usually the user specified query.
|
||||||
<li><code>multipleValuesPerDocument</code>: Whether the fromField contains more than one value per document
|
<li><code>multipleValuesPerDocument</code>: Whether the fromField contains more than one value per document
|
||||||
|
<li><code>scoreMode</code>: Defines how scores are translated to the other join side. If you don't care about scoring
|
||||||
|
use {@link org.apache.lucene.search.join.ScoreMode#None} mode. This will disable scoring and is therefore more
|
||||||
|
efficient (requires less memory and is faster).
|
||||||
<li><code>toField</code>: The to field to join to
|
<li><code>toField</code>: The to field to join to
|
||||||
</ul>
|
</ul>
|
||||||
<p>
|
<p>
|
||||||
Basically the query-time joining is accessible from one static method. The user of this method supplies the method
|
Basically the query-time joining is accessible from one static method. The user of this method supplies the method
|
||||||
with the described input and a <code>IndexSearcher</code> where the from terms need to be collected from. The returned
|
with the described input and a <code>IndexSearcher</code> where the from terms need to be collected from. The returned
|
||||||
query can be executed with the same <code>IndexSearcher</code>, but also with another <code>IndexSearcher</code>.
|
query can be executed with the same <code>IndexSearcher</code>, but also with another <code>IndexSearcher</code>.
|
||||||
Example usage of the {@link org.apache.lucene.search.join.JoinUtil#createJoinQuery(String, boolean, String, org.apache.lucene.search.Query, org.apache.lucene.search.IndexSearcher)
|
Example usage of the {@link org.apache.lucene.search.join.JoinUtil#createJoinQuery(String, boolean, String, org.apache.lucene.search.Query, org.apache.lucene.search.IndexSearcher, org.apache.lucene.search.join.ScoreMode)
|
||||||
JoinUtil.createJoinQuery()} :
|
JoinUtil.createJoinQuery()} :
|
||||||
</p>
|
</p>
|
||||||
<pre class="prettyprint">
|
<pre class="prettyprint">
|
||||||
String fromField = "from"; // Name of the from field
|
String fromField = "from"; // Name of the from field
|
||||||
boolean multipleValuesPerDocument = false; // Set only yo true in the case when your fromField has multiple values per document in your index
|
boolean multipleValuesPerDocument = false; // Set only yo true in the case when your fromField has multiple values per document in your index
|
||||||
String toField = "to"; // Name of the to field
|
String toField = "to"; // Name of the to field
|
||||||
|
ScoreMode scoreMode = ScoreMode.Max // Defines how the scores are translated into the other side of the join.
|
||||||
Query fromQuery = new TermQuery(new Term("content", searchTerm)); // Query executed to collect from values to join to the to values
|
Query fromQuery = new TermQuery(new Term("content", searchTerm)); // Query executed to collect from values to join to the to values
|
||||||
|
|
||||||
Query joinQuery = JoinUtil.createJoinQuery(fromField, multipleValuesPerDocument, toField, fromQuery, fromSearcher);
|
Query joinQuery = JoinUtil.createJoinQuery(fromField, multipleValuesPerDocument, toField, fromQuery, fromSearcher, scoreMode);
|
||||||
TopDocs topDocs = toSearcher.search(joinQuery, 10); // Note: toSearcher can be the same as the fromSearcher
|
TopDocs topDocs = toSearcher.search(joinQuery, 10); // Note: toSearcher can be the same as the fromSearcher
|
||||||
// Render topDocs...
|
// Render topDocs...
|
||||||
</pre>
|
</pre>
|
||||||
|
|
|
@ -96,7 +96,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
||||||
|
|
||||||
// Wrap the child document query to 'join' any matches
|
// Wrap the child document query to 'join' any matches
|
||||||
// up to corresponding parent:
|
// up to corresponding parent:
|
||||||
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
|
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
|
||||||
|
|
||||||
// Combine the parent and nested child queries into a single query for a candidate
|
// Combine the parent and nested child queries into a single query for a candidate
|
||||||
BooleanQuery fullQuery = new BooleanQuery();
|
BooleanQuery fullQuery = new BooleanQuery();
|
||||||
|
@ -198,7 +198,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
||||||
|
|
||||||
// Wrap the child document query to 'join' any matches
|
// Wrap the child document query to 'join' any matches
|
||||||
// up to corresponding parent:
|
// up to corresponding parent:
|
||||||
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
|
ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
|
||||||
|
|
||||||
assertEquals("no filter - both passed", 2, s.search(childJoinQuery, 10).totalHits);
|
assertEquals("no filter - both passed", 2, s.search(childJoinQuery, 10).totalHits);
|
||||||
|
|
||||||
|
@ -259,7 +259,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
||||||
w.close();
|
w.close();
|
||||||
IndexSearcher s = newSearcher(r);
|
IndexSearcher s = newSearcher(r);
|
||||||
|
|
||||||
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new QueryWrapperFilter(new MatchAllDocsQuery()), ToParentBlockJoinQuery.ScoreMode.Avg);
|
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new QueryWrapperFilter(new MatchAllDocsQuery()), ScoreMode.Avg);
|
||||||
s.search(q, 10);
|
s.search(q, 10);
|
||||||
BooleanQuery bq = new BooleanQuery();
|
BooleanQuery bq = new BooleanQuery();
|
||||||
bq.setBoost(2f); // we boost the BQ
|
bq.setBoost(2f); // we boost the BQ
|
||||||
|
@ -493,15 +493,15 @@ public class TestBlockJoin extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int x = random().nextInt(4);
|
final int x = random().nextInt(4);
|
||||||
final ToParentBlockJoinQuery.ScoreMode agg;
|
final ScoreMode agg;
|
||||||
if (x == 0) {
|
if (x == 0) {
|
||||||
agg = ToParentBlockJoinQuery.ScoreMode.None;
|
agg = ScoreMode.None;
|
||||||
} else if (x == 1) {
|
} else if (x == 1) {
|
||||||
agg = ToParentBlockJoinQuery.ScoreMode.Max;
|
agg = ScoreMode.Max;
|
||||||
} else if (x == 2) {
|
} else if (x == 2) {
|
||||||
agg = ToParentBlockJoinQuery.ScoreMode.Total;
|
agg = ScoreMode.Total;
|
||||||
} else {
|
} else {
|
||||||
agg = ToParentBlockJoinQuery.ScoreMode.Avg;
|
agg = ScoreMode.Avg;
|
||||||
}
|
}
|
||||||
|
|
||||||
final ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
|
final ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);
|
||||||
|
@ -584,7 +584,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
||||||
|
|
||||||
final boolean trackScores;
|
final boolean trackScores;
|
||||||
final boolean trackMaxScore;
|
final boolean trackMaxScore;
|
||||||
if (agg == ToParentBlockJoinQuery.ScoreMode.None) {
|
if (agg == ScoreMode.None) {
|
||||||
trackScores = false;
|
trackScores = false;
|
||||||
trackMaxScore = false;
|
trackMaxScore = false;
|
||||||
} else {
|
} else {
|
||||||
|
@ -881,8 +881,8 @@ public class TestBlockJoin extends LuceneTestCase {
|
||||||
|
|
||||||
// Wrap the child document query to 'join' any matches
|
// Wrap the child document query to 'join' any matches
|
||||||
// up to corresponding parent:
|
// up to corresponding parent:
|
||||||
ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
|
ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ScoreMode.Avg);
|
||||||
ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
|
ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ScoreMode.Avg);
|
||||||
|
|
||||||
// Combine the parent and nested child queries into a single query for a candidate
|
// Combine the parent and nested child queries into a single query for a candidate
|
||||||
BooleanQuery fullQuery = new BooleanQuery();
|
BooleanQuery fullQuery = new BooleanQuery();
|
||||||
|
@ -952,7 +952,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
||||||
new QueryWrapperFilter(
|
new QueryWrapperFilter(
|
||||||
new TermQuery(new Term("parent", "1"))));
|
new TermQuery(new Term("parent", "1"))));
|
||||||
|
|
||||||
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
|
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
|
||||||
Weight weight = s.createNormalizedWeight(q);
|
Weight weight = s.createNormalizedWeight(q);
|
||||||
DocIdSetIterator disi = weight.scorer(s.getIndexReader().getTopReaderContext().leaves()[0], true, true, null);
|
DocIdSetIterator disi = weight.scorer(s.getIndexReader().getTopReaderContext().leaves()[0], true, true, null);
|
||||||
assertEquals(1, disi.advance(1));
|
assertEquals(1, disi.advance(1));
|
||||||
|
@ -986,7 +986,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
||||||
new QueryWrapperFilter(
|
new QueryWrapperFilter(
|
||||||
new TermQuery(new Term("isparent", "yes"))));
|
new TermQuery(new Term("isparent", "yes"))));
|
||||||
|
|
||||||
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ToParentBlockJoinQuery.ScoreMode.Avg);
|
ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
|
||||||
Weight weight = s.createNormalizedWeight(q);
|
Weight weight = s.createNormalizedWeight(q);
|
||||||
DocIdSetIterator disi = weight.scorer(s.getIndexReader().getTopReaderContext().leaves()[0], true, true, null);
|
DocIdSetIterator disi = weight.scorer(s.getIndexReader().getTopReaderContext().leaves()[0], true, true, null);
|
||||||
assertEquals(2, disi.advance(0));
|
assertEquals(2, disi.advance(0));
|
||||||
|
|
|
@ -22,8 +22,26 @@ import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.index.DocTermOrds;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.MultiFields;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.Collector;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.apache.lucene.search.Explanation;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.ScoreDoc;
|
||||||
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.TopScoreDocCollector;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
|
@ -49,45 +67,45 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
|
|
||||||
// 0
|
// 0
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new Field("description", "random text", TextField.TYPE_STORED));
|
doc.add(new Field("description", "random text", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field("name", "name1", TextField.TYPE_STORED));
|
doc.add(new Field("name", "name1", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(idField, "1", TextField.TYPE_STORED));
|
doc.add(new Field(idField, "1", TextField.TYPE_UNSTORED));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
// 1
|
// 1
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new Field("price", "10.0", TextField.TYPE_STORED));
|
doc.add(new Field("price", "10.0", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(idField, "2", TextField.TYPE_STORED));
|
doc.add(new Field(idField, "2", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(toField, "1", TextField.TYPE_STORED));
|
doc.add(new Field(toField, "1", TextField.TYPE_UNSTORED));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
// 2
|
// 2
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new Field("price", "20.0", TextField.TYPE_STORED));
|
doc.add(new Field("price", "20.0", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(idField, "3", TextField.TYPE_STORED));
|
doc.add(new Field(idField, "3", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(toField, "1", TextField.TYPE_STORED));
|
doc.add(new Field(toField, "1", TextField.TYPE_UNSTORED));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
// 3
|
// 3
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new Field("description", "more random text", TextField.TYPE_STORED));
|
doc.add(new Field("description", "more random text", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field("name", "name2", TextField.TYPE_STORED));
|
doc.add(new Field("name", "name2", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(idField, "4", TextField.TYPE_STORED));
|
doc.add(new Field(idField, "4", TextField.TYPE_UNSTORED));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
w.commit();
|
w.commit();
|
||||||
|
|
||||||
// 4
|
// 4
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new Field("price", "10.0", TextField.TYPE_STORED));
|
doc.add(new Field("price", "10.0", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(idField, "5", TextField.TYPE_STORED));
|
doc.add(new Field(idField, "5", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(toField, "4", TextField.TYPE_STORED));
|
doc.add(new Field(toField, "4", TextField.TYPE_UNSTORED));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
// 5
|
// 5
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
doc.add(new Field("price", "20.0", TextField.TYPE_STORED));
|
doc.add(new Field("price", "20.0", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(idField, "6", TextField.TYPE_STORED));
|
doc.add(new Field(idField, "6", TextField.TYPE_UNSTORED));
|
||||||
doc.add(new Field(toField, "4", TextField.TYPE_STORED));
|
doc.add(new Field(toField, "4", TextField.TYPE_UNSTORED));
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
|
||||||
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
|
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
|
||||||
|
@ -95,21 +113,21 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
|
|
||||||
// Search for product
|
// Search for product
|
||||||
Query joinQuery =
|
Query joinQuery =
|
||||||
JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")), indexSearcher);
|
JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name2")), indexSearcher, ScoreMode.None);
|
||||||
|
|
||||||
TopDocs result = indexSearcher.search(joinQuery, 10);
|
TopDocs result = indexSearcher.search(joinQuery, 10);
|
||||||
assertEquals(2, result.totalHits);
|
assertEquals(2, result.totalHits);
|
||||||
assertEquals(4, result.scoreDocs[0].doc);
|
assertEquals(4, result.scoreDocs[0].doc);
|
||||||
assertEquals(5, result.scoreDocs[1].doc);
|
assertEquals(5, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
joinQuery = JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")), indexSearcher);
|
joinQuery = JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("name", "name1")), indexSearcher, ScoreMode.None);
|
||||||
result = indexSearcher.search(joinQuery, 10);
|
result = indexSearcher.search(joinQuery, 10);
|
||||||
assertEquals(2, result.totalHits);
|
assertEquals(2, result.totalHits);
|
||||||
assertEquals(1, result.scoreDocs[0].doc);
|
assertEquals(1, result.scoreDocs[0].doc);
|
||||||
assertEquals(2, result.scoreDocs[1].doc);
|
assertEquals(2, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
// Search for offer
|
// Search for offer
|
||||||
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")), indexSearcher);
|
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("id", "5")), indexSearcher, ScoreMode.None);
|
||||||
result = indexSearcher.search(joinQuery, 10);
|
result = indexSearcher.search(joinQuery, 10);
|
||||||
assertEquals(1, result.totalHits);
|
assertEquals(1, result.totalHits);
|
||||||
assertEquals(3, result.scoreDocs[0].doc);
|
assertEquals(3, result.scoreDocs[0].doc);
|
||||||
|
@ -118,6 +136,96 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSimpleWithScoring() throws Exception {
|
||||||
|
final String idField = "id";
|
||||||
|
final String toField = "movieId";
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(
|
||||||
|
random(),
|
||||||
|
dir,
|
||||||
|
newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||||
|
new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||||
|
|
||||||
|
// 0
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("description", "A random movie", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field("name", "Movie 1", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(idField, "1", TextField.TYPE_UNSTORED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
// 1
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new Field("subtitle", "The first subtitle of this movie", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(idField, "2", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(toField, "1", TextField.TYPE_UNSTORED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
// 2
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new Field("subtitle", "random subtitle; random event movie", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(idField, "3", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(toField, "1", TextField.TYPE_UNSTORED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
// 3
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new Field("description", "A second random movie", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field("name", "Movie 2", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(idField, "4", TextField.TYPE_UNSTORED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
w.commit();
|
||||||
|
|
||||||
|
// 4
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new Field("subtitle", "a very random event happened during christmas night", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(idField, "5", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(toField, "4", TextField.TYPE_UNSTORED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
// 5
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(new Field("subtitle", "movie end movie test 123 test 123 random", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(idField, "6", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(new Field(toField, "4", TextField.TYPE_UNSTORED));
|
||||||
|
w.addDocument(doc);
|
||||||
|
|
||||||
|
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
// Search for movie via subtitle
|
||||||
|
Query joinQuery =
|
||||||
|
JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "random")), indexSearcher, ScoreMode.Max);
|
||||||
|
TopDocs result = indexSearcher.search(joinQuery, 10);
|
||||||
|
assertEquals(2, result.totalHits);
|
||||||
|
assertEquals(0, result.scoreDocs[0].doc);
|
||||||
|
assertEquals(3, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
|
// Score mode max.
|
||||||
|
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Max);
|
||||||
|
result = indexSearcher.search(joinQuery, 10);
|
||||||
|
assertEquals(2, result.totalHits);
|
||||||
|
assertEquals(3, result.scoreDocs[0].doc);
|
||||||
|
assertEquals(0, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
|
// Score mode total
|
||||||
|
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Total);
|
||||||
|
result = indexSearcher.search(joinQuery, 10);
|
||||||
|
assertEquals(2, result.totalHits);
|
||||||
|
assertEquals(0, result.scoreDocs[0].doc);
|
||||||
|
assertEquals(3, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
|
//Score mode avg
|
||||||
|
joinQuery = JoinUtil.createJoinQuery(toField, false, idField, new TermQuery(new Term("subtitle", "movie")), indexSearcher, ScoreMode.Avg);
|
||||||
|
result = indexSearcher.search(joinQuery, 10);
|
||||||
|
assertEquals(2, result.totalHits);
|
||||||
|
assertEquals(3, result.scoreDocs[0].doc);
|
||||||
|
assertEquals(0, result.scoreDocs[1].doc);
|
||||||
|
|
||||||
|
indexSearcher.getIndexReader().close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSingleValueRandomJoin() throws Exception {
|
public void testSingleValueRandomJoin() throws Exception {
|
||||||
int maxIndexIter = _TestUtil.nextInt(random(), 6, 12);
|
int maxIndexIter = _TestUtil.nextInt(random(), 6, 12);
|
||||||
|
@ -160,15 +268,20 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
String randomValue = context.randomUniqueValues[r];
|
String randomValue = context.randomUniqueValues[r];
|
||||||
FixedBitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
|
FixedBitSet expectedResult = createExpectedResult(randomValue, from, indexSearcher.getIndexReader(), context);
|
||||||
|
|
||||||
Query actualQuery = new TermQuery(new Term("value", randomValue));
|
final Query actualQuery = new TermQuery(new Term("value", randomValue));
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("actualQuery=" + actualQuery);
|
System.out.println("actualQuery=" + actualQuery);
|
||||||
}
|
}
|
||||||
Query joinQuery;
|
final ScoreMode scoreMode = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("scoreMode=" + scoreMode);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Query joinQuery;
|
||||||
if (from) {
|
if (from) {
|
||||||
joinQuery = JoinUtil.createJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher);
|
joinQuery = JoinUtil.createJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode);
|
||||||
} else {
|
} else {
|
||||||
joinQuery = JoinUtil.createJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher);
|
joinQuery = JoinUtil.createJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode);
|
||||||
}
|
}
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("joinQuery=" + joinQuery);
|
System.out.println("joinQuery=" + joinQuery);
|
||||||
|
@ -176,26 +289,30 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
|
|
||||||
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
|
// Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector...
|
||||||
final FixedBitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
final FixedBitSet actualResult = new FixedBitSet(indexSearcher.getIndexReader().maxDoc());
|
||||||
|
final TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, false);
|
||||||
indexSearcher.search(joinQuery, new Collector() {
|
indexSearcher.search(joinQuery, new Collector() {
|
||||||
|
|
||||||
int docBase;
|
int docBase;
|
||||||
|
|
||||||
public void collect(int doc) throws IOException {
|
public void collect(int doc) throws IOException {
|
||||||
actualResult.set(doc + docBase);
|
actualResult.set(doc + docBase);
|
||||||
|
topScoreDocCollector.collect(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
docBase = context.docBase;
|
docBase = context.docBase;
|
||||||
|
topScoreDocCollector.setNextReader(context);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setScorer(Scorer scorer) throws IOException {
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
topScoreDocCollector.setScorer(scorer);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean acceptsDocsOutOfOrder() {
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
return true;
|
return topScoreDocCollector.acceptsDocsOutOfOrder();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
// Asserting bit set...
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("expected cardinality:" + expectedResult.cardinality());
|
System.out.println("expected cardinality:" + expectedResult.cardinality());
|
||||||
DocIdSetIterator iterator = expectedResult.iterator();
|
DocIdSetIterator iterator = expectedResult.iterator();
|
||||||
|
@ -208,8 +325,28 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
System.out.println(String.format("Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
|
System.out.println(String.format("Actual doc[%d] with id value %s", doc, indexSearcher.doc(doc).get("id")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assertEquals(expectedResult, actualResult);
|
assertEquals(expectedResult, actualResult);
|
||||||
|
|
||||||
|
// Asserting TopDocs...
|
||||||
|
TopDocs expectedTopDocs = createExpectedTopDocs(randomValue, from, scoreMode, context);
|
||||||
|
TopDocs actualTopDocs = topScoreDocCollector.topDocs();
|
||||||
|
assertEquals(expectedTopDocs.totalHits, actualTopDocs.totalHits);
|
||||||
|
assertEquals(expectedTopDocs.scoreDocs.length, actualTopDocs.scoreDocs.length);
|
||||||
|
if (scoreMode == ScoreMode.None) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(expectedTopDocs.getMaxScore(), actualTopDocs.getMaxScore(), 0.0f);
|
||||||
|
for (int i = 0; i < expectedTopDocs.scoreDocs.length; i++) {
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.printf("Expected doc: %d | Actual doc: %d\n", expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
|
||||||
|
System.out.printf("Expected score: %f | Actual score: %f\n", expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score);
|
||||||
|
}
|
||||||
|
assertEquals(expectedTopDocs.scoreDocs[i].doc, actualTopDocs.scoreDocs[i].doc);
|
||||||
|
assertEquals(expectedTopDocs.scoreDocs[i].score, actualTopDocs.scoreDocs[i].score, 0.0f);
|
||||||
|
Explanation explanation = indexSearcher.explain(joinQuery, expectedTopDocs.scoreDocs[i].doc);
|
||||||
|
assertEquals(expectedTopDocs.scoreDocs[i].score, explanation.getValue(), 0.0f);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
topLevelReader.close();
|
topLevelReader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
|
@ -238,20 +375,21 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
context.randomUniqueValues[i] = uniqueRandomValue;
|
context.randomUniqueValues[i] = uniqueRandomValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RandomDoc[] docs = new RandomDoc[nDocs];
|
||||||
for (int i = 0; i < nDocs; i++) {
|
for (int i = 0; i < nDocs; i++) {
|
||||||
String id = Integer.toString(i);
|
String id = Integer.toString(i);
|
||||||
int randomI = random().nextInt(context.randomUniqueValues.length);
|
int randomI = random().nextInt(context.randomUniqueValues.length);
|
||||||
String value = context.randomUniqueValues[randomI];
|
String value = context.randomUniqueValues[randomI];
|
||||||
Document document = new Document();
|
Document document = new Document();
|
||||||
document.add(newField(random(), "id", id, TextField.TYPE_STORED));
|
document.add(newField(random(), "id", id, TextField.TYPE_UNSTORED));
|
||||||
document.add(newField(random(), "value", value, TextField.TYPE_STORED));
|
document.add(newField(random(), "value", value, TextField.TYPE_UNSTORED));
|
||||||
|
|
||||||
boolean from = context.randomFrom[randomI];
|
boolean from = context.randomFrom[randomI];
|
||||||
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
|
int numberOfLinkValues = multipleValuesPerDocument ? 2 + random().nextInt(10) : 1;
|
||||||
RandomDoc doc = new RandomDoc(id, numberOfLinkValues, value);
|
docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
|
||||||
for (int j = 0; j < numberOfLinkValues; j++) {
|
for (int j = 0; j < numberOfLinkValues; j++) {
|
||||||
String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
|
String linkValue = context.randomUniqueValues[random().nextInt(context.randomUniqueValues.length)];
|
||||||
doc.linkValues.add(linkValue);
|
docs[i].linkValues.add(linkValue);
|
||||||
if (from) {
|
if (from) {
|
||||||
if (!context.fromDocuments.containsKey(linkValue)) {
|
if (!context.fromDocuments.containsKey(linkValue)) {
|
||||||
context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>());
|
context.fromDocuments.put(linkValue, new ArrayList<RandomDoc>());
|
||||||
|
@ -260,9 +398,9 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>());
|
context.randomValueFromDocs.put(value, new ArrayList<RandomDoc>());
|
||||||
}
|
}
|
||||||
|
|
||||||
context.fromDocuments.get(linkValue).add(doc);
|
context.fromDocuments.get(linkValue).add(docs[i]);
|
||||||
context.randomValueFromDocs.get(value).add(doc);
|
context.randomValueFromDocs.get(value).add(docs[i]);
|
||||||
document.add(newField(random(), "from", linkValue, TextField.TYPE_STORED));
|
document.add(newField(random(), "from", linkValue, TextField.TYPE_UNSTORED));
|
||||||
} else {
|
} else {
|
||||||
if (!context.toDocuments.containsKey(linkValue)) {
|
if (!context.toDocuments.containsKey(linkValue)) {
|
||||||
context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
|
context.toDocuments.put(linkValue, new ArrayList<RandomDoc>());
|
||||||
|
@ -271,9 +409,9 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
context.randomValueToDocs.put(value, new ArrayList<RandomDoc>());
|
context.randomValueToDocs.put(value, new ArrayList<RandomDoc>());
|
||||||
}
|
}
|
||||||
|
|
||||||
context.toDocuments.get(linkValue).add(doc);
|
context.toDocuments.get(linkValue).add(docs[i]);
|
||||||
context.randomValueToDocs.get(value).add(doc);
|
context.randomValueToDocs.get(value).add(docs[i]);
|
||||||
document.add(newField(random(), "to", linkValue, TextField.TYPE_STORED));
|
document.add(newField(random(), "to", linkValue, TextField.TYPE_UNSTORED));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -289,12 +427,235 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
w.commit();
|
w.commit();
|
||||||
}
|
}
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("Added document[" + i + "]: " + document);
|
System.out.println("Added document[" + docs[i].id + "]: " + document);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
|
||||||
|
// any ScoreMode.
|
||||||
|
IndexSearcher fromSearcher = newSearcher(fromWriter.getReader());
|
||||||
|
IndexSearcher toSearcher = newSearcher(toWriter.getReader());
|
||||||
|
for (int i = 0; i < context.randomUniqueValues.length; i++) {
|
||||||
|
String uniqueRandomValue = context.randomUniqueValues[i];
|
||||||
|
final String fromField;
|
||||||
|
final String toField;
|
||||||
|
final Map<String, Map<Integer, JoinScore>> queryVals;
|
||||||
|
if (context.randomFrom[i]) {
|
||||||
|
fromField = "from";
|
||||||
|
toField = "to";
|
||||||
|
queryVals = context.fromHitsToJoinScore;
|
||||||
|
} else {
|
||||||
|
fromField = "to";
|
||||||
|
toField = "from";
|
||||||
|
queryVals = context.toHitsToJoinScore;
|
||||||
|
}
|
||||||
|
final Map<BytesRef, JoinScore> joinValueToJoinScores = new HashMap<BytesRef, JoinScore>();
|
||||||
|
if (multipleValuesPerDocument) {
|
||||||
|
fromSearcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new Collector() {
|
||||||
|
|
||||||
|
private Scorer scorer;
|
||||||
|
private DocTermOrds docTermOrds;
|
||||||
|
private TermsEnum docTermsEnum;
|
||||||
|
private DocTermOrds.TermOrdsIterator reuse;
|
||||||
|
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
if (docTermOrds.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
reuse = docTermOrds.lookup(doc, reuse);
|
||||||
|
int[] buffer = new int[5];
|
||||||
|
|
||||||
|
int chunk;
|
||||||
|
do {
|
||||||
|
chunk = reuse.read(buffer);
|
||||||
|
if (chunk == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int idx = 0; idx < chunk; idx++) {
|
||||||
|
int key = buffer[idx];
|
||||||
|
docTermsEnum.seekExact((long) key);
|
||||||
|
BytesRef joinValue = docTermsEnum.term();
|
||||||
|
if (joinValue == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
|
||||||
|
if (joinScore == null) {
|
||||||
|
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
|
||||||
|
}
|
||||||
|
joinScore.addScore(scorer.score());
|
||||||
|
}
|
||||||
|
} while (chunk >= buffer.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), fromField);
|
||||||
|
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
|
||||||
|
reuse = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
fromSearcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new Collector() {
|
||||||
|
|
||||||
|
private Scorer scorer;
|
||||||
|
private FieldCache.DocTerms terms;
|
||||||
|
private final BytesRef spare = new BytesRef();
|
||||||
|
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
BytesRef joinValue = terms.getTerm(doc, spare);
|
||||||
|
if (joinValue == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
JoinScore joinScore = joinValueToJoinScores.get(joinValue);
|
||||||
|
if (joinScore == null) {
|
||||||
|
joinValueToJoinScores.put(BytesRef.deepCopyOf(joinValue), joinScore = new JoinScore());
|
||||||
|
}
|
||||||
|
joinScore.addScore(scorer.score());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
terms = FieldCache.DEFAULT.getTerms(context.reader(), fromField);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {
|
||||||
|
this.scorer = scorer;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean acceptsDocsOutOfOrder() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
final Map<Integer, JoinScore> docToJoinScore = new HashMap<Integer, JoinScore>();
|
||||||
|
if (multipleValuesPerDocument) {
|
||||||
|
toSearcher.search(new MatchAllDocsQuery(), new Collector() {
|
||||||
|
|
||||||
|
private DocTermOrds docTermOrds;
|
||||||
|
private TermsEnum docTermsEnum;
|
||||||
|
private DocTermOrds.TermOrdsIterator reuse;
|
||||||
|
private int docBase;
|
||||||
|
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
if (docTermOrds.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
reuse = docTermOrds.lookup(doc, reuse);
|
||||||
|
int[] buffer = new int[5];
|
||||||
|
|
||||||
|
int chunk;
|
||||||
|
do {
|
||||||
|
chunk = reuse.read(buffer);
|
||||||
|
if (chunk == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int idx = 0; idx < chunk; idx++) {
|
||||||
|
int key = buffer[idx];
|
||||||
|
docTermsEnum.seekExact((long) key);
|
||||||
|
JoinScore joinScore = joinValueToJoinScores.get(docTermsEnum.term());
|
||||||
|
if (joinScore == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Integer basedDoc = docBase + doc;
|
||||||
|
// First encountered join value determines the score.
|
||||||
|
// Something to keep in mind for many-to-many relations.
|
||||||
|
if (!docToJoinScore.containsKey(basedDoc)) {
|
||||||
|
docToJoinScore.put(basedDoc, joinScore);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (chunk >= buffer.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
docBase = context.docBase;
|
||||||
|
docTermOrds = FieldCache.DEFAULT.getDocTermOrds(context.reader(), toField);
|
||||||
|
docTermsEnum = docTermOrds.getOrdTermsEnum(context.reader());
|
||||||
|
reuse = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean acceptsDocsOutOfOrder() {return false;}
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
toSearcher.search(new MatchAllDocsQuery(), new Collector() {
|
||||||
|
|
||||||
|
private FieldCache.DocTerms terms;
|
||||||
|
private int docBase;
|
||||||
|
private final BytesRef spare = new BytesRef();
|
||||||
|
|
||||||
|
public void collect(int doc) throws IOException {
|
||||||
|
JoinScore joinScore = joinValueToJoinScores.get(terms.getTerm(doc, spare));
|
||||||
|
if (joinScore == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
docToJoinScore.put(docBase + doc, joinScore);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
|
terms = FieldCache.DEFAULT.getTerms(context.reader(), toField);
|
||||||
|
docBase = context.docBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean acceptsDocsOutOfOrder() {return false;}
|
||||||
|
public void setScorer(Scorer scorer) throws IOException {}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
queryVals.put(uniqueRandomValue, docToJoinScore);
|
||||||
|
}
|
||||||
|
|
||||||
|
fromSearcher.getIndexReader().close();
|
||||||
|
toSearcher.getIndexReader().close();
|
||||||
|
|
||||||
return context;
|
return context;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private TopDocs createExpectedTopDocs(String queryValue,
|
||||||
|
final boolean from,
|
||||||
|
final ScoreMode scoreMode,
|
||||||
|
IndexIterationContext context) throws IOException {
|
||||||
|
|
||||||
|
Map<Integer, JoinScore> hitsToJoinScores;
|
||||||
|
if (from) {
|
||||||
|
hitsToJoinScores = context.fromHitsToJoinScore.get(queryValue);
|
||||||
|
} else {
|
||||||
|
hitsToJoinScores = context.toHitsToJoinScore.get(queryValue);
|
||||||
|
}
|
||||||
|
List<Map.Entry<Integer,JoinScore>> hits = new ArrayList<Map.Entry<Integer, JoinScore>>(hitsToJoinScores.entrySet());
|
||||||
|
Collections.sort(hits, new Comparator<Map.Entry<Integer, JoinScore>>() {
|
||||||
|
|
||||||
|
public int compare(Map.Entry<Integer, JoinScore> hit1, Map.Entry<Integer, JoinScore> hit2) {
|
||||||
|
float score1 = hit1.getValue().score(scoreMode);
|
||||||
|
float score2 = hit2.getValue().score(scoreMode);
|
||||||
|
|
||||||
|
int cmp = Float.compare(score2, score1);
|
||||||
|
if (cmp != 0) {
|
||||||
|
return cmp;
|
||||||
|
}
|
||||||
|
return hit1.getKey() - hit2.getKey();
|
||||||
|
}
|
||||||
|
|
||||||
|
});
|
||||||
|
ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(10, hits.size())];
|
||||||
|
for (int i = 0; i < scoreDocs.length; i++) {
|
||||||
|
Map.Entry<Integer,JoinScore> hit = hits.get(i);
|
||||||
|
scoreDocs[i] = new ScoreDoc(hit.getKey(), hit.getValue().score(scoreMode));
|
||||||
|
}
|
||||||
|
return new TopDocs(hits.size(), scoreDocs, hits.isEmpty() ? Float.NaN : hits.get(0).getValue().score(scoreMode));
|
||||||
|
}
|
||||||
|
|
||||||
private FixedBitSet createExpectedResult(String queryValue, boolean from, IndexReader topLevelReader, IndexIterationContext context) throws IOException {
|
private FixedBitSet createExpectedResult(String queryValue, boolean from, IndexReader topLevelReader, IndexIterationContext context) throws IOException {
|
||||||
final Map<String, List<RandomDoc>> randomValueDocs;
|
final Map<String, List<RandomDoc>> randomValueDocs;
|
||||||
final Map<String, List<RandomDoc>> linkValueDocuments;
|
final Map<String, List<RandomDoc>> linkValueDocuments;
|
||||||
|
@ -339,6 +700,9 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
Map<String, List<RandomDoc>> randomValueFromDocs = new HashMap<String, List<RandomDoc>>();
|
Map<String, List<RandomDoc>> randomValueFromDocs = new HashMap<String, List<RandomDoc>>();
|
||||||
Map<String, List<RandomDoc>> randomValueToDocs = new HashMap<String, List<RandomDoc>>();
|
Map<String, List<RandomDoc>> randomValueToDocs = new HashMap<String, List<RandomDoc>>();
|
||||||
|
|
||||||
|
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<String, Map<Integer, JoinScore>>();
|
||||||
|
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<String, Map<Integer, JoinScore>>();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class RandomDoc {
|
private static class RandomDoc {
|
||||||
|
@ -346,12 +710,44 @@ public class TestJoinUtil extends LuceneTestCase {
|
||||||
final String id;
|
final String id;
|
||||||
final List<String> linkValues;
|
final List<String> linkValues;
|
||||||
final String value;
|
final String value;
|
||||||
|
final boolean from;
|
||||||
|
|
||||||
private RandomDoc(String id, int numberOfLinkValues, String value) {
|
private RandomDoc(String id, int numberOfLinkValues, String value, boolean from) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
|
this.from = from;
|
||||||
linkValues = new ArrayList<String>(numberOfLinkValues);
|
linkValues = new ArrayList<String>(numberOfLinkValues);
|
||||||
this.value = value;
|
this.value = value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class JoinScore {
|
||||||
|
|
||||||
|
float maxScore;
|
||||||
|
float total;
|
||||||
|
int count;
|
||||||
|
|
||||||
|
void addScore(float score) {
|
||||||
|
total += score;
|
||||||
|
if (score > maxScore) {
|
||||||
|
maxScore = score;
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
float score(ScoreMode mode) {
|
||||||
|
switch (mode) {
|
||||||
|
case None:
|
||||||
|
return 1.0f;
|
||||||
|
case Total:
|
||||||
|
return total;
|
||||||
|
case Avg:
|
||||||
|
return total / count;
|
||||||
|
case Max:
|
||||||
|
return maxScore;
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Unsupported ScoreMode: " + mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue