mirror of https://github.com/apache/lucene.git
LUCENE-5489: add Rescorer/QueryRescorer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1579911 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
54b06fdd42
commit
d3cff1ae8d
|
@ -0,0 +1,238 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
// TODO: we could also have an ExpressionRescorer
|
||||
|
||||
/** A {@link Rescorer} that uses a provided Query to assign
|
||||
* scores to the first-pass hits.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public abstract class QueryRescorer extends Rescorer {
|
||||
|
||||
private final Query query;
|
||||
|
||||
/** Sole constructor, passing the 2nd pass query to
|
||||
* assign scores to the 1st pass hits. */
|
||||
public QueryRescorer(Query query) {
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement this in a subclass to combine the first pass and
|
||||
* second pass scores. If secondPassMatches is false then
|
||||
* the second pass query failed to match a hit from the
|
||||
* first pass query, and you should ignore the
|
||||
* secondPassScore.
|
||||
*/
|
||||
protected abstract float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore);
|
||||
|
||||
@Override
|
||||
public TopDocs rescore(IndexSearcher searcher, TopDocs topDocs, int topN) throws IOException {
|
||||
int[] docIDs = new int[topDocs.scoreDocs.length];
|
||||
for(int i=0;i<docIDs.length;i++) {
|
||||
docIDs[i] = topDocs.scoreDocs[i].doc;
|
||||
}
|
||||
|
||||
TopDocs topDocs2 = searcher.search(query, new OnlyDocIDsFilter(docIDs), topDocs.scoreDocs.length);
|
||||
|
||||
// TODO: we could save small young GC cost here if we
|
||||
// cloned the incoming ScoreDoc[], sorted that by doc,
|
||||
// passed that to OnlyDocIDsFilter, sorted 2nd pass
|
||||
// TopDocs by doc, did a merge sort to combine the
|
||||
// scores, and finally re-sorted by the combined score,
|
||||
// but that is sizable added code complexity for minor
|
||||
// GC savings:
|
||||
Map<Integer,Float> newScores = new HashMap<Integer,Float>();
|
||||
for(ScoreDoc sd : topDocs2.scoreDocs) {
|
||||
newScores.put(sd.doc, sd.score);
|
||||
}
|
||||
|
||||
ScoreDoc[] newHits = new ScoreDoc[topDocs.scoreDocs.length];
|
||||
for(int i=0;i<topDocs.scoreDocs.length;i++) {
|
||||
ScoreDoc sd = topDocs.scoreDocs[i];
|
||||
Float newScore = newScores.get(sd.doc);
|
||||
float combinedScore;
|
||||
if (newScore == null) {
|
||||
combinedScore = combine(sd.score, false, 0.0f);
|
||||
} else {
|
||||
combinedScore = combine(sd.score, true, newScore.floatValue());
|
||||
}
|
||||
newHits[i] = new ScoreDoc(sd.doc, combinedScore);
|
||||
}
|
||||
|
||||
// TODO: we should do a partial sort (of only topN)
|
||||
// instead, but typically the number of hits is
|
||||
// smallish:
|
||||
Arrays.sort(newHits,
|
||||
new Comparator<ScoreDoc>() {
|
||||
@Override
|
||||
public int compare(ScoreDoc a, ScoreDoc b) {
|
||||
// Sort by score descending, then docID ascending:
|
||||
if (a.score > b.score) {
|
||||
return -1;
|
||||
} else if (a.score < b.score) {
|
||||
return 1;
|
||||
} else {
|
||||
// This subtraction can't overflow int
|
||||
// because docIDs are >= 0:
|
||||
return a.doc - b.doc;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (topN < newHits.length) {
|
||||
ScoreDoc[] subset = new ScoreDoc[topN];
|
||||
System.arraycopy(newHits, 0, subset, 0, topN);
|
||||
newHits = subset;
|
||||
}
|
||||
|
||||
return new TopDocs(topDocs.totalHits, newHits, newHits[0].score);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(IndexSearcher searcher, Explanation firstPassExplanation, int docID) throws IOException {
|
||||
Explanation secondPassExplanation = searcher.explain(query, docID);
|
||||
|
||||
Float secondPassScore = secondPassExplanation.isMatch() ? secondPassExplanation.getValue() : null;
|
||||
|
||||
float score;
|
||||
if (secondPassScore == null) {
|
||||
score = combine(firstPassExplanation.getValue(), false, 0.0f);
|
||||
} else {
|
||||
score = combine(firstPassExplanation.getValue(), true, secondPassScore.floatValue());
|
||||
}
|
||||
|
||||
Explanation result = new Explanation(score, "combined first and second pass score using " + getClass());
|
||||
|
||||
Explanation first = new Explanation(firstPassExplanation.getValue(), "first pass score");
|
||||
first.addDetail(firstPassExplanation);
|
||||
result.addDetail(first);
|
||||
|
||||
Explanation second;
|
||||
if (secondPassScore == null) {
|
||||
second = new Explanation(0.0f, "no second pass score");
|
||||
} else {
|
||||
second = new Explanation(secondPassScore, "second pass score");
|
||||
}
|
||||
second.addDetail(secondPassExplanation);
|
||||
result.addDetail(second);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Sugar API, calling {#rescore} using a simple linear
|
||||
* combination of firstPassScore + weight * secondPassScore */
|
||||
public static TopDocs rescore(IndexSearcher searcher, TopDocs topDocs, Query query, final double weight, int topN) throws IOException {
|
||||
return new QueryRescorer(query) {
|
||||
@Override
|
||||
protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) {
|
||||
float score = firstPassScore;
|
||||
if (secondPassMatches) {
|
||||
score += weight * secondPassScore;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
}.rescore(searcher, topDocs, topN);
|
||||
}
|
||||
|
||||
/** Filter accepting only the specified docIDs */
|
||||
private static class OnlyDocIDsFilter extends Filter {
|
||||
|
||||
private final int[] docIDs;
|
||||
|
||||
/** Sole constructor. */
|
||||
public OnlyDocIDsFilter(int[] docIDs) {
|
||||
this.docIDs = docIDs;
|
||||
Arrays.sort(docIDs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
int loc = Arrays.binarySearch(docIDs, context.docBase);
|
||||
if (loc < 0) {
|
||||
loc = -loc-1;
|
||||
}
|
||||
|
||||
final int startLoc = loc;
|
||||
final int endDoc = context.docBase + context.reader().maxDoc();
|
||||
|
||||
return new DocIdSet() {
|
||||
|
||||
int pos = startLoc;
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return new DocIdSetIterator() {
|
||||
|
||||
int docID;
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
if (pos == docIDs.length) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
int docID = docIDs[pos];
|
||||
if (docID >= endDoc) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
pos++;
|
||||
assert acceptDocs == null || acceptDocs.get(docID-context.docBase);
|
||||
return docID-context.docBase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
// NOTE: not quite right, since this is cost
|
||||
// across all segments, and we are supposed to
|
||||
// return cost for just this segment:
|
||||
return docIDs.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
// TODO: this is a full binary search; we
|
||||
// could optimize (a bit) by setting lower
|
||||
// bound to current pos instead:
|
||||
int loc = Arrays.binarySearch(docIDs, target + context.docBase);
|
||||
if (loc < 0) {
|
||||
loc = -loc-1;
|
||||
}
|
||||
pos = loc;
|
||||
return nextDoc();
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Re-scores the topN results ({@link TopDocs}) from an original
|
||||
* query. See {@link QueryRescorer} for an actual
|
||||
* implementation. Typically, you run a low-cost
|
||||
* first-pass query across the entire index, collecting the
|
||||
* top few hundred hits perhaps, and then use this class to
|
||||
* mix in a more costly second pass scoring.
|
||||
*
|
||||
* <p>See {@link
|
||||
* QueryRescorer#rescore(IndexSearcher,TopDocs,Query,double,int)}
|
||||
* for a simple static method to call to rescore using a 2nd
|
||||
* pass {@link Query}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public abstract class Rescorer {
|
||||
|
||||
/**
|
||||
* Rescore an initial first-pass {@link TopDocs}.
|
||||
*
|
||||
* @param searcher {@link IndexSearcher} used to produce the
|
||||
* first pass topDocs
|
||||
* @param firstPassTopDocs Hits from the first pass
|
||||
* search. It's very important that these hits were
|
||||
* produced by the provided searcher; otherwise the doc
|
||||
* IDs will not match!
|
||||
* @param topN How many re-scored hits to return
|
||||
*/
|
||||
public abstract TopDocs rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int topN) throws IOException;
|
||||
|
||||
/**
|
||||
* Explains how the score for the specified document was
|
||||
* computed.
|
||||
*/
|
||||
public abstract Explanation explain(IndexSearcher searcher, Explanation firstPassExplanation, int docID) throws IOException;
|
||||
}
|
|
@ -0,0 +1,276 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestQueryRescorer extends LuceneTestCase {
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", "0", Field.Store.YES));
|
||||
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(newStringField("id", "1", Field.Store.YES));
|
||||
// 1 extra token, but wizard and oz are close;
|
||||
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
// Do ordinary BooleanQuery:
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
|
||||
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
TopDocs hits = searcher.search(bq, 10);
|
||||
assertEquals(2, hits.totalHits);
|
||||
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
|
||||
|
||||
// Now, resort using PhraseQuery:
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.setSlop(5);
|
||||
pq.add(new Term("field", "wizard"));
|
||||
pq.add(new Term("field", "oz"));
|
||||
|
||||
TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10);
|
||||
|
||||
// Resorting changed the order:
|
||||
assertEquals(2, hits2.totalHits);
|
||||
assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
|
||||
|
||||
// Resort using SpanNearQuery:
|
||||
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
|
||||
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
|
||||
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] {t1, t2}, 0, true);
|
||||
|
||||
TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10);
|
||||
|
||||
// Resorting changed the order:
|
||||
assertEquals(2, hits3.totalHits);
|
||||
assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id"));
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testCustomCombine() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", "0", Field.Store.YES));
|
||||
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(newStringField("id", "1", Field.Store.YES));
|
||||
// 1 extra token, but wizard and oz are close;
|
||||
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
// Do ordinary BooleanQuery:
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
|
||||
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
TopDocs hits = searcher.search(bq, 10);
|
||||
assertEquals(2, hits.totalHits);
|
||||
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
|
||||
|
||||
// Now, resort using PhraseQuery, but with an
|
||||
// opposite-world combine:
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.setSlop(5);
|
||||
pq.add(new Term("field", "wizard"));
|
||||
pq.add(new Term("field", "oz"));
|
||||
|
||||
TopDocs hits2 = new QueryRescorer(pq) {
|
||||
@Override
|
||||
protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) {
|
||||
float score = firstPassScore;
|
||||
if (secondPassMatches) {
|
||||
score -= 2.0 * secondPassScore;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
}.rescore(searcher, hits, 10);
|
||||
|
||||
// Resorting didn't change the order:
|
||||
assertEquals(2, hits2.totalHits);
|
||||
assertEquals("0", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("1", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testExplain() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", "0", Field.Store.YES));
|
||||
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(newStringField("id", "1", Field.Store.YES));
|
||||
// 1 extra token, but wizard and oz are close;
|
||||
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
// Do ordinary BooleanQuery:
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
|
||||
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
TopDocs hits = searcher.search(bq, 10);
|
||||
assertEquals(2, hits.totalHits);
|
||||
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
|
||||
|
||||
// Now, resort using PhraseQuery:
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.add(new Term("field", "wizard"));
|
||||
pq.add(new Term("field", "oz"));
|
||||
|
||||
Rescorer rescorer = new QueryRescorer(pq) {
|
||||
@Override
|
||||
protected float combine(float firstPassScore, boolean secondPassMatches, float secondPassScore) {
|
||||
float score = firstPassScore;
|
||||
if (secondPassMatches) {
|
||||
score += 2.0 * secondPassScore;
|
||||
}
|
||||
return score;
|
||||
}
|
||||
};
|
||||
|
||||
TopDocs hits2 = rescorer.rescore(searcher, hits, 10);
|
||||
|
||||
// Resorting changed the order:
|
||||
assertEquals(2, hits2.totalHits);
|
||||
assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
|
||||
|
||||
int docID = hits2.scoreDocs[0].doc;
|
||||
Explanation explain = rescorer.explain(searcher,
|
||||
searcher.explain(bq, docID),
|
||||
docID);
|
||||
String s = explain.toString();
|
||||
assertTrue(s.contains("TestQueryRescorer$"));
|
||||
assertTrue(s.contains("combined first and second pass score"));
|
||||
assertTrue(s.contains("first pass score"));
|
||||
assertTrue(s.contains("= second pass score"));
|
||||
assertEquals(hits2.scoreDocs[0].score, explain.getValue(), 0.0f);
|
||||
|
||||
docID = hits2.scoreDocs[1].doc;
|
||||
explain = rescorer.explain(searcher,
|
||||
searcher.explain(bq, docID),
|
||||
docID);
|
||||
s = explain.toString();
|
||||
assertTrue(s.contains("TestQueryRescorer$"));
|
||||
assertTrue(s.contains("combined first and second pass score"));
|
||||
assertTrue(s.contains("first pass score"));
|
||||
assertTrue(s.contains("no second pass score"));
|
||||
assertFalse(s.contains("= second pass score"));
|
||||
assertTrue(s.contains("NON-MATCH"));
|
||||
assertEquals(hits2.scoreDocs[1].score, explain.getValue(), 0.0f);
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMissingSecondPassScore() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(newStringField("id", "0", Field.Store.YES));
|
||||
doc.add(newTextField("field", "wizard the the the the the oz", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(newStringField("id", "1", Field.Store.YES));
|
||||
// 1 extra token, but wizard and oz are close;
|
||||
doc.add(newTextField("field", "wizard oz the the the the the the", Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
// Do ordinary BooleanQuery:
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(new TermQuery(new Term("field", "wizard")), Occur.SHOULD);
|
||||
bq.add(new TermQuery(new Term("field", "oz")), Occur.SHOULD);
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
|
||||
TopDocs hits = searcher.search(bq, 10);
|
||||
assertEquals(2, hits.totalHits);
|
||||
assertEquals("0", searcher.doc(hits.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("1", searcher.doc(hits.scoreDocs[1].doc).get("id"));
|
||||
|
||||
// Now, resort using PhraseQuery, no slop:
|
||||
PhraseQuery pq = new PhraseQuery();
|
||||
pq.add(new Term("field", "wizard"));
|
||||
pq.add(new Term("field", "oz"));
|
||||
|
||||
TopDocs hits2 = QueryRescorer.rescore(searcher, hits, pq, 2.0, 10);
|
||||
|
||||
// Resorting changed the order:
|
||||
assertEquals(2, hits2.totalHits);
|
||||
assertEquals("1", searcher.doc(hits2.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("0", searcher.doc(hits2.scoreDocs[1].doc).get("id"));
|
||||
|
||||
// Resort using SpanNearQuery:
|
||||
SpanTermQuery t1 = new SpanTermQuery(new Term("field", "wizard"));
|
||||
SpanTermQuery t2 = new SpanTermQuery(new Term("field", "oz"));
|
||||
SpanNearQuery snq = new SpanNearQuery(new SpanQuery[] {t1, t2}, 0, true);
|
||||
|
||||
TopDocs hits3 = QueryRescorer.rescore(searcher, hits, snq, 2.0, 10);
|
||||
|
||||
// Resorting changed the order:
|
||||
assertEquals(2, hits3.totalHits);
|
||||
assertEquals("1", searcher.doc(hits3.scoreDocs[0].doc).get("id"));
|
||||
assertEquals("0", searcher.doc(hits3.scoreDocs[1].doc).get("id"));
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue