mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-08 03:49:38 +00:00
Improve explanation in rescore (#30629)
Currently in a rescore request if window_size is smaller than the top N documents returned (N=size), explanation of scores could be incorrect for documents that were a part of topN and not part of rescoring. This PR corrects this, but saving in RescoreContext docIDs of documents for which rescoring was applied, and adding rescoring explanation only for these docIDs. Closes #28725
This commit is contained in:
parent
b6340658f4
commit
3dfa93ef7c
@ -0,0 +1,39 @@
|
||||
---
|
||||
"Score should match explanation in rescore":
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: Explanation for rescoring was corrected after these versions
|
||||
- do:
|
||||
bulk:
|
||||
refresh: true
|
||||
body:
|
||||
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "1"}}'
|
||||
- '{"f1": "1"}'
|
||||
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "2"}}'
|
||||
- '{"f1": "2"}'
|
||||
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "3"}}'
|
||||
- '{"f1": "3"}'
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_index
|
||||
body:
|
||||
explain: true
|
||||
query:
|
||||
match_all: {}
|
||||
rescore:
|
||||
window_size: 2
|
||||
query:
|
||||
rescore_query:
|
||||
match_all: {}
|
||||
query_weight: 5
|
||||
rescore_query_weight: 10
|
||||
|
||||
- match: { hits.hits.0._score: 15 }
|
||||
- match: { hits.hits.0._explanation.value: 15 }
|
||||
|
||||
- match: { hits.hits.1._score: 15 }
|
||||
- match: { hits.hits.1._explanation.value: 15 }
|
||||
|
||||
- match: { hits.hits.2._score: 5 }
|
||||
- match: { hits.hits.2._explanation.value: 5 }
|
@ -30,6 +30,8 @@ import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Set;
|
||||
import java.util.Collections;
|
||||
import static java.util.stream.Collectors.toSet;
|
||||
|
||||
public final class QueryRescorer implements Rescorer {
|
||||
|
||||
@ -61,6 +63,11 @@ public final class QueryRescorer implements Rescorer {
|
||||
// First take top slice of incoming docs, to be rescored:
|
||||
TopDocs topNFirstPass = topN(topDocs, rescoreContext.getWindowSize());
|
||||
|
||||
// Save doc IDs for which rescoring was applied to be used in score explanation
|
||||
Set<Integer> topNDocIDs = Collections.unmodifiableSet(
|
||||
Arrays.stream(topNFirstPass.scoreDocs).map(scoreDoc -> scoreDoc.doc).collect(toSet()));
|
||||
rescoreContext.setRescoredDocs(topNDocIDs);
|
||||
|
||||
// Rescore them:
|
||||
TopDocs rescored = rescorer.rescore(searcher, topNFirstPass, rescoreContext.getWindowSize());
|
||||
|
||||
@ -71,16 +78,12 @@ public final class QueryRescorer implements Rescorer {
|
||||
@Override
|
||||
public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext,
|
||||
Explanation sourceExplanation) throws IOException {
|
||||
QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext;
|
||||
if (sourceExplanation == null) {
|
||||
// this should not happen but just in case
|
||||
return Explanation.noMatch("nothing matched");
|
||||
}
|
||||
// TODO: this isn't right? I.e., we are incorrectly pretending all first pass hits were rescored? If the requested docID was
|
||||
// beyond the top rescoreContext.window() in the first pass hits, we don't rescore it now?
|
||||
Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
|
||||
QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext;
|
||||
float primaryWeight = rescore.queryWeight();
|
||||
|
||||
Explanation prim;
|
||||
if (sourceExplanation.isMatch()) {
|
||||
prim = Explanation.match(
|
||||
@ -89,23 +92,24 @@ public final class QueryRescorer implements Rescorer {
|
||||
} else {
|
||||
prim = Explanation.noMatch("First pass did not match", sourceExplanation);
|
||||
}
|
||||
|
||||
// NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used. Maybe
|
||||
// we should add QueryRescorer.explainCombine to Lucene?
|
||||
if (rescoreExplain != null && rescoreExplain.isMatch()) {
|
||||
float secondaryWeight = rescore.rescoreQueryWeight();
|
||||
Explanation sec = Explanation.match(
|
||||
if (rescoreContext.isRescored(topLevelDocId)){
|
||||
Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
|
||||
// NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used.
|
||||
// Maybe we should add QueryRescorer.explainCombine to Lucene?
|
||||
if (rescoreExplain != null && rescoreExplain.isMatch()) {
|
||||
float secondaryWeight = rescore.rescoreQueryWeight();
|
||||
Explanation sec = Explanation.match(
|
||||
rescoreExplain.getValue() * secondaryWeight,
|
||||
"product of:",
|
||||
rescoreExplain, Explanation.match(secondaryWeight, "secondaryWeight"));
|
||||
QueryRescoreMode scoreMode = rescore.scoreMode();
|
||||
return Explanation.match(
|
||||
QueryRescoreMode scoreMode = rescore.scoreMode();
|
||||
return Explanation.match(
|
||||
scoreMode.combine(prim.getValue(), sec.getValue()),
|
||||
scoreMode + " of:",
|
||||
prim, sec);
|
||||
} else {
|
||||
return prim;
|
||||
}
|
||||
}
|
||||
return prim;
|
||||
}
|
||||
|
||||
private static final Comparator<ScoreDoc> SCORE_DOC_COMPARATOR = new Comparator<ScoreDoc>() {
|
||||
|
@ -19,6 +19,8 @@
|
||||
|
||||
package org.elasticsearch.search.rescore;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Context available to the rescore while it is running. Rescore
|
||||
* implementations should extend this with any additional resources that
|
||||
@ -27,6 +29,7 @@ package org.elasticsearch.search.rescore;
|
||||
public class RescoreContext {
|
||||
private final int windowSize;
|
||||
private final Rescorer rescorer;
|
||||
private Set<Integer> recroredDocs; //doc Ids for which rescoring was applied
|
||||
|
||||
/**
|
||||
* Build the context.
|
||||
@ -50,4 +53,12 @@ public class RescoreContext {
|
||||
public int getWindowSize() {
|
||||
return windowSize;
|
||||
}
|
||||
|
||||
public void setRescoredDocs(Set<Integer> docIds) {
|
||||
recroredDocs = docIds;
|
||||
}
|
||||
|
||||
public boolean isRescored(int docId) {
|
||||
return recroredDocs.contains(docId);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user