improve query handling (rewrite) when searching, try and minize the number of rewrites. Also, better highlighting when wrapping in filtered query

This commit is contained in:
kimchy 2010-04-27 09:40:27 +03:00
parent 5f98942911
commit cb8faaa13f
6 changed files with 63 additions and 22 deletions

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.util.lucene.search.CustomBoostFactorQuery;
import org.elasticsearch.util.lucene.search.TermFilter;
import java.io.IOException;
@ -73,10 +74,9 @@ public class CustomFieldQuery extends FieldQuery {
flatQueries.add(termQuery);
}
} else if (sourceQuery instanceof ConstantScoreQuery) {
Boolean highlight = highlightFilters.get();
if (highlight != null && highlight.equals(Boolean.TRUE)) {
flatten(((ConstantScoreQuery) sourceQuery).getFilter(), flatQueries);
}
flatten(((ConstantScoreQuery) sourceQuery).getFilter(), flatQueries);
} else if (sourceQuery instanceof CustomBoostFactorQuery) {
flatten(((CustomBoostFactorQuery) sourceQuery).getSubQuery(), flatQueries);
} else if (sourceQuery instanceof MultiTermQuery) {
MultiTermQuery multiTermQuery = (MultiTermQuery) sourceQuery;
MultiTermQuery.RewriteMethod rewriteMethod = multiTermQuery.getRewriteMethod();
@ -87,16 +87,25 @@ public class CustomFieldQuery extends FieldQuery {
flatten(multiTermQuery.rewrite(reader.get()), flatQueries);
} catch (IOException e) {
// ignore
} catch (BooleanQuery.TooManyClauses e) {
// ignore
} finally {
multiTermQuery.setRewriteMethod(rewriteMethod);
}
}
} else if (sourceQuery instanceof FilteredQuery) {
flatten(((FilteredQuery) sourceQuery).getQuery(), flatQueries);
flatten(((FilteredQuery) sourceQuery).getFilter(), flatQueries);
} else {
super.flatten(sourceQuery, flatQueries);
}
}
void flatten(Filter sourceFilter, Collection<Query> flatQueries) {
Boolean highlight = highlightFilters.get();
if (highlight == null || highlight.equals(Boolean.FALSE)) {
return;
}
if (sourceFilter instanceof TermFilter) {
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), flatQueries);
} else if (sourceFilter instanceof PublicTermsFilter) {

View File

@ -37,7 +37,7 @@ public class SearchContextException extends SearchException {
private static String buildMessage(SearchContext context, String msg) {
StringBuilder sb = new StringBuilder();
sb.append('[').append(context.shardTarget().index()).append("][").append(context.shardTarget().shardId()).append("]: ");
sb.append("query[").append(context.query()).append("],from[").append(context.from()).append("],size[").append(context.size()).append("]");
sb.append("query[").append(context.originalQuery()).append("],from[").append(context.from()).append("],size[").append(context.size()).append("]");
if (context.sort() != null) {
sb.append(",sort[").append(context.sort()).append("]");
}

View File

@ -42,7 +42,9 @@ public class DfsPhase implements SearchPhase {
public void execute(SearchContext context) {
try {
context.rewriteQuery();
if (!context.queryRewritten()) {
context.updateRewriteQuery(context.searcher().rewrite(context.query()));
}
THashSet<Term> termsSet = new THashSet<Term>();
context.query().extractTerms(termsSet);

View File

@ -106,9 +106,9 @@ public class HighlighterParseElement implements SearchParseElement {
if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
highlightFilter = jp.getIntValue() != 0;
}
} else if (token == JsonToken.VALUE_FALSE) {
} else if (token == JsonToken.VALUE_FALSE || token == JsonToken.VALUE_TRUE) {
if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
highlightFilter = false;
highlightFilter = token == JsonToken.VALUE_TRUE;
}
} else if (token == JsonToken.START_OBJECT) {
if ("fields".equals(topLevelFieldName)) {

View File

@ -28,7 +28,7 @@ import org.elasticsearch.util.lucene.docidset.DocIdSetCollector;
import java.io.IOException;
/**
* @author kimchy (Shay Banon)
* @author kimchy (shay.banon)
*/
public class ContextIndexSearcher extends IndexSearcher {
@ -57,6 +57,20 @@ public class ContextIndexSearcher extends IndexSearcher {
return docIdSet;
}
@Override public Query rewrite(Query original) throws IOException {
if (original == searchContext.query() || original == searchContext.originalQuery()) {
// optimize in case its the top level search query and we already rewrote it...
if (searchContext.queryRewritten()) {
return searchContext.query();
}
Query rewriteQuery = super.rewrite(original);
searchContext.updateRewriteQuery(rewriteQuery);
return rewriteQuery;
} else {
return super.rewrite(original);
}
}
@Override protected Weight createWeight(Query query) throws IOException {
if (dfSource == null) {
return super.createWeight(query);

View File

@ -86,6 +86,8 @@ public class SearchContext implements Releasable {
private String queryParserName;
private Query originalQuery;
private Query query;
private int[] docIdsToLoad;
@ -238,19 +240,42 @@ public class SearchContext implements Releasable {
}
public SearchContext query(Query query) {
if (query == null) {
this.query = query;
return this;
}
queryRewritten = false;
this.originalQuery = query;
this.query = query;
return this;
}
/**
* The original query to execute, unmodified.
*/
public Query originalQuery() {
return this.originalQuery;
}
/**
* The query to execute, might be rewritten.
*/
public Query query() {
return this.query;
}
/**
* Has the query been rewritten already?
*/
public boolean queryRewritten() {
return queryRewritten;
}
/**
* Rewrites the query and updates it. Only happens once.
*/
public SearchContext updateRewriteQuery(Query rewriteQuery) {
query = rewriteQuery;
queryRewritten = true;
return this;
}
public int from() {
return from;
}
@ -286,15 +311,6 @@ public class SearchContext implements Releasable {
this.explain = explain;
}
public SearchContext rewriteQuery() throws IOException {
if (queryRewritten) {
return this;
}
query = query.rewrite(searcher.getIndexReader());
queryRewritten = true;
return this;
}
public int[] docIdsToLoad() {
return docIdsToLoad;
}