Highlighting broken when query is on `_all` field or with prefixes. Add also a flag to highlight to control if filters should be highlighted or not (called highlight_filters) which defaults to true. Closes #148.

This commit is contained in:
kimchy 2010-04-25 18:32:46 +03:00
parent 453ede8f57
commit bf6cead984
12 changed files with 292 additions and 14 deletions

View File

@ -11,7 +11,7 @@
<option name="METHOD_NAME" value="" /> <option name="METHOD_NAME" value="" />
<option name="GROUP_NAME" value="" /> <option name="GROUP_NAME" value="" />
<option name="TEST_OBJECT" value="PACKAGE" /> <option name="TEST_OBJECT" value="PACKAGE" />
<option name="VM_PARAMETERS" value="-Djava.net.preferIPv4Stack=true" /> <option name="VM_PARAMETERS" value="-Djava.net.preferIPv4Stack=true -Xmx512m" />
<option name="PARAMETERS" value="" /> <option name="PARAMETERS" value="" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" /> <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
<option name="OUTPUT_DIRECTORY" value="" /> <option name="OUTPUT_DIRECTORY" value="" />

View File

@ -11,7 +11,7 @@
<option name="METHOD_NAME" value="" /> <option name="METHOD_NAME" value="" />
<option name="GROUP_NAME" value="" /> <option name="GROUP_NAME" value="" />
<option name="TEST_OBJECT" value="PACKAGE" /> <option name="TEST_OBJECT" value="PACKAGE" />
<option name="VM_PARAMETERS" value="-Des.node.local=true" /> <option name="VM_PARAMETERS" value="-Des.node.local=true -Xmx512m" />
<option name="PARAMETERS" value="" /> <option name="PARAMETERS" value="" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" /> <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
<option name="OUTPUT_DIRECTORY" value="" /> <option name="OUTPUT_DIRECTORY" value="" />

View File

@ -0,0 +1,40 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search;
import java.util.List;
/**
* @author kimchy (shay.banon)
*/
public class PublicBooleanFilter extends BooleanFilter {
public List<Filter> getShouldFilters() {
return this.shouldFilters;
}
public List<Filter> getMustFilters() {
return this.mustFilters;
}
public List<Filter> getNotFilters() {
return this.notFilters;
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.index.Term;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class PublicTermsFilter extends TermsFilter {
public Set<Term> getTerms() {
return terms;
}
}

View File

@ -0,0 +1,125 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search.vectorhighlight;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.util.lucene.search.TermFilter;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.Collection;
/**
* @author kimchy (shay.banon)
*/
// LUCENE MONITOR
public class CustomFieldQuery extends FieldQuery {
private static Field multiTermQueryWrapperFilterQueryField;
static {
try {
multiTermQueryWrapperFilterQueryField = MultiTermQueryWrapperFilter.class.getDeclaredField("query");
multiTermQueryWrapperFilterQueryField.setAccessible(true);
} catch (NoSuchFieldException e) {
// ignore
}
}
// hack since flatten is called from the parent constructor, so we can't pass it
public static ThreadLocal<IndexReader> reader = new ThreadLocal<IndexReader>();
public static ThreadLocal<Boolean> highlightFilters = new ThreadLocal<Boolean>();
public CustomFieldQuery(Query query, FastVectorHighlighter highlighter) {
this(query, highlighter.isPhraseHighlight(), highlighter.isFieldMatch());
}
public CustomFieldQuery(Query query, boolean phraseHighlight, boolean fieldMatch) {
super(query, phraseHighlight, fieldMatch);
reader.remove();
highlightFilters.remove();
}
@Override void flatten(Query sourceQuery, Collection<Query> flatQueries) {
if (sourceQuery instanceof DisjunctionMaxQuery) {
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
for (Query query : dmq) {
flatten(query, flatQueries);
}
} else if (sourceQuery instanceof SpanTermQuery) {
TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());
if (!flatQueries.contains(termQuery)) {
flatQueries.add(termQuery);
}
} else if (sourceQuery instanceof ConstantScoreQuery) {
Boolean highlight = highlightFilters.get();
if (highlight != null && highlight.equals(Boolean.TRUE)) {
flatten(((ConstantScoreQuery) sourceQuery).getFilter(), flatQueries);
}
} else if (sourceQuery instanceof MultiTermQuery) {
MultiTermQuery multiTermQuery = (MultiTermQuery) sourceQuery;
MultiTermQuery.RewriteMethod rewriteMethod = multiTermQuery.getRewriteMethod();
if (rewriteMethod != MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE && rewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
// we need to rewrite
multiTermQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
try {
flatten(multiTermQuery.rewrite(reader.get()), flatQueries);
} catch (IOException e) {
// ignore
} finally {
multiTermQuery.setRewriteMethod(rewriteMethod);
}
}
} else {
super.flatten(sourceQuery, flatQueries);
}
}
void flatten(Filter sourceFilter, Collection<Query> flatQueries) {
if (sourceFilter instanceof TermFilter) {
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), flatQueries);
} else if (sourceFilter instanceof PublicTermsFilter) {
PublicTermsFilter termsFilter = (PublicTermsFilter) sourceFilter;
for (Term term : termsFilter.getTerms()) {
flatten(new TermQuery(term), flatQueries);
}
} else if (sourceFilter instanceof MultiTermQueryWrapperFilter) {
if (multiTermQueryWrapperFilterQueryField != null) {
try {
flatten((Query) multiTermQueryWrapperFilterQueryField.get(sourceFilter), flatQueries);
} catch (IllegalAccessException e) {
// ignore
}
}
} else if (sourceFilter instanceof PublicBooleanFilter) {
PublicBooleanFilter booleanFilter = (PublicBooleanFilter) sourceFilter;
for (Filter filter : booleanFilter.getMustFilters()) {
flatten(filter, flatQueries);
}
for (Filter filter : booleanFilter.getNotFilters()) {
flatten(filter, flatQueries);
}
}
}
}

View File

@ -20,10 +20,7 @@
package org.elasticsearch.index.query.json; package org.elasticsearch.index.query.json;
import com.google.inject.Inject; import com.google.inject.Inject;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilterClause;
import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken; import org.codehaus.jackson.JsonToken;
import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.AbstractIndexComponent;
@ -85,7 +82,7 @@ public class BoolJsonFilterParser extends AbstractIndexComponent implements Json
} }
} }
BooleanFilter booleanFilter = new BooleanFilter(); BooleanFilter booleanFilter = new PublicBooleanFilter();
for (FilterClause filterClause : clauses) { for (FilterClause filterClause : clauses) {
booleanFilter.add(filterClause); booleanFilter.add(filterClause);
} }

View File

@ -22,6 +22,7 @@ package org.elasticsearch.index.query.json;
import com.google.inject.Inject; import com.google.inject.Inject;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter; import org.apache.lucene.search.Filter;
import org.apache.lucene.search.PublicTermsFilter;
import org.apache.lucene.search.TermsFilter; import org.apache.lucene.search.TermsFilter;
import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken; import org.codehaus.jackson.JsonToken;
@ -73,7 +74,7 @@ public class TermsJsonFilterParser extends AbstractIndexComponent implements Jso
throw new QueryParsingException(index, "Terms filter must define the terms to filter on as an array"); throw new QueryParsingException(index, "Terms filter must define the terms to filter on as an array");
} }
TermsFilter termsFilter = new TermsFilter(); TermsFilter termsFilter = new PublicTermsFilter();
while ((token = jp.nextToken()) != JsonToken.END_ARRAY) { while ((token = jp.nextToken()) != JsonToken.END_ARRAY) {
String value = jp.getText(); String value = jp.getText();
if (value == null) { if (value == null) {

View File

@ -61,7 +61,10 @@ public class HighlightPhase implements SearchPhase {
} }
FastVectorHighlighter highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder); FastVectorHighlighter highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);
FieldQuery fieldQuery = highlighter.getFieldQuery(context.query()); CustomFieldQuery.reader.set(context.searcher().getIndexReader());
CustomFieldQuery.highlightFilters.set(context.highlight().highlightFilter());
FieldQuery fieldQuery = new CustomFieldQuery(context.query(), highlighter);
for (SearchHit hit : context.fetchResult().hits().hits()) { for (SearchHit hit : context.fetchResult().hits().hits()) {
InternalSearchHit internalHit = (InternalSearchHit) hit; InternalSearchHit internalHit = (InternalSearchHit) hit;

View File

@ -25,6 +25,7 @@ import org.codehaus.jackson.JsonToken;
import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.SearchParseException; import org.elasticsearch.search.SearchParseException;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.util.Booleans;
import java.util.List; import java.util.List;
@ -67,6 +68,7 @@ public class HighlighterParseElement implements SearchParseElement {
String[] preTags = DEFAULT_PRE_TAGS; String[] preTags = DEFAULT_PRE_TAGS;
String[] postTags = DEFAULT_POST_TAGS; String[] postTags = DEFAULT_POST_TAGS;
boolean scoreOrdered = false; boolean scoreOrdered = false;
boolean highlightFilter = true;
while ((token = jp.nextToken()) != JsonToken.END_OBJECT) { while ((token = jp.nextToken()) != JsonToken.END_OBJECT) {
if (token == JsonToken.FIELD_NAME) { if (token == JsonToken.FIELD_NAME) {
topLevelFieldName = jp.getCurrentName(); topLevelFieldName = jp.getCurrentName();
@ -97,6 +99,16 @@ public class HighlighterParseElement implements SearchParseElement {
preTags = STYLED_PRE_TAG; preTags = STYLED_PRE_TAG;
postTags = STYLED_POST_TAGS; postTags = STYLED_POST_TAGS;
} }
} else if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
highlightFilter = Booleans.parseBoolean(jp.getText(), true);
}
} else if (token == JsonToken.VALUE_NUMBER_INT) {
if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
highlightFilter = jp.getIntValue() != 0;
}
} else if (token == JsonToken.VALUE_FALSE) {
if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
highlightFilter = false;
} }
} else if (token == JsonToken.START_OBJECT) { } else if (token == JsonToken.START_OBJECT) {
if ("fields".equals(topLevelFieldName)) { if ("fields".equals(topLevelFieldName)) {
@ -134,6 +146,6 @@ public class HighlighterParseElement implements SearchParseElement {
if (preTags != null && postTags == null) { if (preTags != null && postTags == null) {
throw new SearchParseException(context, "Highlighter preTags are set, but postTags are not set"); throw new SearchParseException(context, "Highlighter preTags are set, but postTags are not set");
} }
context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered)); context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered, highlightFilter));
} }
} }

View File

@ -34,11 +34,19 @@ public class SearchContextHighlight {
private boolean scoreOrdered = false; private boolean scoreOrdered = false;
public SearchContextHighlight(List<ParsedHighlightField> fields, String[] preTags, String[] postTags, boolean scoreOrdered) { private boolean highlightFilter;
public SearchContextHighlight(List<ParsedHighlightField> fields, String[] preTags, String[] postTags,
boolean scoreOrdered, boolean highlightFilter) {
this.fields = fields; this.fields = fields;
this.preTags = preTags; this.preTags = preTags;
this.postTags = postTags; this.postTags = postTags;
this.scoreOrdered = scoreOrdered; this.scoreOrdered = scoreOrdered;
this.highlightFilter = highlightFilter;
}
public boolean highlightFilter() {
return highlightFilter;
} }
public List<ParsedHighlightField> fields() { public List<ParsedHighlightField> fields() {

View File

@ -23,9 +23,8 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.*;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter; import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
@ -61,6 +60,45 @@ public class VectorHighlighterTests {
System.out.println(fragment); System.out.println(fragment);
} }
@Test public void testVectorHighlighterPrefixQuery() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
indexWriter.addDocument(doc().add(field("_id", "1")).add(field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)).build());
IndexReader reader = indexWriter.getReader();
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
assertThat(topDocs.totalHits, equalTo(1));
FastVectorHighlighter highlighter = new FastVectorHighlighter();
PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, nullValue());
prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE);
Query rewriteQuery = prefixQuery.rewrite(reader);
fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, notNullValue());
System.out.println(fragment);
// now check with the custom field query
prefixQuery = new PrefixQuery(new Term("content", "ba"));
assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
CustomFieldQuery.reader.set(reader);
fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, highlighter),
reader, topDocs.scoreDocs[0].doc, "content", 30);
assertThat(fragment, notNullValue());
System.out.println(fragment);
}
@Test public void testVectorHighlighterNoStore() throws Exception { @Test public void testVectorHighlighterNoStore() throws Exception {
Directory dir = new RAMDirectory(); Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED); IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);

View File

@ -103,6 +103,26 @@ public class HighlightSearchTests extends AbstractNodesTests {
} }
} }
@Test public void testPrefixHighlightingOnSpecificField() throws Exception {
SearchSourceBuilder source = searchSource()
.query(prefixQuery("multi", "te"))
.from(0).size(60).explain(true)
.highlight(highlight().field("_all").order("score").preTags("<xxx>").postTags("</xxx>"));
SearchResponse searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
assertThat(searchResponse.hits().hits().length, equalTo(60));
for (int i = 0; i < 60; i++) {
SearchHit hit = searchResponse.hits().hits()[i];
// System.out.println(hit.target() + ": " + hit.explanation());
// assertThat("id[" + hit.id() + "]", hit.id(), equalTo(Integer.toString(100 - i - 1)));
// System.out.println(hit.shard() + ": " + hit.highlightFields());
assertThat(hit.highlightFields().size(), equalTo(1));
assertThat(hit.highlightFields().get("_all").fragments().length, greaterThan(0));
}
}
private void index(Client client, String id, String nameValue, int age) throws IOException { private void index(Client client, String id, String nameValue, int age) throws IOException {
client.index(Requests.indexRequest("test").type("type1").id(id).source(source(id, nameValue, age))).actionGet(); client.index(Requests.indexRequest("test").type("type1").id(id).source(source(id, nameValue, age))).actionGet();
} }