Highlighting broken when query is on `_all` field or with prefixes. Add also a flag to highlight to control if filters should be highlighted or not (called highlight_filters) which defaults to true. Closes #148.
This commit is contained in:
parent
453ede8f57
commit
bf6cead984
|
@ -11,7 +11,7 @@
|
|||
<option name="METHOD_NAME" value="" />
|
||||
<option name="GROUP_NAME" value="" />
|
||||
<option name="TEST_OBJECT" value="PACKAGE" />
|
||||
<option name="VM_PARAMETERS" value="-Djava.net.preferIPv4Stack=true" />
|
||||
<option name="VM_PARAMETERS" value="-Djava.net.preferIPv4Stack=true -Xmx512m" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
|
||||
<option name="OUTPUT_DIRECTORY" value="" />
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
<option name="METHOD_NAME" value="" />
|
||||
<option name="GROUP_NAME" value="" />
|
||||
<option name="TEST_OBJECT" value="PACKAGE" />
|
||||
<option name="VM_PARAMETERS" value="-Des.node.local=true" />
|
||||
<option name="VM_PARAMETERS" value="-Des.node.local=true -Xmx512m" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
|
||||
<option name="OUTPUT_DIRECTORY" value="" />
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class PublicBooleanFilter extends BooleanFilter {
|
||||
|
||||
public List<Filter> getShouldFilters() {
|
||||
return this.shouldFilters;
|
||||
}
|
||||
|
||||
public List<Filter> getMustFilters() {
|
||||
return this.mustFilters;
|
||||
}
|
||||
|
||||
public List<Filter> getNotFilters() {
|
||||
return this.notFilters;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class PublicTermsFilter extends TermsFilter {
|
||||
|
||||
public Set<Term> getTerms() {
|
||||
return terms;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.vectorhighlight;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.elasticsearch.util.lucene.search.TermFilter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
// LUCENE MONITOR
|
||||
public class CustomFieldQuery extends FieldQuery {
|
||||
|
||||
private static Field multiTermQueryWrapperFilterQueryField;
|
||||
|
||||
static {
|
||||
try {
|
||||
multiTermQueryWrapperFilterQueryField = MultiTermQueryWrapperFilter.class.getDeclaredField("query");
|
||||
multiTermQueryWrapperFilterQueryField.setAccessible(true);
|
||||
} catch (NoSuchFieldException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
// hack since flatten is called from the parent constructor, so we can't pass it
|
||||
public static ThreadLocal<IndexReader> reader = new ThreadLocal<IndexReader>();
|
||||
|
||||
public static ThreadLocal<Boolean> highlightFilters = new ThreadLocal<Boolean>();
|
||||
|
||||
public CustomFieldQuery(Query query, FastVectorHighlighter highlighter) {
|
||||
this(query, highlighter.isPhraseHighlight(), highlighter.isFieldMatch());
|
||||
}
|
||||
|
||||
public CustomFieldQuery(Query query, boolean phraseHighlight, boolean fieldMatch) {
|
||||
super(query, phraseHighlight, fieldMatch);
|
||||
reader.remove();
|
||||
highlightFilters.remove();
|
||||
}
|
||||
|
||||
@Override void flatten(Query sourceQuery, Collection<Query> flatQueries) {
|
||||
if (sourceQuery instanceof DisjunctionMaxQuery) {
|
||||
DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
|
||||
for (Query query : dmq) {
|
||||
flatten(query, flatQueries);
|
||||
}
|
||||
} else if (sourceQuery instanceof SpanTermQuery) {
|
||||
TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());
|
||||
if (!flatQueries.contains(termQuery)) {
|
||||
flatQueries.add(termQuery);
|
||||
}
|
||||
} else if (sourceQuery instanceof ConstantScoreQuery) {
|
||||
Boolean highlight = highlightFilters.get();
|
||||
if (highlight != null && highlight.equals(Boolean.TRUE)) {
|
||||
flatten(((ConstantScoreQuery) sourceQuery).getFilter(), flatQueries);
|
||||
}
|
||||
} else if (sourceQuery instanceof MultiTermQuery) {
|
||||
MultiTermQuery multiTermQuery = (MultiTermQuery) sourceQuery;
|
||||
MultiTermQuery.RewriteMethod rewriteMethod = multiTermQuery.getRewriteMethod();
|
||||
if (rewriteMethod != MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE && rewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
|
||||
// we need to rewrite
|
||||
multiTermQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
try {
|
||||
flatten(multiTermQuery.rewrite(reader.get()), flatQueries);
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
} finally {
|
||||
multiTermQuery.setRewriteMethod(rewriteMethod);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
super.flatten(sourceQuery, flatQueries);
|
||||
}
|
||||
}
|
||||
|
||||
void flatten(Filter sourceFilter, Collection<Query> flatQueries) {
|
||||
if (sourceFilter instanceof TermFilter) {
|
||||
flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), flatQueries);
|
||||
} else if (sourceFilter instanceof PublicTermsFilter) {
|
||||
PublicTermsFilter termsFilter = (PublicTermsFilter) sourceFilter;
|
||||
for (Term term : termsFilter.getTerms()) {
|
||||
flatten(new TermQuery(term), flatQueries);
|
||||
}
|
||||
} else if (sourceFilter instanceof MultiTermQueryWrapperFilter) {
|
||||
if (multiTermQueryWrapperFilterQueryField != null) {
|
||||
try {
|
||||
flatten((Query) multiTermQueryWrapperFilterQueryField.get(sourceFilter), flatQueries);
|
||||
} catch (IllegalAccessException e) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
} else if (sourceFilter instanceof PublicBooleanFilter) {
|
||||
PublicBooleanFilter booleanFilter = (PublicBooleanFilter) sourceFilter;
|
||||
for (Filter filter : booleanFilter.getMustFilters()) {
|
||||
flatten(filter, flatQueries);
|
||||
}
|
||||
for (Filter filter : booleanFilter.getNotFilters()) {
|
||||
flatten(filter, flatQueries);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -20,10 +20,7 @@
|
|||
package org.elasticsearch.index.query.json;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanFilter;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.FilterClause;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
|
@ -85,7 +82,7 @@ public class BoolJsonFilterParser extends AbstractIndexComponent implements Json
|
|||
}
|
||||
}
|
||||
|
||||
BooleanFilter booleanFilter = new BooleanFilter();
|
||||
BooleanFilter booleanFilter = new PublicBooleanFilter();
|
||||
for (FilterClause filterClause : clauses) {
|
||||
booleanFilter.add(filterClause);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.index.query.json;
|
|||
import com.google.inject.Inject;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.PublicTermsFilter;
|
||||
import org.apache.lucene.search.TermsFilter;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
|
@ -73,7 +74,7 @@ public class TermsJsonFilterParser extends AbstractIndexComponent implements Jso
|
|||
throw new QueryParsingException(index, "Terms filter must define the terms to filter on as an array");
|
||||
}
|
||||
|
||||
TermsFilter termsFilter = new TermsFilter();
|
||||
TermsFilter termsFilter = new PublicTermsFilter();
|
||||
while ((token = jp.nextToken()) != JsonToken.END_ARRAY) {
|
||||
String value = jp.getText();
|
||||
if (value == null) {
|
||||
|
|
|
@ -61,7 +61,10 @@ public class HighlightPhase implements SearchPhase {
|
|||
}
|
||||
FastVectorHighlighter highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);
|
||||
|
||||
FieldQuery fieldQuery = highlighter.getFieldQuery(context.query());
|
||||
CustomFieldQuery.reader.set(context.searcher().getIndexReader());
|
||||
CustomFieldQuery.highlightFilters.set(context.highlight().highlightFilter());
|
||||
|
||||
FieldQuery fieldQuery = new CustomFieldQuery(context.query(), highlighter);
|
||||
for (SearchHit hit : context.fetchResult().hits().hits()) {
|
||||
InternalSearchHit internalHit = (InternalSearchHit) hit;
|
||||
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.codehaus.jackson.JsonToken;
|
|||
import org.elasticsearch.search.SearchParseElement;
|
||||
import org.elasticsearch.search.SearchParseException;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.util.Booleans;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
@ -67,6 +68,7 @@ public class HighlighterParseElement implements SearchParseElement {
|
|||
String[] preTags = DEFAULT_PRE_TAGS;
|
||||
String[] postTags = DEFAULT_POST_TAGS;
|
||||
boolean scoreOrdered = false;
|
||||
boolean highlightFilter = true;
|
||||
while ((token = jp.nextToken()) != JsonToken.END_OBJECT) {
|
||||
if (token == JsonToken.FIELD_NAME) {
|
||||
topLevelFieldName = jp.getCurrentName();
|
||||
|
@ -97,6 +99,16 @@ public class HighlighterParseElement implements SearchParseElement {
|
|||
preTags = STYLED_PRE_TAG;
|
||||
postTags = STYLED_POST_TAGS;
|
||||
}
|
||||
} else if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
|
||||
highlightFilter = Booleans.parseBoolean(jp.getText(), true);
|
||||
}
|
||||
} else if (token == JsonToken.VALUE_NUMBER_INT) {
|
||||
if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
|
||||
highlightFilter = jp.getIntValue() != 0;
|
||||
}
|
||||
} else if (token == JsonToken.VALUE_FALSE) {
|
||||
if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
|
||||
highlightFilter = false;
|
||||
}
|
||||
} else if (token == JsonToken.START_OBJECT) {
|
||||
if ("fields".equals(topLevelFieldName)) {
|
||||
|
@ -134,6 +146,6 @@ public class HighlighterParseElement implements SearchParseElement {
|
|||
if (preTags != null && postTags == null) {
|
||||
throw new SearchParseException(context, "Highlighter preTags are set, but postTags are not set");
|
||||
}
|
||||
context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered));
|
||||
context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered, highlightFilter));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,11 +34,19 @@ public class SearchContextHighlight {
|
|||
|
||||
private boolean scoreOrdered = false;
|
||||
|
||||
public SearchContextHighlight(List<ParsedHighlightField> fields, String[] preTags, String[] postTags, boolean scoreOrdered) {
|
||||
private boolean highlightFilter;
|
||||
|
||||
public SearchContextHighlight(List<ParsedHighlightField> fields, String[] preTags, String[] postTags,
|
||||
boolean scoreOrdered, boolean highlightFilter) {
|
||||
this.fields = fields;
|
||||
this.preTags = preTags;
|
||||
this.postTags = postTags;
|
||||
this.scoreOrdered = scoreOrdered;
|
||||
this.highlightFilter = highlightFilter;
|
||||
}
|
||||
|
||||
public boolean highlightFilter() {
|
||||
return highlightFilter;
|
||||
}
|
||||
|
||||
public List<ParsedHighlightField> fields() {
|
||||
|
|
|
@ -23,9 +23,8 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
|
||||
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -61,6 +60,45 @@ public class VectorHighlighterTests {
|
|||
System.out.println(fragment);
|
||||
}
|
||||
|
||||
@Test public void testVectorHighlighterPrefixQuery() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
|
||||
indexWriter.addDocument(doc().add(field("_id", "1")).add(field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)).build());
|
||||
|
||||
IndexReader reader = indexWriter.getReader();
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
|
||||
|
||||
assertThat(topDocs.totalHits, equalTo(1));
|
||||
|
||||
FastVectorHighlighter highlighter = new FastVectorHighlighter();
|
||||
|
||||
PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
|
||||
assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
|
||||
String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery),
|
||||
reader, topDocs.scoreDocs[0].doc, "content", 30);
|
||||
assertThat(fragment, nullValue());
|
||||
|
||||
prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
Query rewriteQuery = prefixQuery.rewrite(reader);
|
||||
fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery),
|
||||
reader, topDocs.scoreDocs[0].doc, "content", 30);
|
||||
assertThat(fragment, notNullValue());
|
||||
|
||||
System.out.println(fragment);
|
||||
|
||||
// now check with the custom field query
|
||||
prefixQuery = new PrefixQuery(new Term("content", "ba"));
|
||||
assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
|
||||
CustomFieldQuery.reader.set(reader);
|
||||
fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, highlighter),
|
||||
reader, topDocs.scoreDocs[0].doc, "content", 30);
|
||||
assertThat(fragment, notNullValue());
|
||||
|
||||
System.out.println(fragment);
|
||||
}
|
||||
|
||||
@Test public void testVectorHighlighterNoStore() throws Exception {
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
|
|
|
@ -103,6 +103,26 @@ public class HighlightSearchTests extends AbstractNodesTests {
|
|||
}
|
||||
}
|
||||
|
||||
@Test public void testPrefixHighlightingOnSpecificField() throws Exception {
|
||||
SearchSourceBuilder source = searchSource()
|
||||
.query(prefixQuery("multi", "te"))
|
||||
.from(0).size(60).explain(true)
|
||||
.highlight(highlight().field("_all").order("score").preTags("<xxx>").postTags("</xxx>"));
|
||||
|
||||
SearchResponse searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
|
||||
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
|
||||
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
|
||||
assertThat(searchResponse.hits().hits().length, equalTo(60));
|
||||
for (int i = 0; i < 60; i++) {
|
||||
SearchHit hit = searchResponse.hits().hits()[i];
|
||||
// System.out.println(hit.target() + ": " + hit.explanation());
|
||||
// assertThat("id[" + hit.id() + "]", hit.id(), equalTo(Integer.toString(100 - i - 1)));
|
||||
// System.out.println(hit.shard() + ": " + hit.highlightFields());
|
||||
assertThat(hit.highlightFields().size(), equalTo(1));
|
||||
assertThat(hit.highlightFields().get("_all").fragments().length, greaterThan(0));
|
||||
}
|
||||
}
|
||||
|
||||
private void index(Client client, String id, String nameValue, int age) throws IOException {
|
||||
client.index(Requests.indexRequest("test").type("type1").id(id).source(source(id, nameValue, age))).actionGet();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue