Highlighting broken when query is on `_all` field or with prefixes. Add also a flag to highlight to control if filters should be highlighted or not (called highlight_filters) which defaults to true. Closes #148.

2010-04-25 18:32:46 +03:00 · 2010-04-25 18:32:46 +03:00 · bf6cead984
parent 453ede8f57
commit bf6cead984
12 changed files with 292 additions and 14 deletions
--- a/.idea/runConfigurations/Elastic_Search_Tests.xml
+++ b/.idea/runConfigurations/Elastic_Search_Tests.xml
@ -11,7 +11,7 @@
    <option name="METHOD_NAME" value="" />
    <option name="GROUP_NAME" value="" />
    <option name="TEST_OBJECT" value="PACKAGE" />
-    <option name="VM_PARAMETERS" value="-Djava.net.preferIPv4Stack=true" />
+    <option name="VM_PARAMETERS" value="-Djava.net.preferIPv4Stack=true -Xmx512m" />
    <option name="PARAMETERS" value="" />
    <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
    <option name="OUTPUT_DIRECTORY" value="" />
--- a/.idea/runConfigurations/Elastic_Search_Tests__Local_.xml
+++ b/.idea/runConfigurations/Elastic_Search_Tests__Local_.xml
@ -11,7 +11,7 @@
    <option name="METHOD_NAME" value="" />
    <option name="GROUP_NAME" value="" />
    <option name="TEST_OBJECT" value="PACKAGE" />
-    <option name="VM_PARAMETERS" value="-Des.node.local=true" />
+    <option name="VM_PARAMETERS" value="-Des.node.local=true -Xmx512m" />
    <option name="PARAMETERS" value="" />
    <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
    <option name="OUTPUT_DIRECTORY" value="" />
--- a/modules/elasticsearch/src/main/java/org/apache/lucene/search/PublicBooleanFilter.java
+++ b/modules/elasticsearch/src/main/java/org/apache/lucene/search/PublicBooleanFilter.java
@ -0,0 +1,40 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search;
+
+import java.util.List;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+public class PublicBooleanFilter extends BooleanFilter {
+
+    public List<Filter> getShouldFilters() {
+        return this.shouldFilters;
+    }
+
+    public List<Filter> getMustFilters() {
+        return this.mustFilters;
+    }
+
+    public List<Filter> getNotFilters() {
+        return this.notFilters;
+    }
+}
--- a/modules/elasticsearch/src/main/java/org/apache/lucene/search/PublicTermsFilter.java
+++ b/modules/elasticsearch/src/main/java/org/apache/lucene/search/PublicTermsFilter.java
@ -0,0 +1,34 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search;
+
+import org.apache.lucene.index.Term;
+
+import java.util.Set;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+public class PublicTermsFilter extends TermsFilter {
+
+    public Set<Term> getTerms() {
+        return terms;
+    }
+}
--- a/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
+++ b/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java
@ -0,0 +1,125 @@
+/*
+ * Licensed to Elastic Search and Shay Banon under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. Elastic Search licenses this
+ * file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.lucene.search.vectorhighlight;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.elasticsearch.util.lucene.search.TermFilter;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.Collection;
+
+/**
+ * @author kimchy (shay.banon)
+ */
+// LUCENE MONITOR
+public class CustomFieldQuery extends FieldQuery {
+
+    private static Field multiTermQueryWrapperFilterQueryField;
+
+    static {
+        try {
+            multiTermQueryWrapperFilterQueryField = MultiTermQueryWrapperFilter.class.getDeclaredField("query");
+            multiTermQueryWrapperFilterQueryField.setAccessible(true);
+        } catch (NoSuchFieldException e) {
+            // ignore
+        }
+    }
+
+    // hack since flatten is called from the parent constructor, so we can't pass it
+    public static ThreadLocal<IndexReader> reader = new ThreadLocal<IndexReader>();
+
+    public static ThreadLocal<Boolean> highlightFilters = new ThreadLocal<Boolean>();
+
+    public CustomFieldQuery(Query query, FastVectorHighlighter highlighter) {
+        this(query, highlighter.isPhraseHighlight(), highlighter.isFieldMatch());
+    }
+
+    public CustomFieldQuery(Query query, boolean phraseHighlight, boolean fieldMatch) {
+        super(query, phraseHighlight, fieldMatch);
+        reader.remove();
+        highlightFilters.remove();
+    }
+
+    @Override void flatten(Query sourceQuery, Collection<Query> flatQueries) {
+        if (sourceQuery instanceof DisjunctionMaxQuery) {
+            DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) sourceQuery;
+            for (Query query : dmq) {
+                flatten(query, flatQueries);
+            }
+        } else if (sourceQuery instanceof SpanTermQuery) {
+            TermQuery termQuery = new TermQuery(((SpanTermQuery) sourceQuery).getTerm());
+            if (!flatQueries.contains(termQuery)) {
+                flatQueries.add(termQuery);
+            }
+        } else if (sourceQuery instanceof ConstantScoreQuery) {
+            Boolean highlight = highlightFilters.get();
+            if (highlight != null && highlight.equals(Boolean.TRUE)) {
+                flatten(((ConstantScoreQuery) sourceQuery).getFilter(), flatQueries);
+            }
+        } else if (sourceQuery instanceof MultiTermQuery) {
+            MultiTermQuery multiTermQuery = (MultiTermQuery) sourceQuery;
+            MultiTermQuery.RewriteMethod rewriteMethod = multiTermQuery.getRewriteMethod();
+            if (rewriteMethod != MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE && rewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) {
+                // we need to rewrite
+                multiTermQuery.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+                try {
+                    flatten(multiTermQuery.rewrite(reader.get()), flatQueries);
+                } catch (IOException e) {
+                    // ignore
+                } finally {
+                    multiTermQuery.setRewriteMethod(rewriteMethod);
+                }
+            }
+        } else {
+            super.flatten(sourceQuery, flatQueries);
+        }
+    }
+
+    void flatten(Filter sourceFilter, Collection<Query> flatQueries) {
+        if (sourceFilter instanceof TermFilter) {
+            flatten(new TermQuery(((TermFilter) sourceFilter).getTerm()), flatQueries);
+        } else if (sourceFilter instanceof PublicTermsFilter) {
+            PublicTermsFilter termsFilter = (PublicTermsFilter) sourceFilter;
+            for (Term term : termsFilter.getTerms()) {
+                flatten(new TermQuery(term), flatQueries);
+            }
+        } else if (sourceFilter instanceof MultiTermQueryWrapperFilter) {
+            if (multiTermQueryWrapperFilterQueryField != null) {
+                try {
+                    flatten((Query) multiTermQueryWrapperFilterQueryField.get(sourceFilter), flatQueries);
+                } catch (IllegalAccessException e) {
+                    // ignore
+                }
+            }
+        } else if (sourceFilter instanceof PublicBooleanFilter) {
+            PublicBooleanFilter booleanFilter = (PublicBooleanFilter) sourceFilter;
+            for (Filter filter : booleanFilter.getMustFilters()) {
+                flatten(filter, flatQueries);
+            }
+            for (Filter filter : booleanFilter.getNotFilters()) {
+                flatten(filter, flatQueries);
+            }
+        }
+    }
+}
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/BoolJsonFilterParser.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/BoolJsonFilterParser.java
@ -20,10 +20,7 @@
 package org.elasticsearch.index.query.json;

 import com.google.inject.Inject;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanFilter;
-import org.apache.lucene.search.Filter;
-import org.apache.lucene.search.FilterClause;
+import org.apache.lucene.search.*;
 import org.codehaus.jackson.JsonParser;
 import org.codehaus.jackson.JsonToken;
 import org.elasticsearch.index.AbstractIndexComponent;
@ -85,7 +82,7 @@ public class BoolJsonFilterParser extends AbstractIndexComponent implements Json
            }
        }

-        BooleanFilter booleanFilter = new BooleanFilter();
+        BooleanFilter booleanFilter = new PublicBooleanFilter();
        for (FilterClause filterClause : clauses) {
            booleanFilter.add(filterClause);
        }
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/TermsJsonFilterParser.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/TermsJsonFilterParser.java
@ -22,6 +22,7 @@ package org.elasticsearch.index.query.json;
 import com.google.inject.Inject;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.PublicTermsFilter;
 import org.apache.lucene.search.TermsFilter;
 import org.codehaus.jackson.JsonParser;
 import org.codehaus.jackson.JsonToken;
@ -73,7 +74,7 @@ public class TermsJsonFilterParser extends AbstractIndexComponent implements Jso
            throw new QueryParsingException(index, "Terms filter must define the terms to filter on as an array");
        }

-        TermsFilter termsFilter = new TermsFilter();
+        TermsFilter termsFilter = new PublicTermsFilter();
        while ((token = jp.nextToken()) != JsonToken.END_ARRAY) {
            String value = jp.getText();
            if (value == null) {
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java
@ -61,7 +61,10 @@ public class HighlightPhase implements SearchPhase {
        }
        FastVectorHighlighter highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);

-        FieldQuery fieldQuery = highlighter.getFieldQuery(context.query());
+        CustomFieldQuery.reader.set(context.searcher().getIndexReader());
+        CustomFieldQuery.highlightFilters.set(context.highlight().highlightFilter());
+
+        FieldQuery fieldQuery = new CustomFieldQuery(context.query(), highlighter);
        for (SearchHit hit : context.fetchResult().hits().hits()) {
            InternalSearchHit internalHit = (InternalSearchHit) hit;

--- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java
@ -25,6 +25,7 @@ import org.codehaus.jackson.JsonToken;
 import org.elasticsearch.search.SearchParseElement;
 import org.elasticsearch.search.SearchParseException;
 import org.elasticsearch.search.internal.SearchContext;
+import org.elasticsearch.util.Booleans;

 import java.util.List;

@ -67,6 +68,7 @@ public class HighlighterParseElement implements SearchParseElement {
        String[] preTags = DEFAULT_PRE_TAGS;
        String[] postTags = DEFAULT_POST_TAGS;
        boolean scoreOrdered = false;
+        boolean highlightFilter = true;
        while ((token = jp.nextToken()) != JsonToken.END_OBJECT) {
            if (token == JsonToken.FIELD_NAME) {
                topLevelFieldName = jp.getCurrentName();
@ -97,6 +99,16 @@ public class HighlighterParseElement implements SearchParseElement {
                        preTags = STYLED_PRE_TAG;
                        postTags = STYLED_POST_TAGS;
                    }
+                } else if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
+                    highlightFilter = Booleans.parseBoolean(jp.getText(), true);
+                }
+            } else if (token == JsonToken.VALUE_NUMBER_INT) {
+                if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
+                    highlightFilter = jp.getIntValue() != 0;
+                }
+            } else if (token == JsonToken.VALUE_FALSE) {
+                if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
+                    highlightFilter = false;
                }
            } else if (token == JsonToken.START_OBJECT) {
                if ("fields".equals(topLevelFieldName)) {
@ -134,6 +146,6 @@ public class HighlighterParseElement implements SearchParseElement {
        if (preTags != null && postTags == null) {
            throw new SearchParseException(context, "Highlighter preTags are set, but postTags are not set");
        }
-        context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered));
+        context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered, highlightFilter));
    }
 }
--- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java
+++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java
@ -34,11 +34,19 @@ public class SearchContextHighlight {

    private boolean scoreOrdered = false;

-    public SearchContextHighlight(List<ParsedHighlightField> fields, String[] preTags, String[] postTags, boolean scoreOrdered) {
+    private boolean highlightFilter;
+
+    public SearchContextHighlight(List<ParsedHighlightField> fields, String[] preTags, String[] postTags,
+                                  boolean scoreOrdered, boolean highlightFilter) {
        this.fields = fields;
        this.preTags = preTags;
        this.postTags = postTags;
        this.scoreOrdered = scoreOrdered;
+        this.highlightFilter = highlightFilter;
+    }
+
+    public boolean highlightFilter() {
+        return highlightFilter;
    }

    public List<ParsedHighlightField> fields() {
--- a/modules/elasticsearch/src/test/java/org/elasticsearch/deps/lucene/VectorHighlighterTests.java
+++ b/modules/elasticsearch/src/test/java/org/elasticsearch/deps/lucene/VectorHighlighterTests.java
@ -23,9 +23,8 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.vectorhighlight.CustomFieldQuery;
 import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
@ -61,6 +60,45 @@ public class VectorHighlighterTests {
        System.out.println(fragment);
    }

+    @Test public void testVectorHighlighterPrefixQuery() throws Exception {
+        Directory dir = new RAMDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
+
+        indexWriter.addDocument(doc().add(field("_id", "1")).add(field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)).build());
+
+        IndexReader reader = indexWriter.getReader();
+        IndexSearcher searcher = new IndexSearcher(reader);
+        TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
+
+        assertThat(topDocs.totalHits, equalTo(1));
+
+        FastVectorHighlighter highlighter = new FastVectorHighlighter();
+
+        PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
+        assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
+        String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery),
+                reader, topDocs.scoreDocs[0].doc, "content", 30);
+        assertThat(fragment, nullValue());
+
+        prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+        Query rewriteQuery = prefixQuery.rewrite(reader);
+        fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery),
+                reader, topDocs.scoreDocs[0].doc, "content", 30);
+        assertThat(fragment, notNullValue());
+
+        System.out.println(fragment);
+
+        // now check with the custom field query
+        prefixQuery = new PrefixQuery(new Term("content", "ba"));
+        assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT.getClass().getName()));
+        CustomFieldQuery.reader.set(reader);
+        fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, highlighter),
+                reader, topDocs.scoreDocs[0].doc, "content", 30);
+        assertThat(fragment, notNullValue());
+
+        System.out.println(fragment);
+    }
+
    @Test public void testVectorHighlighterNoStore() throws Exception {
        Directory dir = new RAMDirectory();
        IndexWriter indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, IndexWriter.MaxFieldLength.UNLIMITED);
--- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlightSearchTests.java
+++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlightSearchTests.java
@ -103,6 +103,26 @@ public class HighlightSearchTests extends AbstractNodesTests {
        }
    }

+    @Test public void testPrefixHighlightingOnSpecificField() throws Exception {
+        SearchSourceBuilder source = searchSource()
+                .query(prefixQuery("multi", "te"))
+                .from(0).size(60).explain(true)
+                .highlight(highlight().field("_all").order("score").preTags("<xxx>").postTags("</xxx>"));
+
+        SearchResponse searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
+        assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
+        assertThat(searchResponse.hits().totalHits(), equalTo(100l));
+        assertThat(searchResponse.hits().hits().length, equalTo(60));
+        for (int i = 0; i < 60; i++) {
+            SearchHit hit = searchResponse.hits().hits()[i];
+//            System.out.println(hit.target() + ": " +  hit.explanation());
+//            assertThat("id[" + hit.id() + "]", hit.id(), equalTo(Integer.toString(100 - i - 1)));
+//            System.out.println(hit.shard() + ": " + hit.highlightFields());
+            assertThat(hit.highlightFields().size(), equalTo(1));
+            assertThat(hit.highlightFields().get("_all").fragments().length, greaterThan(0));
+        }
+    }
+
    private void index(Client client, String id, String nameValue, int age) throws IOException {
        client.index(Requests.indexRequest("test").type("type1").id(id).source(source(id, nameValue, age))).actionGet();
    }