diff --git a/core/src/main/java/org/apache/lucene/search/postingshighlight/Snippet.java b/core/src/main/java/org/apache/lucene/search/highlight/Snippet.java similarity index 96% rename from core/src/main/java/org/apache/lucene/search/postingshighlight/Snippet.java rename to core/src/main/java/org/apache/lucene/search/highlight/Snippet.java index f3bfa1b9c65..81a3d406ea3 100644 --- a/core/src/main/java/org/apache/lucene/search/postingshighlight/Snippet.java +++ b/core/src/main/java/org/apache/lucene/search/highlight/Snippet.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.lucene.search.postingshighlight; +package org.apache.lucene.search.highlight; /** * Represents a scored highlighted snippet. diff --git a/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java b/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java index 889e7f741ed..a33bf16dee4 100644 --- a/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java +++ b/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatter.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.postingshighlight; +import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.search.highlight.Encoder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; @@ -46,10 +47,10 @@ public class CustomPassageFormatter extends PassageFormatter { for (int j = 0; j < passages.length; j++) { Passage passage = passages[j]; StringBuilder sb = new StringBuilder(); - pos = passage.startOffset; - for (int i = 0; i < passage.numMatches; i++) { - int start = passage.matchStarts[i]; - int end = passage.matchEnds[i]; + pos = passage.getStartOffset(); + for (int i = 0; i < passage.getNumMatches(); i++) { + int start = passage.getMatchStarts()[i]; + int end = passage.getMatchEnds()[i]; // its possible to have overlapping terms if (start > pos) { append(sb, content, pos, start); @@ -62,7 +63,7 @@ public class CustomPassageFormatter extends PassageFormatter { } } // its possible a "term" from the analyzer could span a sentence boundary. - append(sb, content, pos, Math.max(pos, passage.endOffset)); + append(sb, content, pos, Math.max(pos, passage.getEndOffset())); //we remove the paragraph separator if present at the end of the snippet (we used it as separator between values) if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) { sb.deleteCharAt(sb.length() - 1); @@ -70,7 +71,7 @@ public class CustomPassageFormatter extends PassageFormatter { sb.deleteCharAt(sb.length() - 1); } //and we trim the snippets too - snippets[j] = new Snippet(sb.toString().trim(), passage.score, passage.numMatches > 0); + snippets[j] = new Snippet(sb.toString().trim(), passage.getScore(), passage.getNumMatches() > 0); } return snippets; } diff --git a/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java b/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java index 30f57b2626c..ac90a3e57ae 100644 --- a/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java +++ b/core/src/main/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighter.java @@ -22,6 +22,7 @@ package org.apache.lucene.search.postingshighlight; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.search.highlight.Snippet; import java.io.IOException; import java.text.BreakIterator; diff --git a/core/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java b/core/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java new file mode 100644 index 00000000000..7a34a805db6 --- /dev/null +++ b/core/src/main/java/org/apache/lucene/search/uhighlight/CustomPassageFormatter.java @@ -0,0 +1,82 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search.uhighlight; + +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.highlight.Snippet; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; + +/** + * Custom passage formatter that allows us to: + * 1) extract different snippets (instead of a single big string) together with their scores ({@link Snippet}) + * 2) use the {@link Encoder} implementations that are already used with the other highlighters + */ +public class CustomPassageFormatter extends PassageFormatter { + + private final String preTag; + private final String postTag; + private final Encoder encoder; + + public CustomPassageFormatter(String preTag, String postTag, Encoder encoder) { + this.preTag = preTag; + this.postTag = postTag; + this.encoder = encoder; + } + + @Override + public Snippet[] format(Passage[] passages, String content) { + Snippet[] snippets = new Snippet[passages.length]; + int pos; + for (int j = 0; j < passages.length; j++) { + Passage passage = passages[j]; + StringBuilder sb = new StringBuilder(); + pos = passage.getStartOffset(); + for (int i = 0; i < passage.getNumMatches(); i++) { + int start = passage.getMatchStarts()[i]; + int end = passage.getMatchEnds()[i]; + // its possible to have overlapping terms + if (start > pos) { + append(sb, content, pos, start); + } + if (end > pos) { + sb.append(preTag); + append(sb, content, Math.max(pos, start), end); + sb.append(postTag); + pos = end; + } + } + // its possible a "term" from the analyzer could span a sentence boundary. + append(sb, content, pos, Math.max(pos, passage.getEndOffset())); + //we remove the paragraph separator if present at the end of the snippet (we used it as separator between values) + if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) { + sb.deleteCharAt(sb.length() - 1); + } else if (sb.charAt(sb.length() - 1) == HighlightUtils.NULL_SEPARATOR) { + sb.deleteCharAt(sb.length() - 1); + } + //and we trim the snippets too + snippets[j] = new Snippet(sb.toString().trim(), passage.getScore(), passage.getNumMatches() > 0); + } + return snippets; + } + + private void append(StringBuilder dest, String content, int start, int end) { + dest.append(encoder.encodeText(content.substring(start, end))); + } +} diff --git a/core/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/core/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java new file mode 100644 index 00000000000..4f1ec5fdb83 --- /dev/null +++ b/core/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -0,0 +1,204 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search.uhighlight; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.highlight.Snippet; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.lucene.all.AllTermQuery; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery; +import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; + +import java.io.IOException; +import java.text.BreakIterator; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +/** + * Subclass of the {@link UnifiedHighlighter} that works for a single field in a single document. + * Uses a custom {@link PassageFormatter}. Accepts field content as a constructor + * argument, given that loadings field value can be done reading from _source field. + * Supports using different {@link BreakIterator} to break the text into fragments. Considers every distinct field + * value as a discrete passage for highlighting (unless the whole content needs to be highlighted). + * Supports both returning empty snippets and non highlighted snippets when no highlighting can be performed. + */ +public class CustomUnifiedHighlighter extends UnifiedHighlighter { + private static final Snippet[] EMPTY_SNIPPET = new Snippet[0]; + + private final String fieldValue; + private final PassageFormatter passageFormatter; + private final BreakIterator breakIterator; + private final boolean returnNonHighlightedSnippets; + + /** + * Creates a new instance of {@link CustomUnifiedHighlighter} + * + * @param analyzer the analyzer used for the field at index time, used for multi term queries internally + * @param passageFormatter our own {@link CustomPassageFormatter} + * which generates snippets in forms of {@link Snippet} objects + * @param breakIterator the {@link BreakIterator} to use for dividing text into passages. + * If null {@link BreakIterator#getSentenceInstance(Locale)} is used. + * @param fieldValue the original field values as constructor argument, loaded from the _source field or + * the relevant stored field. + * @param returnNonHighlightedSnippets whether non highlighted snippets should be + * returned rather than empty snippets when no highlighting can be performed + */ + public CustomUnifiedHighlighter(IndexSearcher searcher, + Analyzer analyzer, + PassageFormatter passageFormatter, + @Nullable BreakIterator breakIterator, + String fieldValue, + boolean returnNonHighlightedSnippets) { + super(searcher, analyzer); + this.breakIterator = breakIterator; + this.passageFormatter = passageFormatter; + this.fieldValue = fieldValue; + this.returnNonHighlightedSnippets = returnNonHighlightedSnippets; + } + + /** + * Highlights terms extracted from the provided query within the content of the provided field name + */ + public Snippet[] highlightField(String field, Query query, int docId, int maxPassages) throws IOException { + Map fieldsAsObjects = super.highlightFieldsAsObjects(new String[]{field}, query, + new int[]{docId}, new int[]{maxPassages}); + Object[] snippetObjects = fieldsAsObjects.get(field); + if (snippetObjects != null) { + //one single document at a time + assert snippetObjects.length == 1; + Object snippetObject = snippetObjects[0]; + if (snippetObject != null && snippetObject instanceof Snippet[]) { + return (Snippet[]) snippetObject; + } + } + return EMPTY_SNIPPET; + } + + @Override + protected List loadFieldValues(String[] fields, DocIdSetIterator docIter, + int cacheCharsThreshold) throws IOException { + //we only highlight one field, one document at a time + return Collections.singletonList(new String[]{fieldValue}); + } + + @Override + protected BreakIterator getBreakIterator(String field) { + if (breakIterator != null) { + return breakIterator; + } + return super.getBreakIterator(field); + } + + @Override + protected PassageFormatter getFormatter(String field) { + return passageFormatter; + } + + @Override + protected int getMaxNoHighlightPassages(String field) { + if (returnNonHighlightedSnippets) { + return 1; + } + return 0; + } + + @Override + protected Collection preMultiTermQueryRewrite(Query query) { + return rewriteCustomQuery(query); + } + + @Override + protected Collection preSpanQueryRewrite(Query query) { + return rewriteCustomQuery(query); + } + + + /** + * Translate custom queries in queries that are supported by the unified highlighter. + */ + private Collection rewriteCustomQuery(Query query) { + if (query instanceof MultiPhrasePrefixQuery) { + MultiPhrasePrefixQuery mpq = (MultiPhrasePrefixQuery) query; + Term[][] terms = mpq.getTerms(); + int[] positions = mpq.getPositions(); + SpanQuery[] positionSpanQueries = new SpanQuery[positions.length]; + int sizeMinus1 = terms.length - 1; + for (int i = 0; i < positions.length; i++) { + SpanQuery[] innerQueries = new SpanQuery[terms[i].length]; + for (int j = 0; j < terms[i].length; j++) { + if (i == sizeMinus1) { + innerQueries[j] = new SpanMultiTermQueryWrapper(new PrefixQuery(terms[i][j])); + } else { + innerQueries[j] = new SpanTermQuery(terms[i][j]); + } + } + if (innerQueries.length > 1) { + positionSpanQueries[i] = new SpanOrQuery(innerQueries); + } else { + positionSpanQueries[i] = innerQueries[0]; + } + } + // sum position increments beyond 1 + int positionGaps = 0; + if (positions.length >= 2) { + // positions are in increasing order. max(0,...) is just a safeguard. + positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1); + } + + //if original slop is 0 then require inOrder + boolean inorder = (mpq.getSlop() == 0); + return Collections.singletonList(new SpanNearQuery(positionSpanQueries, + mpq.getSlop() + positionGaps, inorder)); + } else if (query instanceof CommonTermsQuery) { + CommonTermsQuery ctq = (CommonTermsQuery) query; + List tqs = new ArrayList<> (); + for (Term term : ctq.getTerms()) { + tqs.add(new TermQuery(term)); + } + return tqs; + } else if (query instanceof AllTermQuery) { + AllTermQuery atq = (AllTermQuery) query; + return Collections.singletonList(new TermQuery(atq.getTerm())); + } else if (query instanceof FunctionScoreQuery) { + return Collections.singletonList(((FunctionScoreQuery) query).getSubQuery()); + } else if (query instanceof FiltersFunctionScoreQuery) { + return Collections.singletonList(((FiltersFunctionScoreQuery) query).getSubQuery()); + } else { + return null; + } + } +} diff --git a/core/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java b/core/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java index 269c1c55eec..5307a417e10 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java @@ -32,7 +32,6 @@ import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TermQuery; @@ -87,21 +86,18 @@ public final class AllTermQuery extends Query { if (rewritten != this) { return rewritten; } - boolean fieldExists = false; boolean hasPayloads = false; for (LeafReaderContext context : reader.leaves()) { final Terms terms = context.reader().terms(term.field()); if (terms != null) { - fieldExists = true; if (terms.hasPayloads()) { hasPayloads = true; break; } } } - if (fieldExists == false) { - return new MatchNoDocsQuery(); - } + // if the terms does not exist we could return a MatchNoDocsQuery but this would break the unified highlighter + // which rewrites query with an empty reader. if (hasPayloads == false) { return new TermQuery(term); } diff --git a/core/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/core/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java index 87bfdacb1c7..a76428e829a 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java @@ -25,6 +25,8 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.Query; @@ -115,6 +117,20 @@ public class MultiPhrasePrefixQuery extends Query { positions.add(position); } + /** + * Returns the terms for each position in this phrase + */ + public Term[][] getTerms() { + Term[][] terms = new Term[termArrays.size()][]; + for (int i = 0; i < termArrays.size(); i++) { + terms[i] = new Term[termArrays.get(i).length]; + for (int j = 0; j < termArrays.get(i).length; j++) { + terms[i][j] = termArrays.get(i)[j]; + } + } + return terms; + } + /** * Returns the relative positions of terms in this phrase. */ @@ -150,7 +166,12 @@ public class MultiPhrasePrefixQuery extends Query { } } if (terms.isEmpty()) { - return Queries.newMatchNoDocsQuery("No terms supplied for " + MultiPhrasePrefixQuery.class.getName()); + // if the terms does not exist we could return a MatchNoDocsQuery but this would break the unified highlighter + // which rewrites query with an empty reader. + return new BooleanQuery.Builder() + .add(query.build(), BooleanClause.Occur.MUST) + .add(Queries.newMatchNoDocsQuery("No terms supplied for " + MultiPhrasePrefixQuery.class.getName()), + BooleanClause.Occur.MUST).build(); } query.add(terms.toArray(Term.class), position); return query.build(); diff --git a/core/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java b/core/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java index fd7c8f6c49d..79f4c534688 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java @@ -37,7 +37,6 @@ import org.elasticsearch.common.lucene.Lucene; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Locale; diff --git a/core/src/main/java/org/elasticsearch/search/SearchModule.java b/core/src/main/java/org/elasticsearch/search/SearchModule.java index feae20a941b..01c4e3488e7 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/core/src/main/java/org/elasticsearch/search/SearchModule.java @@ -238,6 +238,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.HighlightPhase; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter; import org.elasticsearch.search.rescore.QueryRescorerBuilder; import org.elasticsearch.search.rescore.RescoreBuilder; import org.elasticsearch.search.sort.FieldSortBuilder; @@ -599,7 +600,7 @@ public class SearchModule { highlighters.register("fvh", new FastVectorHighlighter(settings)); highlighters.register("plain", new PlainHighlighter()); highlighters.register("postings", new PostingsHighlighter()); - + highlighters.register("unified", new UnifiedHighlighter()); highlighters.extractAndRegister(plugins, SearchPlugin::getHighlighters); return unmodifiableMap(highlighters.getRegistry()); diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java index 7ed50c7a1dd..330fb908cc5 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PostingsHighlighter.java @@ -25,7 +25,7 @@ import org.apache.lucene.search.highlight.Encoder; import org.apache.lucene.search.postingshighlight.CustomPassageFormatter; import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter; import org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator; -import org.apache.lucene.search.postingshighlight.Snippet; +import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.common.Strings; import org.elasticsearch.common.text.Text; @@ -139,14 +139,14 @@ public class PostingsHighlighter implements Highlighter { return fieldMapper.fieldType().indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } - private static String mergeFieldValues(List fieldValues, char valuesSeparator) { + static String mergeFieldValues(List fieldValues, char valuesSeparator) { //postings highlighter accepts all values in a single string, as offsets etc. need to match with content //loaded from stored fields, we merge all values using a proper separator String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator)); return rawValue.substring(0, Math.min(rawValue.length(), Integer.MAX_VALUE - 1)); } - private static List filterSnippets(List snippets, int numberOfFragments) { + static List filterSnippets(List snippets, int numberOfFragments) { //We need to filter the snippets as due to no_match_size we could have //either highlighted snippets or non highlighted ones and we don't want to mix those up @@ -181,11 +181,11 @@ public class PostingsHighlighter implements Highlighter { return filteredSnippets; } - private static class HighlighterEntry { + static class HighlighterEntry { Map mappers = new HashMap<>(); } - private static class MapperHighlighterEntry { + static class MapperHighlighterEntry { final CustomPassageFormatter passageFormatter; private MapperHighlighterEntry(CustomPassageFormatter passageFormatter) { diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java new file mode 100644 index 00000000000..4afce9700e5 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java @@ -0,0 +1,158 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.fetch.subphase.highlight; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.highlight.Snippet; +import org.apache.lucene.search.uhighlight.CustomPassageFormatter; +import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CollectionUtil; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.search.fetch.FetchPhaseExecutionException; +import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter.filterSnippets; +import static org.elasticsearch.search.fetch.subphase.highlight.PostingsHighlighter.mergeFieldValues; + +public class UnifiedHighlighter implements Highlighter { + private static final String CACHE_KEY = "highlight-unified"; + + @Override + public boolean canHighlight(FieldMapper fieldMapper) { + return true; + } + + @Override + public HighlightField highlight(HighlighterContext highlighterContext) { + FieldMapper fieldMapper = highlighterContext.mapper; + SearchContextHighlight.Field field = highlighterContext.field; + SearchContext context = highlighterContext.context; + FetchSubPhase.HitContext hitContext = highlighterContext.hitContext; + + if (!hitContext.cache().containsKey(CACHE_KEY)) { + hitContext.cache().put(CACHE_KEY, new HighlighterEntry()); + } + + HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY); + MapperHighlighterEntry mapperHighlighterEntry = highlighterEntry.mappers.get(fieldMapper); + + if (mapperHighlighterEntry == null) { + Encoder encoder = field.fieldOptions().encoder().equals("html") ? + HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; + CustomPassageFormatter passageFormatter = + new CustomPassageFormatter(field.fieldOptions().preTags()[0], + field.fieldOptions().postTags()[0], encoder); + mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter); + } + + List snippets = new ArrayList<>(); + int numberOfFragments; + try { + Analyzer analyzer = + context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer(); + List fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext); + fieldValues = fieldValues.stream().map(obj -> { + if (obj instanceof BytesRef) { + return fieldMapper.fieldType().valueForDisplay(obj).toString(); + } else { + return obj; + } + }).collect(Collectors.toList()); + IndexSearcher searcher = new IndexSearcher(hitContext.reader()); + CustomUnifiedHighlighter highlighter; + if (field.fieldOptions().numberOfFragments() == 0) { + // we use a control char to separate values, which is the only char that the custom break iterator + // breaks the text on, so we don't lose the distinction between the different values of a field and we + // get back a snippet per value + String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.NULL_SEPARATOR); + org.apache.lucene.search.postingshighlight.CustomSeparatorBreakIterator breakIterator = + new org.apache.lucene.search.postingshighlight + .CustomSeparatorBreakIterator(HighlightUtils.NULL_SEPARATOR); + highlighter = + new CustomUnifiedHighlighter(searcher, analyzer, mapperHighlighterEntry.passageFormatter, + breakIterator, fieldValue, field.fieldOptions().noMatchSize() > 0); + numberOfFragments = fieldValues.size(); // we are highlighting the whole content, one snippet per value + } else { + //using paragraph separator we make sure that each field value holds a discrete passage for highlighting + String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.PARAGRAPH_SEPARATOR); + highlighter = new CustomUnifiedHighlighter(searcher, analyzer, + mapperHighlighterEntry.passageFormatter, null, fieldValue, field.fieldOptions().noMatchSize() > 0); + numberOfFragments = field.fieldOptions().numberOfFragments(); + } + if (field.fieldOptions().requireFieldMatch()) { + final String fieldName = highlighterContext.fieldName; + highlighter.setFieldMatcher((name) -> fieldName.equals(name)); + } else { + highlighter.setFieldMatcher((name) -> true); + } + Snippet[] fieldSnippets = highlighter.highlightField(highlighterContext.fieldName, + highlighterContext.query, hitContext.docId(), numberOfFragments); + for (Snippet fieldSnippet : fieldSnippets) { + if (Strings.hasText(fieldSnippet.getText())) { + snippets.add(fieldSnippet); + } + } + } catch (IOException e) { + throw new FetchPhaseExecutionException(context, + "Failed to highlight field [" + highlighterContext.fieldName + "]", e); + } + + snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments()); + + if (field.fieldOptions().scoreOrdered()) { + //let's sort the snippets by score if needed + CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore())); + } + + String[] fragments = new String[snippets.size()]; + for (int i = 0; i < fragments.length; i++) { + fragments[i] = snippets.get(i).getText(); + } + + if (fragments.length > 0) { + return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments)); + } + return null; + } + + static class HighlighterEntry { + Map mappers = new HashMap<>(); + } + + static class MapperHighlighterEntry { + final CustomPassageFormatter passageFormatter; + + private MapperHighlighterEntry(CustomPassageFormatter passageFormatter) { + this.passageFormatter = passageFormatter; + } + } +} diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java index fcddc58f77a..2d43a1ca64e 100644 --- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java +++ b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPassageFormatterTests.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.postingshighlight; +import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.search.highlight.DefaultEncoder; import org.apache.lucene.search.highlight.SimpleHTMLEncoder; import org.apache.lucene.util.BytesRef; diff --git a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java index c95819a0e53..315e38d12fe 100644 --- a/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java +++ b/core/src/test/java/org/apache/lucene/search/postingshighlight/CustomPostingsHighlighterTests.java @@ -31,6 +31,7 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java new file mode 100644 index 00000000000..4e664c3e241 --- /dev/null +++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomPassageFormatterTests.java @@ -0,0 +1,105 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search.uhighlight; + +import org.apache.lucene.search.highlight.Snippet; +import org.apache.lucene.search.highlight.DefaultEncoder; +import org.apache.lucene.search.highlight.SimpleHTMLEncoder; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.notNullValue; + + +public class CustomPassageFormatterTests extends ESTestCase { + public void testSimpleFormat() { + String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back. No matches here."; + + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new DefaultEncoder()); + + Passage[] passages = new Passage[3]; + String match = "highlighter"; + BytesRef matchBytesRef = new BytesRef(match); + + Passage passage1 = new Passage(); + int start = content.indexOf(match); + int end = start + match.length(); + passage1.setStartOffset(0); + passage1.setEndOffset(end + 2); //lets include the whitespace at the end to make sure we trim it + passage1.addMatch(start, end, matchBytesRef); + passages[0] = passage1; + + Passage passage2 = new Passage(); + start = content.lastIndexOf(match); + end = start + match.length(); + passage2.setStartOffset(passage1.getEndOffset()); + passage2.setEndOffset(end + 26); + passage2.addMatch(start, end, matchBytesRef); + passages[1] = passage2; + + Passage passage3 = new Passage(); + passage3.setStartOffset(passage2.getEndOffset()); + passage3.setEndOffset(content.length()); + passages[2] = passage3; + + Snippet[] fragments = passageFormatter.format(passages, content); + assertThat(fragments, notNullValue()); + assertThat(fragments.length, equalTo(3)); + assertThat(fragments[0].getText(), equalTo("This is a really cool highlighter.")); + assertThat(fragments[0].isHighlighted(), equalTo(true)); + assertThat(fragments[1].getText(), equalTo("Unified highlighter gives nice snippets back.")); + assertThat(fragments[1].isHighlighted(), equalTo(true)); + assertThat(fragments[2].getText(), equalTo("No matches here.")); + assertThat(fragments[2].isHighlighted(), equalTo(false)); + } + + public void testHtmlEncodeFormat() { + String content = "This is a really cool highlighter. Unified highlighter gives nice snippets back."; + + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new SimpleHTMLEncoder()); + + Passage[] passages = new Passage[2]; + String match = "highlighter"; + BytesRef matchBytesRef = new BytesRef(match); + + Passage passage1 = new Passage(); + int start = content.indexOf(match); + int end = start + match.length(); + passage1.setStartOffset(0); + passage1.setEndOffset(end + 6); //lets include the whitespace at the end to make sure we trim it + passage1.addMatch(start, end, matchBytesRef); + passages[0] = passage1; + + Passage passage2 = new Passage(); + start = content.lastIndexOf(match); + end = start + match.length(); + passage2.setStartOffset(passage1.getEndOffset()); + passage2.setEndOffset(content.length()); + passage2.addMatch(start, end, matchBytesRef); + passages[1] = passage2; + + Snippet[] fragments = passageFormatter.format(passages, content); + assertThat(fragments, notNullValue()); + assertThat(fragments.length, equalTo(2)); + assertThat(fragments[0].getText(), equalTo("<b>This is a really cool highlighter.</b>")); + assertThat(fragments[1].getText(), equalTo("Unified highlighter gives nice snippets back.")); + } +} diff --git a/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java new file mode 100644 index 00000000000..83b42750f92 --- /dev/null +++ b/core/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -0,0 +1,259 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.search.uhighlight; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.DefaultEncoder; +import org.apache.lucene.search.highlight.Snippet; +import org.apache.lucene.store.Directory; +import org.elasticsearch.common.lucene.all.AllTermQuery; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +import static org.hamcrest.CoreMatchers.equalTo; + +public class CustomUnifiedHighlighterTests extends ESTestCase { + public void testCustomUnifiedHighlighter() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + FieldType offsetsType = new FieldType(TextField.TYPE_STORED); + offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + offsetsType.setStoreTermVectorOffsets(true); + offsetsType.setStoreTermVectorPositions(true); + offsetsType.setStoreTermVectors(true); + + //good position but only one match + final String firstValue = "This is a test. Just a test1 highlighting from unified highlighter."; + Field body = new Field("body", "", offsetsType); + Document doc = new Document(); + doc.add(body); + body.setStringValue(firstValue); + + //two matches, not the best snippet due to its length though + final String secondValue = "This is the second highlighting value to perform highlighting on a longer text " + + "that gets scored lower."; + Field body2 = new Field("body", "", offsetsType); + doc.add(body2); + body2.setStringValue(secondValue); + + //two matches and short, will be scored highest + final String thirdValue = "This is highlighting the third short highlighting value."; + Field body3 = new Field("body", "", offsetsType); + doc.add(body3); + body3.setStringValue(thirdValue); + + //one match, same as first but at the end, will be scored lower due to its position + final String fourthValue = "Just a test4 highlighting from unified highlighter."; + Field body4 = new Field("body", "", offsetsType); + doc.add(body4); + body4.setStringValue(fourthValue); + + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.close(); + + String firstHlValue = "Just a test1 highlighting from unified highlighter."; + String secondHlValue = "This is the second highlighting value to perform highlighting on a" + + " longer text that gets scored lower."; + String thirdHlValue = "This is highlighting the third short highlighting value."; + String fourthHlValue = "Just a test4 highlighting from unified highlighter."; + + IndexSearcher searcher = newSearcher(ir); + Query query = new TermQuery(new Term("body", "highlighting")); + + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + + int docId = topDocs.scoreDocs[0].doc; + + String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue; + + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, iwc.getAnalyzer(), + new CustomPassageFormatter("", "", new DefaultEncoder()), null, fieldValue, true); + Snippet[] snippets = highlighter.highlightField("body", query, docId, 5); + + assertThat(snippets.length, equalTo(4)); + + assertThat(snippets[0].getText(), equalTo(firstHlValue)); + assertThat(snippets[1].getText(), equalTo(secondHlValue)); + assertThat(snippets[2].getText(), equalTo(thirdHlValue)); + assertThat(snippets[3].getText(), equalTo(fourthHlValue)); + ir.close(); + dir.close(); + } + + public void testNoMatchSize() throws Exception { + Directory dir = newDirectory(); + Analyzer analyzer = new StandardAnalyzer(); + IndexWriterConfig iwc = newIndexWriterConfig(analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + FieldType offsetsType = new FieldType(TextField.TYPE_STORED); + offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + offsetsType.setStoreTermVectorOffsets(true); + offsetsType.setStoreTermVectorPositions(true); + offsetsType.setStoreTermVectors(true); + Field body = new Field("body", "", offsetsType); + Field none = new Field("none", "", offsetsType); + Document doc = new Document(); + doc.add(body); + doc.add(none); + + String firstValue = "This is a test. Just a test highlighting from unified. Feel free to ignore."; + body.setStringValue(firstValue); + none.setStringValue(firstValue); + iw.addDocument(doc); + + IndexReader ir = iw.getReader(); + iw.close(); + + Query query = new TermQuery(new Term("none", "highlighting")); + + IndexSearcher searcher = newSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + int docId = topDocs.scoreDocs[0].doc; + + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new DefaultEncoder()); + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, + null, firstValue, false); + Snippet[] snippets = highlighter.highlightField("body", query, docId, 5); + assertThat(snippets.length, equalTo(0)); + + highlighter = new CustomUnifiedHighlighter(searcher, analyzer, passageFormatter, null, firstValue, true); + snippets = highlighter.highlightField("body", query, docId, 5); + assertThat(snippets.length, equalTo(1)); + assertThat(snippets[0].getText(), equalTo("This is a test.")); + ir.close(); + dir.close(); + } + + + private IndexReader indexOneDoc(Directory dir, String field, String value, Analyzer analyzer) throws IOException { + IndexWriterConfig iwc = newIndexWriterConfig(analyzer); + iwc.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); + + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + Field textField = new Field(field, "", ft); + Document doc = new Document(); + doc.add(textField); + + textField.setStringValue(value); + iw.addDocument(doc); + IndexReader ir = iw.getReader(); + iw.close(); + return ir; + } + + public void testMultiPhrasePrefixQuery() throws Exception { + Analyzer analyzer = new StandardAnalyzer(); + Directory dir = newDirectory(); + String value = "The quick brown fox."; + IndexReader ir = indexOneDoc(dir, "text", value, analyzer); + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + query.add(new Term("text", "quick")); + query.add(new Term("text", "brown")); + query.add(new Term("text", "fo")); + IndexSearcher searcher = newSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + int docId = topDocs.scoreDocs[0].doc; + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new DefaultEncoder()); + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, + passageFormatter, null, value, false); + Snippet[] snippets = highlighter.highlightField("text", query, docId, 5); + assertThat(snippets.length, equalTo(1)); + assertThat(snippets[0].getText(), equalTo("The quick brown fox.")); + ir.close(); + dir.close(); + } + + public void testAllTermQuery() throws IOException { + Directory dir = newDirectory(); + String value = "The quick brown fox."; + Analyzer analyzer = new StandardAnalyzer(); + IndexReader ir = indexOneDoc(dir, "all", value, analyzer); + AllTermQuery query = new AllTermQuery(new Term("all", "fox")); + IndexSearcher searcher = newSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + int docId = topDocs.scoreDocs[0].doc; + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new DefaultEncoder()); + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, + passageFormatter, null, value, false); + Snippet[] snippets = highlighter.highlightField("all", query, docId, 5); + assertThat(snippets.length, equalTo(1)); + assertThat(snippets[0].getText(), equalTo("The quick brown fox.")); + ir.close(); + dir.close(); + } + + public void testCommonTermsQuery() throws IOException { + Directory dir = newDirectory(); + String value = "The quick brown fox."; + Analyzer analyzer = new StandardAnalyzer(); + IndexReader ir = indexOneDoc(dir, "text", value, analyzer); + CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128); + query.add(new Term("text", "quick")); + query.add(new Term("text", "brown")); + query.add(new Term("text", "fox")); + IndexSearcher searcher = newSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); + assertThat(topDocs.totalHits, equalTo(1)); + int docId = topDocs.scoreDocs[0].doc; + CustomPassageFormatter passageFormatter = new CustomPassageFormatter("", "", new DefaultEncoder()); + CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, + passageFormatter, null, value, false); + Snippet[] snippets = highlighter.highlightField("text", query, docId, 5); + assertThat(snippets.length, equalTo(1)); + assertThat(snippets[0].getText(), equalTo("The quick brown fox.")); + ir.close(); + dir.close(); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index d7a12de7498..ca7548257ef 100644 --- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -20,12 +20,10 @@ package org.elasticsearch.search.fetch.subphase.highlight; import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.search.join.ScoreMode; -import org.elasticsearch.Version; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.geo.GeoPoint; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings.Builder; @@ -99,6 +97,8 @@ import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.startsWith; public class HighlighterSearchIT extends ESIntegTestCase { + private static final String[] ALL_TYPES = new String[] {"plain", "postings", "fvh", "unified"}; + private static final String[] UNIFIED_AND_NULL = new String[] {null, "unified"}; @Override protected Collection> nodePlugins() { @@ -123,9 +123,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource(jsonBuilder().startObject().field("text", "foo").endObject()) .get(); refresh(); - SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "foo")) - .highlighter(new HighlightBuilder().field(new Field("text"))).get(); - assertHighlight(search, 0, "text", 0, equalTo("foo")); + for (String type : UNIFIED_AND_NULL) { + SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "foo")) + .highlighter(new HighlightBuilder().field(new Field("text")).highlighterType(type)).get(); + assertHighlight(search, 0, "text", 0, equalTo("foo")); + } } public void testHighlightingWithWildcardName() throws IOException { @@ -149,10 +151,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource(jsonBuilder().startObject().field("text", "text").endObject()) .get(); refresh(); - String highlighter = randomFrom("plain", "postings", "fvh"); - SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))) - .highlighter(new HighlightBuilder().field(new Field("*").highlighterType(highlighter))).get(); - assertHighlight(search, 0, "text", 0, equalTo("text")); + for (String type : ALL_TYPES) { + SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))) + .highlighter(new HighlightBuilder().field(new Field("*").highlighterType(type))).get(); + assertHighlight(search, 0, "text", 0, equalTo("text")); + } } public void testHighlightingWhenFieldsAreNotStoredThereIsNoSource() throws IOException { @@ -184,14 +187,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource(jsonBuilder().startObject().field("unstored_text", "text").field("text", "text").endObject()) .get(); refresh(); - String highlighter = randomFrom("plain", "postings", "fvh"); - SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))) - .highlighter(new HighlightBuilder().field(new Field("*").highlighterType(highlighter))).get(); - assertHighlight(search, 0, "text", 0, equalTo("text")); - search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))) + for (String type : ALL_TYPES) { + SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))) + .highlighter(new HighlightBuilder().field(new Field("*").highlighterType(type))).get(); + assertHighlight(search, 0, "text", 0, equalTo("text")); + search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("text", "text"))) .highlighter(new HighlightBuilder().field(new Field("unstored_text"))).get(); - assertNoFailures(search); - assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0)); + assertNoFailures(search); + assertThat(search.getHits().getAt(0).getHighlightFields().size(), equalTo(0)); + } } // see #3486 @@ -368,7 +372,6 @@ public class HighlighterSearchIT extends ESIntegTestCase { + "and should be highlighted") .get(); refresh(); - SearchResponse search = client().prepareSearch() .setQuery(matchQuery("long_term", "thisisaverylongwordandmakessurethisfails foo highlighed")) .highlighter(new HighlightBuilder().field("long_term", 18, 1)) @@ -418,23 +421,25 @@ public class HighlighterSearchIT extends ESIntegTestCase { } indexRandom(true, indexRequestBuilders); - SearchResponse search = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "bug")) - .highlighter(new HighlightBuilder().field("title", -1, 0)) + .highlighter(new HighlightBuilder().field("title", -1, 0).highlighterType(type)) .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch")); - } + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch")); + } - search = client().prepareSearch() + search = client().prepareSearch() .setQuery(matchQuery("attachments.body", "attachment")) - .highlighter(new HighlightBuilder().field("attachments.body", -1, 0)) + .highlighter(new HighlightBuilder().field("attachments.body", -1, 0).highlighterType(type)) .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "attachments.body", 0, equalTo("attachment 1")); - assertHighlight(search, i, "attachments.body", 1, equalTo("attachment 2")); + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "attachments.body", 0, equalTo("attachment 1")); + assertHighlight(search, i, "attachments.body", 1, equalTo("attachment 2")); + } } } @@ -470,23 +475,25 @@ public class HighlighterSearchIT extends ESIntegTestCase { } indexRandom(true, indexRequestBuilders); - SearchResponse search = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "bug")) - .highlighter(new HighlightBuilder().field("title", -1, 0)) + .highlighter(new HighlightBuilder().field("title", -1, 0).highlighterType(type)) .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch")); - } + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch")); + } - search = client().prepareSearch() + search = client().prepareSearch() .setQuery(matchQuery("attachments.body", "attachment")) - .highlighter(new HighlightBuilder().field("attachments.body", -1, 2)) + .highlighter(new HighlightBuilder().field("attachments.body", -1, 2).highlighterType(type)) .execute().get(); - for (int i = 0; i < 5; i++) { - assertHighlight(search, i, "attachments.body", 0, equalTo("attachment 1")); - assertHighlight(search, i, "attachments.body", 1, equalTo("attachment 2")); + for (int i = 0; i < 5; i++) { + assertHighlight(search, i, "attachments.body", 0, equalTo("attachment 1")); + assertHighlight(search, i, "attachments.body", 1, equalTo("attachment 2")); + } } } @@ -534,24 +541,28 @@ public class HighlighterSearchIT extends ESIntegTestCase { assertHighlight(search, i, "title", 1, 2, equalTo("This is the second bug to perform highlighting on.")); } - search = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + search = client().prepareSearch() .setQuery(matchQuery("title", "bug")) //sentences will be generated out of each value - .highlighter(new HighlightBuilder().field("title")).get(); + .highlighter(new HighlightBuilder().field("title").highlighterType(type)).get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "title", 0, equalTo("This is a test on the highlighting bug present in elasticsearch.")); - assertHighlight(search, i, "title", 1, 2, equalTo("This is the second bug to perform highlighting on.")); - } + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "title", 0, + equalTo("This is a test on the highlighting bug present in elasticsearch.")); + assertHighlight(search, i, "title", 1, 2, + equalTo("This is the second bug to perform highlighting on.")); + } - search = client().prepareSearch() + search = client().prepareSearch() .setQuery(matchQuery("attachments.body", "attachment")) - .highlighter(new HighlightBuilder().field("attachments.body", -1, 2)) + .highlighter(new HighlightBuilder().field("attachments.body", -1, 2).highlighterType(type)) .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "attachments.body", 0, equalTo("attachment for this test")); - assertHighlight(search, i, "attachments.body", 1, 2, equalTo("attachment 2")); + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "attachments.body", 0, equalTo("attachment for this test")); + assertHighlight(search, i, "attachments.body", 1, 2, equalTo("attachment 2")); + } } } @@ -654,48 +665,34 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource("field1", "The quick brown fox jumps over the lazy dog", "field2", "second field content").get(); refresh(); - //works using stored field - SearchResponse searchResponse = client().prepareSearch("test") + for (String type : UNIFIED_AND_NULL) { + //works using stored field + SearchResponse searchResponse = client().prepareSearch("test") .setQuery(termQuery("field1", "quick")) - .highlighter(new HighlightBuilder().field(new Field("field1").preTags("").postTags(""))) + .highlighter(new HighlightBuilder().field(new Field("field1").preTags("").postTags("").highlighterType(type))) .get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - assertFailures(client().prepareSearch("test") - .setQuery(termQuery("field1", "quick")) - .highlighter( - new HighlightBuilder().field(new Field("field1").preTags("").postTags("") - .highlighterType("plain").forceSource(true))), + assertFailures(client().prepareSearch("test") + .setQuery(termQuery("field1", "quick")) + .highlighter( + new HighlightBuilder().field(new Field("field1").preTags("").postTags("") + .highlighterType(type).forceSource(true))), RestStatus.BAD_REQUEST, containsString("source is forced for fields [field1] but type [type1] has disabled _source")); - assertFailures(client().prepareSearch("test") - .setQuery(termQuery("field1", "quick")) - .highlighter( - new HighlightBuilder().field(new Field("field1").preTags("").postTags("").highlighterType("fvh") - .forceSource(true))), + SearchSourceBuilder searchSource = SearchSourceBuilder.searchSource().query(termQuery("field1", "quick")) + .highlighter(highlight().forceSource(true).field("field1").highlighterType(type)); + assertFailures(client().prepareSearch("test").setSource(searchSource), RestStatus.BAD_REQUEST, containsString("source is forced for fields [field1] but type [type1] has disabled _source")); - assertFailures(client().prepareSearch("test") - .setQuery(termQuery("field1", "quick")) - .highlighter( - new HighlightBuilder().field(new Field("field1").preTags("").postTags("") - .highlighterType("postings").forceSource(true))), - RestStatus.BAD_REQUEST, - containsString("source is forced for fields [field1] but type [type1] has disabled _source")); - - SearchSourceBuilder searchSource = SearchSourceBuilder.searchSource().query(termQuery("field1", "quick")) - .highlighter(highlight().forceSource(true).field("field1")); - assertFailures(client().prepareSearch("test").setSource(searchSource), - RestStatus.BAD_REQUEST, - containsString("source is forced for fields [field1] but type [type1] has disabled _source")); - - searchSource = SearchSourceBuilder.searchSource().query(termQuery("field1", "quick")) - .highlighter(highlight().forceSource(true).field("field*")); - assertFailures(client().prepareSearch("test").setSource(searchSource), + searchSource = SearchSourceBuilder.searchSource().query(termQuery("field1", "quick")) + .highlighter(highlight().forceSource(true).field("field*").highlighterType(type)); + assertFailures(client().prepareSearch("test").setSource(searchSource), RestStatus.BAD_REQUEST, matches("source is forced for fields \\[field\\d, field\\d\\] but type \\[type1\\] has disabled _source")); + } } public void testPlainHighlighter() throws Exception { @@ -999,13 +996,16 @@ public class HighlighterSearchIT extends ESIntegTestCase { } indexRandom(true, indexRequestBuilders); - SearchResponse search = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "bug")) - .highlighter(new HighlightBuilder().field("title", -1, 0)) + .highlighter(new HighlightBuilder().field("title", -1, 0).highlighterType(type)) .get(); - for (int i = 0; i < 5; i++) { - assertHighlight(search, i, "title", 0, 1, equalTo("This is a test on the highlighting bug present in elasticsearch")); + for (int i = 0; i < 5; i++) { + assertHighlight(search, i, "title", 0, 1, equalTo("This is a test on the highlighting bug " + + "present in elasticsearch")); + } } } @@ -1042,14 +1042,16 @@ public class HighlighterSearchIT extends ESIntegTestCase { } indexRandom(true, indexRequestBuilders); - SearchResponse search = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1, 10)) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1, 10).highlighterType(type)) .get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(search, i, "title", 0, 1, - equalTo("This is a html escaping highlighting test for *&? elasticsearch")); + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(search, i, "title", 0, 1, + startsWith("This is a html escaping highlighting test for *&?")); + } } } @@ -1094,21 +1096,24 @@ public class HighlighterSearchIT extends ESIntegTestCase { ensureGreen(); client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - // simple search on body with standard analyzer with a simple field query - SearchResponse search = client().prepareSearch() + + for (String type : UNIFIED_AND_NULL) { + // simple search on body with standard analyzer with a simple field query + SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1)) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1).highlighterType(type)) .get(); - assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title - search = client().prepareSearch() + // search on title.key and highlight on title + search = client().prepareSearch() .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1)) + .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1).highlighterType(type)) .get(); - assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); + } } public void testMultiMapperVectorFromSource() throws Exception { @@ -1133,21 +1138,23 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - // simple search on body with standard analyzer with a simple field query - SearchResponse search = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + // simple search on body with standard analyzer with a simple field query + SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1)) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1).highlighterType(type)) .get(); - assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title.key - search = client().prepareSearch() + // search on title.key and highlight on title.key + search = client().prepareSearch() .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1)) + .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1).highlighterType(type)) .get(); - assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); + } } public void testMultiMapperNoVectorWithStore() throws Exception { @@ -1173,21 +1180,24 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - // simple search on body with standard analyzer with a simple field query - SearchResponse search = client().prepareSearch() + + for (String type : UNIFIED_AND_NULL) { + // simple search on body with standard analyzer with a simple field query + SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1)) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1).highlighterType(type)) .get(); - assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title - search = client().prepareSearch() + // search on title.key and highlight on title + search = client().prepareSearch() .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1)) + .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1).highlighterType(type)) .get(); - assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); + } } public void testMultiMapperNoVectorFromSource() throws Exception { @@ -1212,21 +1222,23 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - // simple search on body with standard analyzer with a simple field query - SearchResponse search = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + // simple search on body with standard analyzer with a simple field query + SearchResponse search = client().prepareSearch() .setQuery(matchQuery("title", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1)) + .highlighter(new HighlightBuilder().encoder("html").field("title", 50, 1).highlighterType(type)) .get(); - assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title.key - search = client().prepareSearch() + // search on title.key and highlight on title.key + search = client().prepareSearch() .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1)) + .highlighter(new HighlightBuilder().encoder("html").field("title.key", 50, 1).highlighterType(type)) .get(); - assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); + assertHighlight(search, 0, "title.key", 0, 1, equalTo("this is a test")); + } } public void testFastVectorHighlighterShouldFailIfNoTermVectors() throws Exception { @@ -1333,14 +1345,16 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() .query(boostingQuery(termQuery("field2", "brown"), termQuery("field2", "foobar")).negativeBoost(0.5f)) - .highlighter(highlight().field("field2").order("score").preTags("").postTags("")); + .highlighter(highlight().field("field2").order("score").preTags("").postTags("").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + } } public void testBoostingQueryTermVector() throws IOException { @@ -1371,12 +1385,14 @@ public class HighlighterSearchIT extends ESIntegTestCase { refresh(); logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() + for (String type : UNIFIED_AND_NULL) { + SearchSourceBuilder source = searchSource() .query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) - .highlighter(highlight().field("field2").order("score").preTags("").postTags("")); + .highlighter(highlight().field("field2").order("score").preTags("").postTags("").highlighterType(type)); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + } } public void testCommonTermsTermVector() throws IOException { @@ -1416,63 +1432,76 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1", "1").setSource("field1", "The quick browse button is a fancy thing, right bro?").get(); refresh(); logger.info("--> highlighting and searching on field0"); - SearchSourceBuilder source = searchSource() + + for (String type : UNIFIED_AND_NULL) { + SearchSourceBuilder source = searchSource() .query(matchPhrasePrefixQuery("field0", "quick bro")) - .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); + .highlighter(highlight().field("field0").order("score").preTags("").postTags("").highlighterType(type)); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - logger.info("--> highlighting and searching on field1"); - source = searchSource() + logger.info("--> highlighting and searching on field1"); + source = searchSource() .query(matchPhrasePrefixQuery("field1", "quick bro")) - .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); + .highlighter(highlight().field("field1").order("score").preTags("").postTags("").highlighterType(type)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf( + assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf( + equalTo("The quick brown fox jumps over the lazy dog"), + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); + equalTo("The quick brown fox jumps over the lazy dog"), + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); - // with synonyms - client().prepareIndex("test", "type2", "0").setSource( + // with synonyms + client().prepareIndex("test", "type2", "0").setSource( "field4", "The quick brown fox jumps over the lazy dog", "field3", "The quick brown fox jumps over the lazy dog").get(); - client().prepareIndex("test", "type2", "1").setSource( + client().prepareIndex("test", "type2", "1").setSource( "field4", "The quick browse button is a fancy thing, right bro?").get(); - client().prepareIndex("test", "type2", "2").setSource( + client().prepareIndex("test", "type2", "2").setSource( "field4", "a quick fast blue car").get(); - refresh(); + refresh(); - source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")) - .highlighter(highlight().field("field3").order("score").preTags("").postTags("")); + source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field3", "fast bro")) + .highlighter(highlight().field("field3").order("score").preTags("").postTags("").highlighterType(type)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - logger.info("--> highlighting and searching on field4"); - source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")) - .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + logger.info("--> highlighting and searching on field4"); + source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")) + .highlighter(highlight().field("field4").order("score").preTags("").postTags("").highlighterType(type)); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf( + assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf( + equalTo("The quick brown fox jumps over the lazy dog"), + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf( equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); + equalTo("The quick brown fox jumps over the lazy dog"), + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); - logger.info("--> highlighting and searching on field4"); - source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")) - .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + logger.info("--> highlighting and searching on field4"); + source = searchSource().postFilter(typeQuery("type2")).query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")) + .highlighter(highlight().field("field4").order("score").preTags("").postTags("").highlighterType(type)); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field4", 0, 1, equalTo("a quick fast blue car")); + assertHighlight(searchResponse, 0, "field4", 0, 1, + anyOf(equalTo("a quick fast blue car"), + equalTo("a quick fast blue car"))); + } } public void testPlainHighlightDifferentFragmenter() throws Exception { @@ -1563,13 +1592,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { .endObject()).get(); refresh(); - // This query used to fail when the field to highlight was absent - SearchResponse response = client().prepareSearch("test") + for (String type : UNIFIED_AND_NULL) { + // This query used to fail when the field to highlight was absent + SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("field", "highlight").type(MatchQuery.Type.BOOLEAN)) .highlighter( - new HighlightBuilder().field(new HighlightBuilder.Field("highlight_field").fragmentSize(-1).numOfFragments(1) - .fragmenter("simple"))).get(); - assertThat(response.getHits().hits()[0].highlightFields().isEmpty(), equalTo(true)); + new HighlightBuilder().field(new HighlightBuilder.Field("highlight_field").fragmentSize(-1).numOfFragments(1) + .fragmenter("simple")).highlighterType(type)).get(); + assertThat(response.getHits().hits()[0].highlightFields().isEmpty(), equalTo(true)); + } } // Issue #3211 @@ -1614,11 +1645,13 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource("text", "elasticsearch test").get(); refresh(); - SearchResponse response = client().prepareSearch("test") + for (String type : UNIFIED_AND_NULL) { + SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("text", "test").type(MatchQuery.Type.BOOLEAN)) - .highlighter(new HighlightBuilder().field("text")).execute().actionGet(); - // PatternAnalyzer will throw an exception if it is resetted twice - assertHitCount(response, 1L); + .highlighter(new HighlightBuilder().field("text").highlighterType(type)).execute().actionGet(); + // PatternAnalyzer will throw an exception if it is resetted twice + assertHitCount(response, 1L); + } } public void testHighlightUsesHighlightQuery() throws IOException { @@ -1629,55 +1662,30 @@ public class HighlighterSearchIT extends ESIntegTestCase { index("test", "type1", "1", "text", "Testing the highlight query feature"); refresh(); - HighlightBuilder.Field field = new HighlightBuilder.Field("text"); - - HighlightBuilder highlightBuilder = new HighlightBuilder().field(field); - SearchRequestBuilder search = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "testing")) + for (String type : ALL_TYPES) { + HighlightBuilder.Field field = new HighlightBuilder.Field("text"); + HighlightBuilder highlightBuilder = new HighlightBuilder().field(field).highlighterType(type); + SearchRequestBuilder search = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "testing")) .highlighter(highlightBuilder); - Matcher searchQueryMatcher = equalTo("Testing the highlight query feature"); + Matcher searchQueryMatcher = equalTo("Testing the highlight query feature"); - field.highlighterType("plain"); - SearchResponse response = search.get(); - assertHighlight(response, 0, "text", 0, searchQueryMatcher); - field.highlighterType("fvh"); - response = search.get(); - assertHighlight(response, 0, "text", 0, searchQueryMatcher); - field.highlighterType("postings"); - response = search.get(); - assertHighlight(response, 0, "text", 0, searchQueryMatcher); + SearchResponse response = search.get(); + assertHighlight(response, 0, "text", 0, searchQueryMatcher); + field = new HighlightBuilder.Field("text"); - field = new HighlightBuilder.Field("text"); + Matcher hlQueryMatcher = equalTo("Testing the highlight query feature"); + field.highlightQuery(matchQuery("text", "query")); + highlightBuilder = new HighlightBuilder().field(field); + search = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "testing")).highlighter(highlightBuilder); + response = search.get(); + assertHighlight(response, 0, "text", 0, hlQueryMatcher); - Matcher hlQueryMatcher = equalTo("Testing the highlight query feature"); - field.highlightQuery(matchQuery("text", "query")); - highlightBuilder = new HighlightBuilder().field(field); - search = client().prepareSearch("test").setQuery(QueryBuilders.matchQuery("text", "testing")).highlighter(highlightBuilder); - - field.highlighterType("fvh"); - response = search.get(); - assertHighlight(response, 0, "text", 0, hlQueryMatcher); - - field.highlighterType("plain"); - response = search.get(); - assertHighlight(response, 0, "text", 0, hlQueryMatcher); - - field.highlighterType("postings"); - response = search.get(); - assertHighlight(response, 0, "text", 0, hlQueryMatcher); - - // Make sure the highlightQuery is taken into account when it is set on the highlight context instead of the field - highlightBuilder.highlightQuery(matchQuery("text", "query")); - field.highlighterType("fvh").highlightQuery(null); - response = search.get(); - assertHighlight(response, 0, "text", 0, hlQueryMatcher); - - field.highlighterType("plain"); - response = search.get(); - assertHighlight(response, 0, "text", 0, hlQueryMatcher); - - field.highlighterType("postings"); - response = search.get(); - assertHighlight(response, 0, "text", 0, hlQueryMatcher); + // Make sure the highlightQuery is taken into account when it is set on the highlight context instead of the field + highlightBuilder.highlightQuery(matchQuery("text", "query")); + field.highlighterType(type).highlightQuery(null); + response = search.get(); + assertHighlight(response, 0, "text", 0, hlQueryMatcher); + } } private static String randomStoreField() { @@ -1712,6 +1720,10 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertNotHighlighted(response, 0, "text"); + // When noMatchSize is set to 0 you also shouldn't get any field.highlighterType("plain").noMatchSize(0); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); @@ -1725,6 +1737,10 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertNotHighlighted(response, 0, "text"); + // When noMatchSize is between 0 and the size of the string field.highlighterType("plain").noMatchSize(21); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); @@ -1740,6 +1756,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + // Unified hl also works but the fragment is the whole first sentence (size ignored) + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + // We can also ask for a fragment longer than the input string and get the whole string field.highlighterType("plain").noMatchSize(text.length() * 2); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); @@ -1754,6 +1775,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + //no difference using unified hl as the noMatchSize is ignored (just needs to be greater than 0) + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + // We can also ask for a fragment exactly the size of the input field and get the whole field field.highlighterType("plain").noMatchSize(text.length()); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); @@ -1768,6 +1794,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + //no difference using unified hl as the noMatchSize is ignored (just needs to be greater than 0) + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + // You can set noMatchSize globally in the highlighter as well field.highlighterType("plain").noMatchSize(null); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field).noMatchSize(21)).get(); @@ -1781,6 +1812,10 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field).noMatchSize(21)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field).noMatchSize(21)).get(); + assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + // We don't break if noMatchSize is less than zero though field.highlighterType("plain").noMatchSize(randomIntBetween(Integer.MIN_VALUE, -1)); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); @@ -1793,6 +1828,10 @@ public class HighlighterSearchIT extends ESIntegTestCase { field.highlighterType("postings"); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); + + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertNotHighlighted(response, 0, "text"); } public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { @@ -1823,6 +1862,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + // Unified hl also works but the fragment is the whole first sentence (size ignored) + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some of me should get cut off.")); + // And noMatchSize returns nothing when the first entry is empty string! index("test", "type1", "2", "text", new String[] {"", text2}); refresh(); @@ -1846,6 +1890,12 @@ public class HighlighterSearchIT extends ESIntegTestCase { .highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); + field.highlighterType("unified"); + response = client().prepareSearch("test") + .setQuery(idsQueryBuilder) + .highlighter(new HighlightBuilder().field(field)).get(); + assertNotHighlighted(response, 0, "text"); + // But if the field was actually empty then you should get no highlighting field index("test", "type1", "3", "text", new String[] {}); refresh(); @@ -1868,6 +1918,12 @@ public class HighlighterSearchIT extends ESIntegTestCase { .highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); + field.highlighterType("unified"); + response = client().prepareSearch("test") + .setQuery(idsQueryBuilder) + .highlighter(new HighlightBuilder().field(field)).get(); + assertNotHighlighted(response, 0, "text"); + // Same for if the field doesn't even exist on the document index("test", "type1", "4"); refresh(); @@ -1905,6 +1961,10 @@ public class HighlighterSearchIT extends ESIntegTestCase { field.highlighterType("postings"); response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertNotHighlighted(response, 0, "text"); + + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertNotHighlighted(response, 0, "text"); } public void testHighlightNoMatchSizeNumberOfFragments() throws IOException { @@ -1936,6 +1996,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 1, equalTo("This is the first sentence.")); + // Unified hl also works but the fragment is the whole first sentence (size ignored) + field.highlighterType("unified"); + response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get(); + assertHighlight(response, 0, "text", 0, 1, equalTo("This is the first sentence.")); + //if there's a match we only return the values with matches (whole value as number_of_fragments == 0) MatchQueryBuilder queryBuilder = QueryBuilders.matchQuery("text", "third fifth"); field.highlighterType("plain"); @@ -1952,6 +2017,11 @@ public class HighlighterSearchIT extends ESIntegTestCase { response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get(); assertHighlight(response, 0, "text", 0, 2, equalTo("This is the third sentence. This is the fourth sentence.")); assertHighlight(response, 0, "text", 1, 2, equalTo("This is the fifth sentence")); + + field.highlighterType("unified"); + response = client().prepareSearch("test").setQuery(queryBuilder).highlighter(new HighlightBuilder().field(field)).get(); + assertHighlight(response, 0, "text", 0, 2, equalTo("This is the third sentence. This is the fourth sentence.")); + assertHighlight(response, 0, "text", 1, 2, equalTo("This is the fifth sentence")); } public void testPostingsHighlighter() throws Exception { @@ -1962,55 +2032,62 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy quick dog").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() .query(termQuery("field1", "test")) - .highlighter(highlight().field("field1").preTags("").postTags("")); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + .highlighter(highlight().field("field1").preTags("").postTags("").highlighterType(type)); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a test")); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a test")); - logger.info("--> searching on field1, highlighting on field1"); - source = searchSource() + logger.info("--> searching on field1, highlighting on field1"); + source = searchSource() .query(termQuery("field1", "test")) - .highlighter(highlight().field("field1").preTags("").postTags("")); + .highlighter(highlight().field("field1").preTags("").postTags("").highlighterType(type)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a test")); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a test")); - logger.info("--> searching on field2, highlighting on field2"); - source = searchSource() + logger.info("--> searching on field2, highlighting on field2"); + source = searchSource() .query(termQuery("field2", "quick")) - .highlighter(highlight().field("field2").order("score").preTags("").postTags("")); + .highlighter(highlight().field("field2").order("score").preTags("").postTags("").highlighterType(type)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field2", 0, 1, + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy quick dog")); - logger.info("--> searching on field2, highlighting on field2"); - source = searchSource() + logger.info("--> searching on field2, highlighting on field2"); + source = searchSource() .query(matchPhraseQuery("field2", "quick brown")) - .highlighter(highlight().field("field2").preTags("").postTags("")); + .highlighter(highlight().field("field2").preTags("").postTags("").highlighterType(type)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - //phrase query results in highlighting all different terms regardless of their positions - assertHighlight(searchResponse, 0, "field2", 0, 1, - equalTo("The quick brown fox jumps over the lazy quick dog")); + if (type == null) { + //phrase query results in highlighting all different terms regardless of their positions + assertHighlight(searchResponse, 0, "field2", 0, 1, + equalTo("The quick brown fox jumps over the lazy quick dog")); + } else { + assertHighlight(searchResponse, 0, "field2", 0, 1, + equalTo("The quick brown fox jumps over the lazy quick dog")); + } - //lets fall back to the standard highlighter then, what people would do to highlight query matches - logger.info("--> searching on field2, highlighting on field2, falling back to the plain highlighter"); - source = searchSource() - .query(matchPhraseQuery("field2", "quick brown")) - .highlighter(highlight() + //lets fall back to the standard highlighter then, what people would do to highlight query matches + logger.info("--> searching on field2, highlighting on field2, falling back to the plain highlighter"); + source = searchSource() + .query(matchPhraseQuery("field2", "quick brown")) + .highlighter(highlight() .field("field2").preTags("").postTags("").highlighterType("plain").requireFieldMatch(false)); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field2", 0, 1, + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy quick dog")); + } } public void testPostingsHighlighterMultipleFields() throws Exception { @@ -2022,13 +2099,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { "field2", "The slow brown fox. Second sentence."); refresh(); - SearchResponse response = client().prepareSearch("test") + for (String type : UNIFIED_AND_NULL) { + SearchResponse response = client().prepareSearch("test") .setQuery(QueryBuilders.matchQuery("field1", "fox")) .highlighter( - new HighlightBuilder().field(new HighlightBuilder.Field("field1").preTags("<1>").postTags("") - .requireFieldMatch(true))) + new HighlightBuilder().field(new Field("field1").preTags("<1>").postTags("") + .requireFieldMatch(true).highlighterType(type))) .get(); - assertHighlight(response, 0, "field1", 0, 1, equalTo("The quick brown <1>fox.")); + assertHighlight(response, 0, "field1", 0, 1, equalTo("The quick brown <1>fox.")); + } } public void testPostingsHighlighterNumberOfFragments() throws Exception { @@ -2042,51 +2121,54 @@ public class HighlighterSearchIT extends ESIntegTestCase { + "The quick brown dog jumps over the lazy fox.").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() + + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() .query(termQuery("field1", "fox")) .highlighter(highlight() - .field(new HighlightBuilder.Field("field1").numOfFragments(5).preTags("").postTags(""))); + .field(new Field("field1").numOfFragments(5).preTags("").postTags("").highlighterType(type))); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field1", 0, equalTo("The quick brown fox jumps over the lazy dog.")); - assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red fox jumps over the quick dog.")); - assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy fox.")); + assertHighlight(searchResponse, 0, "field1", 0, equalTo("The quick brown fox jumps over the lazy dog.")); + assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red fox jumps over the quick dog.")); + assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy fox.")); - client().prepareIndex("test", "type1", "2") - .setSource("field1", new String[] { - "The quick brown fox jumps over the lazy dog. Second sentence not finished", - "The lazy red fox jumps over the quick dog.", - "The quick brown dog jumps over the lazy fox."}).get(); - refresh(); + client().prepareIndex("test", "type1", "2") + .setSource("field1", new String[]{ + "The quick brown fox jumps over the lazy dog. Second sentence not finished", + "The lazy red fox jumps over the quick dog.", + "The quick brown dog jumps over the lazy fox."}).get(); + refresh(); - source = searchSource() + source = searchSource() .query(termQuery("field1", "fox")) .highlighter(highlight() - .field(new HighlightBuilder.Field("field1").numOfFragments(0).preTags("").postTags(""))); + .field(new Field("field1").numOfFragments(0).preTags("").postTags("").highlighterType(type))); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHitCount(searchResponse, 2L); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHitCount(searchResponse, 2L); - for (SearchHit searchHit : searchResponse.getHits()) { - if ("1".equals(searchHit.id())) { - assertHighlight(searchHit, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog. " + for (SearchHit searchHit : searchResponse.getHits()) { + if ("1".equals(searchHit.id())) { + assertHighlight(searchHit, "field1", 0, 1, equalTo("The quick brown fox jumps over the lazy dog. " + "The lazy red fox jumps over the quick dog. " + "The quick brown dog jumps over the lazy fox.")); - } else if ("2".equals(searchHit.id())) { - assertHighlight(searchHit, "field1", 0, 3, + } else if ("2".equals(searchHit.id())) { + assertHighlight(searchHit, "field1", 0, 3, equalTo("The quick brown fox jumps over the lazy dog. Second sentence not finished")); - assertHighlight(searchHit, "field1", 1, 3, equalTo("The lazy red fox jumps over the quick dog.")); - assertHighlight(searchHit, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy fox.")); - } else { - fail("Only hits with id 1 and 2 are returned"); + assertHighlight(searchHit, "field1", 1, 3, equalTo("The lazy red fox jumps over the quick dog.")); + assertHighlight(searchHit, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy fox.")); + } else { + fail("Only hits with id 1 and 2 are returned"); + } } } } public void testMultiMatchQueryHighlight() throws IOException { - String[] highlighterTypes = new String[] {"fvh", "plain", "postings"}; + String[] highlighterTypes = new String[] {"fvh", "plain", "postings", "unified"}; XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties") .startObject("field1") @@ -2154,24 +2236,26 @@ public class HighlighterSearchIT extends ESIntegTestCase { + "This one contains no matches."}).get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() .query(termQuery("field1", "sentence")) - .highlighter(highlight().field("field1").order("score")); + .highlighter(highlight().field("field1").order("score").highlighterType(type)); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - Map highlightFieldMap = searchResponse.getHits().getAt(0).highlightFields(); - assertThat(highlightFieldMap.size(), equalTo(1)); - HighlightField field1 = highlightFieldMap.get("field1"); - assertThat(field1.fragments().length, equalTo(5)); - assertThat(field1.fragments()[0].string(), + Map highlightFieldMap = searchResponse.getHits().getAt(0).highlightFields(); + assertThat(highlightFieldMap.size(), equalTo(1)); + HighlightField field1 = highlightFieldMap.get("field1"); + assertThat(field1.fragments().length, equalTo(5)); + assertThat(field1.fragments()[0].string(), equalTo("This sentence contains three sentence occurrences (sentence).")); - assertThat(field1.fragments()[1].string(), equalTo("This sentence contains two sentence matches.")); - assertThat(field1.fragments()[2].string(), equalTo("This is the second value's first sentence.")); - assertThat(field1.fragments()[3].string(), equalTo("This sentence contains one match, not that short.")); - assertThat(field1.fragments()[4].string(), + assertThat(field1.fragments()[1].string(), equalTo("This sentence contains two sentence matches.")); + assertThat(field1.fragments()[2].string(), equalTo("This is the second value's first sentence.")); + assertThat(field1.fragments()[3].string(), equalTo("This sentence contains one match, not that short.")); + assertThat(field1.fragments()[4].string(), equalTo("One sentence match here and scored lower since the text is quite long, not that appealing.")); + } } public void testPostingsHighlighterEscapeHtml() throws Exception { @@ -2185,12 +2269,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { } indexRandom(true, indexRequestBuilders); - SearchResponse searchResponse = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + SearchResponse searchResponse = client().prepareSearch() .setQuery(matchQuery("title", "test")) - .highlighter(new HighlightBuilder().field("title").encoder("html")).get(); + .highlighter(new HighlightBuilder().field("title").encoder("html").highlighterType(type)).get(); - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight(searchResponse, i, "title", 0, 1, equalTo("This is a html escaping highlighting test for *&?")); + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight(searchResponse, i, "title", 0, 1, + equalTo("This is a html escaping highlighting test for *&?")); + } } } @@ -2214,25 +2301,29 @@ public class HighlighterSearchIT extends ESIntegTestCase { ensureGreen(); client().prepareIndex("test", "type1", "1").setSource("title", "this is a test . Second sentence.").get(); refresh(); - // simple search on body with standard analyzer with a simple field query - SearchResponse searchResponse = client().prepareSearch() + + for (String type : UNIFIED_AND_NULL) { + // simple search on body with standard analyzer with a simple field query + SearchResponse searchResponse = client().prepareSearch() //lets make sure we analyze the query and we highlight the resulting terms .setQuery(matchQuery("title", "This is a Test")) -.highlighter(new HighlightBuilder().field("title")).get(); + .highlighter(new HighlightBuilder().field("title").highlighterType(type)).get(); - assertHitCount(searchResponse, 1L); - SearchHit hit = searchResponse.getHits().getAt(0); - //stopwords are not highlighted since not indexed - assertHighlight(hit, "title", 0, 1, equalTo("this is a test .")); + assertHitCount(searchResponse, 1L); + SearchHit hit = searchResponse.getHits().getAt(0); + //stopwords are not highlighted since not indexed + assertHighlight(hit, "title", 0, 1, equalTo("this is a test .")); - // search on title.key and highlight on title - searchResponse = client().prepareSearch() + // search on title.key and highlight on title + searchResponse = client().prepareSearch() .setQuery(matchQuery("title.key", "this is a test")) .highlighter(new HighlightBuilder().field("title.key")).get(); - assertHitCount(searchResponse, 1L); + assertHitCount(searchResponse, 1L); - //stopwords are now highlighted since we used only whitespace analyzer here - assertHighlight(searchResponse, 0, "title.key", 0, 1, equalTo("this is a test .")); + //stopwords are now highlighted since we used only whitespace analyzer here + assertHighlight(searchResponse, 0, "title.key", 0, 1, + equalTo("this is a test .")); + } } public void testPostingsHighlighterMultiMapperFromSource() throws Exception { @@ -2257,20 +2348,22 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1", "1").setSource("title", "this is a test").get(); refresh(); - // simple search on body with standard analyzer with a simple field query - SearchResponse searchResponse = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + // simple search on body with standard analyzer with a simple field query + SearchResponse searchResponse = client().prepareSearch() .setQuery(matchQuery("title", "this is a test")) .highlighter(new HighlightBuilder().field("title")) .get(); - assertHighlight(searchResponse, 0, "title", 0, 1, equalTo("this is a test")); + assertHighlight(searchResponse, 0, "title", 0, 1, equalTo("this is a test")); - // search on title.key and highlight on title.key - searchResponse = client().prepareSearch() + // search on title.key and highlight on title.key + searchResponse = client().prepareSearch() .setQuery(matchQuery("title.key", "this is a test")) - .highlighter(new HighlightBuilder().field("title.key")).get(); + .highlighter(new HighlightBuilder().field("title.key").highlighterType(type)).get(); - assertHighlight(searchResponse, 0, "title.key", 0, 1, equalTo("this is a test")); + assertHighlight(searchResponse, 0, "title.key", 0, 1, equalTo("this is a test")); + } } public void testPostingsHighlighterShouldFailIfNoOffsets() throws Exception { @@ -2321,13 +2414,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() .query(boostingQuery(termQuery("field2", "brown"), termQuery("field2", "foobar")).negativeBoost(0.5f)) - .highlighter(highlight().field("field2").preTags("").postTags("")); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + .highlighter(highlight().field("field2").preTags("").postTags("").highlighterType(type)); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + } } public void testPostingsHighlighterCommonTermsQuery() throws IOException { @@ -2337,13 +2432,16 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1") .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource().query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) - .highlighter(highlight().field("field2").preTags("").postTags("")); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHitCount(searchResponse, 1L); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource().query(commonTermsQuery("field2", "quick brown").cutoffFrequency(100)) + .highlighter(highlight().field("field2").preTags("").postTags("").highlighterType(type)); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHitCount(searchResponse, 1L); + + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + } } private static XContentBuilder type1PostingsffsetsMapping() throws IOException { @@ -2364,11 +2462,12 @@ public class HighlighterSearchIT extends ESIntegTestCase { refresh(); logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui")) - .highlighter(highlight().field("field2")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - + for (String type : UNIFIED_AND_NULL) { + SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui")) + .highlighter(highlight().field("field2").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + } } public void testPostingsHighlighterFuzzyQuery() throws Exception { @@ -2378,12 +2477,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1") .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(fuzzyQuery("field2", "quck")) - .highlighter(highlight().field("field2")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(fuzzyQuery("field2", "quck")) + .highlighter(highlight().field("field2").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + } } public void testPostingsHighlighterRegexpQuery() throws Exception { @@ -2393,12 +2495,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1") .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k")) - .highlighter(highlight().field("field2")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k")) + .highlighter(highlight().field("field2").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + } } public void testPostingsHighlighterWildcardQuery() throws Exception { @@ -2408,19 +2513,22 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1") .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*")) - .highlighter(highlight().field("field2")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*")) + .highlighter(highlight().field("field2").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - source = searchSource().query(wildcardQuery("field2", "qu*k")) - .highlighter(highlight().field("field2")); - searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHitCount(searchResponse, 1L); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + source = searchSource().query(wildcardQuery("field2", "qu*k")) + .highlighter(highlight().field("field2").highlighterType(type)); + searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHitCount(searchResponse, 1L); + + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + } } public void testPostingsHighlighterTermRangeQuery() throws Exception { @@ -2429,12 +2537,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "aaab").get(); refresh(); - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(rangeQuery("field2").gte("aaaa").lt("zzzz")) - .highlighter(highlight().field("field2")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("aaab")); + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(rangeQuery("field2").gte("aaaa").lt("zzzz")) + .highlighter(highlight().field("field2").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("aaab")); + } } public void testPostingsHighlighterQueryString() throws Exception { @@ -2444,11 +2555,14 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1") .setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get(); refresh(); - logger.info("--> highlighting and searching on field2"); - SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2")) - .highlighter(highlight().field("field2")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field2"); + SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2")) + .highlighter(highlight().field("field2").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog!")); + } } public void testPostingsHighlighterRegexpQueryWithinConstantScoreQuery() throws Exception { @@ -2458,11 +2572,13 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+"))) - .highlighter(highlight().field("field1")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+"))) + .highlighter(highlight().field("field1").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + } } public void testPostingsHighlighterMultiTermQueryMultipleLevels() throws Exception { @@ -2472,14 +2588,16 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource().query(boolQuery() + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource().query(boolQuery() .should(boolQuery().mustNot(QueryBuilders.existsQuery("field1"))) .should(matchQuery("field1", "test")) .should(constantScoreQuery(queryStringQuery("field1:photo*")))) - .highlighter(highlight().field("field1")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + .highlighter(highlight().field("field1").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + } } public void testPostingsHighlighterPrefixQueryWithinBooleanQuery() throws Exception { @@ -2489,12 +2607,14 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource() + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource() .query(boolQuery().must(prefixQuery("field1", "photo")).should(matchQuery("field1", "test").minimumShouldMatch("0"))) - .highlighter(highlight().field("field1")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + .highlighter(highlight().field("field1").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + } } public void testPostingsHighlighterQueryStringWithinFilteredQuery() throws Exception { @@ -2504,13 +2624,15 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1").setSource("field1", "The photography word will get highlighted").get(); refresh(); - logger.info("--> highlighting and searching on field1"); - SearchSourceBuilder source = searchSource().query(boolQuery() + for (String type : UNIFIED_AND_NULL) { + logger.info("--> highlighting and searching on field1"); + SearchSourceBuilder source = searchSource().query(boolQuery() .must(queryStringQuery("field1:photo*")) .mustNot(existsQuery("field_null"))) - .highlighter(highlight().field("field1")); - SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); - assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + .highlighter(highlight().field("field1").highlighterType(type)); + SearchResponse searchResponse = client().prepareSearch("test").setSource(source).get(); + assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The photography word will get highlighted")); + } } public void testPostingsHighlighterManyDocs() throws Exception { @@ -2532,18 +2654,20 @@ public class HighlighterSearchIT extends ESIntegTestCase { logger.info("--> indexing docs"); indexRandom(true, indexRequestBuilders); - logger.info("--> searching explicitly on field1 and highlighting on it"); - SearchRequestBuilder searchRequestBuilder = client().prepareSearch() + for (String type : UNIFIED_AND_NULL) { + logger.info("--> searching explicitly on field1 and highlighting on it"); + SearchRequestBuilder searchRequestBuilder = client().prepareSearch() .setSize(COUNT) .setQuery(termQuery("field1", "test")) - .highlighter(new HighlightBuilder().field("field1")); - SearchResponse searchResponse = + .highlighter(new HighlightBuilder().field("field1").highlighterType(type)); + SearchResponse searchResponse = searchRequestBuilder.get(); - assertHitCount(searchResponse, COUNT); - assertThat(searchResponse.getHits().hits().length, equalTo(COUNT)); - for (SearchHit hit : searchResponse.getHits()) { - String prefix = prefixes.get(hit.id()); - assertHighlight(hit, "field1", 0, 1, equalTo("Sentence " + prefix + " test.")); + assertHitCount(searchResponse, COUNT); + assertThat(searchResponse.getHits().hits().length, equalTo(COUNT)); + for (SearchHit hit : searchResponse.getHits()) { + String prefix = prefixes.get(hit.id()); + assertHighlight(hit, "field1", 0, 1, equalTo("Sentence " + prefix + " test.")); + } } } @@ -2558,7 +2682,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { indexRandom(true, client().prepareIndex("test", "typename").setSource("foo", "test typename")); - for (String highlighter: new String[] {"plain", "fvh", "postings"}) { + for (String highlighter : ALL_TYPES) { SearchResponse response = client().prepareSearch("test").setTypes("typename").setQuery(matchQuery("foo", "test")) .highlighter(new HighlightBuilder().field("foo").highlighterType(highlighter).requireFieldMatch(false)).get(); assertHighlight(response, 0, "foo", 0, 1, equalTo("test typename")); @@ -2577,7 +2701,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { indexRandom(true, client().prepareIndex("test", "typename").setSource("foo", "test japanese")); - for (String highlighter: new String[] {"plain", "fvh", "postings"}) { + for (String highlighter : ALL_TYPES) { SearchResponse response = client().prepareSearch("filtered_alias").setTypes("typename").setQuery(matchQuery("foo", "test")) .highlighter(new HighlightBuilder().field("foo").highlighterType(highlighter).requireFieldMatch(false)).get(); assertHighlight(response, 0, "foo", 0, 1, equalTo("test japanese")); @@ -2685,7 +2809,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setSource(jsonBuilder().startObject().field("text", "Arbitrary text field which will should not cause a failure").endObject()) .get(); refresh(); - String highlighterType = randomFrom("plain", "fvh", "postings"); + String highlighterType = randomFrom("plain", "fvh", "postings", "unified"); QueryBuilder query = QueryBuilders.boolQuery().should(QueryBuilders.geoBoundingBoxQuery("geo_point") .setCorners(61.10078883158897, -170.15625, -64.92354174306496, 118.47656249999999)) .should(QueryBuilders.termQuery("text", "failure")); @@ -2806,15 +2930,17 @@ public class HighlighterSearchIT extends ESIntegTestCase { .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) .get(); - SearchResponse searchResponse = client().prepareSearch() - .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"))) - .highlighter(new HighlightBuilder() - .field(new Field("text"))) - .get(); - assertHitCount(searchResponse, 1); - HighlightField field = searchResponse.getHits().getAt(0).highlightFields().get("text"); - assertThat(field.getFragments().length, equalTo(1)); - assertThat(field.getFragments()[0].string(), equalTo("brown")); + for (String type : UNIFIED_AND_NULL) { + SearchResponse searchResponse = client().prepareSearch() + .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"))) + .highlighter(new HighlightBuilder() + .field(new Field("text")).highlighterType(type)) + .get(); + assertHitCount(searchResponse, 1); + HighlightField field = searchResponse.getHits().getAt(0).highlightFields().get("text"); + assertThat(field.getFragments().length, equalTo(1)); + assertThat(field.getFragments()[0].string(), equalTo("brown")); + } } public void testFiltersFunctionScoreQueryHighlight() throws Exception { @@ -2825,16 +2951,19 @@ public class HighlighterSearchIT extends ESIntegTestCase { FunctionScoreQueryBuilder.FilterFunctionBuilder filterBuilder = new FunctionScoreQueryBuilder.FilterFunctionBuilder(QueryBuilders.termQuery("enable", "yes"), new RandomScoreFunctionBuilder()); - SearchResponse searchResponse = client().prepareSearch() - .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"), - new FunctionScoreQueryBuilder.FilterFunctionBuilder[] {filterBuilder})) - .highlighter(new HighlightBuilder() - .field(new Field("text"))) - .get(); - assertHitCount(searchResponse, 1); - HighlightField field = searchResponse.getHits().getAt(0).highlightFields().get("text"); - assertThat(field.getFragments().length, equalTo(1)); - assertThat(field.getFragments()[0].string(), equalTo("brown")); + + for (String type : UNIFIED_AND_NULL) { + SearchResponse searchResponse = client().prepareSearch() + .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"), + new FunctionScoreQueryBuilder.FilterFunctionBuilder[]{filterBuilder})) + .highlighter(new HighlightBuilder() + .field(new Field("text")).highlighterType(type)) + .get(); + assertHitCount(searchResponse, 1); + HighlightField field = searchResponse.getHits().getAt(0).highlightFields().get("text"); + assertThat(field.getFragments().length, equalTo(1)); + assertThat(field.getFragments()[0].string(), equalTo("brown")); + } } public void testSynonyms() throws IOException { @@ -2854,7 +2983,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { client().prepareIndex("test", "type1", "0").setSource( "field1", "The quick brown fox jumps over the lazy dog").get(); refresh(); - for (String highlighterType : new String[] {"plain", "postings", "fvh"}) { + for (String highlighterType : ALL_TYPES) { logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1"); SearchSourceBuilder source = searchSource() .query(matchQuery("field1", "quick brown fox").operator(Operator.AND)) diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/request/highlighting.asciidoc index dc2673cebb4..30c0e20d5bf 100644 --- a/docs/reference/search/request/highlighting.asciidoc +++ b/docs/reference/search/request/highlighting.asciidoc @@ -126,6 +126,22 @@ the index to be bigger): } -------------------------------------------------- +==== Unified Highlighter + +experimental[] + +The `unified` highlighter can extract offsets from either postings, term vectors, or via re-analyzing text. +Under the hood it uses Lucene UnifiedHighlighter which picks its strategy depending on the field and the query to highlight. +Independently of the strategy this highlighter breaks the text into sentences and scores individual sentences as +if they were documents in this corpus, using the BM25 algorithm. +It supports accurate phrase and multi-term (fuzzy, prefix, regex) highlighting and can be used with the following options: + +* `force_source` +* `encoder` +* `highlight_query` +* `pre_tags and `post_tags` +* `require_field_match` + ==== Force highlighter type The `type` field allows to force a specific highlighter type. This is useful diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml new file mode 100644 index 00000000000..72f782e68d1 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.highlight/10_unified.yaml @@ -0,0 +1,39 @@ +setup: + - do: + indices.create: + index: test + body: + mappings: + unified: + "properties": + "text": + "type": "text" + "fields": + "fvh": + "type": "text" + "term_vector": "with_positions_offsets" + "postings": + "type": "text" + "index_options": "offsets" + - do: + index: + index: test + type: unified + id: 1 + body: + "text" : "The quick brown fox is brown." + - do: + indices.refresh: {} + +--- +"Basic": + - skip: + version: " - 5.2.99" + reason: this uses a new highlighter that has been added in 5.3 + - do: + search: + body: { "query" : {"multi_match" : { "query" : "quick brown fox", "fields" : [ "text*"] } }, "highlight" : { "type" : "unified", "fields" : { "*" : {} } } } + + - match: {hits.hits.0.highlight.text.0: "The quick brown fox is brown."} + - match: {hits.hits.0.highlight.text\.fvh.0: "The quick brown fox is brown."} + - match: {hits.hits.0.highlight.text\.postings.0: "The quick brown fox is brown."}