mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-30 11:58:36 +00:00
Improve highlighting perf (a bit) by reusing some constructs across hits, closes #1513.
This commit is contained in:
parent
c93ddd9b61
commit
be282cc4c8
@ -202,7 +202,7 @@ public class FetchPhase implements SearchPhase {
|
||||
for (FetchSubPhase fetchSubPhase : fetchSubPhases) {
|
||||
FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext();
|
||||
if (fetchSubPhase.hitExecutionNeeded(context)) {
|
||||
hitContext.reset(searchHit, subReader, subDoc, doc);
|
||||
hitContext.reset(searchHit, subReader, subDoc, context.searcher().getIndexReader(), docId, doc);
|
||||
fetchSubPhase.hitExecute(context, hitContext);
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ package org.elasticsearch.search.fetch;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.collect.Maps;
|
||||
import org.elasticsearch.search.SearchParseElement;
|
||||
import org.elasticsearch.search.internal.InternalSearchHit;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
@ -35,14 +36,19 @@ public interface FetchSubPhase {
|
||||
|
||||
public static class HitContext {
|
||||
private InternalSearchHit hit;
|
||||
private IndexReader topLevelReader;
|
||||
private int topLevelDocId;
|
||||
private IndexReader reader;
|
||||
private int docId;
|
||||
private Document doc;
|
||||
private Map<String, Object> cache;
|
||||
|
||||
public void reset(InternalSearchHit hit, IndexReader reader, int docId, Document doc) {
|
||||
public void reset(InternalSearchHit hit, IndexReader reader, int docId, IndexReader topLevelReader, int topLevelDocId, Document doc) {
|
||||
this.hit = hit;
|
||||
this.reader = reader;
|
||||
this.docId = docId;
|
||||
this.topLevelReader = topLevelReader;
|
||||
this.topLevelDocId = topLevelDocId;
|
||||
this.doc = doc;
|
||||
}
|
||||
|
||||
@ -58,9 +64,24 @@ public interface FetchSubPhase {
|
||||
return docId;
|
||||
}
|
||||
|
||||
public IndexReader topLevelReader() {
|
||||
return topLevelReader;
|
||||
}
|
||||
|
||||
public int topLevelDocId() {
|
||||
return topLevelDocId;
|
||||
}
|
||||
|
||||
public Document doc() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
public Map<String, Object> cache() {
|
||||
if (cache == null) {
|
||||
cache = Maps.newHashMap();
|
||||
}
|
||||
return cache;
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, ? extends SearchParseElement> parseElements();
|
||||
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.highlight.*;
|
||||
@ -39,6 +38,7 @@ import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder;
|
||||
import org.apache.lucene.search.vectorhighlight.SingleFragListBuilder;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.collect.ImmutableMap;
|
||||
import org.elasticsearch.common.collect.Maps;
|
||||
import org.elasticsearch.common.io.FastStringReader;
|
||||
import org.elasticsearch.common.lucene.document.SingleFieldSelector;
|
||||
import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery;
|
||||
@ -55,7 +55,6 @@ import org.elasticsearch.search.internal.InternalSearchHit;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.search.lookup.SearchLookup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
@ -90,7 +89,13 @@ public class HighlightPhase implements FetchSubPhase {
|
||||
}
|
||||
|
||||
@Override public void hitExecute(SearchContext context, HitContext hitContext) throws ElasticSearchException {
|
||||
try {
|
||||
// we use a cache to cache heavy things, mainly the rewrite in FieldQuery for FVH
|
||||
Map<FieldMapper, HighlightEntry> cache = (Map<FieldMapper, HighlightEntry>) hitContext.cache().get("highlight");
|
||||
if (cache == null) {
|
||||
cache = Maps.newHashMap();
|
||||
hitContext.cache().put("highlight", cache);
|
||||
}
|
||||
|
||||
DocumentMapper documentMapper = context.mapperService().documentMapper(hitContext.hit().type());
|
||||
|
||||
Map<String, HighlightField> highlightFields = newHashMap();
|
||||
@ -120,6 +125,8 @@ public class HighlightPhase implements FetchSubPhase {
|
||||
// if we can do highlighting using Term Vectors, use FastVectorHighlighter, otherwise, use the
|
||||
// slower plain highlighter
|
||||
if (mapper.termVector() != Field.TermVector.WITH_POSITIONS_OFFSETS) {
|
||||
HighlightEntry entry = cache.get(mapper);
|
||||
if (entry == null) {
|
||||
// Don't use the context.query() since it might be rewritten, and we need to pass the non rewritten queries to
|
||||
// let the highlighter handle MultiTerm ones
|
||||
|
||||
@ -147,8 +154,12 @@ public class HighlightPhase implements FetchSubPhase {
|
||||
Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]);
|
||||
|
||||
|
||||
Highlighter highlighter = new Highlighter(formatter, encoder, queryScorer);
|
||||
highlighter.setTextFragmenter(fragmenter);
|
||||
entry = new HighlightEntry();
|
||||
entry.highlighter = new Highlighter(formatter, encoder, queryScorer);
|
||||
entry.highlighter.setTextFragmenter(fragmenter);
|
||||
|
||||
cache.put(mapper, entry);
|
||||
}
|
||||
|
||||
List<Object> textsToHighlight;
|
||||
if (mapper.stored()) {
|
||||
@ -178,7 +189,7 @@ public class HighlightPhase implements FetchSubPhase {
|
||||
String text = textToHighlight.toString();
|
||||
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(mapper.names().indexName(), new FastStringReader(text));
|
||||
TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, text, false, numberOfFragments);
|
||||
TextFragment[] bestTextFragments = entry.highlighter.getBestTextFragments(tokenStream, text, false, numberOfFragments);
|
||||
for (TextFragment bestTextFragment : bestTextFragments) {
|
||||
if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
|
||||
fragsList.add(bestTextFragment);
|
||||
@ -216,6 +227,9 @@ public class HighlightPhase implements FetchSubPhase {
|
||||
highlightFields.put(highlightField.name(), highlightField);
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
HighlightEntry entry = cache.get(mapper);
|
||||
if (entry == null) {
|
||||
FragListBuilder fragListBuilder;
|
||||
FragmentsBuilder fragmentsBuilder;
|
||||
if (field.numberOfFragments() == 0) {
|
||||
@ -246,33 +260,44 @@ public class HighlightPhase implements FetchSubPhase {
|
||||
}
|
||||
}
|
||||
}
|
||||
entry = new HighlightEntry();
|
||||
entry.fragListBuilder = fragListBuilder;
|
||||
entry.fragmentsBuilder = fragmentsBuilder;
|
||||
entry.fvh = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);
|
||||
CustomFieldQuery.highlightFilters.set(field.highlightFilter());
|
||||
// we use top level reader to rewrite the query against all readers, with use caching it across hits (and across readers...)
|
||||
entry.fieldQuery = new CustomFieldQuery(context.parsedQuery().query(), hitContext.topLevelReader(), entry.fvh);
|
||||
|
||||
cache.put(mapper, entry);
|
||||
}
|
||||
|
||||
String[] fragments;
|
||||
try {
|
||||
FastVectorHighlighter highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);
|
||||
FieldQuery fieldQuery = buildFieldQuery(highlighter, context.parsedQuery().query(), hitContext.reader(), field);
|
||||
|
||||
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
|
||||
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
|
||||
fragments = highlighter.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments,
|
||||
fragListBuilder, fragmentsBuilder, field.preTags(), field.postTags(), encoder);
|
||||
} catch (IOException e) {
|
||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
|
||||
}
|
||||
// we highlight against the low level reader and docId, because if we load source, we want to reuse it if possible
|
||||
fragments = entry.fvh.getBestFragments(entry.fieldQuery, hitContext.reader(), hitContext.docId(), mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments,
|
||||
entry.fragListBuilder, entry.fragmentsBuilder, field.preTags(), field.postTags(), encoder);
|
||||
|
||||
if (fragments != null && fragments.length > 0) {
|
||||
HighlightField highlightField = new HighlightField(field.field(), fragments);
|
||||
highlightFields.put(highlightField.name(), highlightField);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hitContext.hit().highlightFields(highlightFields);
|
||||
} finally {
|
||||
CustomFieldQuery.highlightFilters.remove();
|
||||
}
|
||||
}
|
||||
|
||||
private FieldQuery buildFieldQuery(FastVectorHighlighter highlighter, Query query, IndexReader indexReader, SearchContextHighlight.Field field) throws IOException {
|
||||
CustomFieldQuery.highlightFilters.set(field.highlightFilter());
|
||||
return new CustomFieldQuery(query, indexReader, highlighter);
|
||||
static class HighlightEntry {
|
||||
public FastVectorHighlighter fvh;
|
||||
public FieldQuery fieldQuery;
|
||||
public FragListBuilder fragListBuilder;
|
||||
public FragmentsBuilder fragmentsBuilder;
|
||||
|
||||
public Highlighter highlighter;
|
||||
}
|
||||
}
|
||||
|
@ -97,9 +97,10 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||
.addHighlightedField("title", -1, 0)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
assertThat(search.hits().hits().length, equalTo(5));
|
||||
assertThat(search.getFailedShards(), equalTo(0));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
|
||||
@ -110,11 +111,10 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||
.addHighlightedField("attachments.body", -1, 0)
|
||||
.execute().actionGet();
|
||||
|
||||
System.out.println(search);
|
||||
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
assertThat(search.hits().hits().length, equalTo(5));
|
||||
assertThat(search.getFailedShards(), equalTo(0));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
|
||||
@ -150,9 +150,10 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||
.addHighlightedField("title", -1, 0)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
assertThat(search.hits().hits().length, equalTo(5));
|
||||
assertThat(search.getFailedShards(), equalTo(0));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch "));
|
||||
@ -163,9 +164,10 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||
.addHighlightedField("attachments.body", -1, 0)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
assertThat(search.hits().hits().length, equalTo(5));
|
||||
assertThat(search.getFailedShards(), equalTo(0));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2 "));
|
||||
@ -428,9 +430,10 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||
.addHighlightedField("title", 50, 1, 10)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
assertThat(search.hits().hits().length, equalTo(5));
|
||||
assertThat(search.getFailedShards(), equalTo(0));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
|
||||
@ -456,21 +459,17 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||
client.prepareIndex("test", "type1", Integer.toString(i))
|
||||
.setSource("title", "This is a html escaping highlighting test for *&? elasticsearch").setRefresh(true).execute().actionGet();
|
||||
}
|
||||
SearchSourceBuilder source = searchSource()
|
||||
.query(termQuery("field1", "test"))
|
||||
.from(0).size(60).explain(true)
|
||||
.highlight(highlight().field("field1", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
|
||||
|
||||
|
||||
SearchResponse search = client.prepareSearch()
|
||||
.setQuery(fieldQuery("title", "test")).setEncoder("html")
|
||||
.addHighlightedField("title", 50, 1, 10)
|
||||
.execute().actionGet();
|
||||
|
||||
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
||||
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
assertThat(search.hits().hits().length, equalTo(5));
|
||||
assertThat(search.getFailedShards(), equalTo(0));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
|
||||
@ -503,9 +502,10 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||
.execute().actionGet();
|
||||
|
||||
|
||||
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
|
||||
|
||||
assertThat(search.hits().totalHits(), equalTo(5l));
|
||||
assertThat(search.hits().hits().length, equalTo(5));
|
||||
assertThat(search.getFailedShards(), equalTo(0));
|
||||
|
||||
for (SearchHit hit : search.hits()) {
|
||||
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
|
||||
|
Loading…
x
Reference in New Issue
Block a user