From cc1eac147a5278b46dfa18481dbbd2ed2461aa19 Mon Sep 17 00:00:00 2001 From: kimchy Date: Sun, 5 Sep 2010 20:47:38 +0300 Subject: [PATCH] Allow to specify highlighter parameters on a per field level basis, closes #356. --- .../SingleFragListBuilder.java | 20 +++++ .../search/highlight/HighlightPhase.java | 64 +++++-------- .../highlight/HighlighterParseElement.java | 69 +++++++------- .../highlight/SearchContextHighlight.java | 90 +++++++++---------- 4 files changed, 116 insertions(+), 127 deletions(-) diff --git a/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java b/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java index b24591756a2..441ea1c9ebb 100644 --- a/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java +++ b/modules/elasticsearch/src/main/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java @@ -1,3 +1,22 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.lucene.search.vectorhighlight; import java.util.ArrayList; @@ -9,6 +28,7 @@ import java.util.List; * http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java * This class in not available in 3.0.2 release yet. */ +// LUCENE MONITOR public class SingleFragListBuilder implements FragListBuilder { @Override public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 008e9d97562..037bf161eb9 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -24,7 +24,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.vectorhighlight.*; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.collect.ImmutableMap; -import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.search.SearchHit; @@ -35,10 +34,9 @@ import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; -import java.util.Arrays; import java.util.Map; -import static org.elasticsearch.common.collect.Maps.newHashMap; +import static org.elasticsearch.common.collect.Maps.*; /** * @author kimchy (shay.banon) @@ -57,9 +55,6 @@ public class HighlightPhase implements SearchPhase { return; } - Map highlighterMap = newHashMap(); - Map fieldQueryMap = newHashMap(); - for (SearchHit hit : context.fetchResult().hits().hits()) { InternalSearchHit internalHit = (InternalSearchHit) hit; @@ -67,24 +62,23 @@ public class HighlightPhase implements SearchPhase { int docId = internalHit.docId(); Map highlightFields = newHashMap(); - for (SearchContextHighlight.ParsedHighlightField parsedHighlightField : context.highlight().fields()) { - String fieldName = parsedHighlightField.field(); - FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(parsedHighlightField.field()); + for (SearchContextHighlight.Field field : context.highlight().fields()) { + String fieldName = field.field(); + FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field()); if (mapper != null) { fieldName = mapper.names().indexName(); } - Tuple highlighterTuple = getHighlighter(highlighterMap, parsedHighlightField.settings()); - FastVectorHighlighter highlighter = highlighterTuple.v2(); - FieldQuery fieldQuery = getFieldQuery(highlighterTuple.v1(), fieldQueryMap, highlighter, context.query(), context.searcher().getIndexReader(), parsedHighlightField.settings()); - + FastVectorHighlighter highlighter = buildHighlighter(field); + FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), context.searcher().getIndexReader(), field); + String[] fragments; try { - fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, fieldName, parsedHighlightField.settings().fragmentCharSize(), parsedHighlightField.settings().numberOfFragments()); + fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, fieldName, field.fragmentCharSize(), field.numberOfFragments()); } catch (IOException e) { - throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + parsedHighlightField.field() + "]", e); + throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e); } - HighlightField highlightField = new HighlightField(parsedHighlightField.field(), fragments); + HighlightField highlightField = new HighlightField(field.field(), fragments); highlightFields.put(highlightField.name(), highlightField); } @@ -92,43 +86,27 @@ public class HighlightPhase implements SearchPhase { } } - private FieldQuery getFieldQuery(int key, Map fieldQueryMap, FastVectorHighlighter highlighter, Query query, IndexReader indexReader, SearchContextHighlight.ParsedHighlightSettings settings) { - FieldQuery fq = fieldQueryMap.get(key); - if (fq == null) { - CustomFieldQuery.reader.set(indexReader); - CustomFieldQuery.highlightFilters.set(settings.highlightFilter()); - fq = new CustomFieldQuery(query, highlighter); - fieldQueryMap.put(key,fq); - } - return fq; + private FieldQuery buildFieldQuery(FastVectorHighlighter highlighter, Query query, IndexReader indexReader, SearchContextHighlight.Field field) { + CustomFieldQuery.reader.set(indexReader); + CustomFieldQuery.highlightFilters.set(field.highlightFilter()); + return new CustomFieldQuery(query, highlighter); } - private Tuple getHighlighter(Map highlighterMap, SearchContextHighlight.ParsedHighlightSettings settings) { - + private FastVectorHighlighter buildHighlighter(SearchContextHighlight.Field field) { FragListBuilder fragListBuilder; FragmentsBuilder fragmentsBuilder; - if (!settings.fragmentsAllowed()) { + if (field.numberOfFragments() == 0) { fragListBuilder = new SingleFragListBuilder(); - fragmentsBuilder = new SimpleFragmentsBuilder(settings.preTags(), settings.postTags()); + fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags()); } else { fragListBuilder = new SimpleFragListBuilder(); - if (settings.scoreOrdered()) { - fragmentsBuilder = new ScoreOrderFragmentsBuilder(settings.preTags(), settings.postTags()); + if (field.scoreOrdered()) { + fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags()); } else { - fragmentsBuilder = new SimpleFragmentsBuilder(settings.preTags(), settings.postTags()); + fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags()); } } - // highlighter key is determined by tags and FragList and Fragment builder classes. - String[] mask = Arrays.copyOf(settings.preTags(), settings.preTags().length + settings.postTags().length); - System.arraycopy(settings.postTags(), 0, mask, settings.preTags().length, settings.postTags().length); - int key = (Arrays.toString(mask)+fragListBuilder.getClass().getSimpleName()+fragmentsBuilder.getClass().getSimpleName()).hashCode(); - - FastVectorHighlighter highlighter = highlighterMap.get(key); - if (highlighter == null) { - highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder); - highlighterMap.put(key,highlighter); - } - return Tuple.tuple(key, highlighter); + return new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java index de5bc7c6c6d..4bcb2749db8 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java @@ -64,15 +64,14 @@ public class HighlighterParseElement implements SearchParseElement { @Override public void parse(XContentParser parser, SearchContext context) throws Exception { XContentParser.Token token; String topLevelFieldName = null; - List fields = newArrayList(); - + List fields = newArrayList(); + String[] globalPreTags = DEFAULT_PRE_TAGS; String[] globalPostTags = DEFAULT_POST_TAGS; boolean globalScoreOrdered = false; boolean globalHighlightFilter = true; int globalFragmentSize = 100; int globalNumOfFragments = 5; - boolean globalFragmentsAllowed = true; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { @@ -105,9 +104,7 @@ public class HighlighterParseElement implements SearchParseElement { } else if ("fragment_size".equals(topLevelFieldName) || "fragmentSize".equals(topLevelFieldName)) { globalFragmentSize = parser.intValue(); } else if ("number_of_fragments".equals(topLevelFieldName) || "numberOfFragments".equals(topLevelFieldName)) { - globalNumOfFragments = parser.intValue(); - } else if ("fragment_type".equals(topLevelFieldName) || "fragmentType".equals(topLevelFieldName)) { - globalFragmentsAllowed = !("content".equals(parser.text())); + globalNumOfFragments = parser.intValue(); } } else if (token == XContentParser.Token.START_OBJECT) { if ("fields".equals(topLevelFieldName)) { @@ -116,16 +113,8 @@ public class HighlighterParseElement implements SearchParseElement { if (token == XContentParser.Token.FIELD_NAME) { highlightFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_OBJECT) { + SearchContextHighlight.Field field = new SearchContextHighlight.Field(highlightFieldName); String fieldName = null; - - int fragmentSize = globalFragmentSize; - int numOfFragments = globalNumOfFragments; - boolean highlightFilter = globalHighlightFilter; - boolean scoreOrdered = globalScoreOrdered; - boolean fragmentsAllowed = globalFragmentsAllowed; - String[] preTags = globalPreTags; - String[] postTags = globalPostTags; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { fieldName = parser.currentName(); @@ -135,33 +124,27 @@ public class HighlighterParseElement implements SearchParseElement { while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { preTagsList.add(parser.text()); } - preTags = preTagsList.toArray(new String[preTagsList.size()]); + field.preTags(preTagsList.toArray(new String[preTagsList.size()])); } else if ("post_tags".equals(fieldName) || "postTags".equals(fieldName)) { List postTagsList = Lists.newArrayList(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { postTagsList.add(parser.text()); } - postTags = postTagsList.toArray(new String[postTagsList.size()]); + field.postTags(postTagsList.toArray(new String[postTagsList.size()])); } } else if (token.isValue()) { if ("fragment_size".equals(fieldName) || "fragmentSize".equals(fieldName)) { - fragmentSize = parser.intValue(); + field.fragmentCharSize(parser.intValue()); } else if ("number_of_fragments".equals(fieldName) || "numberOfFragments".equals(fieldName)) { - numOfFragments = parser.intValue(); + field.numberOfFragments(parser.intValue()); } else if ("highlight_filter".equals(fieldName) || "highlightFilter".equals(fieldName)) { - highlightFilter = parser.booleanValue(); + field.highlightFilter(parser.booleanValue()); } else if ("score".equals(fieldName)) { - scoreOrdered = "score".equals(parser.text());; - } else if ("fragment_type".equals(fieldName) || "fragmentType".equals(fieldName)) { - fragmentsAllowed = !("content".equals(parser.text())); + field.scoreOrdered("score".equals(parser.text())); } } } - fields.add(new SearchContextHighlight.ParsedHighlightField( - highlightFieldName, - new SearchContextHighlight.ParsedHighlightSettings( - fragmentSize, numOfFragments, preTags, postTags, - scoreOrdered, highlightFilter, fragmentsAllowed))); + fields.add(field); } } } @@ -170,11 +153,29 @@ public class HighlighterParseElement implements SearchParseElement { if (globalPreTags != null && globalPostTags == null) { throw new SearchParseException(context, "Highlighter global preTags are set, but global postTags are not set"); } - context.highlight(new SearchContextHighlight( - fields, - new SearchContextHighlight.ParsedHighlightSettings( - globalFragmentSize, globalNumOfFragments, globalPreTags, globalPostTags, - globalScoreOrdered, globalHighlightFilter, globalFragmentsAllowed)) - ); + + // now, go over and fill all fields with default values from the global state + for (SearchContextHighlight.Field field : fields) { + if (field.preTags() == null) { + field.preTags(globalPreTags); + } + if (field.postTags() == null) { + field.postTags(globalPostTags); + } + if (field.highlightFilter() == null) { + field.highlightFilter(globalHighlightFilter); + } + if (field.scoreOrdered() == null) { + field.scoreOrdered(globalScoreOrdered); + } + if (field.fragmentCharSize() == -1) { + field.fragmentCharSize(globalFragmentSize); + } + if (field.numberOfFragments() == -1) { + field.numberOfFragments(globalNumOfFragments); + } + } + + context.highlight(new SearchContextHighlight(fields)); } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java index fdd72e7adb2..6d8c95f2138 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/SearchContextHighlight.java @@ -26,96 +26,86 @@ import java.util.List; */ public class SearchContextHighlight { - private final ParsedHighlightSettings global; + private final List fields; - private final List fields; - - public SearchContextHighlight(List fields, ParsedHighlightSettings settings) { + public SearchContextHighlight(List fields) { this.fields = fields; - this.global = settings; } - public List fields() { + public List fields() { return fields; } - public ParsedHighlightSettings global() { - return global; - } - - public static class ParsedHighlightField { + public static class Field { private final String field; - private final ParsedHighlightSettings settings; + private int fragmentCharSize = -1; - public ParsedHighlightField(String field, ParsedHighlightSettings settings) { + private int numberOfFragments = -1; + + private String[] preTags; + + private String[] postTags; + + private Boolean scoreOrdered; + + private Boolean highlightFilter; + + public Field(String field) { this.field = field; - this.settings = settings; } public String field() { return field; } - public ParsedHighlightSettings settings() { - return settings; - } - } - - public static class ParsedHighlightSettings { - - private final int fragmentCharSize; - - private final int numberOfFragments; - - private final String[] preTags; - - private final String[] postTags; - - private boolean scoreOrdered = false; - - private boolean highlightFilter = true; - - private boolean fragmentsAllowed = true; - - public ParsedHighlightSettings(int fragmentCharSize, int numberOfFragments, String[] preTags, String[] postTags, - boolean scoreOrdered, boolean highlightFilter, boolean fragmentsAllowed) { - this.fragmentCharSize = fragmentCharSize; - this.numberOfFragments = numberOfFragments; - this.preTags = preTags; - this.postTags = postTags; - this.scoreOrdered = scoreOrdered; - this.highlightFilter = highlightFilter; - this.fragmentsAllowed = fragmentsAllowed; - } - public int fragmentCharSize() { return fragmentCharSize; } + public void fragmentCharSize(int fragmentCharSize) { + this.fragmentCharSize = fragmentCharSize; + } + public int numberOfFragments() { return numberOfFragments; } + public void numberOfFragments(int numberOfFragments) { + this.numberOfFragments = numberOfFragments; + } + public String[] preTags() { return preTags; } + public void preTags(String[] preTags) { + this.preTags = preTags; + } + public String[] postTags() { return postTags; } - public boolean scoreOrdered() { + public void postTags(String[] postTags) { + this.postTags = postTags; + } + + public Boolean scoreOrdered() { return scoreOrdered; } - public boolean highlightFilter() { + public void scoreOrdered(boolean scoreOrdered) { + this.scoreOrdered = scoreOrdered; + } + + public Boolean highlightFilter() { return highlightFilter; } - public boolean fragmentsAllowed() { - return fragmentsAllowed; + public void highlightFilter(boolean highlightFilter) { + this.highlightFilter = highlightFilter; } } }