Added the `force_source` option to highlighting that enforces to use of the _source even if there are stored fields.

The percolator uses this option to deal with the fact that the MemoryIndex doesn't support stored fields,
this is possible b/c the _source of the document being percolated is always present.

Closes #4348
This commit is contained in:
Martijn van Groningen 2013-12-09 11:57:59 +01:00
parent 59e4e58683
commit 10e2528cce
12 changed files with 145 additions and 8 deletions

View File

@ -110,6 +110,24 @@ The following is an example that forces the use of the plain highlighter:
} }
-------------------------------------------------- --------------------------------------------------
==== Force highlighting on source
added[1.0.0.RC1]
Forces the highlighting to highlight fields based on the source even if fields are
stored separately. Defaults to `false`.
[source,js]
--------------------------------------------------
{
"query" : {...},
"highlight" : {
"fields" : {
"content" : {"force_source" : true}
}
}
}
--------------------------------------------------
[[tags]] [[tags]]
==== Highlighting Tags ==== Highlighting Tags

View File

@ -82,6 +82,7 @@ import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.InternalFacets; import org.elasticsearch.search.facet.InternalFacets;
import org.elasticsearch.search.highlight.HighlightField; import org.elasticsearch.search.highlight.HighlightField;
import org.elasticsearch.search.highlight.HighlightPhase; import org.elasticsearch.search.highlight.HighlightPhase;
import org.elasticsearch.search.highlight.SearchContextHighlight;
import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException; import java.io.IOException;
@ -309,6 +310,11 @@ public class PercolatorService extends AbstractComponent {
// We need to get the actual source from the request body for highlighting, so parse the request body again // We need to get the actual source from the request body for highlighting, so parse the request body again
// and only get the doc source. // and only get the doc source.
if (context.highlight() != null) { if (context.highlight() != null) {
// Enforce highlighting by source, because MemoryIndex doesn't support stored fields.
for (SearchContextHighlight.Field field : context.highlight().fields()) {
field.forceSource(true);
}
parser.close(); parser.close();
currentFieldName = null; currentFieldName = null;
parser = XContentFactory.xContent(source).createParser(source); parser = XContentFactory.xContent(source).createParser(source);

View File

@ -104,7 +104,7 @@ public class FastVectorHighlighter implements Highlighter {
if (field.numberOfFragments() == 0) { if (field.numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder(); fragListBuilder = new SingleFragListBuilder();
if (mapper.fieldType().stored()) { if (!field.forceSource() && mapper.fieldType().stored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.preTags(), field.postTags(), boundaryScanner); fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.preTags(), field.postTags(), boundaryScanner);
} else { } else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner); fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner);
@ -112,13 +112,13 @@ public class FastVectorHighlighter implements Highlighter {
} else { } else {
fragListBuilder = field.fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fragmentOffset()); fragListBuilder = field.fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fragmentOffset());
if (field.scoreOrdered()) { if (field.scoreOrdered()) {
if (mapper.fieldType().stored()) { if (!field.forceSource() && mapper.fieldType().stored()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner); fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner);
} else { } else {
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner); fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner);
} }
} else { } else {
if (mapper.fieldType().stored()) { if (!field.forceSource() && mapper.fieldType().stored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.preTags(), field.postTags(), boundaryScanner); fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.preTags(), field.postTags(), boundaryScanner);
} else { } else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner); fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner);

View File

@ -60,6 +60,8 @@ public class HighlightBuilder implements ToXContent {
private Map<String, Object> options; private Map<String, Object> options;
private Boolean forceSource;
/** /**
* Adds a field to be highlighted with default fragment size of 100 characters, and * Adds a field to be highlighted with default fragment size of 100 characters, and
* default number of fragments of 5 using the default encoder * default number of fragments of 5 using the default encoder
@ -233,6 +235,14 @@ public class HighlightBuilder implements ToXContent {
return this; return this;
} }
/**
* Forces the highlighting to highlight fields based on the source even if fields are stored separately.
*/
public HighlightBuilder forceSource(boolean forceSource) {
this.forceSource = forceSource;
return this;
}
@Override @Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject("highlight"); builder.startObject("highlight");
@ -269,6 +279,9 @@ public class HighlightBuilder implements ToXContent {
if (options != null && options.size() > 0) { if (options != null && options.size() > 0) {
builder.field("options", options); builder.field("options", options);
} }
if (forceSource != null) {
builder.field("force_source", forceSource);
}
if (fields != null) { if (fields != null) {
builder.startObject("fields"); builder.startObject("fields");
for (Field field : fields) { for (Field field : fields) {
@ -321,6 +334,9 @@ public class HighlightBuilder implements ToXContent {
if (field.options != null && field.options.size() > 0) { if (field.options != null && field.options.size() > 0) {
builder.field("options", field.options); builder.field("options", field.options);
} }
if (field.forceSource != null) {
builder.field("force_source", forceSource);
}
builder.endObject(); builder.endObject();
} }
@ -349,6 +365,7 @@ public class HighlightBuilder implements ToXContent {
Integer noMatchSize; Integer noMatchSize;
String[] matchedFields; String[] matchedFields;
Map<String, Object> options; Map<String, Object> options;
Boolean forceSource;
public Field(String name) { public Field(String name) {
this.name = name; this.name = name;
@ -479,5 +496,14 @@ public class HighlightBuilder implements ToXContent {
this.matchedFields = matchedFields; this.matchedFields = matchedFields;
return this; return this;
} }
/**
* Forces the highlighting to highlight this field based on the source even if this field is stored separately.
*/
public Field forceSource(boolean forceSource) {
this.forceSource = forceSource;
return this;
}
} }
} }

View File

@ -31,6 +31,7 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.internal.SourceFieldMapper;
import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.InternalSearchHit; import org.elasticsearch.search.internal.InternalSearchHit;
@ -85,6 +86,13 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
fieldNamesToHighlight = ImmutableSet.of(field.field()); fieldNamesToHighlight = ImmutableSet.of(field.field());
} }
if (field.forceSource()) {
SourceFieldMapper sourceFieldMapper = context.mapperService().documentMapper(hitContext.hit().type()).sourceMapper();
if (!sourceFieldMapper.enabled()) {
throw new ElasticSearchIllegalArgumentException("source is forced for field [" + field.field() + "] but type [" + hitContext.hit().type() + "] has disabled _source");
}
}
for (String fieldName : fieldNamesToHighlight) { for (String fieldName : fieldNamesToHighlight) {
FieldMapper<?> fieldMapper = getMapperForField(fieldName, context, hitContext); FieldMapper<?> fieldMapper = getMapperForField(fieldName, context, hitContext);
if (fieldMapper == null) { if (fieldMapper == null) {

View File

@ -41,9 +41,9 @@ public final class HighlightUtils {
} }
static List<Object> loadFieldValues(FieldMapper<?> mapper, SearchContext searchContext, FetchSubPhase.HitContext hitContext) throws IOException { static List<Object> loadFieldValues(FieldMapper<?> mapper, SearchContext searchContext, FetchSubPhase.HitContext hitContext, boolean forceSource) throws IOException {
List<Object> textsToHighlight; List<Object> textsToHighlight;
if (mapper.fieldType().stored()) { if (!forceSource && mapper.fieldType().stored()) {
CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(ImmutableSet.of(mapper.names().indexName()), false); CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(ImmutableSet.of(mapper.names().indexName()), false);
hitContext.reader().document(hitContext.docId(), fieldVisitor); hitContext.reader().document(hitContext.docId(), fieldVisitor);
textsToHighlight = fieldVisitor.fields().get(mapper.names().indexName()); textsToHighlight = fieldVisitor.fields().get(mapper.names().indexName());

View File

@ -75,6 +75,7 @@ public class HighlighterParseElement implements SearchParseElement {
boolean globalScoreOrdered = false; boolean globalScoreOrdered = false;
boolean globalHighlightFilter = false; boolean globalHighlightFilter = false;
boolean globalRequireFieldMatch = false; boolean globalRequireFieldMatch = false;
boolean globalForceSource = false;
int globalFragmentSize = 100; int globalFragmentSize = 100;
int globalNumOfFragments = 5; int globalNumOfFragments = 5;
String globalEncoder = "default"; String globalEncoder = "default";
@ -136,6 +137,8 @@ public class HighlighterParseElement implements SearchParseElement {
globalFragmenter = parser.text(); globalFragmenter = parser.text();
} else if ("no_match_size".equals(topLevelFieldName) || "noMatchSize".equals(topLevelFieldName)) { } else if ("no_match_size".equals(topLevelFieldName) || "noMatchSize".equals(topLevelFieldName)) {
globalNoMatchSize = parser.intValue(); globalNoMatchSize = parser.intValue();
} else if ("force_source".equals(topLevelFieldName) || "forceSource".equals(topLevelFieldName)) {
globalForceSource = parser.booleanValue();
} }
} else if (token == XContentParser.Token.START_OBJECT && "options".equals(topLevelFieldName)) { } else if (token == XContentParser.Token.START_OBJECT && "options".equals(topLevelFieldName)) {
globalOptions = parser.map(); globalOptions = parser.map();
@ -199,6 +202,8 @@ public class HighlighterParseElement implements SearchParseElement {
field.fragmenter(parser.text()); field.fragmenter(parser.text());
} else if ("no_match_size".equals(fieldName) || "noMatchSize".equals(fieldName)) { } else if ("no_match_size".equals(fieldName) || "noMatchSize".equals(fieldName)) {
field.noMatchSize(parser.intValue()); field.noMatchSize(parser.intValue());
} else if ("force_source".equals(fieldName) || "forceSource".equals(fieldName)) {
field.forceSource(parser.booleanValue());
} }
} else if (token == XContentParser.Token.START_OBJECT) { } else if (token == XContentParser.Token.START_OBJECT) {
if ("highlight_query".equals(fieldName) || "highlightQuery".equals(fieldName)) { if ("highlight_query".equals(fieldName) || "highlightQuery".equals(fieldName)) {
@ -267,6 +272,9 @@ public class HighlighterParseElement implements SearchParseElement {
if (field.noMatchSize() == -1) { if (field.noMatchSize() == -1) {
field.noMatchSize(globalNoMatchSize); field.noMatchSize(globalNoMatchSize);
} }
if (field.forceSource() == null) {
field.forceSource(globalForceSource);
}
} }
context.highlight(new SearchContextHighlight(fields)); context.highlight(new SearchContextHighlight(fields));

View File

@ -99,7 +99,7 @@ public class PlainHighlighter implements Highlighter {
List<Object> textsToHighlight; List<Object> textsToHighlight;
try { try {
textsToHighlight = HighlightUtils.loadFieldValues(mapper, context, hitContext); textsToHighlight = HighlightUtils.loadFieldValues(mapper, context, hitContext, field.forceSource());
for (Object textToHighlight : textsToHighlight) { for (Object textToHighlight : textsToHighlight) {
String text = textToHighlight.toString(); String text = textToHighlight.toString();

View File

@ -93,7 +93,7 @@ public class PostingsHighlighter implements Highlighter {
try { try {
//we manually load the field values (from source if needed) //we manually load the field values (from source if needed)
List<Object> textsToHighlight = HighlightUtils.loadFieldValues(fieldMapper, context, hitContext); List<Object> textsToHighlight = HighlightUtils.loadFieldValues(fieldMapper, context, hitContext, field.forceSource());
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(mapperHighlighterEntry.passageFormatter, textsToHighlight, mergeValues, Integer.MAX_VALUE-1, field.noMatchSize()); CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(mapperHighlighterEntry.passageFormatter, textsToHighlight, mergeValues, Integer.MAX_VALUE-1, field.noMatchSize());
if (field.numberOfFragments() == 0) { if (field.numberOfFragments() == 0) {

View File

@ -64,6 +64,8 @@ public class SearchContextHighlight {
private String highlighterType; private String highlighterType;
private Boolean forceSource;
private String fragmenter; private String fragmenter;
private int boundaryMaxScan = -1; private int boundaryMaxScan = -1;
@ -166,6 +168,14 @@ public class SearchContextHighlight {
this.highlighterType = type; this.highlighterType = type;
} }
public Boolean forceSource() {
return forceSource;
}
public void forceSource(boolean forceSource) {
this.forceSource = forceSource;
}
public String fragmenter() { public String fragmenter() {
return fragmenter; return fragmenter;
} }

View File

@ -1305,11 +1305,24 @@ public class PercolatorTests extends ElasticsearchIntegrationTest {
ensureGreen(); ensureGreen();
if (randomBoolean()) { if (randomBoolean()) {
// FVH HL
client.admin().indices().preparePutMapping("test").setType("type") client.admin().indices().preparePutMapping("test").setType("type")
.setSource( .setSource(
jsonBuilder().startObject().startObject("type") jsonBuilder().startObject().startObject("type")
.startObject("properties") .startObject("properties")
.startObject("field1").field("type", "string").field("term_vector", "with_positions_offsets").endObject() .startObject("field1").field("type", "string").field("store", randomBoolean())
.field("term_vector", "with_positions_offsets").endObject()
.endObject()
.endObject().endObject()
)
.execute().actionGet();
} if (randomBoolean()) {
// plain hl with stored fields
client.admin().indices().preparePutMapping("test").setType("type")
.setSource(
jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field1").field("type", "string").field("store", true).endObject()
.endObject() .endObject()
.endObject().endObject() .endObject().endObject()
) )

View File

@ -506,6 +506,54 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("this is another <xxx>test</xxx>")); assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("this is another <xxx>test</xxx>"));
} }
@Test
public void testPlainHighlighterForceSource() throws Exception {
prepareCreate("test")
.addMapping("type1", "field1", "type=string,store=yes,term_vector=with_positions_offsets,index_options=offsets")
.get();
ensureGreen();
client().prepareIndex("test", "type1")
.setSource("field1", "The quick brown fox jumps over the lazy dog").get();
refresh();
SearchResponse searchResponse = client().prepareSearch("test")
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("fvh").forceSource(true))
.get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
searchResponse = client().prepareSearch("test")
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("plain").forceSource(true))
.get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
searchResponse = client().prepareSearch("test")
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("postings").forceSource(true))
.get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
searchResponse = client().prepareSearch("test")
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("fvh").forceSource(false))
.get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
searchResponse = client().prepareSearch("test")
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("plain").forceSource(false))
.get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
searchResponse = client().prepareSearch("test")
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("postings").forceSource(false))
.get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
}
@Test @Test
public void testPlainHighlighter() throws Exception { public void testPlainHighlighter() throws Exception {
createIndex("test"); createIndex("test");