Highlighting: Allow to highlight on fields without term vectors, closes #585.

This commit is contained in:
kimchy 2010-12-31 18:57:26 +02:00
parent 7c959e7ec3
commit e6b4834768
9 changed files with 339 additions and 384 deletions

View File

@ -5,6 +5,8 @@
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-core/jars/lucene-core-3.0.3.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-queries/jars/lucene-queries-3.0.3.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-analyzers/jars/lucene-analyzers-3.0.3.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-highlighter/jars/lucene-highlighter-3.0.3.jar!/" />
<root url="jar://$GRADLE_REPOSITORY$/org.apache.lucene/lucene-memory/jars/lucene-memory-3.0.3.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES>
@ -12,6 +14,8 @@
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/fast-vector-highlighter/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/analyzers/common/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/highlighter/src/java" />
<root url="file://$USER_HOME$/opt/lucene/3.0.3.src/contrib/memory/src/java" />
</SOURCES>
</library>
</component>

View File

@ -41,6 +41,8 @@ dependencies {
compile 'org.apache.lucene:lucene-analyzers:3.0.3'
compile 'org.apache.lucene:lucene-queries:3.0.3'
compile 'org.apache.lucene:lucene-fast-vector-highlighter:3.0.3'
compile 'org.apache.lucene:lucene-memory:3.0.3'
compile 'org.apache.lucene:lucene-highlighter:3.0.3'
}
configurations {

View File

@ -19,11 +19,20 @@
package org.elasticsearch.search.highlight;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.vectorhighlight.*;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.lucene.document.SingleFieldSelector;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.Uid;
@ -35,9 +44,10 @@ import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragme
import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import java.io.IOException;
import java.util.Map;
import java.util.*;
import static org.elasticsearch.common.collect.Maps.*;
@ -46,6 +56,8 @@ import static org.elasticsearch.common.collect.Maps.*;
*/
public class HighlightPhase implements SearchHitPhase {
private static final Encoder DEFAULT_ENCODER = new DefaultEncoder();
@Override public Map<String, ? extends SearchParseElement> parseElements() {
return ImmutableMap.of("highlight", new HighlighterParseElement());
}
@ -65,19 +77,91 @@ public class HighlightPhase implements SearchHitPhase {
throw new SearchException(context.shardTarget(), "No mapping found for [" + field.field() + "]");
}
FastVectorHighlighter highlighter = buildHighlighter(context, mapper, field);
FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), reader, field);
// if we can do highlighting using Term Vectors, use FastVectorHighlighter, otherwise, use the
// slower plain highlighter
if (mapper.termVector() != Field.TermVector.WITH_POSITIONS_OFFSETS) {
if (!context.queryRewritten()) {
try {
context.updateRewriteQuery(context.searcher().rewrite(context.query()));
} catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
}
}
// Don't use the context.query() since it might be rewritten, and we need to pass the non rewritten queries to
// let the highlighter handle MultiTerm ones
QueryScorer queryScorer = new QueryScorer(context.parsedQuery().query(), null);
queryScorer.setExpandMultiTermQuery(true);
Fragmenter fragmenter;
if (field.numberOfFragments() == 0) {
fragmenter = new NullFragmenter();
} else {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
}
Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]);
Highlighter highlighter = new Highlighter(formatter, DEFAULT_ENCODER, queryScorer);
highlighter.setTextFragmenter(fragmenter);
String[] fragments;
try {
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments);
} catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
List<Object> textsToHighlight;
if (mapper.stored()) {
try {
Document doc = reader.document(docId, new SingleFieldSelector(mapper.names().indexName()));
textsToHighlight = new ArrayList<Object>(doc.getFields().size());
for (Fieldable docField : doc.getFields()) {
if (docField.stringValue() != null) {
textsToHighlight.add(docField.stringValue());
}
}
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
}
} else {
SearchLookup lookup = context.lookup();
lookup.setNextReader(reader);
lookup.setNextDocId(docId);
textsToHighlight = lookup.source().getValues(mapper.names().fullName());
}
ArrayList<TextFragment> fragsList = new ArrayList<TextFragment>();
try {
for (Object textToHighlight : textsToHighlight) {
String text = textToHighlight.toString();
Analyzer analyzer = context.mapperService().documentMapper(hit.type()).mappers().indexAnalyzer();
TokenStream tokenStream = analyzer.reusableTokenStream(mapper.names().indexName(), new FastStringReader(text));
TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, text, false, field.numberOfFragments());
Collections.addAll(fragsList, bestTextFragments);
}
} catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
}
if (field.scoreOrdered()) {
Collections.sort(fragsList, new Comparator<TextFragment>() {
public int compare(TextFragment o1, TextFragment o2) {
return Math.round(o2.getScore() - o1.getScore());
}
});
}
int numberOfFragments = fragsList.size() < field.numberOfFragments() ? fragsList.size() : field.numberOfFragments();
String[] fragments = new String[numberOfFragments];
for (int i = 0; i < fragments.length; i++) {
fragments[i] = fragsList.get(i).toString();
}
HighlightField highlightField = new HighlightField(field.field(), fragments);
highlightFields.put(highlightField.name(), highlightField);
} else {
FastVectorHighlighter highlighter = buildHighlighter(context, mapper, field);
FieldQuery fieldQuery = buildFieldQuery(highlighter, context.query(), reader, field);
String[] fragments;
try {
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments);
} catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
}
HighlightField highlightField = new HighlightField(field.field(), fragments);
highlightFields.put(highlightField.name(), highlightField);
}
HighlightField highlightField = new HighlightField(field.field(), fragments);
highlightFields.put(highlightField.name(), highlightField);
}
hit.highlightFields(highlightFields);

View File

@ -26,6 +26,7 @@ import org.elasticsearch.common.collect.ImmutableList;
import org.elasticsearch.common.collect.Lists;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.cache.field.data.FieldDataCache;
import org.elasticsearch.index.cache.filter.FilterCache;
import org.elasticsearch.index.cache.id.IdCache;
@ -249,6 +250,10 @@ public class SearchContext implements Releasable {
return indexService.mapperService();
}
public AnalysisService analysisService() {
return indexService.analysisService();
}
public IndexQueryParserService queryParserService() {
return indexService.queryParserService();
}

View File

@ -1,173 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.highlight;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.IOException;
import java.util.Arrays;
import static org.elasticsearch.action.search.SearchType.*;
import static org.elasticsearch.client.Requests.*;
import static org.elasticsearch.common.unit.TimeValue.*;
import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
import static org.elasticsearch.search.builder.SearchSourceBuilder.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
* @author kimchy (shay.banon)
*/
public class HighlightSearchTests extends AbstractNodesTests {
private Client client;
@BeforeClass public void createNodes() throws Exception {
startNode("server1");
startNode("server2");
client = getClient();
client.admin().indices().create(createIndexRequest("test")).actionGet();
logger.info("Update mapping (_all to store and have term vectors)");
client.admin().indices().putMapping(putMappingRequest("test").source(mapping())).actionGet();
for (int i = 0; i < 100; i++) {
index(client("server1"), Integer.toString(i), "test", i);
}
client.admin().indices().refresh(refreshRequest("test")).actionGet();
}
@AfterClass public void closeNodes() {
client.close();
closeAllNodes();
}
protected Client getClient() {
return client("server1");
}
@Test public void testSimpleHighlighting() throws Exception {
SearchResponse searchResponse = client.prepareSearch()
.setIndices("test")
.setSearchType(DFS_QUERY_THEN_FETCH)
.setQuery(termQuery("_all", "test"))
.setFrom(0).setSize(60)
.addHighlightedField("_all").setHighlighterOrder("score").setHighlighterPreTags("<xxx>").setHighlighterPostTags("</xxx>")
.setScroll(timeValueMinutes(10))
.execute().actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
assertThat(searchResponse.hits().hits().length, equalTo(60));
for (int i = 0; i < 60; i++) {
SearchHit hit = searchResponse.hits().hits()[i];
// System.out.println(hit.target() + ": " + hit.explanation());
assertThat("id[" + hit.id() + "]", hit.id(), equalTo(Integer.toString(100 - i - 1)));
// System.out.println(hit.shard() + ": " + hit.highlightFields());
assertThat(hit.highlightFields().size(), equalTo(1));
assertThat(hit.highlightFields().get("_all").fragments().length, greaterThan(0));
}
searchResponse = client.prepareSearchScroll(searchResponse.scrollId()).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
assertThat(searchResponse.hits().hits().length, equalTo(40));
for (int i = 0; i < 40; i++) {
SearchHit hit = searchResponse.hits().hits()[i];
assertThat("id[" + hit.id() + "]", hit.id(), equalTo(Integer.toString(100 - 60 - 1 - i)));
}
}
@Test public void testPrefixHighlightingOnSpecificField() throws Exception {
SearchSourceBuilder source = searchSource()
.query(prefixQuery("multi", "te"))
.from(0).size(60).explain(true)
.highlight(highlight().field("_all").order("score").preTags("<xxx>").postTags("</xxx>"));
SearchResponse searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
assertThat(searchResponse.hits().hits().length, equalTo(60));
for (int i = 0; i < 60; i++) {
SearchHit hit = searchResponse.hits().hits()[i];
// System.out.println(hit.target() + ": " + hit.explanation());
// assertThat("id[" + hit.id() + "]", hit.id(), equalTo(Integer.toString(100 - i - 1)));
// System.out.println(hit.shard() + ": " + hit.highlightFields());
assertThat(hit.highlightFields().size(), equalTo(1));
assertThat(hit.highlightFields().get("_all").fragments().length, greaterThan(0));
}
}
@Test public void testPrefixHighlightingOnAllField() throws Exception {
SearchSourceBuilder source = searchSource()
.query(prefixQuery("_all", "te"))
.from(0).size(60).explain(true)
.highlight(highlight().field("_all").order("score").preTags("<xxx>").postTags("</xxx>"));
SearchResponse searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
assertThat(searchResponse.hits().hits().length, equalTo(60));
for (int i = 0; i < 60; i++) {
SearchHit hit = searchResponse.hits().hits()[i];
// System.out.println(hit.target() + ": " + hit.explanation());
// assertThat("id[" + hit.id() + "]", hit.id(), equalTo(Integer.toString(100 - i - 1)));
// System.out.println(hit.shard() + ": " + hit.highlightFields());
assertThat(hit.highlightFields().size(), equalTo(1));
assertThat(hit.highlightFields().get("_all").fragments().length, greaterThan(0));
}
}
private void index(Client client, String id, String nameValue, int age) throws IOException {
client.index(Requests.indexRequest("test").type("type1").id(id).source(source(id, nameValue, age))).actionGet();
}
public XContentBuilder mapping() throws IOException {
return XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("_all").field("store", "yes").field("termVector", "with_positions_offsets").endObject()
.endObject().endObject();
}
private XContentBuilder source(String id, String nameValue, int age) throws IOException {
StringBuilder multi = new StringBuilder().append(nameValue);
for (int i = 0; i < age; i++) {
multi.append(" ").append(nameValue);
}
return jsonBuilder().startObject()
.field("id", id)
.field("name", nameValue + id)
.field("age", age)
.field("multi", multi.toString())
.field("_boost", age * 10)
.endObject();
}
}

View File

@ -1,9 +0,0 @@
cluster:
routing:
schedule: 100ms
index:
number_of_shards: 3
number_of_replicas: 0
routing :
# Use simple hashing since we want even distribution and our ids are simple incremented number based
hash.type : simple

View File

@ -0,0 +1,232 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.highlight;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.indices.IndexMissingException;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.IOException;
import java.util.Arrays;
import static org.elasticsearch.action.search.SearchType.*;
import static org.elasticsearch.client.Requests.*;
import static org.elasticsearch.common.unit.TimeValue.*;
import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
import static org.elasticsearch.search.builder.SearchSourceBuilder.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
* @author kimchy (shay.banon)
*/
public class HighlighterSearchTests extends AbstractNodesTests {
private Client client;
@BeforeClass public void createNodes() throws Exception {
startNode("server1");
startNode("server2");
client = getClient();
}
@AfterClass public void closeNodes() {
client.close();
closeAllNodes();
}
protected Client getClient() {
return client("server1");
}
@Test public void testPlainHighlighter() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (IndexMissingException e) {
// its ok
}
client.admin().indices().prepareCreate("test").execute().actionGet();
client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
client.prepareIndex("test", "type1")
.setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog")
.setRefresh(true).execute().actionGet();
logger.info("--> highlighting and searching on field1");
SearchSourceBuilder source = searchSource()
.query(termQuery("field1", "test"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field1").order("score").preTags("<xxx>").postTags("</xxx>"));
SearchResponse searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx>"));
logger.info("--> searching on _all, highlighting on field1");
source = searchSource()
.query(termQuery("_all", "test"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field1").order("score").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx>"));
logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(termQuery("_all", "quick"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0], equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(prefixQuery("_all", "qui"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0], equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
}
@Test public void testFastVectorHighlighter() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (IndexMissingException e) {
// its ok
}
client.admin().indices().prepareCreate("test").addMapping("type1", type1TermVectorMapping()).execute().actionGet();
client.admin().cluster().prepareHealth("test").setWaitForGreenStatus().execute().actionGet();
client.prepareIndex("test", "type1")
.setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog")
.setRefresh(true).execute().actionGet();
logger.info("--> highlighting and searching on field1");
SearchSourceBuilder source = searchSource()
.query(termQuery("field1", "test"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field1", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
SearchResponse searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx>"));
logger.info("--> searching on _all, highlighting on field1");
source = searchSource()
.query(termQuery("_all", "test"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field1", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field1").fragments()[0], equalTo("this is a <xxx>test</xxx>"));
logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(termQuery("_all", "quick"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field2", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0], equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(prefixQuery("_all", "qui"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field2", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client.search(searchRequest("test").source(source).searchType(QUERY_THEN_FETCH).scroll(timeValueMinutes(10))).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
assertThat(searchResponse.hits().getAt(0).highlightFields().get("field2").fragments()[0], equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
}
public XContentBuilder type1TermVectorMapping() throws IOException {
return XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("_all").field("store", "yes").field("termVector", "with_positions_offsets").endObject()
.startObject("properties")
.startObject("field1").field("type", "string").field("termVector", "with_positions_offsets").endObject()
.startObject("field2").field("type", "string").field("termVector", "with_positions_offsets").endObject()
.endObject()
.endObject().endObject();
}
@Test public void testSameContent() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("title").field("type", "string").field("store", "yes").field("term_vector", "with_positions_offsets").endObject()
.endObject().endObject().endObject())
.execute().actionGet();
for (int i = 0; i < 5; i++) {
client.prepareIndex("test", "type1", Integer.toString(i))
.setSource("title", "This is a test on the highlighting bug present in elasticsearch").setRefresh(true).execute().actionGet();
}
SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "bug"))
.addHighlightedField("title", -1, 0)
.execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(5l));
for (SearchHit hit : search.hits()) {
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
}
}
}

View File

@ -1,86 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.highlight;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
* @author kimchy (shay.banon)
*/
public class SameContentHighlightingTests extends AbstractNodesTests {
private Client client;
@BeforeClass public void createNodes() throws Exception {
startNode("node1");
client = getClient();
}
@AfterClass public void closeNodes() {
client.close();
closeAllNodes();
}
protected Client getClient() {
return client("node1");
}
@Test public void testSameContent() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("title").field("type", "string").field("store", "yes").field("term_vector", "with_positions_offsets").endObject()
.endObject().endObject().endObject())
.execute().actionGet();
for (int i = 0; i < 5; i++) {
client.prepareIndex("test", "type1", Integer.toString(i))
.setSource("title", "This is a test on the highlighting bug present in elasticsearch").setRefresh(true).execute().actionGet();
}
SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "bug"))
.addHighlightedField("title", -1, 0)
.execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(5l));
for (SearchHit hit : search.hits()) {
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
}
}
}

View File

@ -1,104 +0,0 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.highlight;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.test.integration.AbstractNodesTests;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import static org.elasticsearch.common.xcontent.XContentFactory.*;
import static org.elasticsearch.index.query.xcontent.QueryBuilders.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
* @author kimchy (shay.banon)
*/
public class SourceFieldHighlightingTests extends AbstractNodesTests {
private Client client;
@BeforeClass public void createNodes() throws Exception {
startNode("node1");
client = getClient();
}
@AfterClass public void closeNodes() {
client.close();
closeAllNodes();
}
protected Client getClient() {
return client("node1");
}
@Test public void testSourceLookupHighlighting() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
// we don't store title, now lets see if it works...
.startObject("title").field("type", "string").field("store", "no").field("term_vector", "with_positions_offsets").endObject()
.startObject("attachments").startObject("properties").startObject("body").field("type", "string").field("term_vector", "with_positions_offsets").endObject().endObject().endObject()
.endObject().endObject().endObject())
.execute().actionGet();
for (int i = 0; i < 5; i++) {
client.prepareIndex("test", "type1", Integer.toString(i))
.setSource(XContentFactory.jsonBuilder().startObject()
.field("title", "This is a test on the highlighting bug present in elasticsearch")
.startArray("attachments").startObject().field("body", "attachment 1").endObject().startObject().field("body", "attachment 2").endObject().endArray()
.endObject())
.setRefresh(true).execute().actionGet();
}
SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "bug"))
.addHighlightedField("title", -1, 0)
.execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(5l));
for (SearchHit hit : search.hits()) {
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
}
search = client.prepareSearch()
.setQuery(fieldQuery("attachments.body", "attachment"))
.addHighlightedField("attachments.body", -1, 0)
.execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(5l));
for (SearchHit hit : search.hits()) {
assertThat(hit.highlightFields().get("attachments.body").fragments()[0], equalTo("<em>attachment</em> 1 <em>attachment</em> 2"));
}
}
}