Don't fail hard if broken analysis is used.
Today an analysis chain with broken tokenfilters or tokenizers like WordDelimiterFilter might produce somewhat broken term vectors that cause `StringIndexOutOfBoundsExceptions` if FastVectorHighlighter is used since the positions / offsets contract is violated and offsets of highlight tokens are not increasing but decreasing even if their positions are increasing. Yet, if we detect such a situation we can resort the tokens which might cause somewhat odd highlights but doesn't fail hard with a StringIndexOOBException. Closes #3006
This commit is contained in:
parent
14ae2fb765
commit
e1b66b34ea
|
@ -0,0 +1,102 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.elasticsearch.search.highlight.vectorhighlight;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
|
||||||
|
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||||
|
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
||||||
|
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
||||||
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple helper class for {@link FastVectorHighlighter} {@link FragmentsBuilder} implemenations.
|
||||||
|
*/
|
||||||
|
public final class FragmentBuilderHelper {
|
||||||
|
|
||||||
|
private FragmentBuilderHelper() {
|
||||||
|
// no instance
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fixes problems with broken analysis chains if positions and offsets are messed up that can lead to
|
||||||
|
* {@link StringIndexOutOfBoundsException} in the {@link FastVectorHighlighter}
|
||||||
|
*/
|
||||||
|
public static WeightedFragInfo fixWeightedFragInfo(FieldMapper<?> mapper, Field[] values, WeightedFragInfo fragInfo) {
|
||||||
|
assert fragInfo != null : "FragInfo must not be null";
|
||||||
|
assert mapper.names().indexName().equals(values[0].name()) : "Expected FieldMapper for field " + values[0].name();
|
||||||
|
if (!fragInfo.getSubInfos().isEmpty() && (containsBrokenAnalysis(mapper.indexAnalyzer()))) {
|
||||||
|
/* This is a special case where broken analysis like WDF is used for term-vector creation at index-time
|
||||||
|
* which can potentially mess up the offsets. To prevent a SAIIOBException we need to resort
|
||||||
|
* the fragments based on their offsets rather than using soley the positions as it is done in
|
||||||
|
* the FastVectorHighlighter. Yet, this is really a lucene problem and should be fixed in lucene rather
|
||||||
|
* than in this hack... aka. "we are are working on in!" */
|
||||||
|
final List<SubInfo> subInfos = fragInfo.getSubInfos();
|
||||||
|
Collections.sort(subInfos, new Comparator<SubInfo>() {
|
||||||
|
@Override
|
||||||
|
public int compare(SubInfo o1, SubInfo o2) {
|
||||||
|
int startOffset = o1.getTermsOffsets().get(0).getStartOffset();
|
||||||
|
int startOffset2 = o2.getTermsOffsets().get(0).getStartOffset();
|
||||||
|
return Integer.compare(startOffset, startOffset2);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return new WeightedFragInfo(Math.min(fragInfo.getSubInfos().get(0).getTermsOffsets().get(0).getStartOffset(),
|
||||||
|
fragInfo.getStartOffset()), fragInfo.getEndOffset(), subInfos, fragInfo.getTotalBoost());
|
||||||
|
} else {
|
||||||
|
return fragInfo;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean containsBrokenAnalysis(Analyzer analyzer) {
|
||||||
|
// TODO maybe we need a getter on Namedanalyzer that tells if this uses broken Analysis
|
||||||
|
if (analyzer instanceof NamedAnalyzer) {
|
||||||
|
analyzer = ((NamedAnalyzer) analyzer).analyzer();
|
||||||
|
}
|
||||||
|
if (analyzer instanceof CustomAnalyzer) {
|
||||||
|
final CustomAnalyzer a = (CustomAnalyzer) analyzer;
|
||||||
|
if (a.tokenizerFactory() instanceof EdgeNGramTokenizerFactory) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
TokenFilterFactory[] tokenFilters = a.tokenFilters();
|
||||||
|
for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
|
||||||
|
if (tokenFilterFactory instanceof WordDelimiterTokenFilterFactory
|
||||||
|
|| tokenFilterFactory instanceof EdgeNGramTokenFilterFactory) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,29 +19,38 @@
|
||||||
|
|
||||||
package org.elasticsearch.search.highlight.vectorhighlight;
|
package org.elasticsearch.search.highlight.vectorhighlight;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.ngram.NGramTokenizerFactory;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.highlight.Encoder;
|
||||||
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
|
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
|
||||||
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
|
import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
|
||||||
|
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||||
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
import org.elasticsearch.index.mapper.FieldMapper;
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
import org.elasticsearch.search.lookup.SearchLookup;
|
import org.elasticsearch.search.lookup.SearchLookup;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {
|
public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder {
|
||||||
|
|
||||||
private final FieldMapper mapper;
|
private final FieldMapper<?> mapper;
|
||||||
|
|
||||||
private final SearchContext searchContext;
|
private final SearchContext searchContext;
|
||||||
|
|
||||||
public SourceScoreOrderFragmentsBuilder(FieldMapper mapper, SearchContext searchContext,
|
public SourceScoreOrderFragmentsBuilder(FieldMapper<?> mapper, SearchContext searchContext,
|
||||||
String[] preTags, String[] postTags, BoundaryScanner boundaryScanner) {
|
String[] preTags, String[] postTags, BoundaryScanner boundaryScanner) {
|
||||||
super(preTags, postTags, boundaryScanner);
|
super(preTags, postTags, boundaryScanner);
|
||||||
this.mapper = mapper;
|
this.mapper = mapper;
|
||||||
|
@ -62,4 +71,9 @@ public class SourceScoreOrderFragmentsBuilder extends ScoreOrderFragmentsBuilder
|
||||||
}
|
}
|
||||||
return fields;
|
return fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
|
||||||
|
String[] preTags, String[] postTags, Encoder encoder ){
|
||||||
|
return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,13 +23,18 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.highlight.Encoder;
|
||||||
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
|
import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
|
||||||
import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder;
|
import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||||
|
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
|
||||||
import org.elasticsearch.index.mapper.FieldMapper;
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
import org.elasticsearch.search.lookup.SearchLookup;
|
import org.elasticsearch.search.lookup.SearchLookup;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -68,4 +73,9 @@ public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder {
|
||||||
return fields;
|
return fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
|
||||||
|
String[] preTags, String[] postTags, Encoder encoder ){
|
||||||
|
return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,8 @@ package org.elasticsearch.test.hamcrest;
|
||||||
import static org.hamcrest.MatcherAssert.assertThat;
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
import static org.hamcrest.Matchers.*;
|
import static org.hamcrest.Matchers.*;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.elasticsearch.action.search.SearchResponse;
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
import org.elasticsearch.search.SearchHit;
|
import org.elasticsearch.search.SearchHit;
|
||||||
import org.hamcrest.Matcher;
|
import org.hamcrest.Matcher;
|
||||||
|
@ -61,8 +63,8 @@ public class ElasticsearchAssertions {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void assertHighlight(SearchResponse resp, int hit, String field, int fragment, Matcher<String> matcher) {
|
public static void assertHighlight(SearchResponse resp, int hit, String field, int fragment, Matcher<String> matcher) {
|
||||||
assertThat(resp.getShardFailures().length, equalTo(0));
|
assertThat("Unexpectd ShardFailures: " + Arrays.toString(resp.getShardFailures()), resp.getShardFailures().length, equalTo(0));
|
||||||
assertThat(resp.getHits().hits().length, greaterThan(hit));
|
assertThat("not enough hits", resp.getHits().hits().length, greaterThan(hit));
|
||||||
assertThat(resp.getHits().hits()[hit].getHighlightFields().get(field), notNullValue());
|
assertThat(resp.getHits().hits()[hit].getHighlightFields().get(field), notNullValue());
|
||||||
assertThat(resp.getHits().hits()[hit].getHighlightFields().get(field).fragments().length, greaterThan(fragment));
|
assertThat(resp.getHits().hits()[hit].getHighlightFields().get(field).fragments().length, greaterThan(fragment));
|
||||||
assertThat(resp.getHits().hits()[hit].highlightFields().get(field).fragments()[fragment].string(), matcher);
|
assertThat(resp.getHits().hits()[hit].highlightFields().get(field).fragments()[fragment].string(), matcher);
|
||||||
|
|
|
@ -19,12 +19,31 @@
|
||||||
|
|
||||||
package org.elasticsearch.test.integration.search.highlight;
|
package org.elasticsearch.test.integration.search.highlight;
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
import static org.elasticsearch.action.search.SearchType.QUERY_THEN_FETCH;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import static org.elasticsearch.client.Requests.searchRequest;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import static org.elasticsearch.common.unit.TimeValue.timeValueMinutes;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
|
||||||
import org.apache.lucene.search.vectorhighlight.FieldQuery;
|
import static org.elasticsearch.index.query.QueryBuilders.boostingQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.commonTerms;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.fieldQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.prefixQuery;
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
|
||||||
|
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
|
||||||
|
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
|
||||||
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
import static org.testng.Assert.fail;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||||
import org.elasticsearch.action.search.SearchResponse;
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
@ -37,6 +56,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
import org.elasticsearch.index.query.FilterBuilders;
|
import org.elasticsearch.index.query.FilterBuilders;
|
||||||
import org.elasticsearch.index.query.MatchQueryBuilder;
|
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||||
|
import org.elasticsearch.index.query.MatchQueryBuilder.Operator;
|
||||||
import org.elasticsearch.index.query.MatchQueryBuilder.Type;
|
import org.elasticsearch.index.query.MatchQueryBuilder.Type;
|
||||||
import org.elasticsearch.index.query.QueryBuilders;
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
import org.elasticsearch.indices.IndexMissingException;
|
import org.elasticsearch.indices.IndexMissingException;
|
||||||
|
@ -45,31 +65,10 @@ import org.elasticsearch.search.SearchHit;
|
||||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||||
import org.elasticsearch.search.highlight.HighlightBuilder;
|
import org.elasticsearch.search.highlight.HighlightBuilder;
|
||||||
import org.elasticsearch.test.integration.AbstractNodesTests;
|
import org.elasticsearch.test.integration.AbstractNodesTests;
|
||||||
import org.hamcrest.Matcher;
|
|
||||||
import org.testng.annotations.AfterClass;
|
import org.testng.annotations.AfterClass;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import static org.elasticsearch.action.search.SearchType.QUERY_THEN_FETCH;
|
|
||||||
import static org.elasticsearch.client.Requests.searchRequest;
|
|
||||||
import static org.elasticsearch.common.unit.TimeValue.timeValueMinutes;
|
|
||||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.*;
|
|
||||||
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
|
|
||||||
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
|
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
|
|
||||||
import static org.hamcrest.MatcherAssert.assertThat;
|
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
|
||||||
import static org.hamcrest.Matchers.greaterThan;
|
|
||||||
import static org.hamcrest.Matchers.notNullValue;
|
|
||||||
|
|
||||||
|
|
||||||
import static org.hamcrest.Matchers.instanceOf;
|
|
||||||
import static org.testng.Assert.fail;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -94,6 +93,70 @@ public class HighlighterSearchTests extends AbstractNodesTests {
|
||||||
return client("server1");
|
return client("server1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNgramHighlightingWithBrokenPositions() throws ElasticSearchException, IOException {
|
||||||
|
try {
|
||||||
|
client.admin().indices().prepareDelete("test").execute().actionGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
|
||||||
|
client.admin().indices().prepareCreate("test")
|
||||||
|
.addMapping("test", jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.startObject("test")
|
||||||
|
.startObject("properties")
|
||||||
|
.startObject("name")
|
||||||
|
.startObject("fields")
|
||||||
|
.startObject("autocomplete")
|
||||||
|
.field("type", "string")
|
||||||
|
.field("index_analyzer", "autocomplete")
|
||||||
|
.field("search_analyzer", "search_autocomplete")
|
||||||
|
.field("term_vector", "with_positions_offsets")
|
||||||
|
.endObject()
|
||||||
|
.startObject("name")
|
||||||
|
.field("type", "string")
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.field("type", "multi_field")
|
||||||
|
.endObject()
|
||||||
|
.endObject()
|
||||||
|
.endObject())
|
||||||
|
.setSettings(ImmutableSettings.settingsBuilder()
|
||||||
|
.put("index.number_of_shards", 1)
|
||||||
|
.put("analysis.tokenizer.autocomplete.max_gram", 20)
|
||||||
|
.put("analysis.tokenizer.autocomplete.min_gram", 1)
|
||||||
|
.put("analysis.tokenizer.autocomplete.type", "nGram")
|
||||||
|
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
|
||||||
|
.putArray("analysis.filter.wordDelimiter.type_table",
|
||||||
|
"& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM",
|
||||||
|
"? => ALPHANUM", ". => ALPHANUM", "- => ALPHANUM", "# => ALPHANUM", "% => ALPHANUM",
|
||||||
|
"+ => ALPHANUM", ", => ALPHANUM", "~ => ALPHANUM", ": => ALPHANUM", "/ => ALPHANUM",
|
||||||
|
"^ => ALPHANUM", "$ => ALPHANUM", "@ => ALPHANUM", ") => ALPHANUM", "( => ALPHANUM",
|
||||||
|
"] => ALPHANUM", "[ => ALPHANUM", "} => ALPHANUM", "{ => ALPHANUM")
|
||||||
|
|
||||||
|
.put("analysis.filter.wordDelimiter.type.split_on_numerics", false)
|
||||||
|
.put("analysis.filter.wordDelimiter.generate_word_parts", true)
|
||||||
|
.put("analysis.filter.wordDelimiter.generate_number_parts", false)
|
||||||
|
.put("analysis.filter.wordDelimiter.catenate_words", true)
|
||||||
|
.put("analysis.filter.wordDelimiter.catenate_numbers", true)
|
||||||
|
.put("analysis.filter.wordDelimiter.catenate_all", false)
|
||||||
|
|
||||||
|
.put("analysis.analyzer.autocomplete.tokenizer", "autocomplete")
|
||||||
|
.putArray("analysis.analyzer.autocomplete.filter", "lowercase", "wordDelimiter")
|
||||||
|
.put("analysis.analyzer.search_autocomplete.tokenizer", "whitespace")
|
||||||
|
.putArray("analysis.analyzer.search_autocomplete.filter", "lowercase", "wordDelimiter"))
|
||||||
|
.execute().actionGet();
|
||||||
|
client.prepareIndex("test", "test", "1")
|
||||||
|
.setSource(XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field( "name", "ARCOTEL Hotels Deutschland")
|
||||||
|
.endObject())
|
||||||
|
.setRefresh(true).execute().actionGet();
|
||||||
|
SearchResponse search = client.prepareSearch("test").setTypes("test").setQuery(matchQuery("name.autocomplete", "deut tel").operator(Operator.OR)).addHighlightedField("name.autocomplete").execute().actionGet();
|
||||||
|
assertHighlight(search, 0, "name.autocomplete", 0, equalTo("ARCO<em>TEL</em> Ho<em>tel</em>s <em>Deut</em>schland"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testNgramHighlighting() throws ElasticSearchException, IOException {
|
public void testNgramHighlighting() throws ElasticSearchException, IOException {
|
||||||
|
|
Loading…
Reference in New Issue