Index phrases (#30450)

Specifying `index_phrases: true` on a text field mapping will add a subsidiary
[field]._index_phrase field, indexing two-term shingles from the parent field.
The parent analysis chain is re-used, wrapped with a FixedShingleFilter.

At query time, if a phrase match query is executed, the mapping will redirect it
to run against the subsidiary field.

This should trade faster phrase querying for a larger index and longer indexing
times.

Relates to #27049
This commit is contained in:
Alan Woodward 2018-06-04 08:50:35 +01:00 committed by GitHub
parent dc8a4fb460
commit 0427339ab0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 457 additions and 14 deletions

View File

@ -96,6 +96,14 @@ The following parameters are accepted by `text` fields:
the expense of a larger index. Accepts an
<<index-prefix-config,`index-prefix configuration block`>>
<<index-phrases,`index_phrases`>>::
If enabled, two-term word combinations ('shingles') are indexed into a separate
field. This allows exact phrase queries to run more efficiently, at the expense
of a larger index. Note that this works best when stopwords are not removed,
as phrases containing stopwords will not use the subsidiary field and will fall
back to a standard phrase query. Accepts `true` or `false` (default).
<<norms,`norms`>>::
Whether field-length should be taken into account when scoring queries.

View File

@ -0,0 +1,67 @@
---
"search with indexed phrases":
- skip:
version: " - 6.99.99"
reason: index_phrase is only available as of 7.0.0
- do:
indices.create:
index: test
body:
mappings:
test:
properties:
text:
type: text
index_phrases: true
- do:
index:
index: test
type: test
id: 1
body: { text: "peter piper picked a peck of pickled peppers" }
- do:
indices.refresh:
index: [test]
- do:
search:
index: test
body:
query:
match_phrase:
text:
query: "peter piper"
- match: {hits.total: 1}
- do:
search:
index: test
q: '"peter piper"~1'
df: text
- match: {hits.total: 1}
- do:
search:
index: test
body:
query:
match_phrase:
text: "peter piper picked"
- match: {hits.total: 1}
- do:
search:
index: test
body:
query:
match_phrase:
text: "piper"
- match: {hits.total: 1}

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.mapper;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
@ -43,6 +44,7 @@ import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.QueryShardException;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.search.DocValueFormat;
import org.joda.time.DateTimeZone;
@ -353,6 +355,14 @@ public abstract class MappedFieldType extends FieldType {
public abstract Query existsQuery(QueryShardContext context);
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]");
}
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]");
}
/**
* An enum used to describe the relation between the range of terms in a
* shard when compared with a query range

View File

@ -19,20 +19,29 @@
package org.elasticsearch.index.mapper;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.shingle.FixedShingleFilter;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
@ -43,7 +52,7 @@ import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
import org.elasticsearch.index.query.QueryShardContext;
import java.io.IOException;
import java.util.Collections;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@ -54,9 +63,13 @@ import static org.elasticsearch.index.mapper.TypeParsers.parseTextField;
/** A {@link FieldMapper} for full-text fields. */
public class TextFieldMapper extends FieldMapper {
private static final Logger logger = ESLoggerFactory.getLogger(TextFieldMapper.class);
public static final String CONTENT_TYPE = "text";
private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
public static final String FAST_PHRASE_SUFFIX = "._index_phrase";
public static class Defaults {
public static final double FIELDDATA_MIN_FREQUENCY = 0;
public static final double FIELDDATA_MAX_FREQUENCY = Integer.MAX_VALUE;
@ -105,6 +118,11 @@ public class TextFieldMapper extends FieldMapper {
return builder;
}
public Builder indexPhrases(boolean indexPhrases) {
fieldType().setIndexPhrases(indexPhrases);
return builder;
}
@Override
public Builder docValues(boolean docValues) {
if (docValues) {
@ -166,8 +184,16 @@ public class TextFieldMapper extends FieldMapper {
prefixFieldType.setAnalyzer(fieldType.indexAnalyzer());
prefixMapper = new PrefixFieldMapper(prefixFieldType, context.indexSettings());
}
if (fieldType().indexPhrases) {
if (fieldType().isSearchable() == false) {
throw new IllegalArgumentException("Cannot set index_phrases on unindexed field [" + name() + "]");
}
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
throw new IllegalArgumentException("Cannot set index_phrases on field [" + name() + "] if positions are not enabled");
}
}
return new TextFieldMapper(
name, fieldType, defaultFieldType, positionIncrementGap, prefixMapper,
name, fieldType(), defaultFieldType, positionIncrementGap, prefixMapper,
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
}
}
@ -211,12 +237,35 @@ public class TextFieldMapper extends FieldMapper {
builder.indexPrefixes(minChars, maxChars);
DocumentMapperParser.checkNoRemainingFields(propName, indexPrefix, parserContext.indexVersionCreated());
iterator.remove();
} else if (propName.equals("index_phrases")) {
builder.indexPhrases(XContentMapValues.nodeBooleanValue(propNode, "index_phrases"));
iterator.remove();
}
}
return builder;
}
}
private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
private final Analyzer delegate;
PhraseWrappedAnalyzer(Analyzer delegate) {
super(delegate.getReuseStrategy());
this.delegate = delegate;
}
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return delegate;
}
@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return new TokenStreamComponents(components.getTokenizer(), new FixedShingleFilter(components.getTokenStream(), 2));
}
}
private static class PrefixWrappedAnalyzer extends AnalyzerWrapper {
private final int minChars;
@ -242,6 +291,46 @@ public class TextFieldMapper extends FieldMapper {
}
}
private static final class PhraseFieldType extends StringFieldType {
final TextFieldType parent;
PhraseFieldType(TextFieldType parent) {
setTokenized(true);
setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
if (parent.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
}
if (parent.storeTermVectorOffsets()) {
setStoreTermVectors(true);
setStoreTermVectorPositions(true);
setStoreTermVectorOffsets(true);
}
setAnalyzer(parent.indexAnalyzer().name(), parent.indexAnalyzer().analyzer());
setName(parent.name() + FAST_PHRASE_SUFFIX);
this.parent = parent;
}
void setAnalyzer(String name, Analyzer delegate) {
setIndexAnalyzer(new NamedAnalyzer(name, AnalyzerScope.INDEX, new PhraseWrappedAnalyzer(delegate)));
}
@Override
public MappedFieldType clone() {
return new PhraseFieldType(parent);
}
@Override
public String typeName() {
return "phrase";
}
@Override
public Query existsQuery(QueryShardContext context) {
throw new UnsupportedOperationException();
}
}
static final class PrefixFieldType extends StringFieldType {
final int minChars;
@ -310,6 +399,23 @@ public class TextFieldMapper extends FieldMapper {
}
}
private static final class PhraseFieldMapper extends FieldMapper {
PhraseFieldMapper(PhraseFieldType fieldType, Settings indexSettings) {
super(fieldType.name(), fieldType, fieldType, indexSettings, MultiFields.empty(), CopyTo.empty());
}
@Override
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
throw new UnsupportedOperationException();
}
@Override
protected String contentType() {
return "phrase";
}
}
private static final class PrefixFieldMapper extends FieldMapper {
protected PrefixFieldMapper(PrefixFieldType fieldType, Settings indexSettings) {
@ -343,6 +449,7 @@ public class TextFieldMapper extends FieldMapper {
private double fielddataMaxFrequency;
private int fielddataMinSegmentSize;
private PrefixFieldType prefixFieldType;
private boolean indexPhrases = false;
public TextFieldType() {
setTokenized(true);
@ -358,6 +465,7 @@ public class TextFieldMapper extends FieldMapper {
this.fielddataMinFrequency = ref.fielddataMinFrequency;
this.fielddataMaxFrequency = ref.fielddataMaxFrequency;
this.fielddataMinSegmentSize = ref.fielddataMinSegmentSize;
this.indexPhrases = ref.indexPhrases;
if (ref.prefixFieldType != null) {
this.prefixFieldType = ref.prefixFieldType.clone();
}
@ -374,6 +482,7 @@ public class TextFieldMapper extends FieldMapper {
}
TextFieldType that = (TextFieldType) o;
return fielddata == that.fielddata
&& indexPhrases == that.indexPhrases
&& Objects.equals(prefixFieldType, that.prefixFieldType)
&& fielddataMinFrequency == that.fielddataMinFrequency
&& fielddataMaxFrequency == that.fielddataMaxFrequency
@ -382,7 +491,7 @@ public class TextFieldMapper extends FieldMapper {
@Override
public int hashCode() {
return Objects.hash(super.hashCode(), fielddata, prefixFieldType,
return Objects.hash(super.hashCode(), fielddata, indexPhrases, prefixFieldType,
fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize);
}
@ -427,6 +536,11 @@ public class TextFieldMapper extends FieldMapper {
this.prefixFieldType = prefixFieldType;
}
void setIndexPhrases(boolean indexPhrases) {
checkIfFrozen();
this.indexPhrases = indexPhrases;
}
public PrefixFieldType getPrefixFieldType() {
return this.prefixFieldType;
}
@ -458,6 +572,93 @@ public class TextFieldMapper extends FieldMapper {
}
}
@Override
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) {
stream = new FixedShingleFilter(stream, 2);
field = field + FAST_PHRASE_SUFFIX;
}
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
stream.reset();
while (stream.incrementToken()) {
if (enablePosIncrements) {
position += posIncrAtt.getPositionIncrement();
}
else {
position += 1;
}
builder.add(new Term(field, termAtt.getBytesRef()), position);
}
return builder.build();
}
@Override
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) {
stream = new FixedShingleFilter(stream, 2);
field = field + FAST_PHRASE_SUFFIX;
}
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
mpqb.setSlop(slop);
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
int position = -1;
List<Term> multiTerms = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
int positionIncrement = posIncrAtt.getPositionIncrement();
if (positionIncrement > 0 && multiTerms.size() > 0) {
if (enablePositionIncrements) {
mpqb.add(multiTerms.toArray(new Term[0]), position);
} else {
mpqb.add(multiTerms.toArray(new Term[0]));
}
multiTerms.clear();
}
position += positionIncrement;
multiTerms.add(new Term(field, termAtt.getBytesRef()));
}
if (enablePositionIncrements) {
mpqb.add(multiTerms.toArray(new Term[0]), position);
} else {
mpqb.add(multiTerms.toArray(new Term[0]));
}
return mpqb.build();
}
private static CachingTokenFilter cache(TokenStream in) {
if (in instanceof CachingTokenFilter) {
return (CachingTokenFilter) in;
}
return new CachingTokenFilter(in);
}
private static boolean hasGaps(CachingTokenFilter stream) throws IOException {
PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class);
stream.reset();
while (stream.incrementToken()) {
if (posIncAtt.getPositionIncrement() > 1) {
return true;
}
}
return false;
}
@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
if (fielddata == false) {
@ -472,6 +673,9 @@ public class TextFieldMapper extends FieldMapper {
public void checkCompatibility(MappedFieldType other, List<String> conflicts) {
super.checkCompatibility(other, conflicts);
TextFieldType tft = (TextFieldType) other;
if (tft.indexPhrases != this.indexPhrases) {
conflicts.add("mapper [" + name() + "] has different [index_phrases] values");
}
if (Objects.equals(this.prefixFieldType, tft.prefixFieldType) == false) {
if (this.prefixFieldType == null) {
conflicts.add("mapper [" + name()
@ -490,8 +694,9 @@ public class TextFieldMapper extends FieldMapper {
private int positionIncrementGap;
private PrefixFieldMapper prefixFieldMapper;
private PhraseFieldMapper phraseFieldMapper;
protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
protected TextFieldMapper(String simpleName, TextFieldType fieldType, MappedFieldType defaultFieldType,
int positionIncrementGap, PrefixFieldMapper prefixFieldMapper,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
@ -502,6 +707,7 @@ public class TextFieldMapper extends FieldMapper {
}
this.positionIncrementGap = positionIncrementGap;
this.prefixFieldMapper = prefixFieldMapper;
this.phraseFieldMapper = fieldType.indexPhrases ? new PhraseFieldMapper(new PhraseFieldType(fieldType), indexSettings) : null;
}
@Override
@ -535,15 +741,25 @@ public class TextFieldMapper extends FieldMapper {
if (prefixFieldMapper != null) {
prefixFieldMapper.addField(value, fields);
}
if (phraseFieldMapper != null) {
fields.add(new Field(phraseFieldMapper.fieldType.name(), value, phraseFieldMapper.fieldType));
}
}
}
@Override
public Iterator<Mapper> iterator() {
if (prefixFieldMapper == null) {
List<Mapper> subIterators = new ArrayList<>();
if (prefixFieldMapper != null) {
subIterators.add(prefixFieldMapper);
}
if (phraseFieldMapper != null) {
subIterators.add(phraseFieldMapper);
}
if (subIterators.size() == 0) {
return super.iterator();
}
return Iterators.concat(super.iterator(), Collections.singleton(prefixFieldMapper).iterator());
return Iterators.concat(super.iterator(), subIterators.iterator());
}
@Override
@ -562,6 +778,10 @@ public class TextFieldMapper extends FieldMapper {
throw new IllegalArgumentException("mapper [" + name() + "] has different index_prefix settings, current ["
+ this.prefixFieldMapper + "], merged [" + mw.prefixFieldMapper + "]");
}
else if (this.fieldType().indexPhrases != mw.fieldType().indexPhrases) {
throw new IllegalArgumentException("mapper [" + name() + "] has different index_phrases settings, current ["
+ this.fieldType().indexPhrases + "], merged [" + mw.fieldType().indexPhrases + "]");
}
}
@Override
@ -602,5 +822,8 @@ public class TextFieldMapper extends FieldMapper {
if (fieldType().prefixFieldType != null) {
fieldType().prefixFieldType.doXContent(builder);
}
if (fieldType().indexPhrases) {
builder.field("index_phrases", fieldType().indexPhrases);
}
}
}

View File

@ -28,6 +28,7 @@ import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.search.MatchQuery.ZeroTermsQuery;

View File

@ -352,16 +352,14 @@ public class MatchQuery {
@Override
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
if (hasPositions(mapper) == false) {
IllegalStateException exc =
new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery");
IllegalStateException e = checkForPositions(field);
if (e != null) {
if (lenient) {
return newLenientFieldQuery(field, exc);
} else {
throw exc;
return newLenientFieldQuery(field, e);
}
throw e;
}
Query query = super.analyzePhrase(field, stream, slop);
Query query = mapper.phraseQuery(field, stream, slop, enablePositionIncrements);
if (query instanceof PhraseQuery) {
// synonyms that expand to multiple terms can return a phrase query.
return blendPhraseQuery((PhraseQuery) query, mapper);
@ -369,6 +367,25 @@ public class MatchQuery {
return query;
}
@Override
protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
IllegalStateException e = checkForPositions(field);
if (e != null) {
if (lenient) {
return newLenientFieldQuery(field, e);
}
throw e;
}
return mapper.multiPhraseQuery(field, stream, slop, enablePositionIncrements);
}
private IllegalStateException checkForPositions(String field) {
if (hasPositions(mapper) == false) {
return new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery");
}
return null;
}
/**
* Checks if graph analysis should be enabled for the field depending
* on the provided {@link Analyzer}

View File

@ -19,6 +19,8 @@
package org.elasticsearch.index.mapper;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
@ -29,6 +31,8 @@ import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@ -38,6 +42,7 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.lucene.uid.Versions;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
@ -47,7 +52,9 @@ import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
@ -65,6 +72,7 @@ import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;
public class TextFieldMapperTests extends ESSingleNodeTestCase {
@ -73,7 +81,13 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
@Before
public void setup() {
indexService = createIndex("test");
Settings settings = Settings.builder()
.put("index.analysis.filter.mySynonyms.type", "synonym")
.putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.put("index.analysis.analyzer.synonym.filter", "mySynonyms")
.build();
indexService = createIndex("test", settings);
parser = indexService.mapperService().documentMapperParser();
}
@ -670,6 +684,102 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
}
}
public void testFastPhraseMapping() throws IOException {
QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> {
throw new UnsupportedOperationException();
}, null);
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties")
.startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("index_phrases", true)
.endObject()
.startObject("synfield")
.field("type", "text")
.field("analyzer", "synonym")
.field("index_phrases", true)
.endObject()
.endObject()
.endObject().endObject());
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext);
assertThat(q, is(new PhraseQuery("field._index_phrase", "two word")));
Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext);
assertThat(q2, is(new PhraseQuery("field._index_phrase", "three word", "word here")));
Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext);
assertThat(q3, is(new PhraseQuery(1, "field", "two", "word")));
Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext);
assertThat(q4, is(new TermQuery(new Term("field", "singleton"))));
Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext);
assertThat(q5,
is(new PhraseQuery.Builder().add(new Term("field", "sparkl")).add(new Term("field", "stopword"), 2).build()));
Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext);
assertThat(q6, is(new MultiPhraseQuery.Builder()
.add(new Term[]{
new Term("synfield._index_phrase", "motor car"),
new Term("synfield._index_phrase", "motor auto")})
.build()));
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
.bytes(XContentFactory.jsonBuilder()
.startObject()
.field("field", "Some English text that is going to be very useful")
.endObject()),
XContentType.JSON));
IndexableField[] fields = doc.rootDoc().getFields("field._index_phrase");
assertEquals(1, fields.length);
try (TokenStream ts = fields[0].tokenStream(queryShardContext.getMapperService().indexAnalyzer(), null)) {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
assertTrue(ts.incrementToken());
assertEquals("some english", termAtt.toString());
}
{
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("index", "false")
.field("index_phrases", true)
.endObject().endObject()
.endObject().endObject());
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> parser.parse("type", new CompressedXContent(badConfigMapping))
);
assertThat(e.getMessage(), containsString("Cannot set index_phrases on unindexed field [field]"));
}
{
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("index_options", "freqs")
.field("index_phrases", true)
.endObject().endObject()
.endObject().endObject());
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> parser.parse("type", new CompressedXContent(badConfigMapping))
);
assertThat(e.getMessage(), containsString("Cannot set index_phrases on field [field] if positions are not enabled"));
}
}
public void testIndexPrefixMapping() throws IOException {
QueryShardContext queryShardContext = indexService.newQueryShardContext(

View File

@ -68,6 +68,13 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
tft.setFielddataMinSegmentSize(1000);
}
});
addModifier(new Modifier("index_phrases", false) {
@Override
public void modify(MappedFieldType ft) {
TextFieldMapper.TextFieldType tft = (TextFieldMapper.TextFieldType) ft;
tft.setIndexPhrases(true);
}
});
addModifier(new Modifier("index_prefixes", false) {
@Override
public void modify(MappedFieldType ft) {