Index phrases (#30450)
Specifying `index_phrases: true` on a text field mapping will add a subsidiary [field]._index_phrase field, indexing two-term shingles from the parent field. The parent analysis chain is re-used, wrapped with a FixedShingleFilter. At query time, if a phrase match query is executed, the mapping will redirect it to run against the subsidiary field. This should trade faster phrase querying for a larger index and longer indexing times. Relates to #27049
This commit is contained in:
parent
dc8a4fb460
commit
0427339ab0
|
@ -96,6 +96,14 @@ The following parameters are accepted by `text` fields:
|
|||
the expense of a larger index. Accepts an
|
||||
<<index-prefix-config,`index-prefix configuration block`>>
|
||||
|
||||
<<index-phrases,`index_phrases`>>::
|
||||
|
||||
If enabled, two-term word combinations ('shingles') are indexed into a separate
|
||||
field. This allows exact phrase queries to run more efficiently, at the expense
|
||||
of a larger index. Note that this works best when stopwords are not removed,
|
||||
as phrases containing stopwords will not use the subsidiary field and will fall
|
||||
back to a standard phrase query. Accepts `true` or `false` (default).
|
||||
|
||||
<<norms,`norms`>>::
|
||||
|
||||
Whether field-length should be taken into account when scoring queries.
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
---
|
||||
"search with indexed phrases":
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: index_phrase is only available as of 7.0.0
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
test:
|
||||
properties:
|
||||
text:
|
||||
type: text
|
||||
index_phrases: true
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
body: { text: "peter piper picked a peck of pickled peppers" }
|
||||
|
||||
- do:
|
||||
indices.refresh:
|
||||
index: [test]
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
text:
|
||||
query: "peter piper"
|
||||
|
||||
- match: {hits.total: 1}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
q: '"peter piper"~1'
|
||||
df: text
|
||||
|
||||
- match: {hits.total: 1}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
text: "peter piper picked"
|
||||
|
||||
- match: {hits.total: 1}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
body:
|
||||
query:
|
||||
match_phrase:
|
||||
text: "piper"
|
||||
|
||||
- match: {hits.total: 1}
|
||||
|
||||
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -43,6 +44,7 @@ import org.elasticsearch.index.fielddata.IndexFieldData;
|
|||
import org.elasticsearch.index.query.QueryRewriteContext;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.index.query.QueryShardException;
|
||||
import org.elasticsearch.index.search.MatchQuery;
|
||||
import org.elasticsearch.index.similarity.SimilarityProvider;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.joda.time.DateTimeZone;
|
||||
|
@ -353,6 +355,14 @@ public abstract class MappedFieldType extends FieldType {
|
|||
|
||||
public abstract Query existsQuery(QueryShardContext context);
|
||||
|
||||
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
|
||||
throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]");
|
||||
}
|
||||
|
||||
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
|
||||
throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]");
|
||||
}
|
||||
|
||||
/**
|
||||
* An enum used to describe the relation between the range of terms in a
|
||||
* shard when compared with a query range
|
||||
|
|
|
@ -19,20 +19,29 @@
|
|||
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.AnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.shingle.FixedShingleFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.NormsFieldExistsQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.logging.ESLoggerFactory;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
||||
|
@ -43,7 +52,7 @@ import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
|
|||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -54,9 +63,13 @@ import static org.elasticsearch.index.mapper.TypeParsers.parseTextField;
|
|||
/** A {@link FieldMapper} for full-text fields. */
|
||||
public class TextFieldMapper extends FieldMapper {
|
||||
|
||||
private static final Logger logger = ESLoggerFactory.getLogger(TextFieldMapper.class);
|
||||
|
||||
public static final String CONTENT_TYPE = "text";
|
||||
private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
|
||||
|
||||
public static final String FAST_PHRASE_SUFFIX = "._index_phrase";
|
||||
|
||||
public static class Defaults {
|
||||
public static final double FIELDDATA_MIN_FREQUENCY = 0;
|
||||
public static final double FIELDDATA_MAX_FREQUENCY = Integer.MAX_VALUE;
|
||||
|
@ -105,6 +118,11 @@ public class TextFieldMapper extends FieldMapper {
|
|||
return builder;
|
||||
}
|
||||
|
||||
public Builder indexPhrases(boolean indexPhrases) {
|
||||
fieldType().setIndexPhrases(indexPhrases);
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Builder docValues(boolean docValues) {
|
||||
if (docValues) {
|
||||
|
@ -166,8 +184,16 @@ public class TextFieldMapper extends FieldMapper {
|
|||
prefixFieldType.setAnalyzer(fieldType.indexAnalyzer());
|
||||
prefixMapper = new PrefixFieldMapper(prefixFieldType, context.indexSettings());
|
||||
}
|
||||
if (fieldType().indexPhrases) {
|
||||
if (fieldType().isSearchable() == false) {
|
||||
throw new IllegalArgumentException("Cannot set index_phrases on unindexed field [" + name() + "]");
|
||||
}
|
||||
if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
throw new IllegalArgumentException("Cannot set index_phrases on field [" + name() + "] if positions are not enabled");
|
||||
}
|
||||
}
|
||||
return new TextFieldMapper(
|
||||
name, fieldType, defaultFieldType, positionIncrementGap, prefixMapper,
|
||||
name, fieldType(), defaultFieldType, positionIncrementGap, prefixMapper,
|
||||
context.indexSettings(), multiFieldsBuilder.build(this, context), copyTo);
|
||||
}
|
||||
}
|
||||
|
@ -211,12 +237,35 @@ public class TextFieldMapper extends FieldMapper {
|
|||
builder.indexPrefixes(minChars, maxChars);
|
||||
DocumentMapperParser.checkNoRemainingFields(propName, indexPrefix, parserContext.indexVersionCreated());
|
||||
iterator.remove();
|
||||
} else if (propName.equals("index_phrases")) {
|
||||
builder.indexPhrases(XContentMapValues.nodeBooleanValue(propNode, "index_phrases"));
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
|
||||
|
||||
private final Analyzer delegate;
|
||||
|
||||
PhraseWrappedAnalyzer(Analyzer delegate) {
|
||||
super(delegate.getReuseStrategy());
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return delegate;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return new TokenStreamComponents(components.getTokenizer(), new FixedShingleFilter(components.getTokenStream(), 2));
|
||||
}
|
||||
}
|
||||
|
||||
private static class PrefixWrappedAnalyzer extends AnalyzerWrapper {
|
||||
|
||||
private final int minChars;
|
||||
|
@ -242,6 +291,46 @@ public class TextFieldMapper extends FieldMapper {
|
|||
}
|
||||
}
|
||||
|
||||
private static final class PhraseFieldType extends StringFieldType {
|
||||
|
||||
final TextFieldType parent;
|
||||
|
||||
PhraseFieldType(TextFieldType parent) {
|
||||
setTokenized(true);
|
||||
setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
if (parent.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
|
||||
setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
}
|
||||
if (parent.storeTermVectorOffsets()) {
|
||||
setStoreTermVectors(true);
|
||||
setStoreTermVectorPositions(true);
|
||||
setStoreTermVectorOffsets(true);
|
||||
}
|
||||
setAnalyzer(parent.indexAnalyzer().name(), parent.indexAnalyzer().analyzer());
|
||||
setName(parent.name() + FAST_PHRASE_SUFFIX);
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
void setAnalyzer(String name, Analyzer delegate) {
|
||||
setIndexAnalyzer(new NamedAnalyzer(name, AnalyzerScope.INDEX, new PhraseWrappedAnalyzer(delegate)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public MappedFieldType clone() {
|
||||
return new PhraseFieldType(parent);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String typeName() {
|
||||
return "phrase";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query existsQuery(QueryShardContext context) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
static final class PrefixFieldType extends StringFieldType {
|
||||
|
||||
final int minChars;
|
||||
|
@ -310,6 +399,23 @@ public class TextFieldMapper extends FieldMapper {
|
|||
}
|
||||
}
|
||||
|
||||
private static final class PhraseFieldMapper extends FieldMapper {
|
||||
|
||||
PhraseFieldMapper(PhraseFieldType fieldType, Settings indexSettings) {
|
||||
super(fieldType.name(), fieldType, fieldType, indexSettings, MultiFields.empty(), CopyTo.empty());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String contentType() {
|
||||
return "phrase";
|
||||
}
|
||||
}
|
||||
|
||||
private static final class PrefixFieldMapper extends FieldMapper {
|
||||
|
||||
protected PrefixFieldMapper(PrefixFieldType fieldType, Settings indexSettings) {
|
||||
|
@ -343,6 +449,7 @@ public class TextFieldMapper extends FieldMapper {
|
|||
private double fielddataMaxFrequency;
|
||||
private int fielddataMinSegmentSize;
|
||||
private PrefixFieldType prefixFieldType;
|
||||
private boolean indexPhrases = false;
|
||||
|
||||
public TextFieldType() {
|
||||
setTokenized(true);
|
||||
|
@ -358,6 +465,7 @@ public class TextFieldMapper extends FieldMapper {
|
|||
this.fielddataMinFrequency = ref.fielddataMinFrequency;
|
||||
this.fielddataMaxFrequency = ref.fielddataMaxFrequency;
|
||||
this.fielddataMinSegmentSize = ref.fielddataMinSegmentSize;
|
||||
this.indexPhrases = ref.indexPhrases;
|
||||
if (ref.prefixFieldType != null) {
|
||||
this.prefixFieldType = ref.prefixFieldType.clone();
|
||||
}
|
||||
|
@ -374,6 +482,7 @@ public class TextFieldMapper extends FieldMapper {
|
|||
}
|
||||
TextFieldType that = (TextFieldType) o;
|
||||
return fielddata == that.fielddata
|
||||
&& indexPhrases == that.indexPhrases
|
||||
&& Objects.equals(prefixFieldType, that.prefixFieldType)
|
||||
&& fielddataMinFrequency == that.fielddataMinFrequency
|
||||
&& fielddataMaxFrequency == that.fielddataMaxFrequency
|
||||
|
@ -382,7 +491,7 @@ public class TextFieldMapper extends FieldMapper {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(super.hashCode(), fielddata, prefixFieldType,
|
||||
return Objects.hash(super.hashCode(), fielddata, indexPhrases, prefixFieldType,
|
||||
fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize);
|
||||
}
|
||||
|
||||
|
@ -427,6 +536,11 @@ public class TextFieldMapper extends FieldMapper {
|
|||
this.prefixFieldType = prefixFieldType;
|
||||
}
|
||||
|
||||
void setIndexPhrases(boolean indexPhrases) {
|
||||
checkIfFrozen();
|
||||
this.indexPhrases = indexPhrases;
|
||||
}
|
||||
|
||||
public PrefixFieldType getPrefixFieldType() {
|
||||
return this.prefixFieldType;
|
||||
}
|
||||
|
@ -458,6 +572,93 @@ public class TextFieldMapper extends FieldMapper {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {
|
||||
|
||||
if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) {
|
||||
stream = new FixedShingleFilter(stream, 2);
|
||||
field = field + FAST_PHRASE_SUFFIX;
|
||||
}
|
||||
PhraseQuery.Builder builder = new PhraseQuery.Builder();
|
||||
builder.setSlop(slop);
|
||||
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
int position = -1;
|
||||
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
if (enablePosIncrements) {
|
||||
position += posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
else {
|
||||
position += 1;
|
||||
}
|
||||
builder.add(new Term(field, termAtt.getBytesRef()), position);
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
|
||||
|
||||
if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) {
|
||||
stream = new FixedShingleFilter(stream, 2);
|
||||
field = field + FAST_PHRASE_SUFFIX;
|
||||
}
|
||||
|
||||
MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder();
|
||||
mpqb.setSlop(slop);
|
||||
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
|
||||
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
int position = -1;
|
||||
|
||||
List<Term> multiTerms = new ArrayList<>();
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
int positionIncrement = posIncrAtt.getPositionIncrement();
|
||||
|
||||
if (positionIncrement > 0 && multiTerms.size() > 0) {
|
||||
if (enablePositionIncrements) {
|
||||
mpqb.add(multiTerms.toArray(new Term[0]), position);
|
||||
} else {
|
||||
mpqb.add(multiTerms.toArray(new Term[0]));
|
||||
}
|
||||
multiTerms.clear();
|
||||
}
|
||||
position += positionIncrement;
|
||||
multiTerms.add(new Term(field, termAtt.getBytesRef()));
|
||||
}
|
||||
|
||||
if (enablePositionIncrements) {
|
||||
mpqb.add(multiTerms.toArray(new Term[0]), position);
|
||||
} else {
|
||||
mpqb.add(multiTerms.toArray(new Term[0]));
|
||||
}
|
||||
return mpqb.build();
|
||||
}
|
||||
|
||||
private static CachingTokenFilter cache(TokenStream in) {
|
||||
if (in instanceof CachingTokenFilter) {
|
||||
return (CachingTokenFilter) in;
|
||||
}
|
||||
return new CachingTokenFilter(in);
|
||||
}
|
||||
|
||||
private static boolean hasGaps(CachingTokenFilter stream) throws IOException {
|
||||
PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
if (posIncAtt.getPositionIncrement() > 1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
|
||||
if (fielddata == false) {
|
||||
|
@ -472,6 +673,9 @@ public class TextFieldMapper extends FieldMapper {
|
|||
public void checkCompatibility(MappedFieldType other, List<String> conflicts) {
|
||||
super.checkCompatibility(other, conflicts);
|
||||
TextFieldType tft = (TextFieldType) other;
|
||||
if (tft.indexPhrases != this.indexPhrases) {
|
||||
conflicts.add("mapper [" + name() + "] has different [index_phrases] values");
|
||||
}
|
||||
if (Objects.equals(this.prefixFieldType, tft.prefixFieldType) == false) {
|
||||
if (this.prefixFieldType == null) {
|
||||
conflicts.add("mapper [" + name()
|
||||
|
@ -490,8 +694,9 @@ public class TextFieldMapper extends FieldMapper {
|
|||
|
||||
private int positionIncrementGap;
|
||||
private PrefixFieldMapper prefixFieldMapper;
|
||||
private PhraseFieldMapper phraseFieldMapper;
|
||||
|
||||
protected TextFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||
protected TextFieldMapper(String simpleName, TextFieldType fieldType, MappedFieldType defaultFieldType,
|
||||
int positionIncrementGap, PrefixFieldMapper prefixFieldMapper,
|
||||
Settings indexSettings, MultiFields multiFields, CopyTo copyTo) {
|
||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||
|
@ -502,6 +707,7 @@ public class TextFieldMapper extends FieldMapper {
|
|||
}
|
||||
this.positionIncrementGap = positionIncrementGap;
|
||||
this.prefixFieldMapper = prefixFieldMapper;
|
||||
this.phraseFieldMapper = fieldType.indexPhrases ? new PhraseFieldMapper(new PhraseFieldType(fieldType), indexSettings) : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -535,15 +741,25 @@ public class TextFieldMapper extends FieldMapper {
|
|||
if (prefixFieldMapper != null) {
|
||||
prefixFieldMapper.addField(value, fields);
|
||||
}
|
||||
if (phraseFieldMapper != null) {
|
||||
fields.add(new Field(phraseFieldMapper.fieldType.name(), value, phraseFieldMapper.fieldType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<Mapper> iterator() {
|
||||
if (prefixFieldMapper == null) {
|
||||
List<Mapper> subIterators = new ArrayList<>();
|
||||
if (prefixFieldMapper != null) {
|
||||
subIterators.add(prefixFieldMapper);
|
||||
}
|
||||
if (phraseFieldMapper != null) {
|
||||
subIterators.add(phraseFieldMapper);
|
||||
}
|
||||
if (subIterators.size() == 0) {
|
||||
return super.iterator();
|
||||
}
|
||||
return Iterators.concat(super.iterator(), Collections.singleton(prefixFieldMapper).iterator());
|
||||
return Iterators.concat(super.iterator(), subIterators.iterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -562,6 +778,10 @@ public class TextFieldMapper extends FieldMapper {
|
|||
throw new IllegalArgumentException("mapper [" + name() + "] has different index_prefix settings, current ["
|
||||
+ this.prefixFieldMapper + "], merged [" + mw.prefixFieldMapper + "]");
|
||||
}
|
||||
else if (this.fieldType().indexPhrases != mw.fieldType().indexPhrases) {
|
||||
throw new IllegalArgumentException("mapper [" + name() + "] has different index_phrases settings, current ["
|
||||
+ this.fieldType().indexPhrases + "], merged [" + mw.fieldType().indexPhrases + "]");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -602,5 +822,8 @@ public class TextFieldMapper extends FieldMapper {
|
|||
if (fieldType().prefixFieldType != null) {
|
||||
fieldType().prefixFieldType.doXContent(builder);
|
||||
}
|
||||
if (fieldType().indexPhrases) {
|
||||
builder.field("index_phrases", fieldType().indexPhrases);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.elasticsearch.common.io.stream.StreamInput;
|
|||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.search.MatchQuery;
|
||||
import org.elasticsearch.index.search.MatchQuery.ZeroTermsQuery;
|
||||
|
||||
|
|
|
@ -352,16 +352,14 @@ public class MatchQuery {
|
|||
|
||||
@Override
|
||||
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
|
||||
if (hasPositions(mapper) == false) {
|
||||
IllegalStateException exc =
|
||||
new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery");
|
||||
IllegalStateException e = checkForPositions(field);
|
||||
if (e != null) {
|
||||
if (lenient) {
|
||||
return newLenientFieldQuery(field, exc);
|
||||
} else {
|
||||
throw exc;
|
||||
return newLenientFieldQuery(field, e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
Query query = super.analyzePhrase(field, stream, slop);
|
||||
Query query = mapper.phraseQuery(field, stream, slop, enablePositionIncrements);
|
||||
if (query instanceof PhraseQuery) {
|
||||
// synonyms that expand to multiple terms can return a phrase query.
|
||||
return blendPhraseQuery((PhraseQuery) query, mapper);
|
||||
|
@ -369,6 +367,25 @@ public class MatchQuery {
|
|||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
|
||||
IllegalStateException e = checkForPositions(field);
|
||||
if (e != null) {
|
||||
if (lenient) {
|
||||
return newLenientFieldQuery(field, e);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
return mapper.multiPhraseQuery(field, stream, slop, enablePositionIncrements);
|
||||
}
|
||||
|
||||
private IllegalStateException checkForPositions(String field) {
|
||||
if (hasPositions(mapper) == false) {
|
||||
return new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if graph analysis should be enabled for the field depending
|
||||
* on the provided {@link Analyzer}
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
|
@ -29,6 +31,8 @@ import org.apache.lucene.index.PostingsEnum;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
@ -38,6 +42,7 @@ import org.elasticsearch.common.Strings;
|
|||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.lucene.uid.Versions;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
|
@ -47,7 +52,9 @@ import org.elasticsearch.index.VersionType;
|
|||
import org.elasticsearch.index.engine.Engine;
|
||||
import org.elasticsearch.index.mapper.MapperService.MergeReason;
|
||||
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
|
||||
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.index.search.MatchQuery;
|
||||
import org.elasticsearch.index.shard.IndexShard;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
|
@ -65,6 +72,7 @@ import static org.apache.lucene.search.MultiTermQuery.CONSTANT_SCORE_REWRITE;
|
|||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.core.Is.is;
|
||||
|
||||
public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
||||
|
||||
|
@ -73,7 +81,13 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
|
||||
@Before
|
||||
public void setup() {
|
||||
indexService = createIndex("test");
|
||||
Settings settings = Settings.builder()
|
||||
.put("index.analysis.filter.mySynonyms.type", "synonym")
|
||||
.putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
|
||||
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
|
||||
.put("index.analysis.analyzer.synonym.filter", "mySynonyms")
|
||||
.build();
|
||||
indexService = createIndex("test", settings);
|
||||
parser = indexService.mapperService().documentMapperParser();
|
||||
}
|
||||
|
||||
|
@ -670,6 +684,102 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testFastPhraseMapping() throws IOException {
|
||||
|
||||
QueryShardContext queryShardContext = indexService.newQueryShardContext(
|
||||
randomInt(20), null, () -> {
|
||||
throw new UnsupportedOperationException();
|
||||
}, null);
|
||||
|
||||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("index_phrases", true)
|
||||
.endObject()
|
||||
.startObject("synfield")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "synonym")
|
||||
.field("index_phrases", true)
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject());
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
|
||||
|
||||
Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext);
|
||||
assertThat(q, is(new PhraseQuery("field._index_phrase", "two word")));
|
||||
|
||||
Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext);
|
||||
assertThat(q2, is(new PhraseQuery("field._index_phrase", "three word", "word here")));
|
||||
|
||||
Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext);
|
||||
assertThat(q3, is(new PhraseQuery(1, "field", "two", "word")));
|
||||
|
||||
Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext);
|
||||
assertThat(q4, is(new TermQuery(new Term("field", "singleton"))));
|
||||
|
||||
Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext);
|
||||
assertThat(q5,
|
||||
is(new PhraseQuery.Builder().add(new Term("field", "sparkl")).add(new Term("field", "stopword"), 2).build()));
|
||||
|
||||
Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext);
|
||||
assertThat(q6, is(new MultiPhraseQuery.Builder()
|
||||
.add(new Term[]{
|
||||
new Term("synfield._index_phrase", "motor car"),
|
||||
new Term("synfield._index_phrase", "motor auto")})
|
||||
.build()));
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
|
||||
.bytes(XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "Some English text that is going to be very useful")
|
||||
.endObject()),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field._index_phrase");
|
||||
assertEquals(1, fields.length);
|
||||
|
||||
try (TokenStream ts = fields[0].tokenStream(queryShardContext.getMapperService().indexAnalyzer(), null)) {
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("some english", termAtt.toString());
|
||||
}
|
||||
|
||||
{
|
||||
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("index", "false")
|
||||
.field("index_phrases", true)
|
||||
.endObject().endObject()
|
||||
.endObject().endObject());
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> parser.parse("type", new CompressedXContent(badConfigMapping))
|
||||
);
|
||||
assertThat(e.getMessage(), containsString("Cannot set index_phrases on unindexed field [field]"));
|
||||
}
|
||||
|
||||
{
|
||||
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("index_options", "freqs")
|
||||
.field("index_phrases", true)
|
||||
.endObject().endObject()
|
||||
.endObject().endObject());
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> parser.parse("type", new CompressedXContent(badConfigMapping))
|
||||
);
|
||||
assertThat(e.getMessage(), containsString("Cannot set index_phrases on field [field] if positions are not enabled"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testIndexPrefixMapping() throws IOException {
|
||||
|
||||
QueryShardContext queryShardContext = indexService.newQueryShardContext(
|
||||
|
|
|
@ -68,6 +68,13 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
|
|||
tft.setFielddataMinSegmentSize(1000);
|
||||
}
|
||||
});
|
||||
addModifier(new Modifier("index_phrases", false) {
|
||||
@Override
|
||||
public void modify(MappedFieldType ft) {
|
||||
TextFieldMapper.TextFieldType tft = (TextFieldMapper.TextFieldType) ft;
|
||||
tft.setIndexPhrases(true);
|
||||
}
|
||||
});
|
||||
addModifier(new Modifier("index_prefixes", false) {
|
||||
@Override
|
||||
public void modify(MappedFieldType ft) {
|
||||
|
|
Loading…
Reference in New Issue