Added prefix suggestions based on AnalyzingSuggester

This commit introduces near realtime suggestions. For more information about
its usage refer to github issue #3376

From the implementation point of view, a custom AnalyzingSuggester is used
in combination with a custom postingsformat (which is not exposed to the user
anywhere for him to use).

Closes #3376
This commit is contained in:
Alexander Reelsen 2013-08-01 08:44:09 +02:00
parent fd15b6278b
commit 4f4f3a2b10
21 changed files with 3512 additions and 3 deletions

File diff suppressed because it is too large Load Diff

View File

@ -92,6 +92,7 @@ public class DocumentMapperParser extends AbstractIndexComponent {
.put(ObjectMapper.CONTENT_TYPE, new ObjectMapper.TypeParser()) .put(ObjectMapper.CONTENT_TYPE, new ObjectMapper.TypeParser())
.put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser()) .put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser())
.put(MultiFieldMapper.CONTENT_TYPE, new MultiFieldMapper.TypeParser()) .put(MultiFieldMapper.CONTENT_TYPE, new MultiFieldMapper.TypeParser())
.put(CompletionFieldMapper.CONTENT_TYPE, new CompletionFieldMapper.TypeParser())
.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser()); .put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser());
if (ShapesAvailability.JTS_AVAILABLE) { if (ShapesAvailability.JTS_AVAILABLE) {

View File

@ -0,0 +1,317 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.core;
import com.google.common.collect.Lists;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.index.mapper.MapperException;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider;
import org.elasticsearch.search.suggest.completion.CompletionPostingsFormatProvider;
import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
*
*/
public class CompletionFieldMapper extends AbstractFieldMapper<String> {
public static final String CONTENT_TYPE = "completion";
public static class Defaults extends AbstractFieldMapper.Defaults {
public static final FieldType FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE);
static {
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.freeze();
}
public static final boolean DEFAULT_PRESERVE_SEPARATORS = true;
public static final boolean DEFAULT_POSITION_INCREMENTS = true;
public static final boolean DEFAULT_HAS_PAYLOADS = false;
}
public static class Fields {
public static final String INDEX_ANALYZER = "index_analyzer";
public static final String SEARCH_ANALYZER = "search_analyzer";
public static final String PRESERVE_SEPARATORS = "preserve_separators";
public static final String PRESERVE_POSITION_INCREMENTS = "preserve_position_increments";
public static final String PAYLOADS = "payloads";
public static final String TYPE = "type";
}
public static class Builder extends AbstractFieldMapper.OpenBuilder<Builder, CompletionFieldMapper> {
private NamedAnalyzer searchAnalyzer;
private NamedAnalyzer indexAnalyzer;
private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS;
private boolean payloads = Defaults.DEFAULT_HAS_PAYLOADS;
private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
public Builder(String name) {
super(name, Defaults.FIELD_TYPE);
}
public Builder searchAnalyzer(NamedAnalyzer searchAnalyzer) {
this.searchAnalyzer = searchAnalyzer;
return this;
}
public Builder indexAnalyzer(NamedAnalyzer indexAnalyzer) {
this.indexAnalyzer = indexAnalyzer;
return this;
}
public Builder payloads(boolean payloads) {
this.payloads = payloads;
return this;
}
public Builder preserveSeparators(boolean preserveSeparators) {
this.preserveSeparators = preserveSeparators;
return this;
}
public Builder preservePositionIncrements(boolean preservePositionIncrements) {
this.preservePositionIncrements = preservePositionIncrements;
return this;
}
@Override
public CompletionFieldMapper build(Mapper.BuilderContext context) {
return new CompletionFieldMapper(buildNames(context), indexAnalyzer, searchAnalyzer, provider, similarity, payloads, preserveSeparators, preservePositionIncrements);
}
}
public static class TypeParser implements Mapper.TypeParser {
@Override
public Mapper.Builder<?, ?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
CompletionFieldMapper.Builder builder = new CompletionFieldMapper.Builder(name);
for (Map.Entry<String, Object> entry : node.entrySet()) {
String fieldName = entry.getKey();
Object fieldNode = entry.getValue();
if (fieldName.equals("type")) {
continue;
}
if (fieldName.equals(Fields.INDEX_ANALYZER) || fieldName.equals("indexAnalyzer")) {
builder.indexAnalyzer(parserContext.analysisService().analyzer(fieldNode.toString()));
} else if (fieldName.equals(Fields.SEARCH_ANALYZER) || fieldName.equals("searchAnalyzer")) {
builder.searchAnalyzer(parserContext.analysisService().analyzer(fieldNode.toString()));
} else if (fieldName.equals(Fields.PAYLOADS)) {
builder.payloads(Boolean.parseBoolean(fieldNode.toString()));
} else if (fieldName.equals(Fields.PRESERVE_SEPARATORS) || fieldName.equals("preserveSeparators")) {
builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString()));
} else if (fieldName.equals(Fields.PRESERVE_POSITION_INCREMENTS) || fieldName.equals("preservePositionIncrements")) {
builder.preservePositionIncrements(Boolean.parseBoolean(fieldNode.toString()));
}
}
if (builder.searchAnalyzer == null) {
builder.searchAnalyzer(parserContext.analysisService().analyzer("simple"));
}
if (builder.indexAnalyzer == null) {
builder.indexAnalyzer(parserContext.analysisService().analyzer("simple"));
}
// we are just using this as the default to be wrapped by the CompletionPostingsFormatProvider in the SuggesteFieldMapper ctor
builder.postingsFormat(parserContext.postingFormatService().get("default"));
return builder;
}
}
private static final BytesRef EMPTY = new BytesRef();
private final CompletionPostingsFormatProvider completionPostingsFormatProvider;
private final AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider;
private final boolean payloads;
private final boolean preservePositionIncrements;
private final boolean preserveSeparators;
public CompletionFieldMapper(Names names, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer, PostingsFormatProvider provider, SimilarityProvider similarity, boolean payloads,
boolean preserveSeparators, boolean preservePositionIncrements) {
super(names, 1.0f, Defaults.FIELD_TYPE, indexAnalyzer, searchAnalyzer, provider, similarity, null);
analyzingSuggestLookupProvider = new AnalyzingCompletionLookupProvider(preserveSeparators, false, preservePositionIncrements, payloads);
this.completionPostingsFormatProvider = new CompletionPostingsFormatProvider("completion", provider, analyzingSuggestLookupProvider);
this.preserveSeparators = preserveSeparators;
this.payloads = payloads;
this.preservePositionIncrements = preservePositionIncrements;
}
@Override
public PostingsFormatProvider postingsFormatProvider() {
return this.completionPostingsFormatProvider;
}
@Override
public void parse(ParseContext context) throws IOException {
XContentParser parser = context.parser();
XContentParser.Token token = parser.currentToken();
String surfaceForm = null;
BytesRef payload = null;
long weight = -1;
List<String> inputs = Lists.newArrayListWithExpectedSize(4);
if (token == XContentParser.Token.VALUE_STRING) {
inputs.add(parser.text());
} else {
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if ("payload".equals(currentFieldName)) {
if (!isStoringPayloads()) {
throw new MapperException("Payloads disabled in mapping");
}
if (token == XContentParser.Token.START_OBJECT) {
XContentBuilder payloadBuilder = XContentFactory.contentBuilder(parser.contentType()).copyCurrentStructure(parser);
payload = payloadBuilder.bytes().toBytesRef();
payloadBuilder.close();
}
} else if (token == XContentParser.Token.VALUE_STRING) {
if ("output".equals(currentFieldName)) {
surfaceForm = parser.text();
}
if ("input".equals(currentFieldName)) {
inputs.add(parser.text());
}
} else if (token == XContentParser.Token.VALUE_NUMBER) {
if ("weight".equals(currentFieldName)) {
weight = parser.longValue(); // always parse a long to make sure we don't get the overflow value
if (weight < 0 || weight > Integer.MAX_VALUE) {
throw new ElasticSearchIllegalArgumentException("Weight must be in the interval [0..2147483647] but was " + weight);
}
}
} else if (token == XContentParser.Token.START_ARRAY) {
if ("input".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
inputs.add(parser.text());
}
}
}
}
}
payload = payload == null ? EMPTY: payload;
if (surfaceForm == null) { // no surface form use the input
for (String input : inputs) {
BytesRef suggestPayload = analyzingSuggestLookupProvider.buildPayload(new BytesRef(
input), weight, payload);
context.doc().add(getCompletionField(input, suggestPayload));
}
} else {
BytesRef suggestPayload = analyzingSuggestLookupProvider.buildPayload(new BytesRef(
surfaceForm), weight, payload);
for (String input : inputs) {
context.doc().add(getCompletionField(input, suggestPayload));
}
}
}
public Field getCompletionField(String input, BytesRef payload) {
return new SuggestField(names().fullName(), input, this.fieldType, payload, analyzingSuggestLookupProvider);
}
public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
return analyzingSuggestLookupProvider.buildPayload(
surfaceForm, weight, payload);
}
private static final class SuggestField extends Field {
private final BytesRef payload;
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
public SuggestField(String name, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
super(name, value, type);
this.payload = payload;
this.toFiniteStrings = toFiniteStrings;
}
@Override
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
TokenStream ts = super.tokenStream(analyzer);
return new CompletionTokenStream(ts, payload, toFiniteStrings);
}
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
return builder.startObject(name())
.field(Fields.TYPE, CONTENT_TYPE)
.field(Fields.INDEX_ANALYZER, indexAnalyzer.name())
.field(Fields.SEARCH_ANALYZER, searchAnalyzer.name())
.field(Fields.PAYLOADS, this.payloads)
.field(Fields.PRESERVE_SEPARATORS, this.preserveSeparators)
.field(Fields.PRESERVE_POSITION_INCREMENTS, this.preservePositionIncrements)
.endObject();
}
@Override
protected Field parseCreateField(ParseContext context) throws IOException {
return null;
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
@Override
public FieldType defaultFieldType() {
return Defaults.FIELD_TYPE;
}
@Override
public FieldDataType defaultFieldDataType() {
return null;
}
@Override
public String value(Object value) {
if (value == null) {
return null;
}
return value.toString();
}
public boolean isStoringPayloads() {
return payloads;
}
}

View File

@ -29,6 +29,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.common.xcontent.XContentBuilderString;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry; import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option; import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.search.suggest.term.TermSuggestion; import org.elasticsearch.search.suggest.term.TermSuggestion;
import java.io.IOException; import java.io.IOException;
@ -114,6 +115,9 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
case TermSuggestion.TYPE: case TermSuggestion.TYPE:
suggestion = new TermSuggestion(); suggestion = new TermSuggestion();
break; break;
case CompletionSuggestion.TYPE:
suggestion = new CompletionSuggestion();
break;
default: default:
suggestion = new Suggestion<Entry<Option>>(); suggestion = new Suggestion<Entry<Option>>();
break; break;
@ -522,6 +526,10 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
return score; return score;
} }
protected void setScore(float score) {
this.score = score;
}
@Override @Override
public void readFrom(StreamInput in) throws IOException { public void readFrom(StreamInput in) throws IOException {
text = in.readText(); text = in.readText();
@ -567,8 +575,8 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
return text.hashCode(); return text.hashCode();
} }
} }
} }
public enum Sort { public enum Sort {
/** /**
@ -600,8 +608,6 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
throw new ElasticSearchException("Illegal suggest sort " + id); throw new ElasticSearchException("Illegal suggest sort " + id);
} }
} }
} }
} }
} }

View File

@ -21,6 +21,7 @@ package org.elasticsearch.search.suggest;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.inject.AbstractModule;
import org.elasticsearch.common.inject.multibindings.Multibinder; import org.elasticsearch.common.inject.multibindings.Multibinder;
import org.elasticsearch.search.suggest.completion.CompletionSuggester;
import org.elasticsearch.search.suggest.phrase.PhraseSuggester; import org.elasticsearch.search.suggest.phrase.PhraseSuggester;
import org.elasticsearch.search.suggest.term.TermSuggester; import org.elasticsearch.search.suggest.term.TermSuggester;
@ -36,6 +37,7 @@ public class SuggestModule extends AbstractModule {
public SuggestModule() { public SuggestModule() {
registerSuggester(PhraseSuggester.class); registerSuggester(PhraseSuggester.class);
registerSuggester(TermSuggester.class); registerSuggester(TermSuggester.class);
registerSuggester(CompletionSuggester.class);
} }
public void registerSuggester(Class<? extends Suggester> suggester) { public void registerSuggester(Class<? extends Suggester> suggester) {

View File

@ -159,6 +159,7 @@ public final class SuggestUtils {
numTokens++; numTokens++;
} }
consumer.end(); consumer.end();
stream.close();
return numTokens; return numTokens;
} }

View File

@ -0,0 +1,260 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.codecs.*;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.*;
import org.apache.lucene.util.fst.PairOutputs.Pair;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider;
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
import java.io.IOException;
import java.util.*;
public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider {
// for serialization
public static final int SERIALIZE_PRESERVE_SEPERATORS = 1;
public static final int SERIALIZE_HAS_PAYLOADS = 2;
public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4;
private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256;
private static final int MAX_GRAPH_EXPANSIONS = -1;
public static final String CODEC_NAME = "analyzing";
public static final int CODEC_VERSION = 1;
private boolean preserveSep;
private boolean preservePositionIncrements;
private int maxSurfaceFormsPerAnalyzedForm;
private int maxGraphExpansions;
private boolean hasPayloads;
private final XAnalyzingSuggester prototype;
public AnalyzingCompletionLookupProvider(boolean preserveSep, boolean exactFirst, boolean preservePositionIncrements, boolean hasPayloads) {
this.preserveSep = preserveSep;
this.preservePositionIncrements = preservePositionIncrements;
this.hasPayloads = hasPayloads;
this.maxSurfaceFormsPerAnalyzedForm = MAX_SURFACE_FORMS_PER_ANALYZED_FORM;
this.maxGraphExpansions = MAX_GRAPH_EXPANSIONS;
int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;
// needs to fixed in the suggester first before it can be supported
//options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, null, false, 1);
prototype.setPreservePositionIncrements(preservePositionIncrements);
}
@Override
public String getName() {
return "analyzing";
}
@Override
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
return new FieldsConsumer() {
private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>();
@Override
public void close() throws IOException {
try { /*
* write the offsets per field such that we know where
* we need to load the FSTs from
*/
long pointer = output.getFilePointer();
output.writeVInt(fieldOffsets.size());
for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) {
output.writeString(entry.getKey().name);
output.writeVLong(entry.getValue());
}
output.writeLong(pointer);
output.flush();
} finally {
IOUtils.close(output);
}
}
@Override
public TermsConsumer addField(final FieldInfo field) throws IOException {
return new TermsConsumer() {
final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(maxSurfaceFormsPerAnalyzedForm, hasPayloads);
final CompletionPostingsConsumer postingsConsumer = new CompletionPostingsConsumer(AnalyzingCompletionLookupProvider.this, builder);
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
builder.startTerm(text);
return postingsConsumer;
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
builder.finishTerm(stats.docFreq); // use doc freq as a fallback
}
@Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
/*
* Here we are done processing the field and we can
* buid the FST and write it to disk.
*/
FST<Pair<Long, BytesRef>> build = builder.build();
fieldOffsets.put(field, output.getFilePointer());
build.save(output);
/* write some more meta-info */
output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput());
output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
output.writeInt(maxGraphExpansions); // can be negative
int options = 0;
options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0;
options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
output.writeVInt(options);
}
};
}
};
}
private static final class CompletionPostingsConsumer extends PostingsConsumer {
private final SuggestPayload spare = new SuggestPayload();
private AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider;
private XAnalyzingSuggester.XBuilder builder;
private int maxAnalyzedPathsForOneInput = 0;
public CompletionPostingsConsumer(AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider, XAnalyzingSuggester.XBuilder builder) {
this.analyzingSuggestLookupProvider = analyzingSuggestLookupProvider;
this.builder = builder;
}
@Override
public void startDoc(int docID, int freq) throws IOException {
}
@Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
analyzingSuggestLookupProvider.parsePayload(payload, spare);
builder.addSurface(spare.surfaceForm, spare.payload, spare.weight);
// multi fields have the same surface form so we sum up here
maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position+1);
}
@Override
public void finishDoc() throws IOException {
}
public int getMaxAnalyzedPathsForOneInput() {
return maxAnalyzedPathsForOneInput;
}
};
@Override
public LookupFactory load(IndexInput input) throws IOException {
CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>();
input.seek(input.length() - 8);
long metaPointer = input.readLong();
input.seek(metaPointer);
int numFields = input.readVInt();
Map<Long, String> meta = new TreeMap<Long, String>();
for (int i = 0; i < numFields; i++) {
String name = input.readString();
long offset = input.readVLong();
meta.put(offset, name);
}
for (Map.Entry<Long, String> entry : meta.entrySet()) {
input.seek(entry.getKey());
FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>(
PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
int maxAnalyzedPathsForOneInput = input.readVInt();
int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
int maxGraphExpansions = input.readInt();
int options = input.readVInt();
boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0;
boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
lookupMap.put(entry.getValue(), new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
hasPayloads, maxAnalyzedPathsForOneInput, fst));
}
return new LookupFactory() {
@Override
public Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst) {
AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().fullName());
if (analyzingSuggestHolder == null) {
return null;
}
int flags = exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
if (analyzingSuggestHolder.preserveSep) {
flags |= XAnalyzingSuggester.PRESERVE_SEP;
}
XAnalyzingSuggester suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
analyzingSuggestHolder.maxAnalyzedPathsForOneInput);
suggester.setPreservePositionIncrements(analyzingSuggestHolder.preservePositionIncrements);
return suggester;
}
};
}
static class AnalyzingSuggestHolder {
final boolean preserveSep;
final boolean preservePositionIncrements;
final int maxSurfaceFormsPerAnalyzedForm;
final int maxGraphExpansions;
final boolean hasPayloads;
final int maxAnalyzedPathsForOneInput;
final FST<Pair<Long, BytesRef>> fst;
public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads,
int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) {
this.preserveSep = preserveSep;
this.preservePositionIncrements = preservePositionIncrements;
this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
this.maxGraphExpansions = maxGraphExpansions;
this.hasPayloads = hasPayloads;
this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
this.fst = fst;
}
}
@Override
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
return prototype.toFiniteStrings(prototype.getTokenStreamToAutomaton(), stream);
}
}

View File

@ -0,0 +1,332 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMap.Builder;
import org.apache.lucene.codecs.*;
import org.apache.lucene.index.*;
import org.apache.lucene.index.FilterAtomicReader.FilterTerms;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.store.IOContext.Context;
import org.apache.lucene.store.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ToFiniteStrings;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
/**
* This {@link PostingsFormat} is basically a T-Sink for a default postings
* format that is used to store postings on disk fitting the lucene APIs and
* builds a suggest FST as an auxiliary data structure next to the actual
* postings format. It uses the delegate postings format for simplicity to
* handle all the merge operations. The auxiliary suggest FST data structure is
* only loaded if a FieldsProducer is requested for reading, for merging it uses
* the low memory delegate postings format.
*
*/
public class Completion090PostingsFormat extends PostingsFormat {
public static final String CODEC_NAME = "completion090";
public static final int SUGGEST_CODEC_VERSION = 1;
public static final String EXTENSION = "cmp";
private PostingsFormat delegatePostingsFormat;
private final static Map<String, CompletionLookupProvider> providers;
private CompletionLookupProvider writeProvider;
static {
final CompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, false);
final Builder<String, CompletionLookupProvider> builder = ImmutableMap.builder();
providers = builder.put(provider.getName(), provider).build();
}
public Completion090PostingsFormat(PostingsFormat delegatePostingsFormat, CompletionLookupProvider provider) {
super(CODEC_NAME);
this.delegatePostingsFormat = delegatePostingsFormat;
this.writeProvider = provider;
assert delegatePostingsFormat != null && writeProvider != null;
}
/*
* Used only by core Lucene at read-time via Service Provider instantiation
* do not use at Write-time in application code.
*/
public Completion090PostingsFormat() {
super(CODEC_NAME);
}
@Override
public SuggestFieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
if (delegatePostingsFormat == null) {
throw new UnsupportedOperationException("Error - " + getClass().getName()
+ " has been constructed without a choice of PostingsFormat");
}
assert writeProvider != null;
return new SuggestFieldsConsumer(state);
}
@Override
public CompletionFieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
return new CompletionFieldsProducer(state);
}
private class SuggestFieldsConsumer extends FieldsConsumer {
private FieldsConsumer delegatesFieldsConsumer;
private FieldsConsumer suggestFieldsConsumer;
public SuggestFieldsConsumer(SegmentWriteState state) throws IOException {
this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
IndexOutput output = null;
boolean success = false;
try {
output = state.directory.createOutput(suggestFSTFile, state.context);
CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_CODEC_VERSION);
/*
* we write the delegate postings format name so we can load it
* without getting an instance in the ctor
*/
output.writeString(delegatePostingsFormat.getName());
output.writeString(writeProvider.getName());
this.suggestFieldsConsumer = writeProvider.consumer(output);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(output);
}
}
}
@Override
public TermsConsumer addField(final FieldInfo field) throws IOException {
final TermsConsumer delegateConsumer = delegatesFieldsConsumer.addField(field);
final TermsConsumer suggestTermConsumer = suggestFieldsConsumer.addField(field);
final GroupedPostingsConsumer groupedPostingsConsumer = new GroupedPostingsConsumer(delegateConsumer, suggestTermConsumer);
return new TermsConsumer() {
@Override
public PostingsConsumer startTerm(BytesRef text) throws IOException {
groupedPostingsConsumer.startTerm(text);
return groupedPostingsConsumer;
}
@Override
public Comparator<BytesRef> getComparator() throws IOException {
return delegateConsumer.getComparator();
}
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
suggestTermConsumer.finishTerm(text, stats);
delegateConsumer.finishTerm(text, stats);
}
@Override
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
suggestTermConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
delegateConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
}
};
}
@Override
public void close() throws IOException {
IOUtils.close(delegatesFieldsConsumer, suggestFieldsConsumer);
}
}
private class GroupedPostingsConsumer extends PostingsConsumer {
private TermsConsumer[] termsConsumers;
private PostingsConsumer[] postingsConsumers;
public GroupedPostingsConsumer(TermsConsumer... termsConsumersArgs) {
termsConsumers = termsConsumersArgs;
postingsConsumers = new PostingsConsumer[termsConsumersArgs.length];
}
@Override
public void startDoc(int docID, int freq) throws IOException {
for (PostingsConsumer postingsConsumer : postingsConsumers) {
postingsConsumer.startDoc(docID, freq);
}
}
@Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
for (PostingsConsumer postingsConsumer : postingsConsumers) {
postingsConsumer.addPosition(position, payload, startOffset, endOffset);
}
}
@Override
public void finishDoc() throws IOException {
for (PostingsConsumer postingsConsumer : postingsConsumers) {
postingsConsumer.finishDoc();
}
}
public void startTerm(BytesRef text) throws IOException {
for (int i = 0; i < termsConsumers.length; i++) {
postingsConsumers[i] = termsConsumers[i].startTerm(text);
}
}
}
private class CompletionFieldsProducer extends FieldsProducer {
private FieldsProducer delegateProducer;
private LookupFactory lookupFactory;
public CompletionFieldsProducer(SegmentReadState state) throws IOException {
String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_CODEC_VERSION);
boolean success = false;
try {
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
String providerName = input.readString();
CompletionLookupProvider completionLookupProvider = providers.get(providerName);
if (completionLookupProvider == null) {
throw new ElasticSearchIllegalStateException("no provider with name [" + providerName + "] registered");
}
// TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
this.delegateProducer = delegatePostingsFormat.fieldsProducer(state);
/*
* If we are merging we don't load the FSTs at all such that we
* don't consume so much memory during merge
*/
if (state.context.context != Context.MERGE) {
// TODO: maybe we can do this in a fully lazy fashion based on some configuration
// eventually we should have some kind of curciut breaker that prevents us from going OOM here
// with some configuration
this.lookupFactory = completionLookupProvider.load(input);
}
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(delegateProducer, input);
} else {
IOUtils.close(input);
}
}
}
@Override
public void close() throws IOException {
IOUtils.close(delegateProducer);
}
@Override
public Iterator<String> iterator() {
return delegateProducer.iterator();
}
@Override
public Terms terms(String field) throws IOException {
Terms terms = delegateProducer.terms(field);
if (terms == null) {
return terms;
}
return new CompletionTerms(terms, this.lookupFactory);
}
@Override
public int size() {
return delegateProducer.size();
}
}
public static final class CompletionTerms extends FilterTerms {
private final LookupFactory lookup;
public CompletionTerms(Terms delegate, LookupFactory lookup) {
super(delegate);
this.lookup = lookup;
}
public Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst) {
return lookup.getLookup(mapper, exactFirst);
}
}
public static abstract class CompletionLookupProvider implements PayloadProcessor, ToFiniteStrings {
public static final char UNIT_SEPARATOR = '\u001f';
public abstract FieldsConsumer consumer(IndexOutput output) throws IOException;
public abstract String getName();
public abstract LookupFactory load(IndexInput input) throws IOException;
@Override
public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
if (weight < -1 || weight > Integer.MAX_VALUE) {
throw new IllegalArgumentException("weight must be >= -1 && <= Integer.MAX_VALUE");
}
for (int i = 0; i < surfaceForm.length; i++) {
if (surfaceForm.bytes[i] == UNIT_SEPARATOR) {
throw new IllegalArgumentException(
"surface form cannot contain unit separator character U+001F; this character is reserved");
}
}
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream);
output.writeVLong(weight + 1);
output.writeVInt(surfaceForm.length);
output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
output.writeVInt(payload.length);
output.writeBytes(payload.bytes, 0, payload.length);
output.close();
return new BytesRef(byteArrayOutputStream.toByteArray());
}
@Override
public void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException {
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
InputStreamDataInput input = new InputStreamDataInput(byteArrayInputStream);
ref.weight = input.readVLong() - 1;
int len = input.readVInt();
ref.surfaceForm.grow(len);
ref.surfaceForm.length = len;
input.readBytes(ref.surfaceForm.bytes, ref.surfaceForm.offset, ref.surfaceForm.length);
len = input.readVInt();
ref.payload.grow(len);
ref.payload.length = len;
input.readBytes(ref.payload.bytes, ref.payload.offset, ref.payload.length);
input.close();
}
}
public static abstract class LookupFactory {
public abstract Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst);
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.codecs.PostingsFormat;
import org.elasticsearch.index.codec.postingsformat.AbstractPostingsFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
/**
*
*/
public final class CompletionPostingsFormatProvider extends AbstractPostingsFormatProvider {
private final Completion090PostingsFormat postingsFormat;
public CompletionPostingsFormatProvider(String name, PostingsFormatProvider delegate, Completion090PostingsFormat.CompletionLookupProvider provider) {
super(name);
this.postingsFormat = new Completion090PostingsFormat(delegate.get(), provider);
}
@Override
public PostingsFormat get() {
return postingsFormat;
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.search.suggest.SuggestContextParser;
import org.elasticsearch.search.suggest.SuggestionSearchContext;
import java.io.IOException;
import static org.elasticsearch.search.suggest.SuggestUtils.parseSuggestContext;
/**
*
*/
public class CompletionSuggestParser implements SuggestContextParser {
private CompletionSuggester completionSuggester;
public CompletionSuggestParser(CompletionSuggester completionSuggester) {
this.completionSuggester = completionSuggester;
}
@Override
public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, MapperService mapperService) throws IOException {
XContentParser.Token token;
String fieldName = null;
CompletionSuggestionContext suggestion = new CompletionSuggestionContext(completionSuggester);
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
fieldName = parser.currentName();
} else if (token.isValue()) {
parseSuggestContext(parser, mapperService, fieldName, suggestion);
suggestion.mapper(mapperService.smartNameFieldMapper(suggestion.getField()));
} else {
throw new ElasticSearchIllegalArgumentException("suggester[completion] doesn't support field [" + fieldName + "]");
}
}
return suggestion;
}
}

View File

@ -0,0 +1,112 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import com.google.common.collect.Maps;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.SuggestContextParser;
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
public class CompletionSuggester implements Suggester<CompletionSuggestionContext> {
private static final ScoreComparator scoreComparator = new ScoreComparator();
@Override
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> execute(String name,
CompletionSuggestionContext suggestionContext, IndexReader indexReader, CharsRef spare) throws IOException {
CompletionSuggestion completionSuggestionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(suggestionContext.getText()
.utf8ToString()), 0, suggestionContext.getText().toString().length());
completionSuggestionSuggestion.addTerm(completionSuggestEntry);
String fieldName = suggestionContext.getField();
if (suggestionContext.mapper() == null || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) {
throw new ElasticSearchException("Field [" + suggestionContext.getField() + "] is not a completion suggest field");
}
String prefix = suggestionContext.getText().utf8ToString();
Map<String, CompletionSuggestion.Entry.Option> results = Maps.newHashMapWithExpectedSize(indexReader.leaves().size() * suggestionContext.getSize());
for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
AtomicReader atomicReader = atomicReaderContext.reader();
Terms terms = atomicReader.fields().terms(fieldName);
if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), false);
List<Lookup.LookupResult> lookupResults = lookup.lookup(prefix, false, suggestionContext.getSize());
for (Lookup.LookupResult res : lookupResults) {
final String key = res.key.toString();
final float score = res.value;
final Option value = results.get(key);
if (value == null) {
final Option option = new CompletionSuggestion.Entry.Option(new StringText(key), score, res.payload == null ? null
: new BytesArray(res.payload));
results.put(key, option);
} else if (value.getScore() < score) {
value.setScore(score);
value.setPayload(res.payload == null ? null : new BytesArray(res.payload));
}
}
}
}
final List<CompletionSuggestion.Entry.Option> options = new ArrayList<CompletionSuggestion.Entry.Option>(results.values());
CollectionUtil.introSort(options, scoreComparator);
for (int i = 0 ; i < Math.min(suggestionContext.getSize(), options.size()) ; i++) {
completionSuggestEntry.addOption(options.get(i));
}
return completionSuggestionSuggestion;
}
@Override
public String[] names() {
return new String[] { "completion" };
}
@Override
public SuggestContextParser getContextParser() {
return new CompletionSuggestParser(this);
}
public static class ScoreComparator implements Comparator<CompletionSuggestion.Entry.Option> {
@Override
public int compare(Option o1, Option o2) {
return Float.compare(o2.getScore(), o1.getScore());
}
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.suggest.Suggest;
import java.io.IOException;
/**
*
*/
public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestion.Entry> {
public static final int TYPE = 2;
public CompletionSuggestion() {
}
public CompletionSuggestion(String name, int size) {
super(name, size);
}
@Override
public int getType() {
return TYPE;
}
@Override
protected Entry newEntry() {
return new Entry();
}
public static class Entry extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry<CompletionSuggestion.Entry.Option> {
public Entry(Text text, int offset, int length) {
super(text, offset, length);
}
protected Entry() {
super();
}
@Override
protected Option newOption() {
return new Option();
}
public static class Option extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option {
private BytesReference payload;
public Option(Text text, float score,BytesReference payload) {
super(text, score);
this.payload = payload;
}
protected Option() {
super();
}
public void setPayload(BytesReference payload) {
this.payload = payload;
}
public BytesReference getPayload() {
return payload;
}
public void setScore(float score) {
super.setScore(score);
}
@Override
protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
super.innerToXContent(builder, params);
if (payload != null && payload.length() > 0) {
builder.rawField("payload", payload);
}
return builder;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
payload = in.readBytesReference();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeBytesReference(payload);
}
}
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.suggest.SuggestBuilder;
import java.io.IOException;
/**
*
*/
public class CompletionSuggestionBuilder extends SuggestBuilder.SuggestionBuilder<CompletionSuggestionBuilder> {
public CompletionSuggestionBuilder(String name) {
super(name, "completion");
}
@Override
protected XContentBuilder innerToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
return builder;
}
}

View File

@ -0,0 +1,43 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.suggest.Suggester;
import org.elasticsearch.search.suggest.SuggestionSearchContext;
/**
*
*/
public class CompletionSuggestionContext extends SuggestionSearchContext.SuggestionContext {
private FieldMapper<?> mapper;
public CompletionSuggestionContext(Suggester suggester) {
super(suggester);
}
public FieldMapper<?> mapper() {
return this.mapper;
}
public void mapper(FieldMapper<?> mapper) {
this.mapper = mapper;
}
}

View File

@ -0,0 +1,145 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.Util;
import java.io.IOException;
import java.util.Iterator;
import java.util.Set;
/**
*
*/
public final class CompletionTokenStream extends TokenStream {
private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);;
private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
private final TokenStream input;
private BytesRef payload;
private Iterator<IntsRef> finiteStrings;
private ToFiniteStrings toFiniteStrings;
private int posInc = -1;
private static final int MAX_PATHS = 256;
private final BytesRef scratch = new BytesRef();
public CompletionTokenStream(TokenStream input, BytesRef payload, ToFiniteStrings toFiniteStrings) throws IOException {
this.input = input;
this.payload = payload;
this.toFiniteStrings = toFiniteStrings;
}
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (finiteStrings == null) {
Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input);
if (strings.size() > MAX_PATHS) {
throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS
+ " finite strings are supported");
}
posInc = strings.size();
finiteStrings = strings.iterator();
}
if (finiteStrings.hasNext()) {
posAttr.setPositionIncrement(posInc);
/*
* this posInc encodes the number of paths that this surface form
* produced. Multi Fields have the same surface form and therefore sum up
*/
posInc = 0;
Util.toBytesRef(finiteStrings.next(), scratch); // now we have UTF-8
bytesAtt.setBytesRef(scratch);
if (payload != null) {
payloadAttr.setPayload(this.payload);
}
return true;
}
return false;
}
@Override
public void end() throws IOException {
if (posInc == -1) {
input.end();
}
}
@Override
public void close() throws IOException {
if (posInc == -1) {
input.close();
}
}
public static interface ToFiniteStrings {
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException;
}
@Override
public void reset() throws IOException {
super.reset();
finiteStrings = null;
posInc = -1;
}
public interface ByteTermAttribute extends TermToBytesRefAttribute {
public void setBytesRef(BytesRef bytes);
}
public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
private BytesRef bytes;
@Override
public int fillBytesRef() {
return bytes.hashCode();
}
@Override
public BytesRef getBytesRef() {
return bytes;
}
@Override
public void setBytesRef(BytesRef bytes) {
this.bytes = bytes;
}
@Override
public void clear() {
}
@Override
public void copyTo(AttributeImpl target) {
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
other.bytes = bytes;
}
}
}

View File

@ -0,0 +1,35 @@
package org.elasticsearch.search.suggest.completion;
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
interface PayloadProcessor {
BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException;
void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException;
static class SuggestPayload {
final BytesRef payload = new BytesRef();
long weight = 0;
final BytesRef surfaceForm = new BytesRef();
}
}

View File

@ -1,2 +1,3 @@
org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat
org.elasticsearch.index.codec.postingsformat.ElasticSearch090PostingsFormat org.elasticsearch.index.codec.postingsformat.ElasticSearch090PostingsFormat
org.elasticsearch.search.suggest.completion.Completion090PostingsFormat

View File

@ -0,0 +1,251 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.suggest;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.codecs.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.spell.TermFreqIterator;
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.codec.postingsformat.ElasticSearch090PostingsFormat;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.codec.postingsformat.PreBuiltPostingsFormatProvider;
import org.elasticsearch.index.mapper.FieldMapper.Names;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
import org.elasticsearch.search.suggest.SuggestUtils;
import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider;
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat;
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
import org.elasticsearch.test.integration.ElasticsearchTestCase;
import org.junit.Test;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import static org.hamcrest.Matchers.equalTo;
public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
@Test
public void testCompletionPostingsFormat() throws IOException {
AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true);
RAMDirectory dir = new RAMDirectory();
IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT);
FieldsConsumer consumer = provider.consumer(output);
FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
DocValuesType.SORTED, DocValuesType.BINARY, new HashMap<String, String>());
TermsConsumer addField = consumer.addField(fieldInfo);
PostingsConsumer postingsConsumer = addField.startTerm(new BytesRef("foofightersgenerator"));
postingsConsumer.startDoc(0, 1);
postingsConsumer.addPosition(256 - 2, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0,
1);
postingsConsumer.finishDoc();
addField.finishTerm(new BytesRef("foofightersgenerator"), new TermStats(1, 1));
addField.startTerm(new BytesRef("generator"));
postingsConsumer.startDoc(0, 1);
postingsConsumer.addPosition(256 - 1, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0,
1);
postingsConsumer.finishDoc();
addField.finishTerm(new BytesRef("generator"), new TermStats(1, 1));
addField.finish(1, 1, 1);
consumer.close();
output.close();
IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
LookupFactory load = provider.load(input);
PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), false);
List<LookupResult> result = lookup.lookup("ge", false, 10);
assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters"));
assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10"));
dir.close();
}
@Test
public void testDuellCompletions() throws IOException, NoSuchFieldException, SecurityException, IllegalArgumentException,
IllegalAccessException {
final boolean preserveSeparators = getRandom().nextBoolean();
final boolean preservePositionIncrements = getRandom().nextBoolean();
final boolean usePayloads = getRandom().nextBoolean();
final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0;
XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(TEST_VERSION_CURRENT), new StandardAnalyzer(
TEST_VERSION_CURRENT), options, 256, -1, null, false, 1);
reference.setPreservePositionIncrements(preservePositionIncrements);
LineFileDocs docs = new LineFileDocs(getRandom());
int num = atLeast(150);
final String[] titles = new String[num];
final long[] weights = new long[num];
for (int i = 0; i < titles.length; i++) {
Document nextDoc = docs.nextDoc();
IndexableField field = nextDoc.getField("title");
titles[i] = field.stringValue();
weights[i] = between(0, 100);
}
docs.close();
final TermFreqIterator primaryIter = new TermFreqIterator() {
int index = 0;
long currentWeight = -1;
@Override
public Comparator<BytesRef> getComparator() {
return null;
}
@Override
public BytesRef next() throws IOException {
if (index < titles.length) {
currentWeight = weights[index];
return new BytesRef(titles[index++]);
}
return null;
}
@Override
public long weight() {
return currentWeight;
}
};
TermFreqIterator iter;
if (usePayloads) {
iter = new TermFreqPayloadIterator() {
@Override
public long weight() {
return primaryIter.weight();
}
@Override
public Comparator<BytesRef> getComparator() {
return primaryIter.getComparator();
}
@Override
public BytesRef next() throws IOException {
return primaryIter.next();
}
@Override
public BytesRef payload() {
return new BytesRef(Long.toString(weight()));
}
};
} else {
iter = primaryIter;
}
reference.build(iter);
PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads,
preserveSeparators, preservePositionIncrements);
Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights);
Field field = buildAnalyzingLookup.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
field.setAccessible(true);
Field refField = reference.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
refField.setAccessible(true);
assertThat(refField.get(reference), equalTo(field.get(buildAnalyzingLookup)));
for (int i = 0; i < titles.length; i++) {
int res = between(1, 10);
final StringBuilder builder = new StringBuilder();
SuggestUtils.analyze(namedAnalzyer.tokenStream("foo", titles[i]), new SuggestUtils.TokenConsumer() {
@Override
public void nextToken() throws IOException {
if (builder.length() == 0) {
builder.append(this.charTermAttr.toString());
}
}
});
String firstTerm = builder.toString();
String prefix = firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length()));
List<LookupResult> refLookup = reference.lookup(prefix, false, res);
List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res);
assertThat(refLookup.toString(),lookup.size(), equalTo(refLookup.size()));
for (int j = 0; j < refLookup.size(); j++) {
assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key));
assertThat("prefix: " + prefix + " " + j + " -- missmatch cost: " + lookup.get(j).key + " - " + lookup.get(j).value + " | " + refLookup.get(j).key + " - " + refLookup.get(j).value ,
lookup.get(j).value, equalTo(refLookup.get(j).value));
assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload));
if (usePayloads) {
assertThat(lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value)));
}
}
}
}
public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights)
throws IOException {
RAMDirectory dir = new RAMDirectory();
FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) {
public PostingsFormat postingsFormat() {
return mapper.postingsFormatProvider().get();
}
};
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, mapper.indexAnalyzer());
indexWriterConfig.setCodec(filterCodec);
IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
for (int i = 0; i < weights.length; i++) {
Document doc = new Document();
BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i])));
doc.add(mapper.getCompletionField(terms[i], payload));
if (randomBoolean()) {
writer.commit();
}
writer.addDocument(doc);
}
writer.commit();
writer.forceMerge(1);
writer.commit();
DirectoryReader reader = DirectoryReader.open(writer, true);
assertThat(reader.leaves().size(), equalTo(1));
assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length));
AtomicReaderContext atomicReaderContext = reader.leaves().get(0);
Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name());
Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper, false);
reader.close();
writer.close();
dir.close();
return lookup;
}
// TODO ADD more unittests
}

View File

@ -0,0 +1,454 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.suggest;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
import com.google.common.collect.Lists;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.suggest.SuggestResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.mapper.MapperException;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
import org.junit.Test;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.*;
public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
private static final String INDEX = "test";
private static final String TYPE = "testType";
private static final String FIELD = "testField";
@Test
public void testSimple() throws Exception{
createIndexAndMapping();
String[][] input = {{"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"},
{"Generator", "Foo Fighters Generator"}, {"Learn to Fly", "Foo Fighters Learn to Fly" },
{"The Prodigy"}, {"The Prodigy"}, {"The Prodigy"}, {"Firestarter", "The Prodigy Firestarter"},
{"Turbonegro"}, {"Turbonegro"}, {"Get it on", "Turbonegro Get it on"}}; // work with frequencies
for (int i = 0; i < input.length; i++) {
client().prepareIndex(INDEX, TYPE, "" + i)
.setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value(input[i]).endArray()
.endObject()
.endObject()
)
.execute().actionGet();
}
refresh();
assertSuggestionsNotInOrder("f", "Foo Fighters", "Firestarter", "Foo Fighters Generator", "Foo Fighters Learn to Fly");
assertSuggestionsNotInOrder("t", "The Prodigy", "Turbonegro", "Turbonegro Get it on", "The Prodigy Firestarter");
}
@Test
public void testBasicPrefixSuggestion() throws Exception {
createIndexAndMapping();
for (int i = 0; i < 2; i++) {
createData(i==0);
assertSuggestions("f", "Firestarter - The Prodigy", "Foo Fighters", "Generator - Foo Fighters", "Learn to Fly - Foo Fighters");
assertSuggestions("ge", "Generator - Foo Fighters", "Get it on - Turbonegro");
assertSuggestions("ge", "Generator - Foo Fighters", "Get it on - Turbonegro");
assertSuggestions("t", "The Prodigy", "Firestarter - The Prodigy", "Get it on - Turbonegro", "Turbonegro");
}
}
@Test
public void testThatWeightsAreWorking() throws Exception {
createIndexAndMapping();
List<String> similarNames = Lists.newArrayList("the", "The Prodigy", "The Verve", "The the");
// the weight is 1000 divided by string length, so the results are easy to to check
for (String similarName : similarNames) {
client().prepareIndex(INDEX, TYPE, similarName).setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value(similarName).endArray()
.field("weight", 1000 / similarName.length())
.endObject().endObject()
).get();
}
refresh();
assertSuggestions("the", "the", "The the", "The Verve", "The Prodigy");
}
@Test
public void testThatInputCanBeAStringInsteadOfAnArray() throws Exception {
createIndexAndMapping();
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.field("input", "Foo Fighters")
.field("output", "Boo Fighters")
.endObject().endObject()
).get();
refresh();
assertSuggestions("f", "Boo Fighters");
}
@Test
public void testThatPayloadsAreArbitraryJsonObjects() throws Exception {
createIndexAndMapping();
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foo Fighters").endArray()
.field("output", "Boo Fighters")
.startObject("payload").field("foo", "bar").startArray("test").value("spam").value("eggs").endArray().endObject()
.endObject().endObject()
).get();
refresh();
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
new CompletionSuggestionBuilder("testSuggestions").field(FIELD).text("foo").size(10)
).execute().actionGet();
assertSuggestions(suggestResponse, "testSuggestions", "Boo Fighters");
Suggest.Suggestion.Entry.Option option = suggestResponse.getSuggest().getSuggestion("testSuggestions").getEntries().get(0).getOptions().get(0);
assertThat(option, is(instanceOf(CompletionSuggestion.Entry.Option.class)));
CompletionSuggestion.Entry.Option prefixOption = (CompletionSuggestion.Entry.Option) option;
assertThat(prefixOption.getPayload(), is(notNullValue()));
// parse JSON
Map<String, Object> jsonMap = JsonXContent.jsonXContent.createParser(prefixOption.getPayload()).mapAndClose();
assertThat(jsonMap.size(), is(2));
assertThat(jsonMap.get("foo").toString(), is("bar"));
assertThat(jsonMap.get("test"), is(instanceOf(List.class)));
List<String> listValues = (List<String>) jsonMap.get("test");
assertThat(listValues, hasItems("spam", "eggs"));
}
@Test(expected = MapperException.class)
public void testThatExceptionIsThrownWhenPayloadsAreDisabledButInIndexRequest() throws Exception {
createIndexAndMapping("simple", "simple", false, false, true);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foo Fighters").endArray()
.field("output", "Boo Fighters")
.startArray("payload").value("spam").value("eggs").endArray()
.endObject().endObject()
).get();
}
@Test
public void testDisabledPreserveSeperators() throws Exception {
createIndexAndMapping("simple", "simple", true, false, true);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foo Fighters").endArray()
.field("weight", 10)
.endObject().endObject()
).get();
client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foof").endArray()
.field("weight", 20)
.endObject().endObject()
).get();
refresh();
assertSuggestions("foof", "Foof", "Foo Fighters");
}
@Test
public void testEnabledPreserveSeperators() throws Exception {
createIndexAndMapping("simple", "simple", true, true, true);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foo Fighters").endArray()
.endObject().endObject()
).get();
client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foof").endArray()
.endObject().endObject()
).get();
refresh();
assertSuggestions("foof", "Foof");
}
@Test
public void testThatMultipleInputsAreSuppored() throws Exception {
createIndexAndMapping("simple", "simple", false, false, true);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foo Fighters").value("Fu Fighters").endArray()
.field("output", "The incredible Foo Fighters")
.endObject().endObject()
).get();
refresh();
assertSuggestions("foo", "The incredible Foo Fighters");
assertSuggestions("fu", "The incredible Foo Fighters");
}
@Test
public void testThatShortSyntaxIsWorking() throws Exception {
createIndexAndMapping();
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startArray(FIELD)
.value("The Prodigy Firestarter").value("Firestarter")
.endArray().endObject()
).get();
refresh();
assertSuggestions("t", "The Prodigy Firestarter");
assertSuggestions("f", "Firestarter");
}
@Test
public void testThatDisablingPositionIncrementsWorkForStopwords() throws Exception {
// analyzer which removes stopwords... so may not be the simple one
createIndexAndMapping("standard", "standard", false, false, false);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("The Beatles").endArray()
.endObject().endObject()
).get();
refresh();
assertSuggestions("b", "The Beatles");
}
@Test
public void testThatSynonymsWork() throws Exception {
Settings.Builder settingsBuilder = settingsBuilder()
.put("analysis.analyzer.suggest_analyzer_synonyms.type", "custom")
.put("analysis.analyzer.suggest_analyzer_synonyms.tokenizer", "standard")
.putArray("analysis.analyzer.suggest_analyzer_synonyms.filter", "standard", "lowercase", "my_synonyms")
.put("analysis.filter.my_synonyms.type", "synonym")
.putArray("analysis.filter.my_synonyms.synonyms", "foo,renamed");
createIndexAndMappingAndSettings(settingsBuilder, "suggest_analyzer_synonyms", "suggest_analyzer_synonyms", false, false, true);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foo Fighters").endArray()
.endObject().endObject()
).get();
refresh();
// get suggestions for renamed
assertSuggestions("r", "Foo Fighters");
}
@Test
public void testThatUpgradeToMultiFieldWorks() throws Exception {
client().admin().indices().prepareDelete().get();
int randomShardNumber = between(1, 5);
int randomReplicaNumber = between(0, 2);
Settings.Builder settingsBuilder = settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, randomShardNumber).put(SETTING_NUMBER_OF_REPLICAS, randomReplicaNumber);
client().admin().indices().prepareCreate(INDEX).setSettings(settingsBuilder).get();
ensureYellow();
client().prepareIndex(INDEX, TYPE, "1").setRefresh(true).setSource(jsonBuilder().startObject().field(FIELD, "Foo Fighters").endObject()).get();
PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
.startObject(TYPE).startObject("properties")
.startObject(FIELD)
.field("type", "multi_field")
.startObject("fields")
.startObject(FIELD).field("type", "string").endObject()
.startObject("suggest").field("type", "completion").field("index_analyzer", "simple").field("search_analyzer", "simple").endObject()
.endObject()
.endObject()
.endObject().endObject()
.endObject())
.get();
assertThat(putMappingResponse.isAcknowledged(), is(true));
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
new CompletionSuggestionBuilder("suggs").field(FIELD + ".suggest").text("f").size(10)
).execute().actionGet();
assertSuggestions(suggestResponse, "suggs");
client().prepareIndex(INDEX, TYPE, "1").setRefresh(true).setSource(jsonBuilder().startObject().field(FIELD, "Foo Fighters").endObject()).get();
waitForRelocation(ClusterHealthStatus.GREEN);
SuggestResponse afterReindexingResponse = client().prepareSuggest(INDEX).addSuggestion(
new CompletionSuggestionBuilder("suggs").field(FIELD + ".suggest").text("f").size(10)
).execute().actionGet();
assertSuggestions(afterReindexingResponse, "suggs", "Foo Fighters");
}
public void assertSuggestions(String suggestion, String ... suggestions) {
String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10);
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggestion).size(10)
).execute().actionGet();
assertSuggestions(suggestResponse, suggestionName, suggestions);
}
public void assertSuggestionsNotInOrder(String suggestString, String ... suggestions) {
String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10);
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggestString).size(10)
).execute().actionGet();
assertSuggestions(suggestResponse, false, suggestionName, suggestions);
}
private void assertSuggestions(SuggestResponse suggestResponse, String name, String... suggestions) {
assertSuggestions(suggestResponse, true, name, suggestions);
}
private void assertSuggestions(SuggestResponse suggestResponse, boolean suggestionOrderStrict, String name, String... suggestions) {
assertThat(suggestResponse.getFailedShards(), is(0));
assertThat(suggestResponse.getSuggest().getSuggestion(name), is(notNullValue()));
Suggest.Suggestion<Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option>> suggestion = suggestResponse.getSuggest().getSuggestion(name);
List<String> suggestionList = getNames(suggestion.getEntries().get(0));
List<Suggest.Suggestion.Entry.Option> options = suggestion.getEntries().get(0).getOptions();
String assertMsg = String.format(Locale.ROOT, "Expected options %s length to be %s, but was %s", suggestionList, suggestions.length, options.size());
assertThat(assertMsg, options.size(), is(suggestions.length));
if (suggestionOrderStrict) {
for (int i = 0; i < suggestions.length; i++) {
String errMsg = String.format(Locale.ROOT, "Expected elem %s in list %s to be [%s] score: %s", i, suggestionList, suggestions[i], options.get(i).getScore());
assertThat(errMsg, options.get(i).getText().toString(), is(suggestions[i]));
}
} else {
for (String expectedSuggestion : suggestions) {
String errMsg = String.format(Locale.ROOT, "Expected elem %s to be in list %s", expectedSuggestion, suggestionList);
assertThat(errMsg, suggestionList, hasItem(expectedSuggestion));
}
}
}
private List<String> getNames(Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> suggestEntry) {
List<String> names = Lists.newArrayList();
for (Suggest.Suggestion.Entry.Option entry : suggestEntry.getOptions()) {
names.add(entry.getText().string());
}
return names;
}
private void createIndexAndMapping() throws IOException {
createIndexAndMapping("simple", "simple", true, false, true);
}
private void createIndexAndMappingAndSettings(Settings.Builder settingsBuilder, String indexAnalyzer, String searchAnalyzer, boolean payloads, boolean preserveSeparators, boolean preservePositionIncrements) throws IOException {
client().admin().indices().prepareDelete().get();
client().admin().indices().prepareCreate(INDEX)
.setSettings(settingsBuilder)
.get();
PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
.startObject(TYPE).startObject("properties")
.startObject(FIELD)
.field("type", "completion")
.field("index_analyzer", indexAnalyzer)
.field("search_analyzer", searchAnalyzer)
.field("payloads", payloads)
.field("preserve_separators", preserveSeparators)
.field("preserve_position_increments", preservePositionIncrements)
.endObject()
.endObject().endObject()
.endObject())
.get();
assertThat(putMappingResponse.isAcknowledged(), is(true));
ensureYellow();
}
private void createIndexAndMapping(String indexAnalyzer, String searchAnalyzer, boolean payloads, boolean preserveSeparators, boolean preservePositionIncrements) throws IOException {
int randomShardNumber = between(1, 5);
int randomReplicaNumber = between(0, 2);
Settings.Builder settingsBuilder = settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, randomShardNumber).put(SETTING_NUMBER_OF_REPLICAS, randomReplicaNumber);
createIndexAndMappingAndSettings(settingsBuilder, indexAnalyzer, searchAnalyzer, payloads, preserveSeparators, preservePositionIncrements);
}
private void createData(boolean optimize) throws IOException, InterruptedException, ExecutionException {
String[][] input = {{"Foo Fighters"}, {"Generator", "Foo Fighters Generator"}, {"Learn to Fly", "Foo Fighters Learn to Fly" }, {"The Prodigy"}, {"Firestarter", "The Prodigy Firestarter"}, {"Turbonegro"}, {"Get it on", "Turbonegro Get it on"}};
String[] surface = {"Foo Fighters", "Generator - Foo Fighters", "Learn to Fly - Foo Fighters", "The Prodigy", "Firestarter - The Prodigy", "Turbonegro", "Get it on - Turbonegro"};
int[] weight = {10, 9, 8, 12, 11, 6, 7};
IndexRequestBuilder[] builders = new IndexRequestBuilder[input.length];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex(INDEX, TYPE, "" + i)
.setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value(input[i]).endArray()
.field("output",surface[i])
.field("payload", "id: " + i)
.field("weight", 1) // WE FORCEFULLY INDEX A BOGUS WEIGHT
.endObject()
.endObject()
);
}
indexRandom(INDEX, false, builders);
for (int i = 0; i < builders.length; i++) { // add them again to make sure we deduplicate on the surface form
builders[i] = client().prepareIndex(INDEX, TYPE, "n" + i)
.setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value(input[i]).endArray()
.field("output",surface[i])
.field("payload", "id: " + i)
.field("weight", weight[i])
.endObject()
.endObject()
);
}
indexRandom(INDEX, false, builders);
client().admin().indices().prepareRefresh(INDEX).execute().actionGet();
if (optimize) {
// make sure merging works just fine
client().admin().indices().prepareFlush(INDEX).execute().actionGet();
client().admin().indices().prepareOptimize(INDEX).execute().actionGet();
}
}
}

View File

@ -0,0 +1,169 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.integration.search.suggest;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.synonym.SynonymFilter;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.synonym.SynonymMap.Builder;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ByteTermAttribute;
import org.junit.Test;
import java.io.IOException;
import java.io.StringReader;
import java.util.Set;
public class CompletionTokenStreamTest extends BaseTokenStreamTestCase {
final XAnalyzingSuggester suggester = new XAnalyzingSuggester(new SimpleAnalyzer(TEST_VERSION_CURRENT));
@Test
public void testSuggestTokenFilter() throws Exception {
TokenStream tokenStream = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true);
BytesRef payload = new BytesRef("Surface keyword|friggin payload|10");
TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(new CompletionTokenStream(tokenStream, payload, new CompletionTokenStream.ToFiniteStrings() {
@Override
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
}
}));
assertTokenStreamContents(suggestTokenStream, new String[] {"mykeyword"}, null, null, new String[] {"Surface keyword|friggin payload|10"}, new int[] { 1 }, null, null);
}
@Test
public void testSuggestTokenFilterWithSynonym() throws Exception {
Builder builder = new SynonymMap.Builder(true);
builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
MockTokenizer tokenizer = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true);
SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
BytesRef payload = new BytesRef("Surface keyword|friggin payload|10");
TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(new CompletionTokenStream(filter, payload, new CompletionTokenStream.ToFiniteStrings() {
@Override
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
}
}));
assertTokenStreamContents(suggestTokenStream, new String[] {"mysynonym", "mykeyword"}, null, null, new String[] {"Surface keyword|friggin payload|10", "Surface keyword|friggin payload|10"}, new int[] { 2, 0 }, null, null);
}
@Test
public void testValidNumberOfExpansions() throws IOException {
Builder builder = new SynonymMap.Builder(true);
for (int i = 0; i < 256; i++) {
builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true);
}
StringBuilder valueBuilder = new StringBuilder();
for (int i = 0 ; i < 8 ; i++) {
valueBuilder.append(i+1);
valueBuilder.append(" ");
}
MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()), MockTokenizer.WHITESPACE, true);
SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() {
@Override
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
return finiteStrings;
}
});
suggestTokenStream.reset();
ByteTermAttribute attr = suggestTokenStream.addAttribute(ByteTermAttribute.class);
PositionIncrementAttribute posAttr = suggestTokenStream.addAttribute(PositionIncrementAttribute.class);
int maxPos = 0;
int count = 0;
while(suggestTokenStream.incrementToken()) {
count++;
assertNotNull(attr.getBytesRef());
assertTrue(attr.getBytesRef().length > 0);
maxPos += posAttr.getPositionIncrement();
}
suggestTokenStream.close();
assertEquals(count, 256);
assertEquals(count, maxPos);
}
@Test(expected = IllegalArgumentException.class)
public void testInValidNumberOfExpansions() throws IOException {
Builder builder = new SynonymMap.Builder(true);
for (int i = 0; i < 256; i++) {
builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true);
}
StringBuilder valueBuilder = new StringBuilder();
for (int i = 0 ; i < 9 ; i++) { // 9 -> expands to 512
valueBuilder.append(i+1);
valueBuilder.append(" ");
}
MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()), MockTokenizer.WHITESPACE, true);
SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() {
@Override
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
return finiteStrings;
}
});
suggestTokenStream.reset();
suggestTokenStream.incrementToken();
suggestTokenStream.close();
}
public final static class ByteTermAttrToCharTermAttrFilter extends TokenFilter {
private CharTermAttribute attr = addAttribute(CharTermAttribute.class);
private ByteTermAttribute byteAttr = addAttribute(ByteTermAttribute.class);
private PayloadAttribute payload = addAttribute(PayloadAttribute.class);
private TypeAttribute type = addAttribute(TypeAttribute.class);
protected ByteTermAttrToCharTermAttrFilter(TokenStream input) {
super(input);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
BytesRef bytesRef = byteAttr.getBytesRef();
attr.append(bytesRef.utf8ToString());
// we move them over so we can assert them more easily in the tests
type.setType(payload.getPayload().utf8ToString());
return true;
}
return false;
}
}
}

View File

@ -0,0 +1,88 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.mapper.completion;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
import org.elasticsearch.test.unit.index.mapper.MapperTestUtils;
import org.junit.Test;
import java.io.IOException;
import java.util.Map;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
public class CompletionFieldMapperTests {
@Test
public void testDefaultConfiguration() throws IOException {
String mapping = jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("completion")
.field("type", "completion")
.endObject().endObject()
.endObject().endObject().string();
DocumentMapper defaultMapper = MapperTestUtils.newParser().parse(mapping);
FieldMapper fieldMapper = defaultMapper.mappers().name("completion").mapper();
assertThat(fieldMapper, instanceOf(CompletionFieldMapper.class));
CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper;
assertThat(completionFieldMapper.isStoringPayloads(), is(false));
}
@Test
public void testThatSerializationIncludesAllElements() throws Exception {
String mapping = jsonBuilder().startObject().startObject("type1")
.startObject("properties").startObject("completion")
.field("type", "completion")
.field("index_analyzer", "simple")
.field("search_analyzer", "standard")
.field("payloads", true)
.field("preserve_separators", false)
.field("preserve_position_increments", true)
.endObject().endObject()
.endObject().endObject().string();
DocumentMapper defaultMapper = MapperTestUtils.newParser().parse(mapping);
FieldMapper fieldMapper = defaultMapper.mappers().name("completion").mapper();
assertThat(fieldMapper, instanceOf(CompletionFieldMapper.class));
CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper;
XContentBuilder builder = jsonBuilder().startObject();
completionFieldMapper.toXContent(builder, null).endObject();
builder.close();
Map<String, Object> serializedMap = JsonXContent.jsonXContent.createParser(builder.bytes()).mapAndClose();
Map<String, Object> configMap = (Map<String, Object>) serializedMap.get("completion");
assertThat(configMap.get("index_analyzer").toString(), is("simple"));
assertThat(configMap.get("search_analyzer").toString(), is("standard"));
assertThat(Boolean.valueOf(configMap.get("payloads").toString()), is(true));
assertThat(Boolean.valueOf(configMap.get("preserve_separators").toString()), is(false));
assertThat(Boolean.valueOf(configMap.get("preserve_position_increments").toString()), is(true));
}
}