Added prefix suggestions based on AnalyzingSuggester
This commit introduces near realtime suggestions. For more information about its usage refer to github issue #3376 From the implementation point of view, a custom AnalyzingSuggester is used in combination with a custom postingsformat (which is not exposed to the user anywhere for him to use). Closes #3376
This commit is contained in:
parent
fd15b6278b
commit
4f4f3a2b10
File diff suppressed because it is too large
Load Diff
|
@ -92,6 +92,7 @@ public class DocumentMapperParser extends AbstractIndexComponent {
|
|||
.put(ObjectMapper.CONTENT_TYPE, new ObjectMapper.TypeParser())
|
||||
.put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser())
|
||||
.put(MultiFieldMapper.CONTENT_TYPE, new MultiFieldMapper.TypeParser())
|
||||
.put(CompletionFieldMapper.CONTENT_TYPE, new CompletionFieldMapper.TypeParser())
|
||||
.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser());
|
||||
|
||||
if (ShapesAvailability.JTS_AVAILABLE) {
|
||||
|
|
|
@ -0,0 +1,317 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.mapper.Mapper;
|
||||
import org.elasticsearch.index.mapper.MapperException;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.mapper.ParseContext;
|
||||
import org.elasticsearch.index.similarity.SimilarityProvider;
|
||||
import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionPostingsFormatProvider;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class CompletionFieldMapper extends AbstractFieldMapper<String> {
|
||||
|
||||
public static final String CONTENT_TYPE = "completion";
|
||||
|
||||
public static class Defaults extends AbstractFieldMapper.Defaults {
|
||||
public static final FieldType FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE);
|
||||
|
||||
static {
|
||||
FIELD_TYPE.setOmitNorms(true);
|
||||
FIELD_TYPE.freeze();
|
||||
}
|
||||
|
||||
public static final boolean DEFAULT_PRESERVE_SEPARATORS = true;
|
||||
public static final boolean DEFAULT_POSITION_INCREMENTS = true;
|
||||
public static final boolean DEFAULT_HAS_PAYLOADS = false;
|
||||
}
|
||||
|
||||
public static class Fields {
|
||||
public static final String INDEX_ANALYZER = "index_analyzer";
|
||||
public static final String SEARCH_ANALYZER = "search_analyzer";
|
||||
public static final String PRESERVE_SEPARATORS = "preserve_separators";
|
||||
public static final String PRESERVE_POSITION_INCREMENTS = "preserve_position_increments";
|
||||
public static final String PAYLOADS = "payloads";
|
||||
public static final String TYPE = "type";
|
||||
}
|
||||
|
||||
public static class Builder extends AbstractFieldMapper.OpenBuilder<Builder, CompletionFieldMapper> {
|
||||
|
||||
private NamedAnalyzer searchAnalyzer;
|
||||
private NamedAnalyzer indexAnalyzer;
|
||||
private boolean preserveSeparators = Defaults.DEFAULT_PRESERVE_SEPARATORS;
|
||||
private boolean payloads = Defaults.DEFAULT_HAS_PAYLOADS;
|
||||
private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS;
|
||||
|
||||
public Builder(String name) {
|
||||
super(name, Defaults.FIELD_TYPE);
|
||||
}
|
||||
|
||||
public Builder searchAnalyzer(NamedAnalyzer searchAnalyzer) {
|
||||
this.searchAnalyzer = searchAnalyzer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder indexAnalyzer(NamedAnalyzer indexAnalyzer) {
|
||||
this.indexAnalyzer = indexAnalyzer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder payloads(boolean payloads) {
|
||||
this.payloads = payloads;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder preserveSeparators(boolean preserveSeparators) {
|
||||
this.preserveSeparators = preserveSeparators;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder preservePositionIncrements(boolean preservePositionIncrements) {
|
||||
this.preservePositionIncrements = preservePositionIncrements;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletionFieldMapper build(Mapper.BuilderContext context) {
|
||||
return new CompletionFieldMapper(buildNames(context), indexAnalyzer, searchAnalyzer, provider, similarity, payloads, preserveSeparators, preservePositionIncrements);
|
||||
}
|
||||
}
|
||||
|
||||
public static class TypeParser implements Mapper.TypeParser {
|
||||
|
||||
@Override
|
||||
public Mapper.Builder<?, ?> parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
||||
CompletionFieldMapper.Builder builder = new CompletionFieldMapper.Builder(name);
|
||||
for (Map.Entry<String, Object> entry : node.entrySet()) {
|
||||
String fieldName = entry.getKey();
|
||||
Object fieldNode = entry.getValue();
|
||||
if (fieldName.equals("type")) {
|
||||
continue;
|
||||
}
|
||||
if (fieldName.equals(Fields.INDEX_ANALYZER) || fieldName.equals("indexAnalyzer")) {
|
||||
builder.indexAnalyzer(parserContext.analysisService().analyzer(fieldNode.toString()));
|
||||
} else if (fieldName.equals(Fields.SEARCH_ANALYZER) || fieldName.equals("searchAnalyzer")) {
|
||||
builder.searchAnalyzer(parserContext.analysisService().analyzer(fieldNode.toString()));
|
||||
} else if (fieldName.equals(Fields.PAYLOADS)) {
|
||||
builder.payloads(Boolean.parseBoolean(fieldNode.toString()));
|
||||
} else if (fieldName.equals(Fields.PRESERVE_SEPARATORS) || fieldName.equals("preserveSeparators")) {
|
||||
builder.preserveSeparators(Boolean.parseBoolean(fieldNode.toString()));
|
||||
} else if (fieldName.equals(Fields.PRESERVE_POSITION_INCREMENTS) || fieldName.equals("preservePositionIncrements")) {
|
||||
builder.preservePositionIncrements(Boolean.parseBoolean(fieldNode.toString()));
|
||||
}
|
||||
}
|
||||
|
||||
if (builder.searchAnalyzer == null) {
|
||||
builder.searchAnalyzer(parserContext.analysisService().analyzer("simple"));
|
||||
}
|
||||
|
||||
if (builder.indexAnalyzer == null) {
|
||||
builder.indexAnalyzer(parserContext.analysisService().analyzer("simple"));
|
||||
}
|
||||
// we are just using this as the default to be wrapped by the CompletionPostingsFormatProvider in the SuggesteFieldMapper ctor
|
||||
builder.postingsFormat(parserContext.postingFormatService().get("default"));
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
private static final BytesRef EMPTY = new BytesRef();
|
||||
|
||||
private final CompletionPostingsFormatProvider completionPostingsFormatProvider;
|
||||
private final AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider;
|
||||
private final boolean payloads;
|
||||
private final boolean preservePositionIncrements;
|
||||
private final boolean preserveSeparators;
|
||||
|
||||
public CompletionFieldMapper(Names names, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer, PostingsFormatProvider provider, SimilarityProvider similarity, boolean payloads,
|
||||
boolean preserveSeparators, boolean preservePositionIncrements) {
|
||||
super(names, 1.0f, Defaults.FIELD_TYPE, indexAnalyzer, searchAnalyzer, provider, similarity, null);
|
||||
analyzingSuggestLookupProvider = new AnalyzingCompletionLookupProvider(preserveSeparators, false, preservePositionIncrements, payloads);
|
||||
this.completionPostingsFormatProvider = new CompletionPostingsFormatProvider("completion", provider, analyzingSuggestLookupProvider);
|
||||
this.preserveSeparators = preserveSeparators;
|
||||
this.payloads = payloads;
|
||||
this.preservePositionIncrements = preservePositionIncrements;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public PostingsFormatProvider postingsFormatProvider() {
|
||||
return this.completionPostingsFormatProvider;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parse(ParseContext context) throws IOException {
|
||||
XContentParser parser = context.parser();
|
||||
XContentParser.Token token = parser.currentToken();
|
||||
|
||||
String surfaceForm = null;
|
||||
BytesRef payload = null;
|
||||
long weight = -1;
|
||||
List<String> inputs = Lists.newArrayListWithExpectedSize(4);
|
||||
|
||||
if (token == XContentParser.Token.VALUE_STRING) {
|
||||
inputs.add(parser.text());
|
||||
} else {
|
||||
String currentFieldName = null;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if ("payload".equals(currentFieldName)) {
|
||||
if (!isStoringPayloads()) {
|
||||
throw new MapperException("Payloads disabled in mapping");
|
||||
}
|
||||
if (token == XContentParser.Token.START_OBJECT) {
|
||||
XContentBuilder payloadBuilder = XContentFactory.contentBuilder(parser.contentType()).copyCurrentStructure(parser);
|
||||
payload = payloadBuilder.bytes().toBytesRef();
|
||||
payloadBuilder.close();
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
if ("output".equals(currentFieldName)) {
|
||||
surfaceForm = parser.text();
|
||||
}
|
||||
if ("input".equals(currentFieldName)) {
|
||||
inputs.add(parser.text());
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
||||
if ("weight".equals(currentFieldName)) {
|
||||
weight = parser.longValue(); // always parse a long to make sure we don't get the overflow value
|
||||
if (weight < 0 || weight > Integer.MAX_VALUE) {
|
||||
throw new ElasticSearchIllegalArgumentException("Weight must be in the interval [0..2147483647] but was " + weight);
|
||||
}
|
||||
}
|
||||
} else if (token == XContentParser.Token.START_ARRAY) {
|
||||
if ("input".equals(currentFieldName)) {
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
inputs.add(parser.text());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
payload = payload == null ? EMPTY: payload;
|
||||
if (surfaceForm == null) { // no surface form use the input
|
||||
for (String input : inputs) {
|
||||
BytesRef suggestPayload = analyzingSuggestLookupProvider.buildPayload(new BytesRef(
|
||||
input), weight, payload);
|
||||
context.doc().add(getCompletionField(input, suggestPayload));
|
||||
}
|
||||
} else {
|
||||
BytesRef suggestPayload = analyzingSuggestLookupProvider.buildPayload(new BytesRef(
|
||||
surfaceForm), weight, payload);
|
||||
for (String input : inputs) {
|
||||
context.doc().add(getCompletionField(input, suggestPayload));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Field getCompletionField(String input, BytesRef payload) {
|
||||
return new SuggestField(names().fullName(), input, this.fieldType, payload, analyzingSuggestLookupProvider);
|
||||
}
|
||||
|
||||
public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
|
||||
return analyzingSuggestLookupProvider.buildPayload(
|
||||
surfaceForm, weight, payload);
|
||||
}
|
||||
|
||||
private static final class SuggestField extends Field {
|
||||
private final BytesRef payload;
|
||||
private final CompletionTokenStream.ToFiniteStrings toFiniteStrings;
|
||||
|
||||
public SuggestField(String name, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) {
|
||||
super(name, value, type);
|
||||
this.payload = payload;
|
||||
this.toFiniteStrings = toFiniteStrings;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(Analyzer analyzer) throws IOException {
|
||||
TokenStream ts = super.tokenStream(analyzer);
|
||||
return new CompletionTokenStream(ts, payload, toFiniteStrings);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
return builder.startObject(name())
|
||||
.field(Fields.TYPE, CONTENT_TYPE)
|
||||
.field(Fields.INDEX_ANALYZER, indexAnalyzer.name())
|
||||
.field(Fields.SEARCH_ANALYZER, searchAnalyzer.name())
|
||||
.field(Fields.PAYLOADS, this.payloads)
|
||||
.field(Fields.PRESERVE_SEPARATORS, this.preserveSeparators)
|
||||
.field(Fields.PRESERVE_POSITION_INCREMENTS, this.preservePositionIncrements)
|
||||
.endObject();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Field parseCreateField(ParseContext context) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
protected String contentType() {
|
||||
return CONTENT_TYPE;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public FieldType defaultFieldType() {
|
||||
return Defaults.FIELD_TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldDataType defaultFieldDataType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String value(Object value) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
return value.toString();
|
||||
}
|
||||
|
||||
public boolean isStoringPayloads() {
|
||||
return payloads;
|
||||
}
|
||||
}
|
|
@ -29,6 +29,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
|||
import org.elasticsearch.common.xcontent.XContentBuilderString;
|
||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry;
|
||||
import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
|
||||
import org.elasticsearch.search.suggest.term.TermSuggestion;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -114,6 +115,9 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
|||
case TermSuggestion.TYPE:
|
||||
suggestion = new TermSuggestion();
|
||||
break;
|
||||
case CompletionSuggestion.TYPE:
|
||||
suggestion = new CompletionSuggestion();
|
||||
break;
|
||||
default:
|
||||
suggestion = new Suggestion<Entry<Option>>();
|
||||
break;
|
||||
|
@ -522,6 +526,10 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
|||
return score;
|
||||
}
|
||||
|
||||
protected void setScore(float score) {
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
text = in.readText();
|
||||
|
@ -567,8 +575,8 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
|||
return text.hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public enum Sort {
|
||||
|
||||
/**
|
||||
|
@ -600,8 +608,6 @@ public class Suggest implements Iterable<Suggest.Suggestion<? extends Entry<? ex
|
|||
throw new ElasticSearchException("Illegal suggest sort " + id);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.elasticsearch.search.suggest;
|
|||
import com.google.common.collect.Lists;
|
||||
import org.elasticsearch.common.inject.AbstractModule;
|
||||
import org.elasticsearch.common.inject.multibindings.Multibinder;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggester;
|
||||
import org.elasticsearch.search.suggest.phrase.PhraseSuggester;
|
||||
import org.elasticsearch.search.suggest.term.TermSuggester;
|
||||
|
||||
|
@ -36,6 +37,7 @@ public class SuggestModule extends AbstractModule {
|
|||
public SuggestModule() {
|
||||
registerSuggester(PhraseSuggester.class);
|
||||
registerSuggester(TermSuggester.class);
|
||||
registerSuggester(CompletionSuggester.class);
|
||||
}
|
||||
|
||||
public void registerSuggester(Class<? extends Suggester> suggester) {
|
||||
|
|
|
@ -159,6 +159,7 @@ public final class SuggestUtils {
|
|||
numTokens++;
|
||||
}
|
||||
consumer.end();
|
||||
stream.close();
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,260 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.codecs.*;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.*;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider;
|
||||
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider {
|
||||
|
||||
// for serialization
|
||||
public static final int SERIALIZE_PRESERVE_SEPERATORS = 1;
|
||||
public static final int SERIALIZE_HAS_PAYLOADS = 2;
|
||||
public static final int SERIALIZE_PRESERVE_POSITION_INCREMENTS = 4;
|
||||
|
||||
private static final int MAX_SURFACE_FORMS_PER_ANALYZED_FORM = 256;
|
||||
private static final int MAX_GRAPH_EXPANSIONS = -1;
|
||||
|
||||
public static final String CODEC_NAME = "analyzing";
|
||||
public static final int CODEC_VERSION = 1;
|
||||
|
||||
private boolean preserveSep;
|
||||
private boolean preservePositionIncrements;
|
||||
private int maxSurfaceFormsPerAnalyzedForm;
|
||||
private int maxGraphExpansions;
|
||||
private boolean hasPayloads;
|
||||
private final XAnalyzingSuggester prototype;
|
||||
|
||||
public AnalyzingCompletionLookupProvider(boolean preserveSep, boolean exactFirst, boolean preservePositionIncrements, boolean hasPayloads) {
|
||||
this.preserveSep = preserveSep;
|
||||
this.preservePositionIncrements = preservePositionIncrements;
|
||||
this.hasPayloads = hasPayloads;
|
||||
this.maxSurfaceFormsPerAnalyzedForm = MAX_SURFACE_FORMS_PER_ANALYZED_FORM;
|
||||
this.maxGraphExpansions = MAX_GRAPH_EXPANSIONS;
|
||||
int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;
|
||||
// needs to fixed in the suggester first before it can be supported
|
||||
//options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
|
||||
prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, null, false, 1);
|
||||
prototype.setPreservePositionIncrements(preservePositionIncrements);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return "analyzing";
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer consumer(final IndexOutput output) throws IOException {
|
||||
CodecUtil.writeHeader(output, CODEC_NAME, CODEC_VERSION);
|
||||
return new FieldsConsumer() {
|
||||
private Map<FieldInfo, Long> fieldOffsets = new HashMap<FieldInfo, Long>();
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try { /*
|
||||
* write the offsets per field such that we know where
|
||||
* we need to load the FSTs from
|
||||
*/
|
||||
long pointer = output.getFilePointer();
|
||||
output.writeVInt(fieldOffsets.size());
|
||||
for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) {
|
||||
output.writeString(entry.getKey().name);
|
||||
output.writeVLong(entry.getValue());
|
||||
}
|
||||
output.writeLong(pointer);
|
||||
output.flush();
|
||||
} finally {
|
||||
IOUtils.close(output);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsConsumer addField(final FieldInfo field) throws IOException {
|
||||
|
||||
return new TermsConsumer() {
|
||||
final XAnalyzingSuggester.XBuilder builder = new XAnalyzingSuggester.XBuilder(maxSurfaceFormsPerAnalyzedForm, hasPayloads);
|
||||
final CompletionPostingsConsumer postingsConsumer = new CompletionPostingsConsumer(AnalyzingCompletionLookupProvider.this, builder);
|
||||
|
||||
@Override
|
||||
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||
builder.startTerm(text);
|
||||
return postingsConsumer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||
builder.finishTerm(stats.docFreq); // use doc freq as a fallback
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
|
||||
/*
|
||||
* Here we are done processing the field and we can
|
||||
* buid the FST and write it to disk.
|
||||
*/
|
||||
FST<Pair<Long, BytesRef>> build = builder.build();
|
||||
fieldOffsets.put(field, output.getFilePointer());
|
||||
build.save(output);
|
||||
/* write some more meta-info */
|
||||
output.writeVInt(postingsConsumer.getMaxAnalyzedPathsForOneInput());
|
||||
output.writeVInt(maxSurfaceFormsPerAnalyzedForm);
|
||||
output.writeInt(maxGraphExpansions); // can be negative
|
||||
int options = 0;
|
||||
options |= preserveSep ? SERIALIZE_PRESERVE_SEPERATORS : 0;
|
||||
options |= hasPayloads ? SERIALIZE_HAS_PAYLOADS : 0;
|
||||
options |= preservePositionIncrements ? SERIALIZE_PRESERVE_POSITION_INCREMENTS : 0;
|
||||
output.writeVInt(options);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static final class CompletionPostingsConsumer extends PostingsConsumer {
|
||||
private final SuggestPayload spare = new SuggestPayload();
|
||||
private AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider;
|
||||
private XAnalyzingSuggester.XBuilder builder;
|
||||
private int maxAnalyzedPathsForOneInput = 0;
|
||||
|
||||
public CompletionPostingsConsumer(AnalyzingCompletionLookupProvider analyzingSuggestLookupProvider, XAnalyzingSuggester.XBuilder builder) {
|
||||
this.analyzingSuggestLookupProvider = analyzingSuggestLookupProvider;
|
||||
this.builder = builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startDoc(int docID, int freq) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||
analyzingSuggestLookupProvider.parsePayload(payload, spare);
|
||||
builder.addSurface(spare.surfaceForm, spare.payload, spare.weight);
|
||||
// multi fields have the same surface form so we sum up here
|
||||
maxAnalyzedPathsForOneInput = Math.max(maxAnalyzedPathsForOneInput, position+1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishDoc() throws IOException {
|
||||
}
|
||||
|
||||
public int getMaxAnalyzedPathsForOneInput() {
|
||||
return maxAnalyzedPathsForOneInput;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@Override
|
||||
public LookupFactory load(IndexInput input) throws IOException {
|
||||
CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION, CODEC_VERSION);
|
||||
final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<String, AnalyzingSuggestHolder>();
|
||||
input.seek(input.length() - 8);
|
||||
long metaPointer = input.readLong();
|
||||
input.seek(metaPointer);
|
||||
int numFields = input.readVInt();
|
||||
|
||||
Map<Long, String> meta = new TreeMap<Long, String>();
|
||||
for (int i = 0; i < numFields; i++) {
|
||||
String name = input.readString();
|
||||
long offset = input.readVLong();
|
||||
meta.put(offset, name);
|
||||
}
|
||||
|
||||
for (Map.Entry<Long, String> entry : meta.entrySet()) {
|
||||
input.seek(entry.getKey());
|
||||
FST<Pair<Long, BytesRef>> fst = new FST<Pair<Long, BytesRef>>(input, new PairOutputs<Long, BytesRef>(
|
||||
PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()));
|
||||
int maxAnalyzedPathsForOneInput = input.readVInt();
|
||||
int maxSurfaceFormsPerAnalyzedForm = input.readVInt();
|
||||
int maxGraphExpansions = input.readInt();
|
||||
int options = input.readVInt();
|
||||
boolean preserveSep = (options & SERIALIZE_PRESERVE_SEPERATORS) != 0;
|
||||
boolean hasPayloads = (options & SERIALIZE_HAS_PAYLOADS) != 0;
|
||||
boolean preservePositionIncrements = (options & SERIALIZE_PRESERVE_POSITION_INCREMENTS) != 0;
|
||||
lookupMap.put(entry.getValue(), new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
|
||||
hasPayloads, maxAnalyzedPathsForOneInput, fst));
|
||||
}
|
||||
return new LookupFactory() {
|
||||
@Override
|
||||
public Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst) {
|
||||
AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().fullName());
|
||||
if (analyzingSuggestHolder == null) {
|
||||
return null;
|
||||
}
|
||||
int flags = exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
|
||||
if (analyzingSuggestHolder.preserveSep) {
|
||||
flags |= XAnalyzingSuggester.PRESERVE_SEP;
|
||||
}
|
||||
XAnalyzingSuggester suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
|
||||
analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
|
||||
analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput);
|
||||
suggester.setPreservePositionIncrements(analyzingSuggestHolder.preservePositionIncrements);
|
||||
return suggester;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static class AnalyzingSuggestHolder {
|
||||
final boolean preserveSep;
|
||||
final boolean preservePositionIncrements;
|
||||
final int maxSurfaceFormsPerAnalyzedForm;
|
||||
final int maxGraphExpansions;
|
||||
final boolean hasPayloads;
|
||||
final int maxAnalyzedPathsForOneInput;
|
||||
final FST<Pair<Long, BytesRef>> fst;
|
||||
|
||||
public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads,
|
||||
int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) {
|
||||
this.preserveSep = preserveSep;
|
||||
this.preservePositionIncrements = preservePositionIncrements;
|
||||
this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
|
||||
this.maxGraphExpansions = maxGraphExpansions;
|
||||
this.hasPayloads = hasPayloads;
|
||||
this.maxAnalyzedPathsForOneInput = maxAnalyzedPathsForOneInput;
|
||||
this.fst = fst;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
|
||||
return prototype.toFiniteStrings(prototype.getTokenStreamToAutomaton(), stream);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,332 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.ImmutableMap.Builder;
|
||||
import org.apache.lucene.codecs.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.FilterAtomicReader.FilterTerms;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.store.IOContext.Context;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ToFiniteStrings;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This {@link PostingsFormat} is basically a T-Sink for a default postings
|
||||
* format that is used to store postings on disk fitting the lucene APIs and
|
||||
* builds a suggest FST as an auxiliary data structure next to the actual
|
||||
* postings format. It uses the delegate postings format for simplicity to
|
||||
* handle all the merge operations. The auxiliary suggest FST data structure is
|
||||
* only loaded if a FieldsProducer is requested for reading, for merging it uses
|
||||
* the low memory delegate postings format.
|
||||
*
|
||||
*/
|
||||
public class Completion090PostingsFormat extends PostingsFormat {
|
||||
|
||||
public static final String CODEC_NAME = "completion090";
|
||||
public static final int SUGGEST_CODEC_VERSION = 1;
|
||||
public static final String EXTENSION = "cmp";
|
||||
|
||||
private PostingsFormat delegatePostingsFormat;
|
||||
private final static Map<String, CompletionLookupProvider> providers;
|
||||
private CompletionLookupProvider writeProvider;
|
||||
|
||||
static {
|
||||
final CompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, false);
|
||||
final Builder<String, CompletionLookupProvider> builder = ImmutableMap.builder();
|
||||
providers = builder.put(provider.getName(), provider).build();
|
||||
}
|
||||
|
||||
public Completion090PostingsFormat(PostingsFormat delegatePostingsFormat, CompletionLookupProvider provider) {
|
||||
super(CODEC_NAME);
|
||||
this.delegatePostingsFormat = delegatePostingsFormat;
|
||||
this.writeProvider = provider;
|
||||
assert delegatePostingsFormat != null && writeProvider != null;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used only by core Lucene at read-time via Service Provider instantiation
|
||||
* do not use at Write-time in application code.
|
||||
*/
|
||||
public Completion090PostingsFormat() {
|
||||
super(CODEC_NAME);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SuggestFieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
if (delegatePostingsFormat == null) {
|
||||
throw new UnsupportedOperationException("Error - " + getClass().getName()
|
||||
+ " has been constructed without a choice of PostingsFormat");
|
||||
}
|
||||
assert writeProvider != null;
|
||||
return new SuggestFieldsConsumer(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletionFieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
return new CompletionFieldsProducer(state);
|
||||
}
|
||||
|
||||
private class SuggestFieldsConsumer extends FieldsConsumer {
|
||||
|
||||
private FieldsConsumer delegatesFieldsConsumer;
|
||||
private FieldsConsumer suggestFieldsConsumer;
|
||||
|
||||
public SuggestFieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
this.delegatesFieldsConsumer = delegatePostingsFormat.fieldsConsumer(state);
|
||||
String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
|
||||
IndexOutput output = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
output = state.directory.createOutput(suggestFSTFile, state.context);
|
||||
CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_CODEC_VERSION);
|
||||
/*
|
||||
* we write the delegate postings format name so we can load it
|
||||
* without getting an instance in the ctor
|
||||
*/
|
||||
output.writeString(delegatePostingsFormat.getName());
|
||||
output.writeString(writeProvider.getName());
|
||||
this.suggestFieldsConsumer = writeProvider.consumer(output);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsConsumer addField(final FieldInfo field) throws IOException {
|
||||
final TermsConsumer delegateConsumer = delegatesFieldsConsumer.addField(field);
|
||||
final TermsConsumer suggestTermConsumer = suggestFieldsConsumer.addField(field);
|
||||
final GroupedPostingsConsumer groupedPostingsConsumer = new GroupedPostingsConsumer(delegateConsumer, suggestTermConsumer);
|
||||
|
||||
return new TermsConsumer() {
|
||||
@Override
|
||||
public PostingsConsumer startTerm(BytesRef text) throws IOException {
|
||||
groupedPostingsConsumer.startTerm(text);
|
||||
return groupedPostingsConsumer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() throws IOException {
|
||||
return delegateConsumer.getComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
|
||||
suggestTermConsumer.finishTerm(text, stats);
|
||||
delegateConsumer.finishTerm(text, stats);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finish(long sumTotalTermFreq, long sumDocFreq, int docCount) throws IOException {
|
||||
suggestTermConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
|
||||
delegateConsumer.finish(sumTotalTermFreq, sumDocFreq, docCount);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(delegatesFieldsConsumer, suggestFieldsConsumer);
|
||||
}
|
||||
}
|
||||
|
||||
private class GroupedPostingsConsumer extends PostingsConsumer {
|
||||
|
||||
private TermsConsumer[] termsConsumers;
|
||||
private PostingsConsumer[] postingsConsumers;
|
||||
|
||||
public GroupedPostingsConsumer(TermsConsumer... termsConsumersArgs) {
|
||||
termsConsumers = termsConsumersArgs;
|
||||
postingsConsumers = new PostingsConsumer[termsConsumersArgs.length];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startDoc(int docID, int freq) throws IOException {
|
||||
for (PostingsConsumer postingsConsumer : postingsConsumers) {
|
||||
postingsConsumer.startDoc(docID, freq);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
|
||||
for (PostingsConsumer postingsConsumer : postingsConsumers) {
|
||||
postingsConsumer.addPosition(position, payload, startOffset, endOffset);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishDoc() throws IOException {
|
||||
for (PostingsConsumer postingsConsumer : postingsConsumers) {
|
||||
postingsConsumer.finishDoc();
|
||||
}
|
||||
}
|
||||
|
||||
public void startTerm(BytesRef text) throws IOException {
|
||||
for (int i = 0; i < termsConsumers.length; i++) {
|
||||
postingsConsumers[i] = termsConsumers[i].startTerm(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class CompletionFieldsProducer extends FieldsProducer {
|
||||
|
||||
private FieldsProducer delegateProducer;
|
||||
private LookupFactory lookupFactory;
|
||||
|
||||
public CompletionFieldsProducer(SegmentReadState state) throws IOException {
|
||||
String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
|
||||
IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
|
||||
CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_CODEC_VERSION);
|
||||
boolean success = false;
|
||||
try {
|
||||
PostingsFormat delegatePostingsFormat = PostingsFormat.forName(input.readString());
|
||||
String providerName = input.readString();
|
||||
CompletionLookupProvider completionLookupProvider = providers.get(providerName);
|
||||
if (completionLookupProvider == null) {
|
||||
throw new ElasticSearchIllegalStateException("no provider with name [" + providerName + "] registered");
|
||||
}
|
||||
// TODO: we could clone the ReadState and make it always forward IOContext.MERGE to prevent unecessary heap usage?
|
||||
this.delegateProducer = delegatePostingsFormat.fieldsProducer(state);
|
||||
/*
|
||||
* If we are merging we don't load the FSTs at all such that we
|
||||
* don't consume so much memory during merge
|
||||
*/
|
||||
if (state.context.context != Context.MERGE) {
|
||||
// TODO: maybe we can do this in a fully lazy fashion based on some configuration
|
||||
// eventually we should have some kind of curciut breaker that prevents us from going OOM here
|
||||
// with some configuration
|
||||
this.lookupFactory = completionLookupProvider.load(input);
|
||||
}
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(delegateProducer, input);
|
||||
} else {
|
||||
IOUtils.close(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(delegateProducer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return delegateProducer.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
Terms terms = delegateProducer.terms(field);
|
||||
if (terms == null) {
|
||||
return terms;
|
||||
}
|
||||
return new CompletionTerms(terms, this.lookupFactory);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return delegateProducer.size();
|
||||
}
|
||||
}
|
||||
|
||||
public static final class CompletionTerms extends FilterTerms {
|
||||
private final LookupFactory lookup;
|
||||
|
||||
public CompletionTerms(Terms delegate, LookupFactory lookup) {
|
||||
super(delegate);
|
||||
this.lookup = lookup;
|
||||
}
|
||||
|
||||
public Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst) {
|
||||
return lookup.getLookup(mapper, exactFirst);
|
||||
}
|
||||
}
|
||||
|
||||
public static abstract class CompletionLookupProvider implements PayloadProcessor, ToFiniteStrings {
|
||||
|
||||
public static final char UNIT_SEPARATOR = '\u001f';
|
||||
|
||||
public abstract FieldsConsumer consumer(IndexOutput output) throws IOException;
|
||||
|
||||
public abstract String getName();
|
||||
|
||||
public abstract LookupFactory load(IndexInput input) throws IOException;
|
||||
|
||||
@Override
|
||||
public BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException {
|
||||
if (weight < -1 || weight > Integer.MAX_VALUE) {
|
||||
throw new IllegalArgumentException("weight must be >= -1 && <= Integer.MAX_VALUE");
|
||||
}
|
||||
for (int i = 0; i < surfaceForm.length; i++) {
|
||||
if (surfaceForm.bytes[i] == UNIT_SEPARATOR) {
|
||||
throw new IllegalArgumentException(
|
||||
"surface form cannot contain unit separator character U+001F; this character is reserved");
|
||||
}
|
||||
}
|
||||
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
||||
OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream);
|
||||
output.writeVLong(weight + 1);
|
||||
output.writeVInt(surfaceForm.length);
|
||||
output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
|
||||
output.writeVInt(payload.length);
|
||||
output.writeBytes(payload.bytes, 0, payload.length);
|
||||
|
||||
output.close();
|
||||
return new BytesRef(byteArrayOutputStream.toByteArray());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException {
|
||||
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
|
||||
InputStreamDataInput input = new InputStreamDataInput(byteArrayInputStream);
|
||||
ref.weight = input.readVLong() - 1;
|
||||
int len = input.readVInt();
|
||||
ref.surfaceForm.grow(len);
|
||||
ref.surfaceForm.length = len;
|
||||
input.readBytes(ref.surfaceForm.bytes, ref.surfaceForm.offset, ref.surfaceForm.length);
|
||||
len = input.readVInt();
|
||||
ref.payload.grow(len);
|
||||
ref.payload.length = len;
|
||||
input.readBytes(ref.payload.bytes, ref.payload.offset, ref.payload.length);
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static abstract class LookupFactory {
|
||||
public abstract Lookup getLookup(FieldMapper<?> mapper, boolean exactFirst);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.elasticsearch.index.codec.postingsformat.AbstractPostingsFormatProvider;
|
||||
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final class CompletionPostingsFormatProvider extends AbstractPostingsFormatProvider {
|
||||
|
||||
private final Completion090PostingsFormat postingsFormat;
|
||||
|
||||
public CompletionPostingsFormatProvider(String name, PostingsFormatProvider delegate, Completion090PostingsFormat.CompletionLookupProvider provider) {
|
||||
super(name);
|
||||
this.postingsFormat = new Completion090PostingsFormat(delegate.get(), provider);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsFormat get() {
|
||||
return postingsFormat;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.search.suggest.SuggestContextParser;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.elasticsearch.search.suggest.SuggestUtils.parseSuggestContext;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class CompletionSuggestParser implements SuggestContextParser {
|
||||
|
||||
private CompletionSuggester completionSuggester;
|
||||
|
||||
public CompletionSuggestParser(CompletionSuggester completionSuggester) {
|
||||
this.completionSuggester = completionSuggester;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SuggestionSearchContext.SuggestionContext parse(XContentParser parser, MapperService mapperService) throws IOException {
|
||||
XContentParser.Token token;
|
||||
String fieldName = null;
|
||||
CompletionSuggestionContext suggestion = new CompletionSuggestionContext(completionSuggester);
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
fieldName = parser.currentName();
|
||||
} else if (token.isValue()) {
|
||||
parseSuggestContext(parser, mapperService, fieldName, suggestion);
|
||||
suggestion.mapper(mapperService.smartNameFieldMapper(suggestion.getField()));
|
||||
} else {
|
||||
throw new ElasticSearchIllegalArgumentException("suggester[completion] doesn't support field [" + fieldName + "]");
|
||||
}
|
||||
}
|
||||
return suggestion;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.elasticsearch.ElasticSearchException;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.text.StringText;
|
||||
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
|
||||
import org.elasticsearch.search.suggest.Suggest;
|
||||
import org.elasticsearch.search.suggest.SuggestContextParser;
|
||||
import org.elasticsearch.search.suggest.Suggester;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggestion.Entry.Option;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class CompletionSuggester implements Suggester<CompletionSuggestionContext> {
|
||||
|
||||
private static final ScoreComparator scoreComparator = new ScoreComparator();
|
||||
|
||||
@Override
|
||||
public Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends Suggest.Suggestion.Entry.Option>> execute(String name,
|
||||
CompletionSuggestionContext suggestionContext, IndexReader indexReader, CharsRef spare) throws IOException {
|
||||
CompletionSuggestion completionSuggestionSuggestion = new CompletionSuggestion(name, suggestionContext.getSize());
|
||||
CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new StringText(suggestionContext.getText()
|
||||
.utf8ToString()), 0, suggestionContext.getText().toString().length());
|
||||
completionSuggestionSuggestion.addTerm(completionSuggestEntry);
|
||||
String fieldName = suggestionContext.getField();
|
||||
|
||||
if (suggestionContext.mapper() == null || !(suggestionContext.mapper() instanceof CompletionFieldMapper)) {
|
||||
throw new ElasticSearchException("Field [" + suggestionContext.getField() + "] is not a completion suggest field");
|
||||
}
|
||||
String prefix = suggestionContext.getText().utf8ToString();
|
||||
|
||||
Map<String, CompletionSuggestion.Entry.Option> results = Maps.newHashMapWithExpectedSize(indexReader.leaves().size() * suggestionContext.getSize());
|
||||
for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
|
||||
AtomicReader atomicReader = atomicReaderContext.reader();
|
||||
Terms terms = atomicReader.fields().terms(fieldName);
|
||||
if (terms instanceof Completion090PostingsFormat.CompletionTerms) {
|
||||
Completion090PostingsFormat.CompletionTerms lookupTerms = (Completion090PostingsFormat.CompletionTerms) terms;
|
||||
Lookup lookup = lookupTerms.getLookup(suggestionContext.mapper(), false);
|
||||
List<Lookup.LookupResult> lookupResults = lookup.lookup(prefix, false, suggestionContext.getSize());
|
||||
for (Lookup.LookupResult res : lookupResults) {
|
||||
|
||||
final String key = res.key.toString();
|
||||
final float score = res.value;
|
||||
final Option value = results.get(key);
|
||||
if (value == null) {
|
||||
final Option option = new CompletionSuggestion.Entry.Option(new StringText(key), score, res.payload == null ? null
|
||||
: new BytesArray(res.payload));
|
||||
results.put(key, option);
|
||||
} else if (value.getScore() < score) {
|
||||
value.setScore(score);
|
||||
value.setPayload(res.payload == null ? null : new BytesArray(res.payload));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
final List<CompletionSuggestion.Entry.Option> options = new ArrayList<CompletionSuggestion.Entry.Option>(results.values());
|
||||
CollectionUtil.introSort(options, scoreComparator);
|
||||
|
||||
for (int i = 0 ; i < Math.min(suggestionContext.getSize(), options.size()) ; i++) {
|
||||
completionSuggestEntry.addOption(options.get(i));
|
||||
}
|
||||
|
||||
return completionSuggestionSuggestion;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] names() {
|
||||
return new String[] { "completion" };
|
||||
}
|
||||
|
||||
@Override
|
||||
public SuggestContextParser getContextParser() {
|
||||
return new CompletionSuggestParser(this);
|
||||
}
|
||||
|
||||
public static class ScoreComparator implements Comparator<CompletionSuggestion.Entry.Option> {
|
||||
@Override
|
||||
public int compare(Option o1, Option o2) {
|
||||
return Float.compare(o2.getScore(), o1.getScore());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.search.suggest.Suggest;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class CompletionSuggestion extends Suggest.Suggestion<CompletionSuggestion.Entry> {
|
||||
|
||||
public static final int TYPE = 2;
|
||||
|
||||
public CompletionSuggestion() {
|
||||
}
|
||||
|
||||
public CompletionSuggestion(String name, int size) {
|
||||
super(name, size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getType() {
|
||||
return TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Entry newEntry() {
|
||||
return new Entry();
|
||||
}
|
||||
|
||||
public static class Entry extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry<CompletionSuggestion.Entry.Option> {
|
||||
|
||||
public Entry(Text text, int offset, int length) {
|
||||
super(text, offset, length);
|
||||
}
|
||||
|
||||
protected Entry() {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Option newOption() {
|
||||
return new Option();
|
||||
}
|
||||
|
||||
public static class Option extends org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option {
|
||||
private BytesReference payload;
|
||||
|
||||
public Option(Text text, float score,BytesReference payload) {
|
||||
super(text, score);
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
|
||||
protected Option() {
|
||||
super();
|
||||
}
|
||||
|
||||
public void setPayload(BytesReference payload) {
|
||||
this.payload = payload;
|
||||
}
|
||||
|
||||
public BytesReference getPayload() {
|
||||
return payload;
|
||||
}
|
||||
|
||||
public void setScore(float score) {
|
||||
super.setScore(score);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
super.innerToXContent(builder, params);
|
||||
if (payload != null && payload.length() > 0) {
|
||||
builder.rawField("payload", payload);
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
super.readFrom(in);
|
||||
payload = in.readBytesReference();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
super.writeTo(out);
|
||||
out.writeBytesReference(payload);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class CompletionSuggestionBuilder extends SuggestBuilder.SuggestionBuilder<CompletionSuggestionBuilder> {
|
||||
|
||||
public CompletionSuggestionBuilder(String name) {
|
||||
super(name, "completion");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected XContentBuilder innerToXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
|
||||
return builder;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.search.suggest.Suggester;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class CompletionSuggestionContext extends SuggestionSearchContext.SuggestionContext {
|
||||
|
||||
private FieldMapper<?> mapper;
|
||||
|
||||
public CompletionSuggestionContext(Suggester suggester) {
|
||||
super(suggester);
|
||||
}
|
||||
|
||||
public FieldMapper<?> mapper() {
|
||||
return this.mapper;
|
||||
}
|
||||
|
||||
public void mapper(FieldMapper<?> mapper) {
|
||||
this.mapper = mapper;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public final class CompletionTokenStream extends TokenStream {
|
||||
|
||||
private final PayloadAttribute payloadAttr = addAttribute(PayloadAttribute.class);;
|
||||
private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class);
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
|
||||
private final TokenStream input;
|
||||
private BytesRef payload;
|
||||
private Iterator<IntsRef> finiteStrings;
|
||||
private ToFiniteStrings toFiniteStrings;
|
||||
private int posInc = -1;
|
||||
private static final int MAX_PATHS = 256;
|
||||
private final BytesRef scratch = new BytesRef();
|
||||
|
||||
public CompletionTokenStream(TokenStream input, BytesRef payload, ToFiniteStrings toFiniteStrings) throws IOException {
|
||||
this.input = input;
|
||||
this.payload = payload;
|
||||
this.toFiniteStrings = toFiniteStrings;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
if (finiteStrings == null) {
|
||||
Set<IntsRef> strings = toFiniteStrings.toFiniteStrings(input);
|
||||
|
||||
if (strings.size() > MAX_PATHS) {
|
||||
throw new IllegalArgumentException("TokenStream expanded to " + strings.size() + " finite strings. Only <= " + MAX_PATHS
|
||||
+ " finite strings are supported");
|
||||
}
|
||||
posInc = strings.size();
|
||||
finiteStrings = strings.iterator();
|
||||
}
|
||||
if (finiteStrings.hasNext()) {
|
||||
posAttr.setPositionIncrement(posInc);
|
||||
/*
|
||||
* this posInc encodes the number of paths that this surface form
|
||||
* produced. Multi Fields have the same surface form and therefore sum up
|
||||
*/
|
||||
posInc = 0;
|
||||
Util.toBytesRef(finiteStrings.next(), scratch); // now we have UTF-8
|
||||
bytesAtt.setBytesRef(scratch);
|
||||
if (payload != null) {
|
||||
payloadAttr.setPayload(this.payload);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void end() throws IOException {
|
||||
if (posInc == -1) {
|
||||
input.end();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (posInc == -1) {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static interface ToFiniteStrings {
|
||||
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
finiteStrings = null;
|
||||
posInc = -1;
|
||||
}
|
||||
|
||||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
public void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
public static final class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute, TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
@Override
|
||||
public int fillBytesRef() {
|
||||
return bytes.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
other.bytes = bytes;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
package org.elasticsearch.search.suggest.completion;
|
||||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
interface PayloadProcessor {
|
||||
|
||||
BytesRef buildPayload(BytesRef surfaceForm, long weight, BytesRef payload) throws IOException;
|
||||
|
||||
void parsePayload(BytesRef payload, SuggestPayload ref) throws IOException;
|
||||
|
||||
static class SuggestPayload {
|
||||
final BytesRef payload = new BytesRef();
|
||||
long weight = 0;
|
||||
final BytesRef surfaceForm = new BytesRef();
|
||||
}
|
||||
}
|
|
@ -1,2 +1,3 @@
|
|||
org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat
|
||||
org.elasticsearch.index.codec.postingsformat.ElasticSearch090PostingsFormat
|
||||
org.elasticsearch.search.suggest.completion.Completion090PostingsFormat
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.test.integration.search.suggest;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.codecs.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.FieldInfo.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.search.spell.TermFreqIterator;
|
||||
import org.apache.lucene.search.spell.TermFreqPayloadIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.Lookup.LookupResult;
|
||||
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
|
||||
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.codec.postingsformat.ElasticSearch090PostingsFormat;
|
||||
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
|
||||
import org.elasticsearch.index.codec.postingsformat.PreBuiltPostingsFormatProvider;
|
||||
import org.elasticsearch.index.mapper.FieldMapper.Names;
|
||||
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider;
|
||||
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat;
|
||||
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
|
||||
import org.elasticsearch.test.integration.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
||||
|
||||
@Test
|
||||
public void testCompletionPostingsFormat() throws IOException {
|
||||
AnalyzingCompletionLookupProvider provider = new AnalyzingCompletionLookupProvider(true, false, true, true);
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexOutput output = dir.createOutput("foo.txt", IOContext.DEFAULT);
|
||||
FieldsConsumer consumer = provider.consumer(output);
|
||||
FieldInfo fieldInfo = new FieldInfo("foo", true, 1, false, true, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
|
||||
DocValuesType.SORTED, DocValuesType.BINARY, new HashMap<String, String>());
|
||||
TermsConsumer addField = consumer.addField(fieldInfo);
|
||||
|
||||
PostingsConsumer postingsConsumer = addField.startTerm(new BytesRef("foofightersgenerator"));
|
||||
postingsConsumer.startDoc(0, 1);
|
||||
postingsConsumer.addPosition(256 - 2, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0,
|
||||
1);
|
||||
postingsConsumer.finishDoc();
|
||||
addField.finishTerm(new BytesRef("foofightersgenerator"), new TermStats(1, 1));
|
||||
addField.startTerm(new BytesRef("generator"));
|
||||
postingsConsumer.startDoc(0, 1);
|
||||
postingsConsumer.addPosition(256 - 1, provider.buildPayload(new BytesRef("Generator - Foo Fighters"), 9, new BytesRef("id:10")), 0,
|
||||
1);
|
||||
postingsConsumer.finishDoc();
|
||||
addField.finishTerm(new BytesRef("generator"), new TermStats(1, 1));
|
||||
addField.finish(1, 1, 1);
|
||||
consumer.close();
|
||||
output.close();
|
||||
|
||||
IndexInput input = dir.openInput("foo.txt", IOContext.DEFAULT);
|
||||
LookupFactory load = provider.load(input);
|
||||
PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
|
||||
NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||
Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true), false);
|
||||
List<LookupResult> result = lookup.lookup("ge", false, 10);
|
||||
assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters"));
|
||||
assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10"));
|
||||
dir.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDuellCompletions() throws IOException, NoSuchFieldException, SecurityException, IllegalArgumentException,
|
||||
IllegalAccessException {
|
||||
final boolean preserveSeparators = getRandom().nextBoolean();
|
||||
final boolean preservePositionIncrements = getRandom().nextBoolean();
|
||||
final boolean usePayloads = getRandom().nextBoolean();
|
||||
final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0;
|
||||
|
||||
XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(TEST_VERSION_CURRENT), new StandardAnalyzer(
|
||||
TEST_VERSION_CURRENT), options, 256, -1, null, false, 1);
|
||||
reference.setPreservePositionIncrements(preservePositionIncrements);
|
||||
LineFileDocs docs = new LineFileDocs(getRandom());
|
||||
int num = atLeast(150);
|
||||
final String[] titles = new String[num];
|
||||
final long[] weights = new long[num];
|
||||
for (int i = 0; i < titles.length; i++) {
|
||||
Document nextDoc = docs.nextDoc();
|
||||
IndexableField field = nextDoc.getField("title");
|
||||
titles[i] = field.stringValue();
|
||||
weights[i] = between(0, 100);
|
||||
|
||||
}
|
||||
docs.close();
|
||||
final TermFreqIterator primaryIter = new TermFreqIterator() {
|
||||
int index = 0;
|
||||
long currentWeight = -1;
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
if (index < titles.length) {
|
||||
currentWeight = weights[index];
|
||||
return new BytesRef(titles[index++]);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long weight() {
|
||||
return currentWeight;
|
||||
}
|
||||
|
||||
};
|
||||
TermFreqIterator iter;
|
||||
if (usePayloads) {
|
||||
iter = new TermFreqPayloadIterator() {
|
||||
@Override
|
||||
public long weight() {
|
||||
return primaryIter.weight();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return primaryIter.getComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
return primaryIter.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef payload() {
|
||||
return new BytesRef(Long.toString(weight()));
|
||||
}
|
||||
};
|
||||
} else {
|
||||
iter = primaryIter;
|
||||
}
|
||||
reference.build(iter);
|
||||
PostingsFormatProvider provider = new PreBuiltPostingsFormatProvider(new ElasticSearch090PostingsFormat());
|
||||
|
||||
NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT));
|
||||
final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads,
|
||||
preserveSeparators, preservePositionIncrements);
|
||||
Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights);
|
||||
Field field = buildAnalyzingLookup.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
|
||||
field.setAccessible(true);
|
||||
Field refField = reference.getClass().getDeclaredField("maxAnalyzedPathsForOneInput");
|
||||
refField.setAccessible(true);
|
||||
assertThat(refField.get(reference), equalTo(field.get(buildAnalyzingLookup)));
|
||||
|
||||
for (int i = 0; i < titles.length; i++) {
|
||||
int res = between(1, 10);
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
SuggestUtils.analyze(namedAnalzyer.tokenStream("foo", titles[i]), new SuggestUtils.TokenConsumer() {
|
||||
@Override
|
||||
public void nextToken() throws IOException {
|
||||
if (builder.length() == 0) {
|
||||
builder.append(this.charTermAttr.toString());
|
||||
}
|
||||
}
|
||||
});
|
||||
String firstTerm = builder.toString();
|
||||
String prefix = firstTerm.isEmpty() ? "" : firstTerm.substring(0, between(1, firstTerm.length()));
|
||||
List<LookupResult> refLookup = reference.lookup(prefix, false, res);
|
||||
List<LookupResult> lookup = buildAnalyzingLookup.lookup(prefix, false, res);
|
||||
assertThat(refLookup.toString(),lookup.size(), equalTo(refLookup.size()));
|
||||
for (int j = 0; j < refLookup.size(); j++) {
|
||||
assertThat(lookup.get(j).key, equalTo(refLookup.get(j).key));
|
||||
assertThat("prefix: " + prefix + " " + j + " -- missmatch cost: " + lookup.get(j).key + " - " + lookup.get(j).value + " | " + refLookup.get(j).key + " - " + refLookup.get(j).value ,
|
||||
lookup.get(j).value, equalTo(refLookup.get(j).value));
|
||||
assertThat(lookup.get(j).payload, equalTo(refLookup.get(j).payload));
|
||||
if (usePayloads) {
|
||||
assertThat(lookup.get(j).payload.utf8ToString(), equalTo(Long.toString(lookup.get(j).value)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Lookup buildAnalyzingLookup(final CompletionFieldMapper mapper, String[] terms, String[] surfaces, long[] weights)
|
||||
throws IOException {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
FilterCodec filterCodec = new FilterCodec("filtered", Codec.getDefault()) {
|
||||
public PostingsFormat postingsFormat() {
|
||||
return mapper.postingsFormatProvider().get();
|
||||
}
|
||||
};
|
||||
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(TEST_VERSION_CURRENT, mapper.indexAnalyzer());
|
||||
|
||||
indexWriterConfig.setCodec(filterCodec);
|
||||
IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
|
||||
for (int i = 0; i < weights.length; i++) {
|
||||
Document doc = new Document();
|
||||
BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i])));
|
||||
doc.add(mapper.getCompletionField(terms[i], payload));
|
||||
if (randomBoolean()) {
|
||||
writer.commit();
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.commit();
|
||||
writer.forceMerge(1);
|
||||
writer.commit();
|
||||
DirectoryReader reader = DirectoryReader.open(writer, true);
|
||||
assertThat(reader.leaves().size(), equalTo(1));
|
||||
assertThat(reader.leaves().get(0).reader().numDocs(), equalTo(weights.length));
|
||||
AtomicReaderContext atomicReaderContext = reader.leaves().get(0);
|
||||
Terms luceneTerms = atomicReaderContext.reader().terms(mapper.name());
|
||||
Lookup lookup = ((Completion090PostingsFormat.CompletionTerms) luceneTerms).getLookup(mapper, false);
|
||||
reader.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
return lookup;
|
||||
}
|
||||
|
||||
// TODO ADD more unittests
|
||||
}
|
|
@ -0,0 +1,454 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.test.integration.search.suggest;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
||||
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.suggest.SuggestResponse;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||
import org.elasticsearch.index.mapper.MapperException;
|
||||
import org.elasticsearch.search.suggest.Suggest;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggestion;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder;
|
||||
import org.elasticsearch.test.integration.AbstractSharedClusterTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
|
||||
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
|
||||
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
|
||||
|
||||
private static final String INDEX = "test";
|
||||
private static final String TYPE = "testType";
|
||||
private static final String FIELD = "testField";
|
||||
|
||||
@Test
|
||||
public void testSimple() throws Exception{
|
||||
createIndexAndMapping();
|
||||
String[][] input = {{"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"}, {"Foo Fighters"},
|
||||
{"Generator", "Foo Fighters Generator"}, {"Learn to Fly", "Foo Fighters Learn to Fly" },
|
||||
{"The Prodigy"}, {"The Prodigy"}, {"The Prodigy"}, {"Firestarter", "The Prodigy Firestarter"},
|
||||
{"Turbonegro"}, {"Turbonegro"}, {"Get it on", "Turbonegro Get it on"}}; // work with frequencies
|
||||
for (int i = 0; i < input.length; i++) {
|
||||
client().prepareIndex(INDEX, TYPE, "" + i)
|
||||
.setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value(input[i]).endArray()
|
||||
.endObject()
|
||||
.endObject()
|
||||
)
|
||||
.execute().actionGet();
|
||||
}
|
||||
|
||||
refresh();
|
||||
|
||||
assertSuggestionsNotInOrder("f", "Foo Fighters", "Firestarter", "Foo Fighters Generator", "Foo Fighters Learn to Fly");
|
||||
assertSuggestionsNotInOrder("t", "The Prodigy", "Turbonegro", "Turbonegro Get it on", "The Prodigy Firestarter");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicPrefixSuggestion() throws Exception {
|
||||
createIndexAndMapping();
|
||||
for (int i = 0; i < 2; i++) {
|
||||
createData(i==0);
|
||||
assertSuggestions("f", "Firestarter - The Prodigy", "Foo Fighters", "Generator - Foo Fighters", "Learn to Fly - Foo Fighters");
|
||||
assertSuggestions("ge", "Generator - Foo Fighters", "Get it on - Turbonegro");
|
||||
assertSuggestions("ge", "Generator - Foo Fighters", "Get it on - Turbonegro");
|
||||
assertSuggestions("t", "The Prodigy", "Firestarter - The Prodigy", "Get it on - Turbonegro", "Turbonegro");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatWeightsAreWorking() throws Exception {
|
||||
createIndexAndMapping();
|
||||
|
||||
List<String> similarNames = Lists.newArrayList("the", "The Prodigy", "The Verve", "The the");
|
||||
// the weight is 1000 divided by string length, so the results are easy to to check
|
||||
for (String similarName : similarNames) {
|
||||
client().prepareIndex(INDEX, TYPE, similarName).setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value(similarName).endArray()
|
||||
.field("weight", 1000 / similarName.length())
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
}
|
||||
|
||||
refresh();
|
||||
|
||||
assertSuggestions("the", "the", "The the", "The Verve", "The Prodigy");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatInputCanBeAStringInsteadOfAnArray() throws Exception {
|
||||
createIndexAndMapping();
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.field("input", "Foo Fighters")
|
||||
.field("output", "Boo Fighters")
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
assertSuggestions("f", "Boo Fighters");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatPayloadsAreArbitraryJsonObjects() throws Exception {
|
||||
createIndexAndMapping();
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Foo Fighters").endArray()
|
||||
.field("output", "Boo Fighters")
|
||||
.startObject("payload").field("foo", "bar").startArray("test").value("spam").value("eggs").endArray().endObject()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionBuilder("testSuggestions").field(FIELD).text("foo").size(10)
|
||||
).execute().actionGet();
|
||||
|
||||
assertSuggestions(suggestResponse, "testSuggestions", "Boo Fighters");
|
||||
Suggest.Suggestion.Entry.Option option = suggestResponse.getSuggest().getSuggestion("testSuggestions").getEntries().get(0).getOptions().get(0);
|
||||
assertThat(option, is(instanceOf(CompletionSuggestion.Entry.Option.class)));
|
||||
CompletionSuggestion.Entry.Option prefixOption = (CompletionSuggestion.Entry.Option) option;
|
||||
assertThat(prefixOption.getPayload(), is(notNullValue()));
|
||||
|
||||
// parse JSON
|
||||
Map<String, Object> jsonMap = JsonXContent.jsonXContent.createParser(prefixOption.getPayload()).mapAndClose();
|
||||
assertThat(jsonMap.size(), is(2));
|
||||
assertThat(jsonMap.get("foo").toString(), is("bar"));
|
||||
assertThat(jsonMap.get("test"), is(instanceOf(List.class)));
|
||||
List<String> listValues = (List<String>) jsonMap.get("test");
|
||||
assertThat(listValues, hasItems("spam", "eggs"));
|
||||
}
|
||||
|
||||
@Test(expected = MapperException.class)
|
||||
public void testThatExceptionIsThrownWhenPayloadsAreDisabledButInIndexRequest() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", false, false, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Foo Fighters").endArray()
|
||||
.field("output", "Boo Fighters")
|
||||
.startArray("payload").value("spam").value("eggs").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDisabledPreserveSeperators() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", true, false, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Foo Fighters").endArray()
|
||||
.field("weight", 10)
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Foof").endArray()
|
||||
.field("weight", 20)
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
assertSuggestions("foof", "Foof", "Foo Fighters");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEnabledPreserveSeperators() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", true, true, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Foo Fighters").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "2").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Foof").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
assertSuggestions("foof", "Foof");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatMultipleInputsAreSuppored() throws Exception {
|
||||
createIndexAndMapping("simple", "simple", false, false, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Foo Fighters").value("Fu Fighters").endArray()
|
||||
.field("output", "The incredible Foo Fighters")
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
assertSuggestions("foo", "The incredible Foo Fighters");
|
||||
assertSuggestions("fu", "The incredible Foo Fighters");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatShortSyntaxIsWorking() throws Exception {
|
||||
createIndexAndMapping();
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startArray(FIELD)
|
||||
.value("The Prodigy Firestarter").value("Firestarter")
|
||||
.endArray().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
assertSuggestions("t", "The Prodigy Firestarter");
|
||||
assertSuggestions("f", "Firestarter");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatDisablingPositionIncrementsWorkForStopwords() throws Exception {
|
||||
// analyzer which removes stopwords... so may not be the simple one
|
||||
createIndexAndMapping("standard", "standard", false, false, false);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("The Beatles").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
assertSuggestions("b", "The Beatles");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatSynonymsWork() throws Exception {
|
||||
Settings.Builder settingsBuilder = settingsBuilder()
|
||||
.put("analysis.analyzer.suggest_analyzer_synonyms.type", "custom")
|
||||
.put("analysis.analyzer.suggest_analyzer_synonyms.tokenizer", "standard")
|
||||
.putArray("analysis.analyzer.suggest_analyzer_synonyms.filter", "standard", "lowercase", "my_synonyms")
|
||||
.put("analysis.filter.my_synonyms.type", "synonym")
|
||||
.putArray("analysis.filter.my_synonyms.synonyms", "foo,renamed");
|
||||
createIndexAndMappingAndSettings(settingsBuilder, "suggest_analyzer_synonyms", "suggest_analyzer_synonyms", false, false, true);
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value("Foo Fighters").endArray()
|
||||
.endObject().endObject()
|
||||
).get();
|
||||
|
||||
refresh();
|
||||
|
||||
// get suggestions for renamed
|
||||
assertSuggestions("r", "Foo Fighters");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatUpgradeToMultiFieldWorks() throws Exception {
|
||||
client().admin().indices().prepareDelete().get();
|
||||
int randomShardNumber = between(1, 5);
|
||||
int randomReplicaNumber = between(0, 2);
|
||||
Settings.Builder settingsBuilder = settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, randomShardNumber).put(SETTING_NUMBER_OF_REPLICAS, randomReplicaNumber);
|
||||
|
||||
client().admin().indices().prepareCreate(INDEX).setSettings(settingsBuilder).get();
|
||||
ensureYellow();
|
||||
client().prepareIndex(INDEX, TYPE, "1").setRefresh(true).setSource(jsonBuilder().startObject().field(FIELD, "Foo Fighters").endObject()).get();
|
||||
|
||||
PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
|
||||
.startObject(TYPE).startObject("properties")
|
||||
.startObject(FIELD)
|
||||
.field("type", "multi_field")
|
||||
.startObject("fields")
|
||||
.startObject(FIELD).field("type", "string").endObject()
|
||||
.startObject("suggest").field("type", "completion").field("index_analyzer", "simple").field("search_analyzer", "simple").endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject().endObject()
|
||||
.endObject())
|
||||
.get();
|
||||
assertThat(putMappingResponse.isAcknowledged(), is(true));
|
||||
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionBuilder("suggs").field(FIELD + ".suggest").text("f").size(10)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(suggestResponse, "suggs");
|
||||
|
||||
client().prepareIndex(INDEX, TYPE, "1").setRefresh(true).setSource(jsonBuilder().startObject().field(FIELD, "Foo Fighters").endObject()).get();
|
||||
waitForRelocation(ClusterHealthStatus.GREEN);
|
||||
|
||||
SuggestResponse afterReindexingResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionBuilder("suggs").field(FIELD + ".suggest").text("f").size(10)
|
||||
).execute().actionGet();
|
||||
assertSuggestions(afterReindexingResponse, "suggs", "Foo Fighters");
|
||||
}
|
||||
|
||||
public void assertSuggestions(String suggestion, String ... suggestions) {
|
||||
String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10);
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggestion).size(10)
|
||||
).execute().actionGet();
|
||||
|
||||
assertSuggestions(suggestResponse, suggestionName, suggestions);
|
||||
}
|
||||
|
||||
public void assertSuggestionsNotInOrder(String suggestString, String ... suggestions) {
|
||||
String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10);
|
||||
SuggestResponse suggestResponse = client().prepareSuggest(INDEX).addSuggestion(
|
||||
new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggestString).size(10)
|
||||
).execute().actionGet();
|
||||
|
||||
assertSuggestions(suggestResponse, false, suggestionName, suggestions);
|
||||
}
|
||||
|
||||
private void assertSuggestions(SuggestResponse suggestResponse, String name, String... suggestions) {
|
||||
assertSuggestions(suggestResponse, true, name, suggestions);
|
||||
}
|
||||
|
||||
private void assertSuggestions(SuggestResponse suggestResponse, boolean suggestionOrderStrict, String name, String... suggestions) {
|
||||
assertThat(suggestResponse.getFailedShards(), is(0));
|
||||
|
||||
assertThat(suggestResponse.getSuggest().getSuggestion(name), is(notNullValue()));
|
||||
Suggest.Suggestion<Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option>> suggestion = suggestResponse.getSuggest().getSuggestion(name);
|
||||
|
||||
List<String> suggestionList = getNames(suggestion.getEntries().get(0));
|
||||
List<Suggest.Suggestion.Entry.Option> options = suggestion.getEntries().get(0).getOptions();
|
||||
|
||||
String assertMsg = String.format(Locale.ROOT, "Expected options %s length to be %s, but was %s", suggestionList, suggestions.length, options.size());
|
||||
assertThat(assertMsg, options.size(), is(suggestions.length));
|
||||
if (suggestionOrderStrict) {
|
||||
for (int i = 0; i < suggestions.length; i++) {
|
||||
String errMsg = String.format(Locale.ROOT, "Expected elem %s in list %s to be [%s] score: %s", i, suggestionList, suggestions[i], options.get(i).getScore());
|
||||
assertThat(errMsg, options.get(i).getText().toString(), is(suggestions[i]));
|
||||
}
|
||||
} else {
|
||||
for (String expectedSuggestion : suggestions) {
|
||||
String errMsg = String.format(Locale.ROOT, "Expected elem %s to be in list %s", expectedSuggestion, suggestionList);
|
||||
assertThat(errMsg, suggestionList, hasItem(expectedSuggestion));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<String> getNames(Suggest.Suggestion.Entry<Suggest.Suggestion.Entry.Option> suggestEntry) {
|
||||
List<String> names = Lists.newArrayList();
|
||||
for (Suggest.Suggestion.Entry.Option entry : suggestEntry.getOptions()) {
|
||||
names.add(entry.getText().string());
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
private void createIndexAndMapping() throws IOException {
|
||||
createIndexAndMapping("simple", "simple", true, false, true);
|
||||
}
|
||||
|
||||
private void createIndexAndMappingAndSettings(Settings.Builder settingsBuilder, String indexAnalyzer, String searchAnalyzer, boolean payloads, boolean preserveSeparators, boolean preservePositionIncrements) throws IOException {
|
||||
client().admin().indices().prepareDelete().get();
|
||||
client().admin().indices().prepareCreate(INDEX)
|
||||
.setSettings(settingsBuilder)
|
||||
.get();
|
||||
PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
|
||||
.startObject(TYPE).startObject("properties")
|
||||
.startObject(FIELD)
|
||||
.field("type", "completion")
|
||||
.field("index_analyzer", indexAnalyzer)
|
||||
.field("search_analyzer", searchAnalyzer)
|
||||
.field("payloads", payloads)
|
||||
.field("preserve_separators", preserveSeparators)
|
||||
.field("preserve_position_increments", preservePositionIncrements)
|
||||
.endObject()
|
||||
.endObject().endObject()
|
||||
.endObject())
|
||||
.get();
|
||||
assertThat(putMappingResponse.isAcknowledged(), is(true));
|
||||
ensureYellow();
|
||||
}
|
||||
|
||||
private void createIndexAndMapping(String indexAnalyzer, String searchAnalyzer, boolean payloads, boolean preserveSeparators, boolean preservePositionIncrements) throws IOException {
|
||||
int randomShardNumber = between(1, 5);
|
||||
int randomReplicaNumber = between(0, 2);
|
||||
Settings.Builder settingsBuilder = settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, randomShardNumber).put(SETTING_NUMBER_OF_REPLICAS, randomReplicaNumber);
|
||||
createIndexAndMappingAndSettings(settingsBuilder, indexAnalyzer, searchAnalyzer, payloads, preserveSeparators, preservePositionIncrements);
|
||||
}
|
||||
|
||||
private void createData(boolean optimize) throws IOException, InterruptedException, ExecutionException {
|
||||
String[][] input = {{"Foo Fighters"}, {"Generator", "Foo Fighters Generator"}, {"Learn to Fly", "Foo Fighters Learn to Fly" }, {"The Prodigy"}, {"Firestarter", "The Prodigy Firestarter"}, {"Turbonegro"}, {"Get it on", "Turbonegro Get it on"}};
|
||||
String[] surface = {"Foo Fighters", "Generator - Foo Fighters", "Learn to Fly - Foo Fighters", "The Prodigy", "Firestarter - The Prodigy", "Turbonegro", "Get it on - Turbonegro"};
|
||||
int[] weight = {10, 9, 8, 12, 11, 6, 7};
|
||||
IndexRequestBuilder[] builders = new IndexRequestBuilder[input.length];
|
||||
for (int i = 0; i < builders.length; i++) {
|
||||
builders[i] = client().prepareIndex(INDEX, TYPE, "" + i)
|
||||
.setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value(input[i]).endArray()
|
||||
.field("output",surface[i])
|
||||
.field("payload", "id: " + i)
|
||||
.field("weight", 1) // WE FORCEFULLY INDEX A BOGUS WEIGHT
|
||||
.endObject()
|
||||
.endObject()
|
||||
);
|
||||
}
|
||||
indexRandom(INDEX, false, builders);
|
||||
|
||||
for (int i = 0; i < builders.length; i++) { // add them again to make sure we deduplicate on the surface form
|
||||
builders[i] = client().prepareIndex(INDEX, TYPE, "n" + i)
|
||||
.setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value(input[i]).endArray()
|
||||
.field("output",surface[i])
|
||||
.field("payload", "id: " + i)
|
||||
.field("weight", weight[i])
|
||||
.endObject()
|
||||
.endObject()
|
||||
);
|
||||
}
|
||||
indexRandom(INDEX, false, builders);
|
||||
|
||||
client().admin().indices().prepareRefresh(INDEX).execute().actionGet();
|
||||
if (optimize) {
|
||||
// make sure merging works just fine
|
||||
client().admin().indices().prepareFlush(INDEX).execute().actionGet();
|
||||
client().admin().indices().prepareOptimize(INDEX).execute().actionGet();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.test.integration.search.suggest;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.synonym.SynonymFilter;
|
||||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
import org.apache.lucene.analysis.synonym.SynonymMap.Builder;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ByteTermAttribute;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Set;
|
||||
|
||||
public class CompletionTokenStreamTest extends BaseTokenStreamTestCase {
|
||||
|
||||
final XAnalyzingSuggester suggester = new XAnalyzingSuggester(new SimpleAnalyzer(TEST_VERSION_CURRENT));
|
||||
|
||||
@Test
|
||||
public void testSuggestTokenFilter() throws Exception {
|
||||
TokenStream tokenStream = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true);
|
||||
BytesRef payload = new BytesRef("Surface keyword|friggin payload|10");
|
||||
TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(new CompletionTokenStream(tokenStream, payload, new CompletionTokenStream.ToFiniteStrings() {
|
||||
@Override
|
||||
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
|
||||
return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
|
||||
}
|
||||
}));
|
||||
assertTokenStreamContents(suggestTokenStream, new String[] {"mykeyword"}, null, null, new String[] {"Surface keyword|friggin payload|10"}, new int[] { 1 }, null, null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSuggestTokenFilterWithSynonym() throws Exception {
|
||||
Builder builder = new SynonymMap.Builder(true);
|
||||
builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
|
||||
|
||||
MockTokenizer tokenizer = new MockTokenizer(new StringReader("mykeyword"), MockTokenizer.WHITESPACE, true);
|
||||
SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
|
||||
|
||||
BytesRef payload = new BytesRef("Surface keyword|friggin payload|10");
|
||||
TokenStream suggestTokenStream = new ByteTermAttrToCharTermAttrFilter(new CompletionTokenStream(filter, payload, new CompletionTokenStream.ToFiniteStrings() {
|
||||
@Override
|
||||
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
|
||||
return suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
|
||||
}
|
||||
}));
|
||||
assertTokenStreamContents(suggestTokenStream, new String[] {"mysynonym", "mykeyword"}, null, null, new String[] {"Surface keyword|friggin payload|10", "Surface keyword|friggin payload|10"}, new int[] { 2, 0 }, null, null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidNumberOfExpansions() throws IOException {
|
||||
Builder builder = new SynonymMap.Builder(true);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true);
|
||||
}
|
||||
StringBuilder valueBuilder = new StringBuilder();
|
||||
for (int i = 0 ; i < 8 ; i++) {
|
||||
valueBuilder.append(i+1);
|
||||
valueBuilder.append(" ");
|
||||
}
|
||||
MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()), MockTokenizer.WHITESPACE, true);
|
||||
SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
|
||||
|
||||
TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() {
|
||||
@Override
|
||||
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
|
||||
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
|
||||
return finiteStrings;
|
||||
}
|
||||
});
|
||||
|
||||
suggestTokenStream.reset();
|
||||
ByteTermAttribute attr = suggestTokenStream.addAttribute(ByteTermAttribute.class);
|
||||
PositionIncrementAttribute posAttr = suggestTokenStream.addAttribute(PositionIncrementAttribute.class);
|
||||
int maxPos = 0;
|
||||
int count = 0;
|
||||
while(suggestTokenStream.incrementToken()) {
|
||||
count++;
|
||||
assertNotNull(attr.getBytesRef());
|
||||
assertTrue(attr.getBytesRef().length > 0);
|
||||
maxPos += posAttr.getPositionIncrement();
|
||||
}
|
||||
suggestTokenStream.close();
|
||||
assertEquals(count, 256);
|
||||
assertEquals(count, maxPos);
|
||||
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testInValidNumberOfExpansions() throws IOException {
|
||||
Builder builder = new SynonymMap.Builder(true);
|
||||
for (int i = 0; i < 256; i++) {
|
||||
builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true);
|
||||
}
|
||||
StringBuilder valueBuilder = new StringBuilder();
|
||||
for (int i = 0 ; i < 9 ; i++) { // 9 -> expands to 512
|
||||
valueBuilder.append(i+1);
|
||||
valueBuilder.append(" ");
|
||||
}
|
||||
MockTokenizer tokenizer = new MockTokenizer(new StringReader(valueBuilder.toString()), MockTokenizer.WHITESPACE, true);
|
||||
SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
|
||||
|
||||
TokenStream suggestTokenStream = new CompletionTokenStream(filter, new BytesRef("Surface keyword|friggin payload|10"), new CompletionTokenStream.ToFiniteStrings() {
|
||||
@Override
|
||||
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException {
|
||||
Set<IntsRef> finiteStrings = suggester.toFiniteStrings(suggester.getTokenStreamToAutomaton(), stream);
|
||||
return finiteStrings;
|
||||
}
|
||||
});
|
||||
|
||||
suggestTokenStream.reset();
|
||||
suggestTokenStream.incrementToken();
|
||||
suggestTokenStream.close();
|
||||
|
||||
}
|
||||
|
||||
public final static class ByteTermAttrToCharTermAttrFilter extends TokenFilter {
|
||||
private CharTermAttribute attr = addAttribute(CharTermAttribute.class);
|
||||
private ByteTermAttribute byteAttr = addAttribute(ByteTermAttribute.class);
|
||||
private PayloadAttribute payload = addAttribute(PayloadAttribute.class);
|
||||
private TypeAttribute type = addAttribute(TypeAttribute.class);
|
||||
protected ByteTermAttrToCharTermAttrFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
BytesRef bytesRef = byteAttr.getBytesRef();
|
||||
attr.append(bytesRef.utf8ToString());
|
||||
// we move them over so we can assert them more easily in the tests
|
||||
type.setType(payload.getPayload().utf8ToString());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.test.unit.index.mapper.completion;
|
||||
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
|
||||
import org.elasticsearch.test.unit.index.mapper.MapperTestUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class CompletionFieldMapperTests {
|
||||
|
||||
@Test
|
||||
public void testDefaultConfiguration() throws IOException {
|
||||
String mapping = jsonBuilder().startObject().startObject("type1")
|
||||
.startObject("properties").startObject("completion")
|
||||
.field("type", "completion")
|
||||
.endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper defaultMapper = MapperTestUtils.newParser().parse(mapping);
|
||||
|
||||
FieldMapper fieldMapper = defaultMapper.mappers().name("completion").mapper();
|
||||
assertThat(fieldMapper, instanceOf(CompletionFieldMapper.class));
|
||||
|
||||
CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper;
|
||||
assertThat(completionFieldMapper.isStoringPayloads(), is(false));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testThatSerializationIncludesAllElements() throws Exception {
|
||||
String mapping = jsonBuilder().startObject().startObject("type1")
|
||||
.startObject("properties").startObject("completion")
|
||||
.field("type", "completion")
|
||||
.field("index_analyzer", "simple")
|
||||
.field("search_analyzer", "standard")
|
||||
.field("payloads", true)
|
||||
.field("preserve_separators", false)
|
||||
.field("preserve_position_increments", true)
|
||||
.endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper defaultMapper = MapperTestUtils.newParser().parse(mapping);
|
||||
|
||||
FieldMapper fieldMapper = defaultMapper.mappers().name("completion").mapper();
|
||||
assertThat(fieldMapper, instanceOf(CompletionFieldMapper.class));
|
||||
|
||||
CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper;
|
||||
XContentBuilder builder = jsonBuilder().startObject();
|
||||
completionFieldMapper.toXContent(builder, null).endObject();
|
||||
builder.close();
|
||||
Map<String, Object> serializedMap = JsonXContent.jsonXContent.createParser(builder.bytes()).mapAndClose();
|
||||
Map<String, Object> configMap = (Map<String, Object>) serializedMap.get("completion");
|
||||
assertThat(configMap.get("index_analyzer").toString(), is("simple"));
|
||||
assertThat(configMap.get("search_analyzer").toString(), is("standard"));
|
||||
assertThat(Boolean.valueOf(configMap.get("payloads").toString()), is(true));
|
||||
assertThat(Boolean.valueOf(configMap.get("preserve_separators").toString()), is(false));
|
||||
assertThat(Boolean.valueOf(configMap.get("preserve_position_increments").toString()), is(true));
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue