From 81e537bd5eb56f66b8ddd9ac59e5dd4a1630b931 Mon Sep 17 00:00:00 2001 From: Florian Schilling Date: Tue, 8 Oct 2013 13:55:25 +0200 Subject: [PATCH] ContextSuggester ================ This commit extends the `CompletionSuggester` by context informations. In example such a context informations can be a simple string representing a category reducing the suggestions in order to this category. Three base implementations of these context informations have been setup in this commit. - a Category Context - a Geo Context All the mapping for these context informations are specified within a context field in the completion field that should use this kind of information. --- docs/reference/search/suggesters.asciidoc | 2 + .../suggesters/completion-suggest.asciidoc | 3 + .../suggesters/context-suggest.asciidoc | 319 +++++++++ .../lucene/analysis/PrefixAnalyzer.java | 128 ++++ .../analyzing/XAnalyzingSuggester.java | 24 +- .../suggest/analyzing/XFuzzySuggester.java | 11 +- .../common/geo/GeoHashUtils.java | 42 +- .../elasticsearch/common/geo/GeoPoint.java | 12 +- .../common/geo/GeohashPathIterator.java | 59 ++ .../common/xcontent/XContentBuilder.java | 8 + .../mapper/core/CompletionFieldMapper.java | 122 +++- .../index/query/GeohashCellFilter.java | 5 +- .../search/suggest/SuggestBuilder.java | 74 ++ .../suggest/SuggestionSearchContext.java | 11 +- .../AnalyzingCompletionLookupProvider.java | 41 +- .../Completion090PostingsFormat.java | 9 +- .../completion/CompletionSuggestParser.java | 70 +- .../CompletionSuggestionContext.java | 27 +- .../completion/CompletionTokenStream.java | 2 +- .../context/CategoryContextMapping.java | 341 +++++++++ .../suggest/context/ContextBuilder.java | 127 ++++ .../suggest/context/ContextMapping.java | 316 +++++++++ .../context/GeolocationContextMapping.java | 664 ++++++++++++++++++ .../search/geo/GeoFilterTests.java | 38 +- .../suggest/ContextSuggestSearchTests.java | 640 +++++++++++++++++ .../AnalyzingCompletionLookupProviderV1.java | 26 +- .../CompletionPostingsFormatTest.java | 15 +- .../hamcrest/ElasticsearchGeoAssertions.java | 19 + 28 files changed, 3021 insertions(+), 134 deletions(-) create mode 100644 docs/reference/search/suggesters/context-suggest.asciidoc create mode 100644 src/main/java/org/apache/lucene/analysis/PrefixAnalyzer.java create mode 100644 src/main/java/org/elasticsearch/common/geo/GeohashPathIterator.java create mode 100644 src/main/java/org/elasticsearch/search/suggest/context/CategoryContextMapping.java create mode 100644 src/main/java/org/elasticsearch/search/suggest/context/ContextBuilder.java create mode 100644 src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java create mode 100644 src/main/java/org/elasticsearch/search/suggest/context/GeolocationContextMapping.java create mode 100644 src/test/java/org/elasticsearch/search/suggest/ContextSuggestSearchTests.java diff --git a/docs/reference/search/suggesters.asciidoc b/docs/reference/search/suggesters.asciidoc index e753927db10..92997249db6 100644 --- a/docs/reference/search/suggesters.asciidoc +++ b/docs/reference/search/suggesters.asciidoc @@ -272,4 +272,6 @@ include::suggesters/phrase-suggest.asciidoc[] include::suggesters/completion-suggest.asciidoc[] +include::suggesters/context-suggest.asciidoc[] + diff --git a/docs/reference/search/suggesters/completion-suggest.asciidoc b/docs/reference/search/suggesters/completion-suggest.asciidoc index 41da0b5de6b..03fe636ad2d 100644 --- a/docs/reference/search/suggesters/completion-suggest.asciidoc +++ b/docs/reference/search/suggesters/completion-suggest.asciidoc @@ -179,6 +179,9 @@ is used as `score`. Also the `text` field uses the `output` of your indexed suggestion, if configured, otherwise the matched part of the `input` field. +NOTE: The completion suggester considers all documents in the index. +See <> for an explanation of how to query a subset of +documents instead. [[fuzzy]] ==== Fuzzy queries diff --git a/docs/reference/search/suggesters/context-suggest.asciidoc b/docs/reference/search/suggesters/context-suggest.asciidoc new file mode 100644 index 00000000000..b3095fc4306 --- /dev/null +++ b/docs/reference/search/suggesters/context-suggest.asciidoc @@ -0,0 +1,319 @@ +[[suggester-context]] +== Context Suggester + +The context suggester is an extension to the suggest API of Elasticsearch. Namely the +suggester system provides a very fast way of searching documents by handling these +entirely in memory. But this special treatenment does not allow the handling of +traditional queries and filters, because those would have notable impact on the +performance. So the context extension is designed to take so-called context information +into account to specify a more accurate way of searching within the suggester system. +Instead of using the traditional query and filter system a predefined ``context`` is +configured to limit suggestions to a particular subset of suggestions. +Such a context is defined by a set of context mappings which can either be a simple +*category* or a *geo location*. The information used by the context suggester is +configured in the type mapping with the `context` parameter, which lists all of the +contexts that need to be specified in each document and in each suggestion request. +For instance: + +[source,js] +-------------------------------------------------- +PUT services/service/_mapping +{ + "service": { + "properties": { + "name": { + "type" : "string" + }, + "tag": { + "type" : "string" + }, + "suggest_field": { + "type": "completion", + "context": { + "color": { <1> + "type": "category", + "path": "color_field" + "default": ["red", "green", "blue"] + }, + "location": { <2> + "type": "geo", + "precision": "5m", + "neighbors": true, + "default": "u33" + } + } + } + } + } +} +-------------------------------------------------- +<1> See <> +<2> See <> + +However contexts are specified (as type `category` or `geo`, which are discussed below), each +context value generates a new sub-set of documents which can be queried by the completion +suggester. All three types accept a `default` parameter which provides a default value to use +if the corresponding context value is absent. + +The basic structure of this element is that each field forms a new context and the fieldname +is used to reference this context information later on during indexing or querying. All context +mappings have the `default` and the `type` option in common. The value of the `default` field +is used, when ever no specific is provided for the certain context. Note that a context is +defined by at least one value. The `type` option defines the kind of information hold by this +context. These type will be explained further in the following sections. + +[[suggester-context-category]] +[float] +=== Category Context +The `category` context allows you to specify one or more categories in the document at index time. +The document will be assigned to each named category, which can then be queried later. The category +type also allows to specify a field to extract the categories from. The `path` parameter is used to +specify this field of the documents that should be used. If the referenced field contains multiple +values, all these values will be used as alternative categories. + +[float] +==== Category Mapping + +The mapping for a category is simply defined by its `default` values. These can either be +defined as list of *default* categories: + +[source,js] +-------------------------------------------------- +"context": { + "color": { + "type": "category", + "default": ["red", "orange"] + } +} +-------------------------------------------------- + +or as a single value + +[source,js] +-------------------------------------------------- +"context": { + "color": { + "type": "category", + "default": "red" + } +} +-------------------------------------------------- + +or as reference to another field within the documents indexed: + +[source,js] +-------------------------------------------------- +"context": { + "color": { + "type": "category", + "default": "red" + "path": "color_field" + } +} +-------------------------------------------------- + +in this case the *default* categories will only be used, if the given field does not +exist within the document. In the example above the categories are received from a +field named `color_field`. If this field does not exist a category *red* is assumed for +the context *color*. + +[float] +==== Indexing category contexts +Within a document the category is specified either as an `array` of values, a +single value or `null`. A list of values is interpreted as alternative categories. So +a document belongs to all the categories defined. If the category is `null` or remains +unset the categories will be retrieved from the documents field addressed by the `path` +parameter. If this value is not set or the field is missing, the default values of the +mapping will be assigned to the context. + +[source,js] +-------------------------------------------------- +PUT services/service/1 +{ + "name": "knapsack", + "suggest_field": { + "input": ["knacksack", "backpack", "daypack"], + "context": { + "color": ["red", "yellow"] + } + } +} +-------------------------------------------------- + +[float] +==== Category Query +A query within a category works similar to the configuration. If the value is `null` +the mappings default categories will be used. Otherwise the suggestion takes place +for all documents that have at least one category in common with the query. + +[source,js] +-------------------------------------------------- +POST services/_suggest?pretty' +{ + "suggest" : { + "text" : "m", + "completion" : { + "field" : "suggest_field", + "size": 10, + "context": { + "color": "red" + } + } + } +} +-------------------------------------------------- + +[[suggester-context-geo]] +[float] +=== Geo location Context +A `geo` context allows you to limit results to those that lie within a certain distance +of a specified geolocation. At index time, a lat/long geo point is converted into a +geohash of a certain precision, which provides the context. + +[float] +==== Geo location Mapping +The mapping for a geo context accepts four settings: + +[horizontal] +`precision`:: This defines the precision of the geohash and can be specified as `5m`, `10km`, + or as a raw geohash precision: `1`..`12`. It's also possible to setup multiple + precisions by defining a list of precisions: `["5m", "10km"]` + (default is a geohash level of 12) +`neighbors`:: Geohashes are rectangles, so a geolocation, which in reality is only 1 metre + away from the specified point, may fall into the neighbouring rectangle. Set + `neighbours` to `true` to include the neighbouring geohashes in the context. + (default is *on*) +`path`:: Optionally specify a field to use to look up the geopoint. +`default`:: The geopoint to use if no geopoint has been specified. + +Since all locations of this mapping are translated into geohashes, each location matches +a geohash cell. So some results that lie within the specified range but not in the same +cell as the query location will not match. To avoid this the `neighbors` option allows a +matching of cells that join the bordering regions of the documents location. This option +is turned on by default. +If a document or a query doesn't define a location a value to use instead can defined by +the `default` option. The value of this option supports all the ways a `geo_point` can be +defined. The `path` refers to another field within the document to retrieve the +location. If this field contains multiple values, the document will be linked to all these +locations. + +[source,js] +-------------------------------------------------- +"context": { + "location": { + "type": "geo", + "precision": ["1km", "5m"], + "neighbors": true, + "path": "pin", + "default": { + "lat": 0.0, + "lon": 0.0 + } + } +} +-------------------------------------------------- + +[float] +==== Geo location Config + +Within a document a geo location retrieved from the mapping definition can be overridden +by another location. In this case the context mapped to a geo location supports all +variants of defining a `geo_point`. + +[source,js] +-------------------------------------------------- +PUT services/service/1 +{ + "name": "some hotel 1", + "suggest_field": { + "input": ["my hotel", "this hotel"], + "context": { + "location": { + "lat": 0, + "lon": 0 + } + } + } +} +-------------------------------------------------- + +[float] +==== Geo location Query + +Like in the configuration, querying with a geo location in context, the geo location +query supports all representations of a `geo_point` to define the location. In this +simple case all precision values defined in the mapping will be applied to the given +location. + +[source,js] +-------------------------------------------------- +POST services/_suggest +{ + "suggest" : { + "text" : "m", + "completion" : { + "field" : "suggest_field", + "size": 10, + "context": { + "location": { + "lat": 0, + "lon": 0 + } + } + } + } +} +-------------------------------------------------- + +But it also possible to set a subset of the precisions set in the mapping, by using the +`precision` parameter. Like in the mapping, this parameter is allowed to be set to a +single precision value or a list of these. + +[source,js] +-------------------------------------------------- +POST services/_suggest +{ + "suggest" : { + "text" : "m", + "completion" : { + "field" : "suggest_field", + "size": 10, + "context": { + "location": { + "value": { + "lat": 0, + "lon": 0 + }, + "precision": "1km" + } + } + } + } +} +-------------------------------------------------- + +A special form of the query is definied by an extension of the object representation of +the `geo_point`. Using this representation allows to set the `precision` parameter within +the location itself: + +[source,js] +-------------------------------------------------- +POST services/_suggest +{ + "suggest" : { + "text" : "m", + "completion" : { + "field" : "suggest_field", + "size": 10, + "context": { + "location": { + "lat": 0, + "lon": 0, + "precision": "1km" + } + } + } + } +} +-------------------------------------------------- + diff --git a/src/main/java/org/apache/lucene/analysis/PrefixAnalyzer.java b/src/main/java/org/apache/lucene/analysis/PrefixAnalyzer.java new file mode 100644 index 00000000000..c39fcc50917 --- /dev/null +++ b/src/main/java/org/apache/lucene/analysis/PrefixAnalyzer.java @@ -0,0 +1,128 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.analysis; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +import java.io.IOException; +import java.io.Reader; +import java.util.Collections; +import java.util.Iterator; + +/** + * This {@link Analyzer} wraps another analyzer and adds a set of prefixes to the + * underlying TokenStream. While these prefixes are iterated the position attribute + * will not be incremented. Also each prefix will be separated from the other tokens + * by a separator character. + * NOTE: The sequence of prefixes needs to be not empty + */ +public class PrefixAnalyzer extends Analyzer { + + private final char separator; + private final Iterable prefix; + private final Analyzer analyzer; + + /** + * Create a new {@link PrefixAnalyzer}. The separator will be set to the DEFAULT_SEPARATOR. + * + * @param analyzer {@link Analyzer} to wrap + * @param prefix Single prefix + */ + public PrefixAnalyzer(Analyzer analyzer, char separator, CharSequence prefix) { + this(analyzer, separator, Collections.singleton(prefix)); + } + + /** + * Create a new {@link PrefixAnalyzer}. The separator will be set to the DEFAULT_SEPARATOR. + * + * @param analyzer {@link Analyzer} to wrap + * @param prefix {@link Iterable} of {@link CharSequence} which keeps all prefixes + */ + public PrefixAnalyzer(Analyzer analyzer, char separator, Iterable prefix) { + super(); + this.analyzer = analyzer; + this.prefix = prefix; + this.separator = separator; + } + + @Override + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + TokenStreamComponents createComponents = analyzer.createComponents(fieldName, reader); + TokenStream stream = new PrefixTokenFilter(createComponents.getTokenStream(), separator, prefix); + TokenStreamComponents tsc = new TokenStreamComponents(createComponents.getTokenizer(), stream); + return tsc; + } + + /** + * The {@link PrefixTokenFilter} wraps a {@link TokenStream} and adds a set + * prefixes ahead. The position attribute will not be incremented for the prefixes. + */ + public static final class PrefixTokenFilter extends TokenFilter { + + private final char separator; + private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class); + private final Iterable prefixes; + + private Iterator currentPrefix; + + /** + * Create a new {@link PrefixTokenFilter}. The separator will be set to the DEFAULT_SEPARATOR. + * + * @param input {@link TokenStream} to wrap + * @param separator Character used separate prefixes from other tokens + * @param prefixes {@link Iterable} of {@link CharSequence} which keeps all prefixes + */ + public PrefixTokenFilter(TokenStream input, char separator, Iterable prefixes) { + super(input); + this.prefixes = prefixes; + this.currentPrefix = null; + this.separator = separator; + assert (prefixes != null && prefixes.iterator().hasNext()) : "one or more prefix needed"; + } + + @Override + public boolean incrementToken() throws IOException { + if (currentPrefix != null) { + if (!currentPrefix.hasNext()) { + return input.incrementToken(); + } else { + posAttr.setPositionIncrement(0); + } + } else { + currentPrefix = prefixes.iterator(); + termAttr.setEmpty(); + posAttr.setPositionIncrement(1); + assert (currentPrefix.hasNext()) : "one or more prefixes needed"; + } + termAttr.setEmpty(); + termAttr.append(currentPrefix.next()); + termAttr.append(separator); + return true; + } + + @Override + public void reset() throws IOException { + super.reset(); + currentPrefix = null; + } + } +} diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java index 4c80716d856..4b2f2c53491 100644 --- a/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java +++ b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java @@ -26,8 +26,6 @@ import org.apache.lucene.search.suggest.InputIterator; import org.apache.lucene.search.suggest.Lookup; import org.apache.lucene.search.suggest.Sort; import org.apache.lucene.store.*; -import org.apache.lucene.store.DataInput; -import org.apache.lucene.store.DataOutput; import org.apache.lucene.util.*; import org.apache.lucene.util.automaton.*; import org.apache.lucene.util.fst.*; @@ -36,7 +34,10 @@ import org.apache.lucene.util.fst.PairOutputs.Pair; import org.apache.lucene.util.fst.Util.MinResult; import org.elasticsearch.common.collect.HppcMaps; -import java.io.*; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.*; /** @@ -167,6 +168,8 @@ public class XAnalyzingSuggester extends Lookup { public static final int PAYLOAD_SEP = '\u001F'; public static final int HOLE_CHARACTER = '\u001E'; + + private final Automaton queryPrefix; /** Whether position holes should appear in the automaton. */ private boolean preservePositionIncrements; @@ -180,7 +183,7 @@ public class XAnalyzingSuggester extends Lookup { * PRESERVE_SEP, 256, -1)} */ public XAnalyzingSuggester(Analyzer analyzer) { - this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); + this(analyzer, null, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } /** @@ -189,7 +192,7 @@ public class XAnalyzingSuggester extends Lookup { * PRESERVE_SEP, 256, -1)} */ public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) { - this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); + this(indexAnalyzer, null, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } /** @@ -208,7 +211,7 @@ public class XAnalyzingSuggester extends Lookup { * to expand from the analyzed form. Set this to -1 for * no limit. */ - public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, + public XAnalyzingSuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean preservePositionIncrements, FST> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) { // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput @@ -222,6 +225,9 @@ public class XAnalyzingSuggester extends Lookup { this.exactFirst = (options & EXACT_FIRST) != 0; this.preserveSep = (options & PRESERVE_SEP) != 0; + // FLORIAN EDIT: I added queryPrefix for context dependent suggestions + this.queryPrefix = queryPrefix; + // NOTE: this is just an implementation limitation; if // somehow this is a problem we could fix it by using // more than one byte to disambiguate ... but 256 seems @@ -298,6 +304,10 @@ public class XAnalyzingSuggester extends Lookup { } protected Automaton convertAutomaton(Automaton a) { + if (queryPrefix != null) { + a = Automaton.concatenate(Arrays.asList(queryPrefix, a)); + BasicOperations.determinize(a); + } return a; } @@ -1081,4 +1091,4 @@ public class XAnalyzingSuggester extends Lookup { } } -} +} \ No newline at end of file diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java index 4379cdabb70..c030cd913a5 100644 --- a/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java +++ b/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java @@ -121,7 +121,7 @@ public final class XFuzzySuggester extends XAnalyzingSuggester { * Analyzer that will be used for analyzing query text during lookup */ public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) { - this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS, + this(indexAnalyzer, null, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS, DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } @@ -151,11 +151,11 @@ public final class XFuzzySuggester extends XAnalyzingSuggester { * @param payloadSep payload separator byte * @param endByte end byte marker byte */ - public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, + public XFuzzySuggester(Analyzer indexAnalyzer, Automaton queryPrefix, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware, FST> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, int sepLabel, int payloadSep, int endByte, int holeCharacter) { - super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter); + super(indexAnalyzer, queryPrefix, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } @@ -202,11 +202,12 @@ public final class XFuzzySuggester extends XAnalyzingSuggester { @Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { - Automaton utf8automaton = new UTF32ToUTF8().convert(a); + // FLORIAN EDIT: get converted Automaton from superclass + Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); BasicOperations.determinize(utf8automaton); return utf8automaton; } else { - return a; + return super.convertAutomaton(a); } } diff --git a/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java b/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java index d24cfbfd775..7c97e0614b5 100644 --- a/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java +++ b/src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java @@ -20,7 +20,8 @@ package org.elasticsearch.common.geo; import org.elasticsearch.ElasticsearchIllegalArgumentException; import java.util.ArrayList; -import java.util.List; +import java.util.Collection; +import java.util.Iterator; /** @@ -113,13 +114,30 @@ public class GeoHashUtils { /** * Calculate all neighbors of a given geohash cell. * - * @param geohash Geohash of the defines cell + * @param geohash Geohash of the defined cell * @return geohashes of all neighbor cells */ - public static List neighbors(String geohash) { - return addNeighbors(geohash, geohash.length(), new ArrayList(8)); + public static Collection neighbors(String geohash) { + return addNeighbors(geohash, geohash.length(), new ArrayList(8)); } + /** + * Create an {@link Iterable} which allows to iterate over the cells that + * contain a given geohash + * + * @param geohash Geohash of a cell + * + * @return {@link Iterable} of path + */ + public static Iterable path(final String geohash) { + return new Iterable() { + @Override + public Iterator iterator() { + return new GeohashPathIterator(geohash); + } + }; + } + /** * Calculate the geohash of a neighbor of a geohash * @@ -164,7 +182,7 @@ public class GeoHashUtils { final int yLimit = ((level % 2) == 0) ? 3 : 7; // if the defined neighbor has the same parent a the current cell - // encode the cell direcly. Otherwise find the cell next to this + // encode the cell directly. Otherwise find the cell next to this // cell recursively. Since encoding wraps around within a cell // it can be encoded here. if (nx >= 0 && nx <= xLimit && ny >= 0 && ny < yLimit) { @@ -180,6 +198,17 @@ public class GeoHashUtils { } } + /** + * Add all geohashes of the cells next to a given geohash to a list. + * + * @param geohash Geohash of a specified cell + * @param neighbors list to add the neighbors to + * @return the given list + */ + public static final > E addNeighbors(String geohash, E neighbors) { + return addNeighbors(geohash, geohash.length(), neighbors); + } + /** * Add all geohashes of the cells next to a given geohash to a list. * @@ -188,10 +217,9 @@ public class GeoHashUtils { * @param neighbors list to add the neighbors to * @return the given list */ - private static final List addNeighbors(String geohash, int length, List neighbors) { + public static final > E addNeighbors(String geohash, int length, E neighbors) { String south = neighbor(geohash, length, 0, -1); String north = neighbor(geohash, length, 0, +1); - if (north != null) { neighbors.add(neighbor(north, length, -1, 0)); neighbors.add(north); diff --git a/src/main/java/org/elasticsearch/common/geo/GeoPoint.java b/src/main/java/org/elasticsearch/common/geo/GeoPoint.java index 434aae5010e..ba568ab5510 100644 --- a/src/main/java/org/elasticsearch/common/geo/GeoPoint.java +++ b/src/main/java/org/elasticsearch/common/geo/GeoPoint.java @@ -29,7 +29,7 @@ import java.io.IOException; /** * */ -public class GeoPoint { +public final class GeoPoint { public static final String LATITUDE = GeoPointFieldMapper.Names.LAT; public static final String LONGITUDE = GeoPointFieldMapper.Names.LON; @@ -41,6 +41,16 @@ public class GeoPoint { public GeoPoint() { } + /** + * Create a new Geopointform a string. This String must either be a geohash + * or a lat-lon tuple. + * + * @param value String to create the point from + */ + public GeoPoint(String value) { + this.resetFromString(value); + } + public GeoPoint(double lat, double lon) { this.lat = lat; this.lon = lon; diff --git a/src/main/java/org/elasticsearch/common/geo/GeohashPathIterator.java b/src/main/java/org/elasticsearch/common/geo/GeohashPathIterator.java new file mode 100644 index 00000000000..6e6821ee81b --- /dev/null +++ b/src/main/java/org/elasticsearch/common/geo/GeohashPathIterator.java @@ -0,0 +1,59 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.geo; + +import java.util.Iterator; + +/** + * This class iterates over the cells of a given geohash. Assume geohashes + * form a tree, this iterator traverses this tree form a leaf (actual gehash) + * to the root (geohash of length 1). + */ +public final class GeohashPathIterator implements Iterator { + + private final String geohash; + private int currentLength; + + /** + * Create a new {@link GeohashPathIterator} for a given geohash + * @param geohash The geohash to traverse + */ + public GeohashPathIterator(String geohash) { + this.geohash = geohash; + this.currentLength = geohash.length(); + } + + @Override + public boolean hasNext() { + return currentLength > 0; + } + + @Override + public String next() { + String result = geohash.substring(0, currentLength); + currentLength--; + return result; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("unable to remove a geohash from this path"); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java b/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java index d4c3b38ce3e..a76c848618e 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java +++ b/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java @@ -1036,6 +1036,14 @@ public final class XContentBuilder implements BytesStream { return this; } + public XContentBuilder latlon(String name, double lat, double lon) throws IOException { + return startObject(name).field("lat", lat).field("lon", lon).endObject(); + } + + public XContentBuilder latlon(double lat, double lon) throws IOException { + return startObject().field("lat", lat).field("lon", lon).endObject(); + } + public XContentBuilder copyCurrentStructure(XContentParser parser) throws IOException { generator.copyCurrentStructure(parser); return this; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java index f5f3c2cdff5..8607895cfac 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper.core; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -27,11 +28,11 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.*; import org.elasticsearch.common.xcontent.XContentParser.NumberType; +import org.elasticsearch.common.xcontent.XContentParser.Token; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider; import org.elasticsearch.index.fielddata.FieldDataType; @@ -40,13 +41,13 @@ import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider; import org.elasticsearch.search.suggest.completion.CompletionPostingsFormatProvider; import org.elasticsearch.search.suggest.completion.CompletionTokenStream; +import org.elasticsearch.search.suggest.context.ContextBuilder; +import org.elasticsearch.search.suggest.context.ContextMapping; +import org.elasticsearch.search.suggest.context.ContextMapping.ContextConfig; import java.io.IOException; import java.io.Reader; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; +import java.util.*; import static org.elasticsearch.index.mapper.MapperBuilders.completionField; import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField; @@ -87,10 +88,11 @@ public class CompletionFieldMapper extends AbstractFieldMapper { public static final String CONTENT_FIELD_NAME_OUTPUT = "output"; public static final String CONTENT_FIELD_NAME_PAYLOAD = "payload"; public static final String CONTENT_FIELD_NAME_WEIGHT = "weight"; + public static final String CONTEXT = "context"; } public static Set ALLOWED_CONTENT_FIELD_NAMES = Sets.newHashSet(Fields.CONTENT_FIELD_NAME_INPUT, - Fields.CONTENT_FIELD_NAME_OUTPUT, Fields.CONTENT_FIELD_NAME_PAYLOAD, Fields.CONTENT_FIELD_NAME_WEIGHT); + Fields.CONTENT_FIELD_NAME_OUTPUT, Fields.CONTENT_FIELD_NAME_PAYLOAD, Fields.CONTENT_FIELD_NAME_WEIGHT, Fields.CONTEXT); public static class Builder extends AbstractFieldMapper.Builder { @@ -98,6 +100,7 @@ public class CompletionFieldMapper extends AbstractFieldMapper { private boolean payloads = Defaults.DEFAULT_HAS_PAYLOADS; private boolean preservePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS; private int maxInputLength = Defaults.DEFAULT_MAX_INPUT_LENGTH; + private SortedMap contextMapping = ContextMapping.EMPTY_MAPPING; public Builder(String name) { super(name, new FieldType(Defaults.FIELD_TYPE)); @@ -127,11 +130,17 @@ public class CompletionFieldMapper extends AbstractFieldMapper { return this; } + public Builder contextMapping(SortedMap contextMapping) { + this.contextMapping = contextMapping; + return this; + } + @Override public CompletionFieldMapper build(Mapper.BuilderContext context) { return new CompletionFieldMapper(buildNames(context), indexAnalyzer, searchAnalyzer, postingsProvider, similarity, payloads, - preserveSeparators, preservePositionIncrements, maxInputLength, multiFieldsBuilder.build(this, context), copyTo); + preserveSeparators, preservePositionIncrements, maxInputLength, multiFieldsBuilder.build(this, context), copyTo, this.contextMapping); } + } public static class TypeParser implements Mapper.TypeParser { @@ -163,6 +172,8 @@ public class CompletionFieldMapper extends AbstractFieldMapper { builder.maxInputLength(Integer.parseInt(fieldNode.toString())); } else if ("fields".equals(fieldName) || "path".equals(fieldName)) { parseMultiField(builder, name, node, parserContext, fieldName, fieldNode); + } else if (fieldName.equals(Fields.CONTEXT)) { + builder.contextMapping(ContextBuilder.loadMappings(fieldNode)); } else { throw new MapperParsingException("Unknown field [" + fieldName + "]"); } @@ -197,9 +208,14 @@ public class CompletionFieldMapper extends AbstractFieldMapper { private final boolean preservePositionIncrements; private final boolean preserveSeparators; private int maxInputLength; + private final SortedMap contextMapping; + /** + * + * @param contextMappings Configuration of context type. If none should be used set {@link ContextMapping.EMPTY_MAPPING} + */ public CompletionFieldMapper(Names names, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer, PostingsFormatProvider postingsProvider, SimilarityProvider similarity, boolean payloads, - boolean preserveSeparators, boolean preservePositionIncrements, int maxInputLength, MultiFields multiFields, CopyTo copyTo) { + boolean preserveSeparators, boolean preservePositionIncrements, int maxInputLength, MultiFields multiFields, CopyTo copyTo, SortedMap contextMappings) { super(names, 1.0f, Defaults.FIELD_TYPE, null, indexAnalyzer, searchAnalyzer, postingsProvider, null, similarity, null, null, null, multiFields, copyTo); analyzingSuggestLookupProvider = new AnalyzingCompletionLookupProvider(preserveSeparators, false, preservePositionIncrements, payloads); this.completionPostingsFormatProvider = new CompletionPostingsFormatProvider("completion", postingsProvider, analyzingSuggestLookupProvider); @@ -207,9 +223,9 @@ public class CompletionFieldMapper extends AbstractFieldMapper { this.payloads = payloads; this.preservePositionIncrements = preservePositionIncrements; this.maxInputLength = maxInputLength; + this.contextMapping = contextMappings; } - @Override public PostingsFormatProvider postingsFormatProvider() { return this.completionPostingsFormatProvider; @@ -225,6 +241,8 @@ public class CompletionFieldMapper extends AbstractFieldMapper { long weight = -1; List inputs = Lists.newArrayListWithExpectedSize(4); + SortedMap contextConfig = null; + if (token == XContentParser.Token.VALUE_STRING) { inputs.add(parser.text()); multiFields.parse(this, context); @@ -236,6 +254,28 @@ public class CompletionFieldMapper extends AbstractFieldMapper { if (!ALLOWED_CONTENT_FIELD_NAMES.contains(currentFieldName)) { throw new ElasticsearchIllegalArgumentException("Unknown field name[" + currentFieldName + "], must be one of " + ALLOWED_CONTENT_FIELD_NAMES); } + } else if (Fields.CONTEXT.equals(currentFieldName)) { + SortedMap configs = Maps.newTreeMap(); + + if (token == Token.START_OBJECT) { + while ((token = parser.nextToken()) != Token.END_OBJECT) { + String name = parser.text(); + ContextMapping mapping = contextMapping.get(name); + if (mapping == null) { + throw new ElasticsearchParseException("context [" + name + "] is not defined"); + } else { + token = parser.nextToken(); + configs.put(name, mapping.parseContext(context, parser)); + } + } + contextConfig = Maps.newTreeMap(); + for (ContextMapping mapping : contextMapping.values()) { + ContextConfig config = configs.get(mapping.name()); + contextConfig.put(mapping.name(), config==null ? mapping.defaultConfig() : config); + } + } else { + throw new ElasticsearchParseException("context must be an object"); + } } else if (Fields.CONTENT_FIELD_NAME_PAYLOAD.equals(currentFieldName)) { if (!isStoringPayloads()) { throw new MapperException("Payloads disabled in mapping"); @@ -276,23 +316,49 @@ public class CompletionFieldMapper extends AbstractFieldMapper { } } } + + if(contextConfig == null) { + contextConfig = Maps.newTreeMap(); + for (ContextMapping mapping : contextMapping.values()) { + contextConfig.put(mapping.name(), mapping.defaultConfig()); + } + } + + final ContextMapping.Context ctx = new ContextMapping.Context(contextConfig, context.doc()); + payload = payload == null ? EMPTY : payload; if (surfaceForm == null) { // no surface form use the input for (String input : inputs) { BytesRef suggestPayload = analyzingSuggestLookupProvider.buildPayload(new BytesRef( input), weight, payload); - context.doc().add(getCompletionField(input, suggestPayload)); + context.doc().add(getCompletionField(ctx, input, suggestPayload)); } } else { BytesRef suggestPayload = analyzingSuggestLookupProvider.buildPayload(new BytesRef( surfaceForm), weight, payload); for (String input : inputs) { - context.doc().add(getCompletionField(input, suggestPayload)); + context.doc().add(getCompletionField(ctx, input, suggestPayload)); } } } + /** + * Get the context mapping associated with this completion field. + */ + public SortedMap getContextMapping() { + return contextMapping; + } + + /** @return true if a context mapping has been defined */ + public boolean requiresContext() { + return !contextMapping.isEmpty(); + } + public Field getCompletionField(String input, BytesRef payload) { + return getCompletionField(ContextMapping.EMPTY_CONTEXT, input, payload); + } + + public Field getCompletionField(ContextMapping.Context ctx, String input, BytesRef payload) { final String originalInput = input; if (input.length() > maxInputLength) { final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length())); @@ -305,7 +371,7 @@ public class CompletionFieldMapper extends AbstractFieldMapper { + "] at position " + i + " is a reserved character"); } } - return new SuggestField(names.indexName(), input, this.fieldType, payload, analyzingSuggestLookupProvider); + return new SuggestField(names.indexName(), ctx, input, this.fieldType, payload, analyzingSuggestLookupProvider); } public static int correctSubStringLen(String input, int len) { @@ -324,27 +390,29 @@ public class CompletionFieldMapper extends AbstractFieldMapper { private static final class SuggestField extends Field { private final BytesRef payload; private final CompletionTokenStream.ToFiniteStrings toFiniteStrings; + private final ContextMapping.Context ctx; - public SuggestField(String name, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) { + public SuggestField(String name, ContextMapping.Context ctx, Reader value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) { super(name, value, type); this.payload = payload; this.toFiniteStrings = toFiniteStrings; + this.ctx = ctx; } - public SuggestField(String name, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) { + public SuggestField(String name, ContextMapping.Context ctx, String value, FieldType type, BytesRef payload, CompletionTokenStream.ToFiniteStrings toFiniteStrings) { super(name, value, type); this.payload = payload; this.toFiniteStrings = toFiniteStrings; + this.ctx = ctx; } @Override public TokenStream tokenStream(Analyzer analyzer) throws IOException { - TokenStream ts = super.tokenStream(analyzer); + TokenStream ts = ctx.wrapTokenStream(super.tokenStream(analyzer)); return new CompletionTokenStream(ts, payload, toFiniteStrings); } } - - + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(name()) @@ -360,6 +428,15 @@ public class CompletionFieldMapper extends AbstractFieldMapper { builder.field(Fields.PRESERVE_POSITION_INCREMENTS.getPreferredName(), this.preservePositionIncrements); builder.field(Fields.MAX_INPUT_LENGTH.getPreferredName(), this.maxInputLength); multiFields.toXContent(builder, params); + + if(!contextMapping.isEmpty()) { + builder.startObject(Fields.CONTEXT); + for (ContextMapping mapping : contextMapping.values()) { + builder.value(mapping); + } + builder.endObject(); + } + return builder.endObject(); } @@ -367,7 +444,6 @@ public class CompletionFieldMapper extends AbstractFieldMapper { protected void parseCreateField(ParseContext context, List fields) throws IOException { } - @Override protected String contentType() { return CONTENT_TYPE; @@ -418,6 +494,9 @@ public class CompletionFieldMapper extends AbstractFieldMapper { if (preserveSeparators != fieldMergeWith.preserveSeparators) { mergeContext.addConflict("mapper [" + names.fullName() + "] has different 'preserve_separators' values"); } + if(!ContextMapping.mappingsAreEqual(getContextMapping(), fieldMergeWith.getContextMapping())) { + mergeContext.addConflict("mapper [" + names.fullName() + "] has different 'context_mapping' values"); + } if (!mergeContext.mergeFlags().simulate()) { this.maxInputLength = fieldMergeWith.maxInputLength; } @@ -434,9 +513,10 @@ public class CompletionFieldMapper extends AbstractFieldMapper { case XAnalyzingSuggester.END_BYTE: case XAnalyzingSuggester.SEP_LABEL: case XAnalyzingSuggester.HOLE_CHARACTER: + case ContextMapping.SEPARATOR: return true; default: return false; - } + } } } diff --git a/src/main/java/org/elasticsearch/index/query/GeohashCellFilter.java b/src/main/java/org/elasticsearch/index/query/GeohashCellFilter.java index b41ba3be111..98568b1e8af 100644 --- a/src/main/java/org/elasticsearch/index/query/GeohashCellFilter.java +++ b/src/main/java/org/elasticsearch/index/query/GeohashCellFilter.java @@ -38,6 +38,7 @@ import org.elasticsearch.index.mapper.core.StringFieldMapper; import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper; import java.io.IOException; +import java.util.ArrayList; import java.util.List; /** @@ -71,7 +72,7 @@ public class GeohashCellFilter { * @param geohashes optional array of additional geohashes * @return a new GeoBoundinboxfilter */ - public static Filter create(QueryParseContext context, GeoPointFieldMapper fieldMapper, String geohash, @Nullable List geohashes) { + public static Filter create(QueryParseContext context, GeoPointFieldMapper fieldMapper, String geohash, @Nullable List geohashes) { if (fieldMapper.geoHashStringMapper() == null) { throw new ElasticsearchIllegalArgumentException("geohash filter needs geohash_prefix to be enabled"); } @@ -254,7 +255,7 @@ public class GeohashCellFilter { } if (neighbors) { - return create(parseContext, geoMapper, geohash, GeoHashUtils.neighbors(geohash)); + return create(parseContext, geoMapper, geohash, GeoHashUtils.addNeighbors(geohash, new ArrayList(8))); } else { return create(parseContext, geoMapper, geohash, null); } diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java b/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java index a4609e8cca0..1fd4e4246f6 100644 --- a/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestBuilder.java @@ -25,6 +25,9 @@ import java.util.List; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; +import org.elasticsearch.search.suggest.context.CategoryContextMapping; +import org.elasticsearch.search.suggest.context.GeolocationContextMapping; import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder; import org.elasticsearch.search.suggest.term.TermSuggestionBuilder; @@ -123,12 +126,75 @@ public class SuggestBuilder implements ToXContent { private String analyzer; private Integer size; private Integer shardSize; + + private List contextQueries = new ArrayList(); public SuggestionBuilder(String name, String suggester) { this.name = name; this.suggester = suggester; } + @SuppressWarnings("unchecked") + private T addContextQuery(ContextQuery ctx) { + this.contextQueries.add(ctx); + return (T) this; + } + + /** + * Setup a Geolocation for suggestions. See {@link GeoContextMapping}. + * @param lat Latitude of the location + * @param lon Longitude of the Location + * @return this + */ + public T addGeoLocation(String name, double lat, double lon) { + return addContextQuery(GeolocationContextMapping.query(name, lat, lon)); + } + + /** + * Setup a Geolocation for suggestions. See {@link GeoContextMapping}. + * @param geohash Geohash of the location + * @return this + */ + public T addGeoLocation(String name, String geohash) { + return addContextQuery(GeolocationContextMapping.query(name, geohash)); + } + + /** + * Setup a Category for suggestions. See {@link CategoryMapping}. + * @param category name of the category + * @return this + */ + public T addCategory(String name, CharSequence...categories) { + return addContextQuery(CategoryContextMapping.query(name, categories)); + } + + /** + * Setup a Category for suggestions. See {@link CategoryMapping}. + * @param category name of the category + * @return this + */ + public T addCategory(String name, Iterable categories) { + return addContextQuery(CategoryContextMapping.query(name, categories)); + } + + /** + * Setup a Context Field for suggestions. See {@link CategoryContextMapping}. + * @param category name of the category + * @return this + */ + public T addContextField(String name, CharSequence...fieldvalues) { + return addContextQuery(CategoryContextMapping.query(name, fieldvalues)); + } + + /** + * Setup a Context Field for suggestions. See {@link CategoryContextMapping}. + * @param category name of the category + * @return this + */ + public T addContextField(String name, Iterable fieldvalues) { + return addContextQuery(CategoryContextMapping.query(name, fieldvalues)); + } + /** * Same as in {@link SuggestBuilder#setText(String)}, but in the suggestion scope. */ @@ -157,6 +223,14 @@ public class SuggestBuilder implements ToXContent { if (shardSize != null) { builder.field("shard_size", shardSize); } + + if (!contextQueries.isEmpty()) { + builder.startObject("context"); + for (ContextQuery query : contextQueries) { + query.toXContent(builder, params); + } + builder.endObject(); + } builder = innerToXContent(builder, params); builder.endObject(); builder.endObject(); diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java b/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java index a45f9797044..24a2c34f69e 100644 --- a/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestionSearchContext.java @@ -18,13 +18,12 @@ */ package org.elasticsearch.search.suggest; -import java.util.LinkedHashMap; -import java.util.Map; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import java.util.LinkedHashMap; +import java.util.Map; /** */ public class SuggestionSearchContext { @@ -49,7 +48,7 @@ public class SuggestionSearchContext { private int shardSize = -1; private int shardId; private String index; - + public BytesRef getText() { return text; } @@ -61,7 +60,7 @@ public class SuggestionSearchContext { public SuggestionContext(Suggester suggester) { this.suggester = suggester; } - + public Suggester getSuggester() { return this.suggester; } @@ -121,6 +120,4 @@ public class SuggestionSearchContext { } } - - } diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java index 6714eed9224..bda9c29f742 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java @@ -31,15 +31,14 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.fst.ByteSequenceOutputs; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.PairOutputs; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.fst.*; import org.apache.lucene.util.fst.PairOutputs.Pair; -import org.apache.lucene.util.fst.PositiveIntOutputs; import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider; import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory; +import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; import java.io.IOException; import java.util.*; @@ -74,7 +73,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; // needs to fixed in the suggester first before it can be supported //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0; - prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); + prototype = new XAnalyzingSuggester(null, null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } @Override @@ -199,9 +198,6 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider } } - ; - - @Override public LookupFactory load(IndexInput input) throws IOException { long sizeInBytes = 0; @@ -256,25 +252,26 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override - public Lookup getLookup(FieldMapper mapper, CompletionSuggestionContext suggestionContext) { + public Lookup getLookup(CompletionFieldMapper mapper, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName()); if (analyzingSuggestHolder == null) { return null; } - int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; + int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; + + final XAnalyzingSuggester suggester; + final Automaton queryPrefix = mapper.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; - XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { - suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, + suggester = new XFuzzySuggester(mapper.indexAnalyzer(), queryPrefix, mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); - } else { - suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, + suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), queryPrefix, mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, @@ -309,7 +306,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider } @Override - AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper mapper) { + AnalyzingSuggestHolder getAnalyzingSuggestHolder(CompletionFieldMapper mapper) { return lookupMap.get(mapper.names().indexName()); } @@ -351,6 +348,18 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider this.endByte = endByte; this.holeCharacter = holeCharacter; } + + public boolean getPreserveSeparator() { + return preserveSep; + } + + public boolean getPreservePositionIncrements() { + return preservePositionIncrements; + } + + public boolean hasPayloads() { + return hasPayloads; + } } @Override diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java b/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java index 8162160c706..5bbd5a25deb 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/Completion090PostingsFormat.java @@ -28,11 +28,10 @@ import org.apache.lucene.store.IOContext.Context; import org.apache.lucene.store.*; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.ElasticsearchIllegalStateException; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; -import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.search.suggest.completion.CompletionTokenStream.ToFiniteStrings; import java.io.ByteArrayInputStream; @@ -284,7 +283,7 @@ public class Completion090PostingsFormat extends PostingsFormat { this.lookup = lookup; } - public Lookup getLookup(FieldMapper mapper, CompletionSuggestionContext suggestionContext) { + public Lookup getLookup(CompletionFieldMapper mapper, CompletionSuggestionContext suggestionContext) { return lookup.getLookup(mapper, suggestionContext); } @@ -364,9 +363,9 @@ public class Completion090PostingsFormat extends PostingsFormat { } public static abstract class LookupFactory { - public abstract Lookup getLookup(FieldMapper mapper, CompletionSuggestionContext suggestionContext); + public abstract Lookup getLookup(CompletionFieldMapper mapper, CompletionSuggestionContext suggestionContext); public abstract CompletionStats stats(String ... fields); - abstract AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper mapper); + abstract AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder getAnalyzingSuggestHolder(CompletionFieldMapper mapper); public abstract long ramBytesUsed(); } } diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java index 2be279c59e2..aad44e31da5 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestParser.java @@ -20,13 +20,19 @@ package org.elasticsearch.search.suggest.completion; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.search.suggest.SuggestContextParser; import org.elasticsearch.search.suggest.SuggestionSearchContext; +import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; import java.io.IOException; +import java.util.List; import static org.elasticsearch.search.suggest.SuggestUtils.parseSuggestContext; @@ -47,6 +53,9 @@ public class CompletionSuggestParser implements SuggestContextParser { XContentParser.Token token; String fieldName = null; CompletionSuggestionContext suggestion = new CompletionSuggestionContext(completionSuggester); + + XContentParser contextParser = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { fieldName = parser.currentName(); @@ -56,32 +65,57 @@ public class CompletionSuggestParser implements SuggestContextParser { suggestion.setFuzzy(parser.booleanValue()); } } - } else if (token == XContentParser.Token.START_OBJECT && "fuzzy".equals(fieldName)) { - suggestion.setFuzzy(true); - String fuzzyConfigName = null; - while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - if (token == XContentParser.Token.FIELD_NAME) { - fuzzyConfigName = parser.currentName(); - } else if (token.isValue()) { - if (FUZZINESS.match(fuzzyConfigName, ParseField.EMPTY_FLAGS)) { - suggestion.setFuzzyEditDistance(Fuzziness.parse(parser).asDistance()); - } else if ("transpositions".equals(fuzzyConfigName)) { - suggestion.setFuzzyTranspositions(parser.booleanValue()); - } else if ("min_length".equals(fuzzyConfigName) || "minLength".equals(fuzzyConfigName)) { - suggestion.setFuzzyMinLength(parser.intValue()); - } else if ("prefix_length".equals(fuzzyConfigName) || "prefixLength".equals(fuzzyConfigName)) { - suggestion.setFuzzyPrefixLength(parser.intValue()); - } else if ("unicode_aware".equals(fuzzyConfigName) || "unicodeAware".equals(fuzzyConfigName)) { - suggestion.setFuzzyUnicodeAware(parser.booleanValue()); + } else if (token == XContentParser.Token.START_OBJECT) { + if("fuzzy".equals(fieldName)) { + suggestion.setFuzzy(true); + String fuzzyConfigName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + fuzzyConfigName = parser.currentName(); + } else if (token.isValue()) { + if (FUZZINESS.match(fuzzyConfigName, ParseField.EMPTY_FLAGS)) { + suggestion.setFuzzyEditDistance(Fuzziness.parse(parser).asDistance()); + } else if ("transpositions".equals(fuzzyConfigName)) { + suggestion.setFuzzyTranspositions(parser.booleanValue()); + } else if ("min_length".equals(fuzzyConfigName) || "minLength".equals(fuzzyConfigName)) { + suggestion.setFuzzyMinLength(parser.intValue()); + } else if ("prefix_length".equals(fuzzyConfigName) || "prefixLength".equals(fuzzyConfigName)) { + suggestion.setFuzzyPrefixLength(parser.intValue()); + } else if ("unicode_aware".equals(fuzzyConfigName) || "unicodeAware".equals(fuzzyConfigName)) { + suggestion.setFuzzyUnicodeAware(parser.booleanValue()); + } } } + } else if("context".equals(fieldName)) { + // Copy the current structure. We will parse, once the mapping is provided + XContentBuilder builder = XContentFactory.contentBuilder(parser.contentType()); + builder.copyCurrentStructure(parser); + BytesReference bytes = builder.bytes(); + contextParser = parser.contentType().xContent().createParser(bytes); + } else { + throw new ElasticsearchIllegalArgumentException("suggester [completion] doesn't support field [" + fieldName + "]"); } } else { throw new ElasticsearchIllegalArgumentException("suggester[completion] doesn't support field [" + fieldName + "]"); } } - suggestion.mapper(mapperService.smartNameFieldMapper(suggestion.getField())); + + suggestion.mapper((CompletionFieldMapper)mapperService.smartNameFieldMapper(suggestion.getField())); + CompletionFieldMapper mapper = suggestion.mapper(); + if (mapper != null) { + if (mapper.requiresContext()) { + if (contextParser == null) { + throw new ElasticsearchIllegalArgumentException("suggester [completion] requires context to be setup"); + } else { + contextParser.nextToken(); + List contextQueries = ContextQuery.parseQueries(mapper.getContextMapping(), contextParser); + suggestion.setContextQuery(contextQueries); + } + } else if (contextParser != null) { + throw new ElasticsearchIllegalArgumentException("suggester [completion] doesn't expect any context"); + } + } return suggestion; } diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java index adf6911a29b..a60ad16dd8a 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggestionContext.java @@ -19,32 +19,37 @@ package org.elasticsearch.search.suggest.completion; import org.apache.lucene.search.suggest.analyzing.XFuzzySuggester; -import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.SuggestionSearchContext; +import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; + +import java.util.Collections; +import java.util.List; /** * */ public class CompletionSuggestionContext extends SuggestionSearchContext.SuggestionContext { - private FieldMapper mapper; + private CompletionFieldMapper mapper; private int fuzzyEditDistance = XFuzzySuggester.DEFAULT_MAX_EDITS; private boolean fuzzyTranspositions = XFuzzySuggester.DEFAULT_TRANSPOSITIONS; private int fuzzyMinLength = XFuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH; private int fuzzyPrefixLength = XFuzzySuggester.DEFAULT_NON_FUZZY_PREFIX; private boolean fuzzy = false; private boolean fuzzyUnicodeAware = XFuzzySuggester.DEFAULT_UNICODE_AWARE; - + private List contextQueries = Collections.emptyList(); + public CompletionSuggestionContext(Suggester suggester) { super(suggester); } - - public FieldMapper mapper() { + + public CompletionFieldMapper mapper() { return this.mapper; } - - public void mapper(FieldMapper mapper) { + + public void mapper(CompletionFieldMapper mapper) { this.mapper = mapper; } @@ -95,4 +100,12 @@ public class CompletionSuggestionContext extends SuggestionSearchContext.Suggest public boolean isFuzzyUnicodeAware() { return fuzzyUnicodeAware; } + + public void setContextQuery(List queries) { + this.contextQueries = queries; + } + + public List getContextQueries() { + return this.contextQueries; + } } diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java index 18306782ddd..0c92b3e79c8 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java @@ -157,4 +157,4 @@ public final class CompletionTokenStream extends TokenStream { return charsRef; } } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/search/suggest/context/CategoryContextMapping.java b/src/main/java/org/elasticsearch/search/suggest/context/CategoryContextMapping.java new file mode 100644 index 00000000000..d529848c2e0 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/context/CategoryContextMapping.java @@ -0,0 +1,341 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.context; + +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import org.apache.lucene.analysis.PrefixAnalyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.BasicAutomata; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.ParseContext.Document; + +import java.io.IOException; +import java.util.*; + +/** + * The {@link CategoryContextMapping} is used to define a {@link ContextMapping} that + * references a field within a document. The value of the field in turn will be + * used to setup the suggestions made by the completion suggester. + */ +public class CategoryContextMapping extends ContextMapping { + + protected static final String TYPE = "field"; + + private static final String FIELD_FIELDNAME = "path"; + private static final String DEFAULT_FIELDNAME = "_type"; + + private static final Iterable EMPTY_VALUES = Collections.emptyList(); + + private final String fieldName; + private final Iterable defaultValues; + private final FieldConfig defaultConfig; + + /** + * Create a new {@link CategoryContextMapping} with the default field + * [_type] + */ + public CategoryContextMapping(String name) { + this(name, DEFAULT_FIELDNAME, EMPTY_VALUES); + } + + /** + * Create a new {@link CategoryContextMapping} with the default field + * [_type] + */ + public CategoryContextMapping(String name, String fieldName) { + this(name, fieldName, EMPTY_VALUES); + } + + /** + * Create a new {@link CategoryContextMapping} with the default field + * [_type] + */ + public CategoryContextMapping(String name, Iterable defaultValues) { + this(name, DEFAULT_FIELDNAME, defaultValues); + } + + /** + * Create a new {@link CategoryContextMapping} with the default field + * [_type] + */ + public CategoryContextMapping(String name, String fieldName, Iterable defaultValues) { + super(TYPE, name); + this.fieldName = fieldName; + this.defaultValues = defaultValues; + this.defaultConfig = new FieldConfig(fieldName, defaultValues, null); + } + + /** + * Name of the field used by this {@link CategoryContextMapping} + */ + public String getFieldName() { + return fieldName; + } + + public Iterable getDefaultValues() { + return defaultValues; + } + + @Override + public FieldConfig defaultConfig() { + return defaultConfig; + } + + /** + * Load the specification of a {@link CategoryContextMapping} + * + * @param field + * name of the field to use. If null default field + * will be used + * @return new {@link CategoryContextMapping} + */ + protected static CategoryContextMapping load(String name, Map config) throws ElasticsearchParseException { + CategoryContextMapping.Builder mapping = new CategoryContextMapping.Builder(name); + + Object fieldName = config.get(FIELD_FIELDNAME); + Object defaultValues = config.get(FIELD_MISSING); + + if (fieldName != null) { + mapping.fieldName(fieldName.toString()); + } + + if (defaultValues != null) { + if (defaultValues instanceof Iterable) { + for (Object value : (Iterable) defaultValues) { + mapping.addDefaultValue(value.toString()); + } + } else { + mapping.addDefaultValue(defaultValues.toString()); + } + } + + return mapping.build(); + } + + @Override + protected XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException { + if (fieldName != null) { + builder.field(FIELD_FIELDNAME, fieldName); + } + builder.startArray(FIELD_MISSING); + for (CharSequence value : defaultValues) { + builder.value(value); + } + builder.endArray(); + return builder; + } + + @Override + public ContextConfig parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException { + Token token = parser.currentToken(); + if (token == Token.VALUE_NULL) { + return new FieldConfig(fieldName, defaultValues, null); + } else if (token == Token.VALUE_STRING) { + return new FieldConfig(fieldName, null, Collections.singleton(parser.text())); + } else if (token == Token.VALUE_NUMBER) { + return new FieldConfig(fieldName, null, Collections.singleton(parser.text())); + } else if (token == Token.VALUE_BOOLEAN) { + return new FieldConfig(fieldName, null, Collections.singleton(parser.text())); + } else if (token == Token.START_ARRAY) { + ArrayList values = Lists.newArrayList(); + while((token = parser.nextToken()) != Token.END_ARRAY) { + values.add(parser.text()); + } + if(values.isEmpty()) { + throw new ElasticsearchParseException("FieldConfig must contain a least one category"); + } + return new FieldConfig(fieldName, null, values); + } else { + throw new ElasticsearchParseException("FieldConfig must be either [null], a string or a list of strings"); + } + } + + @Override + public FieldQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException { + Iterable values; + Token token = parser.currentToken(); + if (token == Token.START_ARRAY) { + ArrayList list = new ArrayList(); + while ((token = parser.nextToken()) != Token.END_ARRAY) { + list.add(parser.text()); + } + values = list; + } else if (token == Token.VALUE_NULL) { + values = defaultValues; + } else { + values = Collections.singleton(parser.text()); + } + + return new FieldQuery(name, values); + } + + public static FieldQuery query(String name, CharSequence... fieldvalues) { + return query(name, Arrays.asList(fieldvalues)); + } + + public static FieldQuery query(String name, Iterable fieldvalues) { + return new FieldQuery(name, fieldvalues); + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof CategoryContextMapping) { + CategoryContextMapping other = (CategoryContextMapping) obj; + if (this.fieldName.equals(other.fieldName)) { + return Iterables.elementsEqual(this.defaultValues, other.defaultValues); + } + } + return false; + } + + private static class FieldConfig extends ContextConfig { + + private final String fieldname; + private final Iterable defaultValues; + private final Iterable values; + + public FieldConfig(String fieldname, Iterable defaultValues, Iterable values) { + this.fieldname = fieldname; + this.defaultValues = defaultValues; + this.values = values; + } + + @Override + protected TokenStream wrapTokenStream(Document doc, TokenStream stream) { + if(values != null) { + return new PrefixAnalyzer.PrefixTokenFilter(stream, ContextMapping.SEPARATOR, values); + } else { + IndexableField[] fields = doc.getFields(fieldname); + ArrayList values = new ArrayList(fields.length); + + for (int i = 0; i < fields.length; i++) { + values.add(fields[i].stringValue()); + } + + return new PrefixAnalyzer.PrefixTokenFilter(stream, ContextMapping.SEPARATOR, values); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("FieldConfig(" + fieldname + " = ["); + Iterator value = this.defaultValues.iterator(); + if (value.hasNext()) { + sb.append(value.next()); + while (value.hasNext()) { + sb.append(", ").append(value.next()); + } + } + return sb.append("])").toString(); + } + + } + + private static class FieldQuery extends ContextQuery { + + private final Iterable values; + + public FieldQuery(String name, Iterable values) { + super(name); + this.values = values; + } + + public Automaton toAutomaton() { + List automatons = new ArrayList(); + for (CharSequence value : values) { + automatons.add(BasicAutomata.makeString(value.toString())); + } + return Automaton.union(automatons); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startArray(name); + for (CharSequence value : values) { + builder.value(value); + } + builder.endArray(); + return builder; + } + } + + public static class Builder extends ContextBuilder { + + private String fieldname; + private List defaultValues = new ArrayList(); + + public Builder(String name) { + this(name, DEFAULT_FIELDNAME); + } + + public Builder(String name, String fieldname) { + super(name); + this.fieldname = fieldname; + } + + /** + * Set the name of the field to use + */ + public Builder fieldName(String fieldname) { + this.fieldname = fieldname; + return this; + } + + /** + * Add value to the default values of the mapping + */ + public Builder addDefaultValue(CharSequence defaultValue) { + this.defaultValues.add(defaultValue); + return this; + } + + /** + * Add set of default values to the mapping + */ + public Builder addDefaultValues(CharSequence... defaultValues) { + for (CharSequence defaultValue : defaultValues) { + this.defaultValues.add(defaultValue); + } + return this; + } + + /** + * Add set of default values to the mapping + */ + public Builder addDefaultValues(Iterable defaultValues) { + for (CharSequence defaultValue : defaultValues) { + this.defaultValues.add(defaultValue); + } + return this; + } + + @Override + public CategoryContextMapping build() { + return new CategoryContextMapping(name, fieldname, defaultValues); + } + } +} diff --git a/src/main/java/org/elasticsearch/search/suggest/context/ContextBuilder.java b/src/main/java/org/elasticsearch/search/suggest/context/ContextBuilder.java new file mode 100644 index 00000000000..dde313615f1 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/context/ContextBuilder.java @@ -0,0 +1,127 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.context; + +import com.google.common.collect.Maps; +import org.elasticsearch.ElasticsearchParseException; + +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedMap; + +public abstract class ContextBuilder { + + protected String name; + + public ContextBuilder(String name) { + this.name = name; + } + + public abstract E build(); + + /** + * Create a new {@link GeolocationContextMapping} + */ + public static GeolocationContextMapping.Builder location(String name) { + return new GeolocationContextMapping.Builder(name); + } + + /** + * Create a new {@link GeolocationContextMapping} with given precision and + * neighborhood usage + * + * @param precision geohash length + * @param neighbors use neighbor cells + */ + public static GeolocationContextMapping.Builder location(String name, int precision, boolean neighbors) { + return new GeolocationContextMapping.Builder(name, neighbors, precision); + } + + /** + * Create a new {@link CategoryMapping} + */ + public static CategoryContextMapping.Builder category(String name) { + return new CategoryContextMapping.Builder(name, null); + } + + /** + * Create a new {@link CategoryMapping} with default category + * + * @param defaultCategory category to use, if it is not provided + */ + public static CategoryContextMapping.Builder category(String name, String defaultCategory) { + return new CategoryContextMapping.Builder(name, null).addDefaultValue(defaultCategory); + } + + /** + * Create a new {@link CategoryContextMapping} + * + * @param fieldname + * name of the field to use + */ + public static CategoryContextMapping.Builder reference(String name, String fieldname) { + return new CategoryContextMapping.Builder(name, fieldname); + } + + /** + * Create a new {@link CategoryContextMapping} + * + * @param fieldname name of the field to use + * @param defaultValues values to use, if the document not provides + * a field with the given name + */ + public static CategoryContextMapping.Builder reference(String name, String fieldname, Iterable defaultValues) { + return new CategoryContextMapping.Builder(name, fieldname).addDefaultValues(defaultValues); + } + + public static SortedMap loadMappings(Object configuration) throws ElasticsearchParseException { + if (configuration instanceof Map) { + Map configurations = (Map)configuration; + SortedMap mappings = Maps.newTreeMap(); + for (Entry config : configurations.entrySet()) { + String name = config.getKey(); + mappings.put(name, loadMapping(name, (Map) config.getValue())); + } + return mappings; + } else if (configuration == null) { + return ContextMapping.EMPTY_MAPPING; + } else { + throw new ElasticsearchParseException("no valid context configuration"); + } + } + + protected static ContextMapping loadMapping(String name, Map config) throws ElasticsearchParseException { + final Object argType = config.get(ContextMapping.FIELD_TYPE); + + if (argType == null) { + throw new ElasticsearchParseException("missing [" + ContextMapping.FIELD_TYPE + "] in context mapping"); + } + + final String type = argType.toString(); + + if (GeolocationContextMapping.TYPE.equals(type)) { + return GeolocationContextMapping.load(name, config); + } else if (CategoryContextMapping.TYPE.equals(type)) { + return CategoryContextMapping.load(name, config); + } else { + throw new ElasticsearchParseException("unknown context type[" + type + "]"); + } + } +} diff --git a/src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java b/src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java new file mode 100644 index 00000000000..89f896972de --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/context/ContextMapping.java @@ -0,0 +1,316 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.context; + +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.search.suggest.analyzing.XAnalyzingSuggester; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.BasicAutomata; +import org.apache.lucene.util.automaton.BasicOperations; +import org.apache.lucene.util.fst.FST; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.ParseContext.Document; + +import java.io.IOException; +import java.util.*; + +/** + * A {@link ContextMapping} is used t define a context that may used + * in conjunction with a suggester. To define a suggester that depends on a + * specific context derived class of {@link ContextMapping} will be + * used to specify the kind of additional information required in order to make + * suggestions. + */ +public abstract class ContextMapping implements ToXContent { + + /** Character used to separate several contexts */ + public static final char SEPARATOR = '\u001D'; + + /** Dummy Context Mapping that should be used if no context is used*/ + public static final SortedMap EMPTY_MAPPING = Maps.newTreeMap(); + + /** Dummy Context Config matching the Dummy Mapping by providing an empty context*/ + public static final SortedMap EMPTY_CONFIG = Maps.newTreeMap(); + + /** Dummy Context matching the Dummy Mapping by not wrapping a {@link TokenStream} */ + public static final Context EMPTY_CONTEXT = new Context(EMPTY_CONFIG, null); + + public static final String FIELD_VALUE = "value"; + public static final String FIELD_MISSING = "default"; + public static final String FIELD_TYPE = "type"; + + protected final String type; // Type of the Contextmapping + protected final String name; + + /** + * Define a new context mapping of a specific type + * + * @param type + * name of the new context mapping + */ + protected ContextMapping(String type, String name) { + super(); + this.type = type; + this.name = name; + } + + /** + * @return the type name of the context + */ + protected String type() { + return type; + } + + /** + * @return the name/id of the context + */ + public String name() { + return name; + } + + @Override + public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(name); + builder.field(FIELD_TYPE, type); + toInnerXContent(builder, params); + builder.endObject(); + return builder; + } + + /** + * A {@link ContextMapping} combined with the information provided by a document + * form a {@link ContextConfig} which is used to build the underlying FST. + * + * @param parseContext context of parsing phase + * @param parser {@link XContentParser} used to read and setup the configuration + * @return A {@link ContextConfig} related to this mapping + * + * @throws IOException + * @throws ElasticsearchParseException + */ + public abstract ContextConfig parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException; + + public abstract ContextConfig defaultConfig(); + + /** + * Parse a query according to the context. Parsing starts at parsers current position + * + * @param name name of the context + * @param parser {@link XContentParser} providing the data of the query + * + * @return {@link ContextQuery} according to this mapping + * + * @throws IOException + * @throws ElasticsearchParseException + */ + public abstract ContextQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException; + + /** + * Since every context mapping is assumed to have a name given by the field name of an context object, this + * method is used to build the value used to serialize the mapping + * + * @param builder builder to append the mapping to + * @param params parameters passed to the builder + * + * @return the builder used + * + * @throws IOException + */ + protected abstract XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException; + + /** + * Test equality of two mapping + * + * @param thisMappings first mapping + * @param otherMappings second mapping + * + * @return true if both arguments are equal + */ + public static boolean mappingsAreEqual(SortedMap thisMappings, SortedMap otherMappings) { + return Iterables.elementsEqual(thisMappings.entrySet(), otherMappings.entrySet()); + } + + @Override + public String toString() { + try { + return toXContent(JsonXContent.contentBuilder(), ToXContent.EMPTY_PARAMS).string(); + } catch (IOException e) { + return super.toString(); + } + } + + /** + * A collection of {@link ContextMapping}s, their {@link ContextConfig}uration and a + * Document form a complete {@link Context}. Since this Object provides all information used + * to setup a suggestion, it can be used to wrap the entire {@link TokenStream} used to build a + * path within the {@link FST}. + */ + public static class Context { + + final SortedMap contexts; + final Document doc; + + public Context(SortedMap contexts, Document doc) { + super(); + this.contexts = contexts; + this.doc = doc; + } + + /** + * Wrap the {@link TokenStream} according to the provided informations of {@link ContextConfig} + * and a related {@link Document}. + * + * @param tokenStream {@link TokenStream} to wrap + * + * @return wrapped token stream + */ + public TokenStream wrapTokenStream(TokenStream tokenStream) { + for (ContextConfig context : contexts.values()) { + tokenStream = context.wrapTokenStream(doc, tokenStream); + } + return tokenStream; + } + } + + /** + * A {@link ContextMapping} combined with the information provided by a document + * form a {@link ContextConfig} which is used to build the underlying {@link FST}. This class hold + * a simple method wrapping a {@link TokenStream} by provided document informations. + */ + public static abstract class ContextConfig { + + /** + * Wrap a {@link TokenStream} for building suggestions to use context informations + * provided by a document or a {@link ContextMapping} + * + * @param doc document related to the stream + * @param stream original stream used to build the underlying {@link FST} + * + * @return A new {@link TokenStream} providing additional context information + */ + protected abstract TokenStream wrapTokenStream(Document doc, TokenStream stream); + + } + + /** + * A {@link ContextQuery} defines the context information for a specific {@link ContextMapping} + * defined within a suggestion request. According to the parameters set in the request and the + * {@link ContextMapping} such a query is used to wrap the {@link TokenStream} of the actual + * suggestion request into a {@link TokenStream} with the context settings + */ + public static abstract class ContextQuery implements ToXContent { + + protected final String name; + + protected ContextQuery(String name) { + this.name = name; + } + + public String name() { + return name; + } + + /** + * Create a automaton for a given context query this automaton will be used + * to find the matching paths with the fst + * + * @param preserveSep set an additional char (XAnalyzingSuggester.SEP_LABEL) between each context query + * @param queries list of {@link ContextQuery} defining the lookup context + * + * @return Automaton matching the given Query + */ + public static Automaton toAutomaton(boolean preserveSep, Iterable queries) { + Automaton a = BasicAutomata.makeEmptyString(); + + Automaton gap = BasicAutomata.makeChar(ContextMapping.SEPARATOR); + if (preserveSep) { + // if separators are preserved the fst contains a SEP_LABEL + // behind each gap. To have a matching automaton, we need to + // include the SEP_LABEL in the query as well + gap = BasicOperations.concatenate(gap, BasicAutomata.makeChar(XAnalyzingSuggester.SEP_LABEL)); + } + + for (ContextQuery query : queries) { + a = Automaton.concatenate(Arrays.asList(query.toAutomaton(), gap, a)); + } + BasicOperations.determinize(a); + return a; + } + + /** + * Build a LookUp Automaton for this context. + * @return LookUp Automaton + */ + protected abstract Automaton toAutomaton(); + + /** + * Parse a set of {@link ContextQuery} according to a given mapping + * @param mappings List of mapping defined y the suggest field + * @param parser parser holding the settings of the queries. The parsers + * current token is assumed hold an array. The number of elements + * in this array must match the number of elements in the mappings. + * @return List of context queries + * + * @throws IOException if something unexpected happened on the underlying stream + * @throws ElasticsearchParseException if the list of queries could not be parsed + */ + public static List parseQueries(Map mappings, XContentParser parser) + throws IOException, ElasticsearchParseException { + + Map querySet = new HashMap(); + Token token = parser.currentToken(); + if(token == Token.START_OBJECT) { + while ((token = parser.nextToken()) != Token.END_OBJECT) { + String name = parser.text(); + ContextMapping mapping = mappings.get(name); + if (mapping == null) { + throw new ElasticsearchParseException("no mapping defined for [" + name + "]"); + } + parser.nextToken(); + querySet.put(name, mapping.parseQuery(name, parser)); + } + } + + List queries = Lists.newArrayListWithExpectedSize(mappings.size()); + for (ContextMapping mapping : mappings.values()) { + queries.add(querySet.get(mapping.name)); + } + return queries; + } + + @Override + public String toString() { + try { + return toXContent(JsonXContent.contentBuilder(), ToXContent.EMPTY_PARAMS).string(); + } catch (IOException e) { + return super.toString(); + } + } + } +} diff --git a/src/main/java/org/elasticsearch/search/suggest/context/GeolocationContextMapping.java b/src/main/java/org/elasticsearch/search/suggest/context/GeolocationContextMapping.java new file mode 100644 index 00000000000..38ebb1137c3 --- /dev/null +++ b/src/main/java/org/elasticsearch/search/suggest/context/GeolocationContextMapping.java @@ -0,0 +1,664 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.suggest.context; + +import com.carrotsearch.hppc.IntOpenHashSet; +import com.google.common.collect.Lists; +import org.apache.lucene.analysis.PrefixAnalyzer.PrefixTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.BasicAutomata; +import org.apache.lucene.util.automaton.BasicOperations; +import org.apache.lucene.util.fst.FST; +import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.common.geo.GeoHashUtils; +import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.geo.GeoUtils; +import org.elasticsearch.common.unit.DistanceUnit; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentParser.Token; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.ParseContext.Document; +import org.elasticsearch.index.mapper.geo.GeoPointFieldMapper; + +import java.io.IOException; +import java.util.*; + +/** + * The {@link GeolocationContextMapping} allows to take GeoInfomation into account + * during building suggestions. The mapping itself works with geohashes + * explicitly and is configured by three parameters: + *
    + *
  • precision: length of the geohash indexed as prefix of the + * completion field
  • + *
  • neighbors: Should the neighbor cells of the deepest geohash + * level also be indexed as alternatives to the actual geohash
  • + *
  • location: (optional) location assumed if it is not provided
  • + *
+ * Internally this mapping wraps the suggestions into a form + * [geohash][suggestion]. If the neighbor option is set the cells + * next to the cell on the deepest geohash level ( precision) will + * be indexed as well. The {@link TokenStream} used to build the {@link FST} for + * suggestion will be wrapped into a {@link PrefixTokenFilter} managing these + * geohases as prefixes. + */ +public class GeolocationContextMapping extends ContextMapping { + + public static final String TYPE = "geo"; + + public static final String FIELD_PRECISION = "precision"; + public static final String FIELD_NEIGHBORS = "neighbors"; + public static final String FIELD_FIELDNAME = "path"; + + private final Collection defaultLocations; + private final int[] precision; + private final boolean neighbors; + private final String fieldName; + private final GeoConfig defaultConfig; + + /** + * Create a new {@link GeolocationContextMapping} with a given precision + * + * @param precision + * length of the geohashes + * @param neighbors + * should neighbors be indexed + * @param defaultLocation + * location to use, if it is not provided by the document + */ + protected GeolocationContextMapping(String name, int[] precision, boolean neighbors, Collection defaultLocations, String fieldName) { + super(TYPE, name); + this.precision = precision; + this.neighbors = neighbors; + this.defaultLocations = defaultLocations; + this.fieldName = fieldName; + this.defaultConfig = new GeoConfig(this, defaultLocations); + } + + /** + * load a {@link GeolocationContextMapping} by configuration. Such a configuration + * can set the parameters + *
    + *
  • precision [String, Double, + * Float or Integer] defines the length of the + * underlying geohash
  • + *
  • defaultLocation [String] defines the location to use if + * it is not provided by the document
  • + *
  • neighbors [Boolean] defines if the last level of the + * geohash should be extended by neighbor cells
  • + *
+ * + * @param config + * Configuration for {@link GeolocationContextMapping} + * @return new {@link GeolocationContextMapping} configured by the parameters of + * config + */ + protected static GeolocationContextMapping load(String name, Map config) { + final GeolocationContextMapping.Builder builder = new GeolocationContextMapping.Builder(name); + + if (config != null) { + final Object configPrecision = config.get(FIELD_PRECISION); + if (configPrecision == null) { + // ignore precision + } else if (configPrecision instanceof Integer) { + builder.precision((Integer) configPrecision); + } else if (configPrecision instanceof Long) { + builder.precision((Long) configPrecision); + } else if (configPrecision instanceof Double) { + builder.precision((Double) configPrecision); + } else if (configPrecision instanceof Float) { + builder.precision((Float) configPrecision); + } else if (configPrecision instanceof Iterable) { + for (Object precision : (Iterable)configPrecision) { + if (precision instanceof Integer) { + builder.precision((Integer) precision); + } else if (precision instanceof Long) { + builder.precision((Long) precision); + } else if (precision instanceof Double) { + builder.precision((Double) precision); + } else if (precision instanceof Float) { + builder.precision((Float) precision); + } else { + builder.precision(precision.toString()); + } + } + } else { + builder.precision(configPrecision.toString()); + } + + final Object configNeighbors = config.get(FIELD_NEIGHBORS); + if (configNeighbors != null) { + builder.neighbors((Boolean) configNeighbors); + } + + final Object def = config.get(FIELD_MISSING); + if (def != null) { + if (def instanceof Iterable) { + for (Object location : (Iterable)def) { + builder.addDefaultLocation(location.toString()); + } + } else if (def instanceof String) { + builder.addDefaultLocation(def.toString()); + } else { + throw new ElasticsearchParseException("field [" + FIELD_MISSING + "] must be of type string or list"); + } + } + + final Object fieldName = config.get(FIELD_FIELDNAME); + if (fieldName != null) { + builder.field(fieldName.toString()); + } + } + return builder.build(); + } + + @Override + protected XContentBuilder toInnerXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(FIELD_PRECISION, precision); + builder.field(FIELD_NEIGHBORS, neighbors); + if (defaultLocations != null) { + builder.startArray(FIELD_MISSING); + for (String defaultLocation : defaultLocations) { + builder.value(defaultLocation); + } + builder.endArray(); + } + if (fieldName != null) { + builder.field(FIELD_FIELDNAME, fieldName); + } + return builder; + } + + protected static Collection parseSinglePointOrList(XContentParser parser) throws IOException { + Token token = parser.currentToken(); + if(token == Token.START_ARRAY) { + token = parser.nextToken(); + // Test if value is a single point in [lon, lat] format + if(token == Token.VALUE_NUMBER) { + double lon = parser.doubleValue(); + if(parser.nextToken() == Token.VALUE_NUMBER) { + double lat = parser.doubleValue(); + if(parser.nextToken() == Token.END_ARRAY) { + return Collections.singleton(GeoHashUtils.encode(lat, lon)); + } else { + throw new ElasticsearchParseException("only two values expected"); + } + } else { + throw new ElasticsearchParseException("latitue must be a numeric value"); + } + } else { + // otherwise it's a list of locations + ArrayList result = Lists.newArrayList(); + while (token != Token.END_ARRAY) { + result.add(GeoPoint.parse(parser).geohash()); + } + return result; + } + } else { + // or a single location + return Collections.singleton(GeoPoint.parse(parser).geohash()); + } + } + + @Override + public ContextConfig defaultConfig() { + return defaultConfig; + } + + @Override + public ContextConfig parseContext(ParseContext parseContext, XContentParser parser) throws IOException, ElasticsearchParseException { + + if(fieldName != null) { + FieldMapper mapper = parseContext.docMapper().mappers().fullName(fieldName).mapper(); + if(!(mapper instanceof GeoPointFieldMapper)) { + throw new ElasticsearchParseException("referenced field must be mapped to geo_point"); + } + } + + Collection locations; + if(parser.currentToken() == Token.VALUE_NULL) { + locations = null; + } else { + locations = parseSinglePointOrList(parser); + } + return new GeoConfig(this, locations); + } + + /** + * Create a new geolocation query from a given GeoPoint + * + * @param point + * query location + * @return new geolocation query + */ + public static GeoQuery query(String name, GeoPoint point) { + return query(name, point.getGeohash()); + } + + /** + * Create a new geolocation query from a given geocoordinate + * + * @param lat + * latitude of the location + * @param lon + * longitude of the location + * @return new geolocation query + */ + public static GeoQuery query(String name, double lat, double lon) { + return query(name, GeoHashUtils.encode(lat, lon)); + } + + /** + * Create a new geolocation query from a given geohash + * + * @param geohash + * geohash of the location + * @return new geolocation query + */ + public static GeoQuery query(String name, String geohash) { + return new GeoQuery(name, geohash); + } + + private static final int parsePrecision(XContentParser parser) throws IOException, ElasticsearchParseException { + switch (parser.currentToken()) { + case VALUE_STRING: + return GeoUtils.geoHashLevelsForPrecision(parser.text()); + case VALUE_NUMBER: + switch (parser.numberType()) { + case INT: + case LONG: + return parser.intValue(); + default: + return GeoUtils.geoHashLevelsForPrecision(parser.doubleValue()); + } + default: + throw new ElasticsearchParseException("invalid precision value"); + } + } + + @Override + public GeoQuery parseQuery(String name, XContentParser parser) throws IOException, ElasticsearchParseException { + if (parser.currentToken() == Token.START_OBJECT) { + double lat = Double.NaN; + double lon = Double.NaN; + GeoPoint point = null; + int[] precision = null; + + while (parser.nextToken() != Token.END_OBJECT) { + final String fieldName = parser.text(); + if("lat".equals(fieldName)) { + if(point == null) { + if (parser.nextToken() == Token.VALUE_NUMBER) { + lat = parser.doubleValue(); + } else { + throw new ElasticsearchParseException("latitude must be a number"); + } + } else { + throw new ElasticsearchParseException("only lat/lon or [" + FIELD_VALUE + "] is allowed"); + } + } else if ("lon".equals(fieldName)) { + if(point == null) { + if(parser.nextToken() == Token.VALUE_NUMBER) { + lon = parser.doubleValue(); + } else { + throw new ElasticsearchParseException("longitude must be a number"); + } + } else { + throw new ElasticsearchParseException("only lat/lon or [" + FIELD_VALUE + "] is allowed"); + } + } else if (FIELD_PRECISION.equals(fieldName)) { + if(parser.nextToken() == Token.START_ARRAY) { + IntOpenHashSet precisions = new IntOpenHashSet(); + while(parser.nextToken() != Token.END_ARRAY) { + precisions.add(parsePrecision(parser)); + } + precision = precisions.toArray(); + } else { + precision = new int[] { parsePrecision(parser) }; + } + } else if (FIELD_VALUE.equals(fieldName)) { + if(lat == Double.NaN && lon == Double.NaN) { + point = GeoPoint.parse(parser); + } else { + throw new ElasticsearchParseException("only lat/lon or [" + FIELD_VALUE + "] is allowed"); + } + } else { + throw new ElasticsearchParseException("unexpected fieldname [" + fieldName + "]"); + } + } + + if (point == null) { + if (lat == Double.NaN || lon == Double.NaN) { + throw new ElasticsearchParseException("location is missing"); + } else { + point = new GeoPoint(lat, lon); + } + } + + return new GeoQuery(name, point.geohash(), precision); + } else { + return new GeoQuery(name, GeoPoint.parse(parser).getGeohash(), precision); + } + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((defaultLocations == null) ? 0 : defaultLocations.hashCode()); + result = prime * result + ((fieldName == null) ? 0 : fieldName.hashCode()); + result = prime * result + (neighbors ? 1231 : 1237); + result = prime * result + Arrays.hashCode(precision); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + GeolocationContextMapping other = (GeolocationContextMapping) obj; + if (defaultLocations == null) { + if (other.defaultLocations != null) + return false; + } else if (!defaultLocations.equals(other.defaultLocations)) + return false; + if (fieldName == null) { + if (other.fieldName != null) + return false; + } else if (!fieldName.equals(other.fieldName)) + return false; + if (neighbors != other.neighbors) + return false; + if (!Arrays.equals(precision, other.precision)) + return false; + return true; + } + + + + + public static class Builder extends ContextBuilder { + + private IntOpenHashSet precisions = new IntOpenHashSet(); + private boolean neighbors; // take neighbor cell on the lowest level into account + private HashSet defaultLocations = new HashSet(); + private String fieldName = null; + + protected Builder(String name) { + this(name, true, null); + } + + protected Builder(String name, boolean neighbors, int...levels) { + super(name); + neighbors(neighbors); + if (levels != null) { + for (int level : levels) { + precision(level); + } + } + } + + /** + * Set the precision use o make suggestions + * + * @param precision + * precision as distance with {@link DistanceUnit}. Default: + * meters + * @return this + */ + public Builder precision(String precision) { + return precision(DistanceUnit.parse(precision, DistanceUnit.METERS, DistanceUnit.METERS)); + } + + /** + * Set the precision use o make suggestions + * + * @param precision + * precision value + * @param unit + * {@link DistanceUnit} to use + * @return this + */ + public Builder precision(double precision, DistanceUnit unit) { + return precision(unit.toMeters(precision)); + } + + /** + * Set the precision use o make suggestions + * + * @param precision + * precision as distance in meters + * @return this + */ + public Builder precision(double meters) { + int level = GeoUtils.geoHashLevelsForPrecision(meters); + // Ceiling precision: we might return more results + if (GeoUtils.geoHashCellSize(level) < meters) { + level = Math.max(1, level - 1); + } + return precision(level); + } + + /** + * Set the precision use o make suggestions + * + * @param precision + * maximum length of geohashes + * @return this + */ + public Builder precision(int level) { + this.precisions.add(level); + return this; + } + + /** + * Set neighborhood usage + * + * @param neighbors + * should neighbor cells also be valid + * @return this + */ + public Builder neighbors(boolean neighbors) { + this.neighbors = neighbors; + return this; + } + + /** + * Set a default location that should be used, if no location is + * provided by the query + * + * @param geohash + * geohash of the default location + * @return this + */ + public Builder addDefaultLocation(String geohash) { + this.defaultLocations.add(geohash); + return this; + } + + /** + * Set a default location that should be used, if no location is + * provided by the query + * + * @param geohash + * geohash of the default location + * @return this + */ + public Builder addDefaultLocations(Collection geohashes) { + this.defaultLocations.addAll(geohashes); + return this; + } + + /** + * Set a default location that should be used, if no location is + * provided by the query + * + * @param lat + * latitude of the default location + * @param lon + * longitude of the default location + * @return this + */ + public Builder addDefaultLocation(double lat, double lon) { + this.defaultLocations.add(GeoHashUtils.encode(lat, lon)); + return this; + } + + /** + * Set a default location that should be used, if no location is + * provided by the query + * + * @param point + * location + * @return this + */ + public Builder defaultLocation(GeoPoint point) { + this.defaultLocations.add(point.geohash()); + return this; + } + + /** + * Set the name of the field containing a geolocation to use + * @param fieldName name of the field + * @return this + */ + public Builder field(String fieldName) { + this.fieldName = fieldName; + return this; + } + + @Override + public GeolocationContextMapping build() { + if(precisions.isEmpty()) { + precisions.add(GeoHashUtils.PRECISION); + } + return new GeolocationContextMapping(name, precisions.toArray(), neighbors, defaultLocations, fieldName); + } + + } + + private static class GeoConfig extends ContextConfig { + + private final GeolocationContextMapping mapping; + private final Collection locations; + + public GeoConfig(GeolocationContextMapping mapping, Collection locations) { + this.locations = locations; + this.mapping = mapping; + } + + @Override + protected TokenStream wrapTokenStream(Document doc, TokenStream stream) { + Collection geohashes; + + if(locations == null | locations.size() == 0) { + if(mapping.fieldName != null) { + IndexableField[] fields = doc.getFields(mapping.fieldName); + if(fields.length > 0) { + geohashes = new ArrayList(fields.length); + GeoPoint spare = new GeoPoint(); + for (IndexableField field : fields) { + spare.resetFromString(field.stringValue()); + geohashes.add(spare.geohash()); + } + } else { + geohashes = mapping.defaultLocations; + } + } else { + geohashes = mapping.defaultLocations; + } + } else { + geohashes = locations; + } + + Collection locations = new HashSet(); + for (String geohash : geohashes) { + for (int p : mapping.precision) { + int precision = Math.min(p, geohash.length()); + geohash = geohash.substring(0, precision); + if(mapping.neighbors) { + GeoHashUtils.addNeighbors(geohash, precision, locations); + } + locations.add(geohash); + } + } + + return new PrefixTokenFilter(stream, ContextMapping.SEPARATOR, locations); + } + + public String toString() { + StringBuilder sb = new StringBuilder("GeoConfig(location = ["); + Iterator location = this.locations.iterator(); + if (location.hasNext()) { + sb.append(location.next()); + while (location.hasNext()) { + sb.append(", ").append(location.next()); + } + } + return sb.append("])").toString(); + } + } + + private static class GeoQuery extends ContextQuery { + private final String location; + private final int[] precisions; + + public GeoQuery(String name, String location, int...precisions) { + super(name); + this.location = location; + this.precisions = precisions; + } + + @Override + public Automaton toAutomaton() { + Automaton automaton; + if(precisions == null || precisions.length == 0) { + automaton = BasicAutomata.makeString(location); + } else { + automaton = BasicAutomata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0])))); + for (int i = 1; i < precisions.length; i++) { + final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))); + automaton = BasicOperations.union(automaton, BasicAutomata.makeString(cell)); + } + } + return automaton; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if(precisions == null || precisions.length == 0) { + builder.field(name, location); + } else { + builder.startObject(name); + builder.field(FIELD_VALUE, location); + builder.field(FIELD_PRECISION, precisions); + builder.endObject(); + } + return builder; + } + } +} diff --git a/src/test/java/org/elasticsearch/search/geo/GeoFilterTests.java b/src/test/java/org/elasticsearch/search/geo/GeoFilterTests.java index efb2e130c6a..eb63a5d2321 100644 --- a/src/test/java/org/elasticsearch/search/geo/GeoFilterTests.java +++ b/src/test/java/org/elasticsearch/search/geo/GeoFilterTests.java @@ -54,7 +54,9 @@ import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.util.List; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; import java.util.Random; import java.util.zip.GZIPInputStream; @@ -468,8 +470,8 @@ public class GeoFilterTests extends ElasticsearchIntegrationTest { String geohash = randomhash(10); logger.info("Testing geohash_cell filter for [{}]", geohash); - List neighbors = GeoHashUtils.neighbors(geohash); - List parentNeighbors = GeoHashUtils.neighbors(geohash.substring(0, geohash.length() - 1)); + Collection neighbors = GeoHashUtils.neighbors(geohash); + Collection parentNeighbors = GeoHashUtils.neighbors(geohash.substring(0, geohash.length() - 1)); logger.info("Neighbors {}", neighbors); logger.info("Parent Neighbors {}", parentNeighbors); @@ -482,16 +484,18 @@ public class GeoFilterTests extends ElasticsearchIntegrationTest { client().prepareIndex("locations", "location", "1").setCreate(true).setSource("pin", geohash).execute().actionGet(); // index neighbors - for (int i = 0; i < neighbors.size(); i++) { - client().prepareIndex("locations", "location", "N" + i).setCreate(true).setSource("pin", neighbors.get(i)).execute().actionGet(); + Iterator iterator = neighbors.iterator(); + for (int i = 0; iterator.hasNext(); i++) { + client().prepareIndex("locations", "location", "N" + i).setCreate(true).setSource("pin", iterator.next()).execute().actionGet(); } // Index parent cell client().prepareIndex("locations", "location", "p").setCreate(true).setSource("pin", geohash.substring(0, geohash.length() - 1)).execute().actionGet(); // index neighbors - for (int i = 0; i < parentNeighbors.size(); i++) { - client().prepareIndex("locations", "location", "p" + i).setCreate(true).setSource("pin", parentNeighbors.get(i)).execute().actionGet(); + iterator = parentNeighbors.iterator(); + for (int i = 0; iterator.hasNext(); i++) { + client().prepareIndex("locations", "location", "p" + i).setCreate(true).setSource("pin", iterator.next()).execute().actionGet(); } client().admin().indices().prepareRefresh("locations").execute().actionGet(); @@ -531,24 +535,24 @@ public class GeoFilterTests extends ElasticsearchIntegrationTest { @Test public void testNeighbors() { // Simple root case - assertThat(GeoHashUtils.neighbors("7"), containsInAnyOrder("4", "5", "6", "d", "e", "h", "k", "s")); + assertThat(GeoHashUtils.addNeighbors("7", new ArrayList()), containsInAnyOrder("4", "5", "6", "d", "e", "h", "k", "s")); // Root cases (Outer cells) - assertThat(GeoHashUtils.neighbors("0"), containsInAnyOrder("1", "2", "3", "p", "r")); - assertThat(GeoHashUtils.neighbors("b"), containsInAnyOrder("8", "9", "c", "x", "z")); - assertThat(GeoHashUtils.neighbors("p"), containsInAnyOrder("n", "q", "r", "0", "2")); - assertThat(GeoHashUtils.neighbors("z"), containsInAnyOrder("8", "b", "w", "x", "y")); + assertThat(GeoHashUtils.addNeighbors("0", new ArrayList()), containsInAnyOrder("1", "2", "3", "p", "r")); + assertThat(GeoHashUtils.addNeighbors("b", new ArrayList()), containsInAnyOrder("8", "9", "c", "x", "z")); + assertThat(GeoHashUtils.addNeighbors("p", new ArrayList()), containsInAnyOrder("n", "q", "r", "0", "2")); + assertThat(GeoHashUtils.addNeighbors("z", new ArrayList()), containsInAnyOrder("8", "b", "w", "x", "y")); // Root crossing dateline - assertThat(GeoHashUtils.neighbors("2"), containsInAnyOrder("0", "1", "3", "8", "9", "p", "r", "x")); - assertThat(GeoHashUtils.neighbors("r"), containsInAnyOrder("0", "2", "8", "n", "p", "q", "w", "x")); + assertThat(GeoHashUtils.addNeighbors("2", new ArrayList()), containsInAnyOrder("0", "1", "3", "8", "9", "p", "r", "x")); + assertThat(GeoHashUtils.addNeighbors("r", new ArrayList()), containsInAnyOrder("0", "2", "8", "n", "p", "q", "w", "x")); // level1: simple case - assertThat(GeoHashUtils.neighbors("dk"), containsInAnyOrder("d5", "d7", "de", "dh", "dj", "dm", "ds", "dt")); + assertThat(GeoHashUtils.addNeighbors("dk", new ArrayList()), containsInAnyOrder("d5", "d7", "de", "dh", "dj", "dm", "ds", "dt")); // Level1: crossing cells - assertThat(GeoHashUtils.neighbors("d5"), containsInAnyOrder("d4", "d6", "d7", "dh", "dk", "9f", "9g", "9u")); - assertThat(GeoHashUtils.neighbors("d0"), containsInAnyOrder("d1", "d2", "d3", "9b", "9c", "6p", "6r", "3z")); + assertThat(GeoHashUtils.addNeighbors("d5", new ArrayList()), containsInAnyOrder("d4", "d6", "d7", "dh", "dk", "9f", "9g", "9u")); + assertThat(GeoHashUtils.addNeighbors("d0", new ArrayList()), containsInAnyOrder("d1", "d2", "d3", "9b", "9c", "6p", "6r", "3z")); } public static double distance(double lat1, double lon1, double lat2, double lon2) { diff --git a/src/test/java/org/elasticsearch/search/suggest/ContextSuggestSearchTests.java b/src/test/java/org/elasticsearch/search/suggest/ContextSuggestSearchTests.java new file mode 100644 index 00000000000..e83fd601775 --- /dev/null +++ b/src/test/java/org/elasticsearch/search/suggest/ContextSuggestSearchTests.java @@ -0,0 +1,640 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.suggest; + +import com.carrotsearch.randomizedtesting.generators.RandomStrings; +import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse; +import org.elasticsearch.action.suggest.SuggestRequestBuilder; +import org.elasticsearch.action.suggest.SuggestResponse; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.search.suggest.Suggest.Suggestion; +import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry; +import org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option; +import org.elasticsearch.search.suggest.completion.CompletionSuggestion; +import org.elasticsearch.search.suggest.completion.CompletionSuggestionBuilder; +import org.elasticsearch.search.suggest.completion.CompletionSuggestionFuzzyBuilder; +import org.elasticsearch.search.suggest.context.ContextBuilder; +import org.elasticsearch.search.suggest.context.ContextMapping; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.hamcrest.Matchers; +import org.junit.Test; + +import java.io.IOException; +import java.util.*; + +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; +import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.test.hamcrest.ElasticsearchGeoAssertions.assertDistance; +import static org.hamcrest.Matchers.is; + +public class ContextSuggestSearchTests extends ElasticsearchIntegrationTest { + + private static final String INDEX = "test"; + private static final String TYPE = "testType"; + private static final String FIELD = "testField"; + + private static final String[][] HEROS = { + { "Afari, Jamal", "Jamal Afari", "Jamal" }, + { "Allerdyce, St. John", "Allerdyce, John", "St. John", "St. John Allerdyce" }, + { "Beaubier, Jean-Paul", "Jean-Paul Beaubier", "Jean-Paul" }, + { "Beaubier, Jeanne-Marie", "Jeanne-Marie Beaubier", "Jeanne-Marie" }, + { "Braddock, Elizabeth \"Betsy\"", "Betsy", "Braddock, Elizabeth", "Elizabeth Braddock", "Elizabeth" }, + { "Cody Mushumanski gun Man", "the hunter", "gun man", "Cody Mushumanski" }, + { "Corbo, Adrian", "Adrian Corbo", "Adrian" }, + { "Corbo, Jared", "Jared Corbo", "Jared" }, + { "Creel, Carl \"Crusher\"", "Creel, Carl", "Crusher", "Carl Creel", "Carl" }, + { "Crichton, Lady Jacqueline Falsworth", "Lady Jacqueline Falsworth Crichton", "Lady Jacqueline Falsworth", + "Jacqueline Falsworth" }, { "Crichton, Kenneth", "Kenneth Crichton", "Kenneth" }, + { "MacKenzie, Al", "Al MacKenzie", "Al" }, + { "MacPherran, Mary \"Skeeter\"", "Mary MacPherran \"Skeeter\"", "MacPherran, Mary", "Skeeter", "Mary MacPherran" }, + { "MacTaggert, Moira", "Moira MacTaggert", "Moira" }, { "Rasputin, Illyana", "Illyana Rasputin", "Illyana" }, + { "Rasputin, Mikhail", "Mikhail Rasputin", "Mikhail" }, { "Rasputin, Piotr", "Piotr Rasputin", "Piotr" }, + { "Smythe, Alistair", "Alistair Smythe", "Alistair" }, { "Smythe, Spencer", "Spencer Smythe", "Spencer" }, + { "Whitemane, Aelfyre", "Aelfyre Whitemane", "Aelfyre" }, { "Whitemane, Kofi", "Kofi Whitemane", "Kofi" } }; + + @Test + public void testBasicGeo() throws Exception { + createIndexAndSettings(); + createMapping(TYPE, ContextBuilder.location("st").precision("5km").neighbors(true)); + XContentBuilder source1 = jsonBuilder() + .startObject() + .startObject(FIELD) + .array("input", "Hotel Amsterdam", "Amsterdam") + .field("output", "Hotel Amsterdam in Berlin") + .startObject("context").latlon("st", 52.529172, 13.407333).endObject() + .endObject() + .endObject(); + client().prepareIndex(INDEX, TYPE, "1").setSource(source1).execute().actionGet(); + + XContentBuilder source2 = jsonBuilder() + .startObject() + .startObject(FIELD) + .array("input", "Hotel Berlin", "Berlin") + .field("output", "Hotel Berlin in Amsterdam") + .startObject("context").latlon("st", 52.363389, 4.888695).endObject() + .endObject() + .endObject(); + client().prepareIndex(INDEX, TYPE, "2").setSource(source2).execute().actionGet(); + + client().admin().indices().prepareRefresh(INDEX).get(); + + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionBuilder context = new CompletionSuggestionBuilder(suggestionName).field(FIELD).text("h").size(10) + .addGeoLocation("st", 52.52, 13.4); + + SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); + SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); + + assertEquals(suggestResponse.getSuggest().size(), 1); + assertEquals("Hotel Amsterdam in Berlin", suggestResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string()); + } + + @Test + public void testGeoField() throws Exception { + createIndexAndSettings(); + + XContentBuilder mapping = jsonBuilder(); + mapping.startObject(); + mapping.startObject(TYPE); + mapping.startObject("properties"); + mapping.startObject("pin"); + mapping.field("type", "geo_point"); + mapping.endObject(); + mapping.startObject(FIELD); + mapping.field("type", "completion"); + mapping.field("index_analyzer", "simple"); + mapping.field("search_analyzer", "simple"); + + mapping.startObject("context"); + mapping.value(ContextBuilder.location("st", 5, true).field("pin").build()); + mapping.endObject(); + + mapping.endObject(); + mapping.endObject(); + mapping.endObject(); + mapping.endObject(); + + PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(mapping).get(); + + assertThat(putMappingResponse.isAcknowledged(), is(true)); + ensureYellow(); + + XContentBuilder source1 = jsonBuilder() + .startObject() + .latlon("pin", 52.529172, 13.407333) + .startObject(FIELD) + .array("input", "Hotel Amsterdam", "Amsterdam") + .field("output", "Hotel Amsterdam in Berlin") + .startObject("context").endObject() + .endObject() + .endObject(); + client().prepareIndex(INDEX, TYPE, "1").setSource(source1).execute().actionGet(); + + XContentBuilder source2 = jsonBuilder() + .startObject() + .latlon("pin", 52.363389, 4.888695) + .startObject(FIELD) + .array("input", "Hotel Berlin", "Berlin") + .field("output", "Hotel Berlin in Amsterdam") + .startObject("context").endObject() + .endObject() + .endObject(); + client().prepareIndex(INDEX, TYPE, "2").setSource(source2).execute().actionGet(); + + client().admin().indices().prepareRefresh(INDEX).get(); + + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionBuilder context = new CompletionSuggestionBuilder(suggestionName).field(FIELD).text("h").size(10) + .addGeoLocation("st", 52.52, 13.4); + SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); + SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); + + assertEquals(suggestResponse.getSuggest().size(), 1); + assertEquals("Hotel Amsterdam in Berlin", suggestResponse.getSuggest().getSuggestion(suggestionName).iterator().next().getOptions().iterator().next().getText().string()); + } + + @Test + public void testSimpleGeo() throws Exception { + String reinickendorf = "u337p3mp11e2"; + String pankow = "u33e0cyyjur4"; + String koepenick = "u33dm4f7fn40"; + String bernau = "u33etnjf1yjn"; + String berlin = "u33dc1v0xupz"; + String mitte = "u33dc0cpke4q"; + String steglitz = "u336m36rjh2p"; + String wilmersdorf = "u336wmw0q41s"; + String spandau = "u336uqek7gh6"; + String tempelhof = "u33d91jh3by0"; + String schoeneberg = "u336xdrkzbq7"; + String treptow = "u33d9unn7fp7"; + + double precision = 100.0; // meters + createIndexAndSettings(); + createMapping(TYPE, ContextBuilder.location("st").precision(precision).neighbors(true)); + + String[] locations = { reinickendorf, pankow, koepenick, bernau, berlin, mitte, steglitz, wilmersdorf, spandau, tempelhof, + schoeneberg, treptow }; + + String[][] input = { { "pizza - reinickendorf", "pizza", "food" }, { "pizza - pankow", "pizza", "food" }, + { "pizza - koepenick", "pizza", "food" }, { "pizza - bernau", "pizza", "food" }, { "pizza - berlin", "pizza", "food" }, + { "pizza - mitte", "pizza - berlin mitte", "pizza", "food" }, + { "pizza - steglitz", "pizza - Berlin-Steglitz", "pizza", "food" }, { "pizza - wilmersdorf", "pizza", "food" }, + { "pizza - spandau", "spandau bei berlin", "pizza", "food" }, + { "pizza - tempelhof", "pizza - berlin-tempelhof", "pizza", "food" }, + { "pizza - schoeneberg", "pizza - schöneberg", "pizza - berlin schoeneberg", "pizza", "food" }, + { "pizza - treptow", "pizza", "food" } }; + + for (int i = 0; i < locations.length; i++) { + XContentBuilder source = jsonBuilder().startObject().startObject(FIELD).startArray("input").value(input[i]).endArray() + .startObject("context").field("st", locations[i]).endObject().field("payload", locations[i]).endObject().endObject(); + client().prepareIndex(INDEX, TYPE, "" + i).setSource(source).execute().actionGet(); + } + + refresh(); + + assertGeoSuggestionsInRange(berlin, "pizza", precision); + assertGeoSuggestionsInRange(reinickendorf, "pizza", precision); + assertGeoSuggestionsInRange(spandau, "pizza", precision); + assertGeoSuggestionsInRange(koepenick, "pizza", precision); + assertGeoSuggestionsInRange(schoeneberg, "pizza", precision); + assertGeoSuggestionsInRange(tempelhof, "pizza", precision); + assertGeoSuggestionsInRange(bernau, "pizza", precision); + assertGeoSuggestionsInRange(pankow, "pizza", precision); + assertGeoSuggestionsInRange(mitte, "pizza", precision); + assertGeoSuggestionsInRange(steglitz, "pizza", precision); + assertGeoSuggestionsInRange(mitte, "pizza", precision); + assertGeoSuggestionsInRange(wilmersdorf, "pizza", precision); + assertGeoSuggestionsInRange(treptow, "pizza", precision); + } + + @Test + public void testSimplePrefix() throws Exception { + createIndexAndSettings(); + createMapping(TYPE, ContextBuilder.category("st")); + + for (int i = 0; i < HEROS.length; i++) { + XContentBuilder source = jsonBuilder().startObject().startObject(FIELD).startArray("input").value(HEROS[i]).endArray() + .startObject("context").field("st", i%3).endObject() + .startObject("payload").field("group", i % 3).field("id", i).endObject() + .endObject().endObject(); + client().prepareIndex(INDEX, TYPE, "" + i).setSource(source).execute().actionGet(); + } + + refresh(); + + assertPrefixSuggestions(0, "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); + assertPrefixSuggestions(0, "b", "Beaubier, Jeanne-Marie"); + assertPrefixSuggestions(0, "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); + assertPrefixSuggestions(0, "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); + assertPrefixSuggestions(0, "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); + assertPrefixSuggestions(1, "s", "St. John", "St. John Allerdyce"); + assertPrefixSuggestions(2, "s", "Smythe, Alistair"); + assertPrefixSuggestions(1, "w", "Whitemane, Aelfyre"); + assertPrefixSuggestions(2, "w", "Whitemane, Kofi"); + } + + @Test + public void testBasic() throws Exception { + createIndexAndSettings(); + createMapping(TYPE, false, ContextBuilder.reference("st", "_type"), ContextBuilder.reference("nd", "_type")); + + client().prepareIndex(INDEX, TYPE, "1") + .setSource( + jsonBuilder().startObject().startObject(FIELD).startArray("input").value("my hotel").value("this hotel").endArray() + .startObject("context").endObject() + .field("payload", TYPE + "|" + TYPE).endObject().endObject()).execute() + .actionGet(); + + refresh(); + + assertDoubleFieldSuggestions(TYPE, TYPE, "m", "my hotel"); + } + + @Test + public void testSimpleField() throws Exception { + createIndexAndSettings(); + createMapping(TYPE, ContextBuilder.reference("st", "category")); + + for (int i = 0; i < HEROS.length; i++) { + client().prepareIndex(INDEX, TYPE, "" + i) + .setSource( + jsonBuilder().startObject().field("category", Integer.toString(i % 3)).startObject(FIELD).startArray("input") + .value(HEROS[i]).endArray().startObject("context").endObject().field("payload", Integer.toString(i % 3)) + .endObject().endObject()).execute().actionGet(); + } + + refresh(); + + assertFieldSuggestions("0", "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); + assertFieldSuggestions("0", "b", "Beaubier, Jeanne-Marie"); + assertFieldSuggestions("0", "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); + assertFieldSuggestions("0", "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); + assertFieldSuggestions("0", "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); + assertFieldSuggestions("1", "s", "St. John", "St. John Allerdyce"); + assertFieldSuggestions("2", "s", "Smythe, Alistair"); + assertFieldSuggestions("1", "w", "Whitemane, Aelfyre"); + assertFieldSuggestions("2", "w", "Whitemane, Kofi"); + + } + + @Test + public void testMultiValueField() throws Exception { + createIndexAndSettings(); + createMapping(TYPE, ContextBuilder.reference("st", "category")); + + for (int i = 0; i < HEROS.length; i++) { + client().prepareIndex(INDEX, TYPE, "" + i) + .setSource( + jsonBuilder().startObject().startArray("category").value(Integer.toString(i % 3)).value("other").endArray() + .startObject(FIELD).startArray("input").value(HEROS[i]).endArray().startObject("context").endObject() + .field("payload", Integer.toString(i % 3)).endObject().endObject()).execute().actionGet(); + } + + refresh(); + + assertFieldSuggestions("0", "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); + assertFieldSuggestions("0", "b", "Beaubier, Jeanne-Marie"); + assertFieldSuggestions("0", "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); + assertFieldSuggestions("0", "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); + assertFieldSuggestions("0", "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); + assertFieldSuggestions("1", "s", "St. John", "St. John Allerdyce"); + assertFieldSuggestions("2", "s", "Smythe, Alistair"); + assertFieldSuggestions("1", "w", "Whitemane, Aelfyre"); + assertFieldSuggestions("2", "w", "Whitemane, Kofi"); + + } + + @Test + public void testMultiContext() throws Exception { + createIndexAndSettings(); + createMapping(TYPE, ContextBuilder.reference("st", "categoryA"), ContextBuilder.reference("nd", "categoryB")); + + for (int i = 0; i < HEROS.length; i++) { + client().prepareIndex(INDEX, TYPE, "" + i) + .setSource( + jsonBuilder().startObject().field("categoryA").value("" + (char) ('0' + (i % 3))).field("categoryB") + .value("" + (char) ('A' + (i % 3))).startObject(FIELD).startArray("input").value(HEROS[i]).endArray() + .startObject("context").endObject().field("payload", ((char) ('0' + (i % 3))) + "" + (char) ('A' + (i % 3))) + .endObject().endObject()).execute().actionGet(); + } + + refresh(); + + assertMultiContextSuggestions("0", "A", "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); + assertMultiContextSuggestions("0", "A", "b", "Beaubier, Jeanne-Marie"); + assertMultiContextSuggestions("0", "A", "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); + assertMultiContextSuggestions("0", "A", "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); + assertMultiContextSuggestions("0", "A", "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); + assertMultiContextSuggestions("1", "B", "s", "St. John", "St. John Allerdyce"); + assertMultiContextSuggestions("2", "C", "s", "Smythe, Alistair"); + assertMultiContextSuggestions("1", "B", "w", "Whitemane, Aelfyre"); + assertMultiContextSuggestions("2", "C", "w", "Whitemane, Kofi"); + } + + @Test + public void testMultiContextWithFuzzyLogic() throws Exception { + createIndexAndSettings(); + createMapping(TYPE, ContextBuilder.reference("st", "categoryA"), ContextBuilder.reference("nd", "categoryB")); + + for (int i = 0; i < HEROS.length; i++) { + String source = jsonBuilder().startObject().field("categoryA", "" + (char) ('0' + (i % 3))) + .field("categoryB", "" + (char) ('a' + (i % 3))).startObject(FIELD).array("input", HEROS[i]) + .startObject("context").endObject().startObject("payload").field("categoryA", "" + (char) ('0' + (i % 3))) + .field("categoryB", "" + (char) ('a' + (i % 3))).endObject().endObject().endObject().string(); + client().prepareIndex(INDEX, TYPE, "" + i).setSource(source).execute().actionGet(); + } + + refresh(); + + String[] prefix1 = { "0", "1", "2" }; + String[] prefix2 = { "a", "b", "c" }; + String[] prefix3 = { "0", "1" }; + String[] prefix4 = { "a", "b" }; + + assertContextWithFuzzySuggestions(prefix1, prefix2, "mary", "MacKenzie, Al", "MacPherran, Mary", "MacPherran, Mary \"Skeeter\"", + "MacTaggert, Moira", "Mary MacPherran", "Mary MacPherran \"Skeeter\""); + assertContextWithFuzzySuggestions(prefix1, prefix2, "mac", "Mikhail", "Mary MacPherran \"Skeeter\"", "MacTaggert, Moira", + "Moira MacTaggert", "Moira", "MacKenzie, Al", "Mary MacPherran", "Mikhail Rasputin", "MacPherran, Mary", + "MacPherran, Mary \"Skeeter\""); + assertContextWithFuzzySuggestions(prefix3, prefix4, "mary", "MacPherran, Mary", "MacPherran, Mary \"Skeeter\"", + "MacTaggert, Moira", "Mary MacPherran", "Mary MacPherran \"Skeeter\""); + assertContextWithFuzzySuggestions(prefix3, prefix4, "mac", "MacPherran, Mary", "MacPherran, Mary \"Skeeter\"", "MacTaggert, Moira", + "Mary MacPherran", "Mary MacPherran \"Skeeter\"", "Mikhail", "Mikhail Rasputin", "Moira", "Moira MacTaggert"); + } + + @Test + public void testSimpleType() throws Exception { + String[] types = { TYPE + "A", TYPE + "B", TYPE + "C" }; + + createIndexAndSettings(); + for (int i = 0; i < types.length; i++) { + createMapping(types[i], ContextBuilder.reference("st", "_type")); + } + + for (int i = 0; i < HEROS.length; i++) { + String type = types[i % types.length]; + client().prepareIndex(INDEX, type, "" + i) + .setSource( + jsonBuilder().startObject().startObject(FIELD).startArray("input").value(HEROS[i]).endArray() + .startObject("context").endObject().field("payload", type).endObject().endObject()).execute().actionGet(); + } + + refresh(); + + assertFieldSuggestions(types[0], "a", "Afari, Jamal", "Adrian Corbo", "Adrian"); + assertFieldSuggestions(types[0], "b", "Beaubier, Jeanne-Marie"); + assertFieldSuggestions(types[0], "c", "Corbo, Adrian", "Crichton, Lady Jacqueline Falsworth"); + assertFieldSuggestions(types[0], "mary", "Mary MacPherran \"Skeeter\"", "Mary MacPherran"); + assertFieldSuggestions(types[0], "s", "Skeeter", "Smythe, Spencer", "Spencer Smythe", "Spencer"); + assertFieldSuggestions(types[1], "s", "St. John", "St. John Allerdyce"); + assertFieldSuggestions(types[2], "s", "Smythe, Alistair"); + assertFieldSuggestions(types[1], "w", "Whitemane, Aelfyre"); + assertFieldSuggestions(types[2], "w", "Whitemane, Kofi"); + } + + public void assertGeoSuggestionsInRange(String location, String suggest, double precision) throws IOException { + + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionBuilder context = new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggest).size(10) + .addGeoLocation("st", location); + SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); + SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); + + Suggest suggest2 = suggestResponse.getSuggest(); + assertTrue(suggest2.iterator().hasNext()); + for (Suggestion> s : suggest2) { + CompletionSuggestion suggestion = (CompletionSuggestion) s; + assertTrue(suggestion.iterator().hasNext()); + for (CompletionSuggestion.Entry entry : suggestion) { + List options = entry.getOptions(); + assertTrue(options.iterator().hasNext()); + for (CompletionSuggestion.Entry.Option option : options) { + String target = option.getPayloadAsString(); + assertDistance(location, target, Matchers.lessThanOrEqualTo(precision)); + } + } + } + } + + public void assertPrefixSuggestions(long prefix, String suggest, String... hits) throws IOException { + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionBuilder context = new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggest) + .size(hits.length + 1).addCategory("st", Long.toString(prefix)); + SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); + SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); + ArrayList suggestions = new ArrayList(); + Suggest suggest2 = suggestResponse.getSuggest(); + assertTrue(suggest2.iterator().hasNext()); + for (Suggestion> s : suggest2) { + CompletionSuggestion suggestion = (CompletionSuggestion) s; + for (CompletionSuggestion.Entry entry : suggestion) { + List options = entry.getOptions(); + for (CompletionSuggestion.Entry.Option option : options) { + Map payload = option.getPayloadAsMap(); + int group = (Integer) payload.get("group"); + String text = option.getText().string(); + assertEquals(prefix, group); + suggestions.add(text); + } + } + } + assertSuggestionsMatch(suggestions, hits); + } + + public void assertContextWithFuzzySuggestions(String[] prefix1, String[] prefix2, String suggest, String... hits) throws IOException { + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionFuzzyBuilder context = new CompletionSuggestionFuzzyBuilder(suggestionName).field(FIELD).text(suggest) + .size(hits.length + 10).addContextField("st", prefix1).addContextField("nd", prefix2).setFuzziness(Fuzziness.TWO); + SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); + SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); + + ArrayList suggestions = new ArrayList(); + + Suggest suggest2 = suggestResponse.getSuggest(); + assertTrue(suggest2.iterator().hasNext()); + for (Suggestion> s : suggest2) { + CompletionSuggestion suggestion = (CompletionSuggestion) s; + for (CompletionSuggestion.Entry entry : suggestion) { + List options = entry.getOptions(); + for (CompletionSuggestion.Entry.Option option : options) { + Map payload = option.getPayloadAsMap(); + String text = option.getText().string(); + assertThat(prefix1, Matchers.hasItemInArray(payload.get("categoryA"))); + assertThat(prefix2, Matchers.hasItemInArray(payload.get("categoryB"))); + suggestions.add(text); + } + } + } + + assertSuggestionsMatch(suggestions, hits); + } + + public void assertFieldSuggestions(String value, String suggest, String... hits) throws IOException { + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionBuilder context = new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggest).size(10) + .addContextField("st", value); + SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); + SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); + + ArrayList suggestions = new ArrayList(); + + Suggest suggest2 = suggestResponse.getSuggest(); + for (Suggestion> s : suggest2) { + CompletionSuggestion suggestion = (CompletionSuggestion) s; + for (CompletionSuggestion.Entry entry : suggestion) { + List options = entry.getOptions(); + for (CompletionSuggestion.Entry.Option option : options) { + String payload = option.getPayloadAsString(); + String text = option.getText().string(); + assertEquals(value, payload); + suggestions.add(text); + } + } + } + assertSuggestionsMatch(suggestions, hits); + } + + public void assertDoubleFieldSuggestions(String field1, String field2, String suggest, String... hits) throws IOException { + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionBuilder context = new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggest).size(10) + .addContextField("st", field1).addContextField("nd", field2); + SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); + SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); + ArrayList suggestions = new ArrayList(); + + Suggest suggest2 = suggestResponse.getSuggest(); + for (Suggestion> s : suggest2) { + CompletionSuggestion suggestion = (CompletionSuggestion) s; + for (CompletionSuggestion.Entry entry : suggestion) { + List options = entry.getOptions(); + for (CompletionSuggestion.Entry.Option option : options) { + String payload = option.getPayloadAsString(); + String text = option.getText().string(); + assertEquals(field1 + "|" + field2, payload); + suggestions.add(text); + } + } + } + assertSuggestionsMatch(suggestions, hits); + } + + public void assertMultiContextSuggestions(String value1, String value2, String suggest, String... hits) throws IOException { + String suggestionName = RandomStrings.randomAsciiOfLength(new Random(), 10); + CompletionSuggestionBuilder context = new CompletionSuggestionBuilder(suggestionName).field(FIELD).text(suggest).size(10) + .addContextField("st", value1).addContextField("nd", value2); + + SuggestRequestBuilder suggestionRequest = client().prepareSuggest(INDEX).addSuggestion(context); + SuggestResponse suggestResponse = suggestionRequest.execute().actionGet(); + ArrayList suggestions = new ArrayList(); + + Suggest suggest2 = suggestResponse.getSuggest(); + for (Suggestion> s : suggest2) { + CompletionSuggestion suggestion = (CompletionSuggestion) s; + for (CompletionSuggestion.Entry entry : suggestion) { + List options = entry.getOptions(); + for (CompletionSuggestion.Entry.Option option : options) { + String payload = option.getPayloadAsString(); + String text = option.getText().string(); + assertEquals(value1 + value2, payload); + suggestions.add(text); + } + } + } + assertSuggestionsMatch(suggestions, hits); + } + + private void assertSuggestionsMatch(List suggestions, String... hits) { + boolean[] suggested = new boolean[hits.length]; + Arrays.sort(hits); + Arrays.fill(suggested, false); + int numSuggestions = 0; + + for (String suggestion : suggestions) { + int hitpos = Arrays.binarySearch(hits, suggestion); + + assertEquals(hits[hitpos], suggestion); + assertTrue(hitpos >= 0); + assertTrue(!suggested[hitpos]); + + suggested[hitpos] = true; + numSuggestions++; + + } + assertEquals(hits.length, numSuggestions); + } + + private void createMapping(String type, ContextBuilder... context) throws IOException { + createMapping(type, false, context); + } + + private void createMapping(String type, boolean preserveSeparators, ContextBuilder... context) throws IOException { + createMapping(type, "simple", "simple", true, preserveSeparators, true, context); + } + + private ImmutableSettings.Builder createDefaultSettings() { + int randomShardNumber = between(1, 5); + int randomReplicaNumber = between(0, cluster().size() - 1); + return settingsBuilder().put(SETTING_NUMBER_OF_SHARDS, randomShardNumber).put(SETTING_NUMBER_OF_REPLICAS, randomReplicaNumber); + } + + private void createIndexAndSettings() throws IOException { + createIndexAndSettings(createDefaultSettings()); + } + + private void createIndexAndSettings(Settings.Builder settingsBuilder) throws IOException { + client().admin().indices().prepareCreate(INDEX).setSettings(settingsBuilder).get(); + ensureYellow(); + } + + private void createMapping(String type, String indexAnalyzer, String searchAnalyzer, boolean payloads, boolean preserveSeparators, + boolean preservePositionIncrements, ContextBuilder... contexts) throws IOException { + XContentBuilder mapping = jsonBuilder(); + mapping.startObject(); + mapping.startObject(type); + mapping.startObject("properties"); + mapping.startObject(FIELD); + mapping.field("type", "completion"); + mapping.field("index_analyzer", indexAnalyzer); + mapping.field("search_analyzer", searchAnalyzer); + mapping.field("payloads", payloads); + mapping.field("preserve_separators", preserveSeparators); + mapping.field("preserve_position_increments", preservePositionIncrements); + + mapping.startObject("context"); + for (ContextBuilder context : contexts) { + mapping.value(context.build()); + } + mapping.endObject(); + + mapping.endObject(); + mapping.endObject(); + mapping.endObject(); + mapping.endObject(); + + PutMappingResponse putMappingResponse = client().admin().indices().preparePutMapping(INDEX).setType(type).setSource(mapping).get(); + + assertThat(putMappingResponse.isAcknowledged(), is(true)); + ensureYellow(); + } +} diff --git a/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java b/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java index 4a47b3dec30..f01af66d0cc 100644 --- a/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java +++ b/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java @@ -31,16 +31,15 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; -import org.apache.lucene.util.fst.ByteSequenceOutputs; -import org.apache.lucene.util.fst.FST; -import org.apache.lucene.util.fst.PairOutputs; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.fst.*; import org.apache.lucene.util.fst.PairOutputs.Pair; -import org.apache.lucene.util.fst.PositiveIntOutputs; import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.core.CompletionFieldMapper; +import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder; import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider; import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory; -import org.elasticsearch.search.suggest.completion.AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder; +import org.elasticsearch.search.suggest.context.ContextMapping.ContextQuery; import java.io.IOException; import java.util.*; @@ -84,7 +83,7 @@ public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvide int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; // needs to fixed in the suggester first before it can be supported //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0; - prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, + prototype = new XAnalyzingSuggester(null,null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } @@ -243,24 +242,25 @@ public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvide final long ramBytesUsed = sizeInBytes; return new LookupFactory() { @Override - public Lookup getLookup(FieldMapper mapper, CompletionSuggestionContext suggestionContext) { + public Lookup getLookup(CompletionFieldMapper mapper, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(mapper.names().indexName()); if (analyzingSuggestHolder == null) { return null; } - int flags = analyzingSuggestHolder.preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; + int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; + + final Automaton queryPrefix = mapper.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; XAnalyzingSuggester suggester; if (suggestionContext.isFuzzy()) { - suggester = new XFuzzySuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, + suggester = new XFuzzySuggester(mapper.indexAnalyzer(),queryPrefix, mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); - } else { - suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, + suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), queryPrefix, mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, @@ -294,7 +294,7 @@ public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvide return new CompletionStats(sizeInBytes, completionFields); } @Override - AnalyzingSuggestHolder getAnalyzingSuggestHolder(FieldMapper mapper) { + AnalyzingSuggestHolder getAnalyzingSuggestHolder(CompletionFieldMapper mapper) { return lookupMap.get(mapper.names().indexName()); } diff --git a/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java b/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java index 650ac2ab352..411c27268ca 100644 --- a/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java +++ b/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java @@ -44,6 +44,7 @@ import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.index.merge.Merges; import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory; +import org.elasticsearch.search.suggest.context.ContextMapping; import org.elasticsearch.test.ElasticsearchTestCase; import org.junit.Test; @@ -72,7 +73,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { LookupFactory load = currentProvider.load(input); PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat()); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); - Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null), new CompletionSuggestionContext(null)); + Lookup lookup = load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING), new CompletionSuggestionContext(null)); List result = lookup.lookup("ge", false, 10); assertThat(result.get(0).key.toString(), equalTo("Generator - Foo Fighters")); assertThat(result.get(0).payload.utf8ToString(), equalTo("id:10")); @@ -91,7 +92,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { LookupFactory load = currentProvider.load(input); PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat()); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); - AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder analyzingSuggestHolder = load.getAnalyzingSuggestHolder(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null)); + AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder analyzingSuggestHolder = load.getAnalyzingSuggestHolder(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING)); assertThat(analyzingSuggestHolder.sepLabel, is(AnalyzingCompletionLookupProviderV1.SEP_LABEL)); assertThat(analyzingSuggestHolder.payloadSep, is(AnalyzingCompletionLookupProviderV1.PAYLOAD_SEP)); assertThat(analyzingSuggestHolder.endByte, is(AnalyzingCompletionLookupProviderV1.END_BYTE)); @@ -109,7 +110,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { LookupFactory load = currentProvider.load(input); PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat()); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); - AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder analyzingSuggestHolder = load.getAnalyzingSuggestHolder(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null)); + AnalyzingCompletionLookupProvider.AnalyzingSuggestHolder analyzingSuggestHolder = load.getAnalyzingSuggestHolder(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING)); assertThat(analyzingSuggestHolder.sepLabel, is(XAnalyzingSuggester.SEP_LABEL)); assertThat(analyzingSuggestHolder.payloadSep, is(XAnalyzingSuggester.PAYLOAD_SEP)); assertThat(analyzingSuggestHolder.endByte, is(XAnalyzingSuggester.END_BYTE)); @@ -124,7 +125,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { final boolean usePayloads = getRandom().nextBoolean(); final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0; - XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(TEST_VERSION_CURRENT), new StandardAnalyzer( + XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(TEST_VERSION_CURRENT), null, new StandardAnalyzer( TEST_VERSION_CURRENT), options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); LineFileDocs docs = new LineFileDocs(getRandom()); int num = atLeast(150); @@ -208,7 +209,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { NamedAnalyzer namedAnalzyer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); final CompletionFieldMapper mapper = new CompletionFieldMapper(new Names("foo"), namedAnalzyer, namedAnalzyer, provider, null, usePayloads, - preserveSeparators, preservePositionIncrements, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null); + preserveSeparators, preservePositionIncrements, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING); Lookup buildAnalyzingLookup = buildAnalyzingLookup(mapper, titles, titles, weights); Field field = buildAnalyzingLookup.getClass().getDeclaredField("maxAnalyzedPathsForOneInput"); field.setAccessible(true); @@ -259,7 +260,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { for (int i = 0; i < weights.length; i++) { Document doc = new Document(); BytesRef payload = mapper.buildPayload(new BytesRef(surfaces[i]), weights[i], new BytesRef(Long.toString(weights[i]))); - doc.add(mapper.getCompletionField(terms[i], payload)); + doc.add(mapper.getCompletionField(ContextMapping.EMPTY_CONTEXT, terms[i], payload)); if (randomBoolean()) { writer.commit(); } @@ -297,7 +298,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { LookupFactory load = provider.load(input); PostingsFormatProvider format = new PreBuiltPostingsFormatProvider(new Elasticsearch090PostingsFormat()); NamedAnalyzer analyzer = new NamedAnalyzer("foo", new StandardAnalyzer(TEST_VERSION_CURRENT)); - assertNull(load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null), new CompletionSuggestionContext(null))); + assertNull(load.getLookup(new CompletionFieldMapper(new Names("foo"), analyzer, analyzer, format, null, true, true, true, Integer.MAX_VALUE, AbstractFieldMapper.MultiFields.empty(), null, ContextMapping.EMPTY_MAPPING), new CompletionSuggestionContext(null))); dir.close(); } diff --git a/src/test/java/org/elasticsearch/test/hamcrest/ElasticsearchGeoAssertions.java b/src/test/java/org/elasticsearch/test/hamcrest/ElasticsearchGeoAssertions.java index fe6a06bb5d5..2b5daf4b92f 100644 --- a/src/test/java/org/elasticsearch/test/hamcrest/ElasticsearchGeoAssertions.java +++ b/src/test/java/org/elasticsearch/test/hamcrest/ElasticsearchGeoAssertions.java @@ -23,6 +23,10 @@ import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.jts.JtsGeometry; import com.spatial4j.core.shape.jts.JtsPoint; import com.vividsolutions.jts.geom.*; +import org.elasticsearch.common.geo.GeoDistance; +import org.elasticsearch.common.geo.GeoPoint; +import org.elasticsearch.common.unit.DistanceUnit; +import org.hamcrest.Matcher; import org.junit.Assert; import java.util.Arrays; @@ -201,4 +205,19 @@ public class ElasticsearchGeoAssertions { public static void assertMultiLineString(Shape shape) { assert(unwrap(shape) instanceof MultiLineString): "expected MultiLineString but found " + unwrap(shape).getClass().getName(); } + + public static void assertDistance(String geohash1, String geohash2, Matcher match) { + GeoPoint p1 = new GeoPoint(geohash1); + GeoPoint p2 = new GeoPoint(geohash2); + assertDistance(p1.lat(), p1.lon(), p2.lat(),p2.lon(), match); + } + + public static void assertDistance(double lat1, double lon1, double lat2, double lon2, Matcher match) { + assertThat(distance(lat1, lon1, lat2, lon2), match); + } + + private static double distance(double lat1, double lon1, double lat2, double lon2) { + return GeoDistance.ARC.calculate(lat1, lon1, lat2, lon2, DistanceUnit.DEFAULT); + } + }