Add ICUCollationFieldMapper (#24126)
Adds a new "icu_collation" field type that exposes lucene's ICUCollationDocValuesField. ICUCollationDocValuesField is the replacement for ICUCollationKeyFilter which has been deprecated since Lucene 5.
This commit is contained in:
parent
3f1ef488cd
commit
b24326271e
|
@ -57,7 +57,7 @@ public abstract class StringFieldType extends TermBasedFieldType {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions,
|
||||
public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions,
|
||||
boolean transpositions) {
|
||||
failIfNotIndexed();
|
||||
return new FuzzyQuery(new Term(name(), indexedValueForSearch(value)),
|
||||
|
@ -65,7 +65,7 @@ public abstract class StringFieldType extends TermBasedFieldType {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
failIfNotIndexed();
|
||||
PrefixQuery query = new PrefixQuery(new Term(name(), indexedValueForSearch(value)));
|
||||
if (method != null) {
|
||||
|
@ -75,7 +75,7 @@ public abstract class StringFieldType extends TermBasedFieldType {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
failIfNotIndexed();
|
||||
RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates);
|
||||
|
|
|
@ -302,50 +302,46 @@ PUT icu_sample
|
|||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
|
||||
[[analysis-icu-collation]]
|
||||
==== ICU Collation Token Filter
|
||||
|
||||
[WARNING]
|
||||
======
|
||||
This token filter has been deprecated since Lucene 5.0. Please use
|
||||
<<analysis-icu-collation-keyword-field, ICU Collation Keyword Field>>.
|
||||
======
|
||||
|
||||
[[analysis-icu-collation-keyword-field]]
|
||||
==== ICU Collation Keyword Field
|
||||
|
||||
Collations are used for sorting documents in a language-specific word order.
|
||||
The `icu_collation` token filter is available to all indices and defaults to
|
||||
using the
|
||||
{defguide}/sorting-collations.html#uca[DUCET collation],
|
||||
The `icu_collation_keyword` field type is available to all indices and will encode
|
||||
the terms directly as bytes in a doc values field and a single indexed token just
|
||||
like a standard {ref}/keyword.html[Keyword Field].
|
||||
|
||||
Defaults to using {defguide}/sorting-collations.html#uca[DUCET collation],
|
||||
which is a best-effort attempt at language-neutral sorting.
|
||||
|
||||
Below is an example of how to set up a field for sorting German names in
|
||||
``phonebook'' order:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT /my_index
|
||||
--------------------------
|
||||
PUT my_index
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"filter": {
|
||||
"german_phonebook": {
|
||||
"type": "icu_collation",
|
||||
"language": "de",
|
||||
"country": "DE",
|
||||
"variant": "@collation=phonebook"
|
||||
}
|
||||
},
|
||||
"analyzer": {
|
||||
"german_phonebook": {
|
||||
"tokenizer": "keyword",
|
||||
"filter": [ "german_phonebook" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"user": {
|
||||
"properties": {
|
||||
"name": { <1>
|
||||
"name": { <1>
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"sort": { <2>
|
||||
"type": "text",
|
||||
"fielddata": true,
|
||||
"analyzer": "german_phonebook"
|
||||
"sort": { <2>
|
||||
"type": "icu_collation_keyword",
|
||||
"index": false,
|
||||
"language": "de",
|
||||
"country": "DE",
|
||||
"variant": "@collation=phonebook"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -364,15 +360,47 @@ GET _search <3>
|
|||
"sort": "name.sort"
|
||||
}
|
||||
|
||||
--------------------------------------------------
|
||||
--------------------------
|
||||
// CONSOLE
|
||||
|
||||
<1> The `name` field uses the `standard` analyzer, and so support full text queries.
|
||||
<2> The `name.sort` field uses the `keyword` analyzer to preserve the name as
|
||||
a single token, and applies the `german_phonebook` token filter to index
|
||||
the value in German phonebook sort order.
|
||||
<2> The `name.sort` field is an `icu_collation_keyword` field that will preserve the name as
|
||||
a single token doc values, and applies the German ``phonebook'' order.
|
||||
<3> An example query which searches the `name` field and sorts on the `name.sort` field.
|
||||
|
||||
==== Parameters for ICU Collation Keyword Fields
|
||||
|
||||
The following parameters are accepted by `icu_collation_keyword` fields:
|
||||
|
||||
[horizontal]
|
||||
|
||||
`doc_values`::
|
||||
|
||||
Should the field be stored on disk in a column-stride fashion, so that it
|
||||
can later be used for sorting, aggregations, or scripting? Accepts `true`
|
||||
(default) or `false`.
|
||||
|
||||
`index`::
|
||||
|
||||
Should the field be searchable? Accepts `true` (default) or `false`.
|
||||
|
||||
`null_value`::
|
||||
|
||||
Accepts a string value which is substituted for any explicit `null`
|
||||
values. Defaults to `null`, which means the field is treated as missing.
|
||||
|
||||
`store`::
|
||||
|
||||
Whether the field value should be stored and retrievable separately from
|
||||
the {ref}/mapping-source-field.html[`_source`] field. Accepts `true` or `false`
|
||||
(default).
|
||||
|
||||
`fields`::
|
||||
|
||||
Multi-fields allow the same string value to be indexed in multiple ways for
|
||||
different purposes, such as one field for search and a multi-field for
|
||||
sorting and aggregations.
|
||||
|
||||
===== Collation options
|
||||
|
||||
`strength`::
|
||||
|
@ -404,14 +432,14 @@ Possible values: `shifted` or `non-ignorable`. Sets the alternate handling for
|
|||
strength `quaternary` to be either shifted or non-ignorable. Which boils down
|
||||
to ignoring punctuation and whitespace.
|
||||
|
||||
`caseLevel`::
|
||||
`case_level`::
|
||||
|
||||
Possible values: `true` or `false` (default). Whether case level sorting is
|
||||
required. When strength is set to `primary` this will ignore accent
|
||||
differences.
|
||||
|
||||
|
||||
`caseFirst`::
|
||||
`case_first`::
|
||||
|
||||
Possible values: `lower` or `upper`. Useful to control which case is sorted
|
||||
first when case is not ignored for strength `tertiary`. The default depends on
|
||||
|
@ -424,11 +452,11 @@ according to their numeric representation. For example the value `egg-9` is
|
|||
sorted before the value `egg-21`.
|
||||
|
||||
|
||||
`variableTop`::
|
||||
`variable_top`::
|
||||
|
||||
Single character or contraction. Controls what is variable for `alternate`.
|
||||
|
||||
`hiraganaQuaternaryMode`::
|
||||
`hiragana_quaternary_mode`::
|
||||
|
||||
Possible values: `true` or `false`. Distinguishing between Katakana and
|
||||
Hiragana characters in `quaternary` strength.
|
||||
|
|
|
@ -0,0 +1,746 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
||||
import org.elasticsearch.index.analysis.IndexableBinaryStringTools;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.joda.time.DateTimeZone;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.function.LongSupplier;
|
||||
|
||||
public class ICUCollationKeywordFieldMapper extends FieldMapper {
|
||||
|
||||
public static final String CONTENT_TYPE = "icu_collation_keyword";
|
||||
|
||||
public static class Defaults {
|
||||
public static final MappedFieldType FIELD_TYPE = new CollationFieldType();
|
||||
|
||||
static {
|
||||
FIELD_TYPE.setTokenized(false);
|
||||
FIELD_TYPE.setOmitNorms(true);
|
||||
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
|
||||
FIELD_TYPE.freeze();
|
||||
}
|
||||
|
||||
public static final String NULL_VALUE = null;
|
||||
}
|
||||
|
||||
public static final class CollationFieldType extends StringFieldType {
|
||||
private Collator collator = null;
|
||||
|
||||
public CollationFieldType() {
|
||||
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||
setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||
}
|
||||
|
||||
protected CollationFieldType(CollationFieldType ref) {
|
||||
super(ref);
|
||||
this.collator = ref.collator;
|
||||
}
|
||||
|
||||
public CollationFieldType clone() {
|
||||
return new CollationFieldType(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return super.equals(o) && Objects.equals(collator, ((CollationFieldType) o).collator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkCompatibility(MappedFieldType otherFT, List<String> conflicts, boolean strict) {
|
||||
super.checkCompatibility(otherFT, conflicts, strict);
|
||||
CollationFieldType other = (CollationFieldType) otherFT;
|
||||
if (!Objects.equals(collator, other.collator)) {
|
||||
conflicts.add("mapper [" + name() + "] has different [collator]");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * super.hashCode() + Objects.hashCode(collator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String typeName() {
|
||||
return CONTENT_TYPE;
|
||||
}
|
||||
|
||||
public Collator collator() {
|
||||
return collator;
|
||||
}
|
||||
|
||||
public void setCollator(Collator collator) {
|
||||
checkIfFrozen();
|
||||
this.collator = collator.isFrozen() ? collator : collator.freeze();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query nullValueQuery() {
|
||||
if (nullValue() == null) {
|
||||
return null;
|
||||
}
|
||||
return termQuery(nullValue(), null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexFieldData.Builder fielddataBuilder() {
|
||||
failIfNoDocValues();
|
||||
return new DocValuesIndexFieldData.Builder();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BytesRef indexedValueForSearch(Object value) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
if (value instanceof BytesRef) {
|
||||
value = ((BytesRef) value).utf8ToString();
|
||||
}
|
||||
|
||||
if (collator != null) {
|
||||
RawCollationKey key = collator.getRawCollationKey(value.toString(), null);
|
||||
return new BytesRef(key.bytes, 0, key.size);
|
||||
} else {
|
||||
throw new IllegalStateException("collator is null");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions,
|
||||
boolean transpositions) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public static DocValueFormat COLLATE_FORMAT = new DocValueFormat() {
|
||||
@Override
|
||||
public String getWriteableName() {
|
||||
return "collate";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String format(long value) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String format(double value) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String format(BytesRef value) {
|
||||
int encodedLength = IndexableBinaryStringTools.getEncodedLength(value.bytes, value.offset, value.length);
|
||||
char[] encoded = new char[encodedLength];
|
||||
IndexableBinaryStringTools.encode(value.bytes, value.offset, value.length, encoded, 0, encodedLength);
|
||||
return new String(encoded, 0, encodedLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long parseLong(String value, boolean roundUp, LongSupplier now) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double parseDouble(String value, boolean roundUp, LongSupplier now) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef parseBytesRef(String value) {
|
||||
char[] encoded = value.toCharArray();
|
||||
int decodedLength = IndexableBinaryStringTools.getDecodedLength(encoded, 0, encoded.length);
|
||||
byte[] decoded = new byte[decodedLength];
|
||||
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0, decodedLength);
|
||||
return new BytesRef(decoded);
|
||||
}
|
||||
};
|
||||
|
||||
@Override
|
||||
public DocValueFormat docValueFormat(final String format, final DateTimeZone timeZone) {
|
||||
return COLLATE_FORMAT;
|
||||
}
|
||||
}
|
||||
|
||||
public static class Builder extends FieldMapper.Builder<Builder, ICUCollationKeywordFieldMapper> {
|
||||
private String rules = null;
|
||||
private String language = null;
|
||||
private String country = null;
|
||||
private String variant = null;
|
||||
private String strength = null;
|
||||
private String decomposition = null;
|
||||
private String alternate = null;
|
||||
private boolean caseLevel = false;
|
||||
private String caseFirst = null;
|
||||
private boolean numeric = false;
|
||||
private String variableTop = null;
|
||||
private boolean hiraganaQuaternaryMode = false;
|
||||
private String nullValue = Defaults.NULL_VALUE;
|
||||
|
||||
public Builder(String name) {
|
||||
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
|
||||
builder = this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CollationFieldType fieldType() {
|
||||
return (CollationFieldType) super.fieldType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Builder indexOptions(IndexOptions indexOptions) {
|
||||
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0) {
|
||||
throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support positions, got [index_options]="
|
||||
+ indexOptionToString(indexOptions));
|
||||
}
|
||||
|
||||
return super.indexOptions(indexOptions);
|
||||
}
|
||||
|
||||
public String rules() {
|
||||
return rules;
|
||||
}
|
||||
|
||||
public Builder rules(final String rules) {
|
||||
this.rules = rules;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String language() {
|
||||
return language;
|
||||
}
|
||||
|
||||
public Builder language(final String language) {
|
||||
this.language = language;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String country() {
|
||||
return country;
|
||||
}
|
||||
|
||||
public Builder country(final String country) {
|
||||
this.country = country;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String variant() {
|
||||
return variant;
|
||||
}
|
||||
|
||||
public Builder variant(final String variant) {
|
||||
this.variant = variant;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String strength() {
|
||||
return strength;
|
||||
}
|
||||
|
||||
public Builder strength(final String strength) {
|
||||
this.strength = strength;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String decomposition() {
|
||||
return decomposition;
|
||||
}
|
||||
|
||||
public Builder decomposition(final String decomposition) {
|
||||
this.decomposition = decomposition;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String alternate() {
|
||||
return alternate;
|
||||
}
|
||||
|
||||
public Builder alternate(final String alternate) {
|
||||
this.alternate = alternate;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean caseLevel() {
|
||||
return caseLevel;
|
||||
}
|
||||
|
||||
public Builder caseLevel(final boolean caseLevel) {
|
||||
this.caseLevel = caseLevel;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String caseFirst() {
|
||||
return caseFirst;
|
||||
}
|
||||
|
||||
public Builder caseFirst(final String caseFirst) {
|
||||
this.caseFirst = caseFirst;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean numeric() {
|
||||
return numeric;
|
||||
}
|
||||
|
||||
public Builder numeric(final boolean numeric) {
|
||||
this.numeric = numeric;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String variableTop() {
|
||||
return variableTop;
|
||||
}
|
||||
|
||||
public Builder variableTop(final String variableTop) {
|
||||
this.variableTop = variableTop;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean hiraganaQuaternaryMode() {
|
||||
return hiraganaQuaternaryMode;
|
||||
}
|
||||
|
||||
public Builder hiraganaQuaternaryMode(final boolean hiraganaQuaternaryMode) {
|
||||
this.hiraganaQuaternaryMode = hiraganaQuaternaryMode;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Collator buildCollator() {
|
||||
Collator collator;
|
||||
if (rules != null) {
|
||||
try {
|
||||
collator = new RuleBasedCollator(rules);
|
||||
} catch (Exception e) {
|
||||
throw new IllegalArgumentException("Failed to parse collation rules", e);
|
||||
}
|
||||
} else {
|
||||
if (language != null) {
|
||||
ULocale locale;
|
||||
if (country != null) {
|
||||
if (variant != null) {
|
||||
locale = new ULocale(language, country, variant);
|
||||
} else {
|
||||
locale = new ULocale(language, country);
|
||||
}
|
||||
} else {
|
||||
locale = new ULocale(language);
|
||||
}
|
||||
collator = Collator.getInstance(locale);
|
||||
} else {
|
||||
collator = Collator.getInstance();
|
||||
}
|
||||
}
|
||||
|
||||
// set the strength flag, otherwise it will be the default.
|
||||
if (strength != null) {
|
||||
if (strength.equalsIgnoreCase("primary")) {
|
||||
collator.setStrength(Collator.PRIMARY);
|
||||
} else if (strength.equalsIgnoreCase("secondary")) {
|
||||
collator.setStrength(Collator.SECONDARY);
|
||||
} else if (strength.equalsIgnoreCase("tertiary")) {
|
||||
collator.setStrength(Collator.TERTIARY);
|
||||
} else if (strength.equalsIgnoreCase("quaternary")) {
|
||||
collator.setStrength(Collator.QUATERNARY);
|
||||
} else if (strength.equalsIgnoreCase("identical")) {
|
||||
collator.setStrength(Collator.IDENTICAL);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Invalid strength: " + strength);
|
||||
}
|
||||
}
|
||||
|
||||
// set the decomposition flag, otherwise it will be the default.
|
||||
if (decomposition != null) {
|
||||
if (decomposition.equalsIgnoreCase("no")) {
|
||||
collator.setDecomposition(Collator.NO_DECOMPOSITION);
|
||||
} else if (decomposition.equalsIgnoreCase("canonical")) {
|
||||
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Invalid decomposition: " + decomposition);
|
||||
}
|
||||
}
|
||||
|
||||
// expert options: concrete subclasses are always a RuleBasedCollator
|
||||
RuleBasedCollator rbc = (RuleBasedCollator) collator;
|
||||
if (alternate != null) {
|
||||
if (alternate.equalsIgnoreCase("shifted")) {
|
||||
rbc.setAlternateHandlingShifted(true);
|
||||
} else if (alternate.equalsIgnoreCase("non-ignorable")) {
|
||||
rbc.setAlternateHandlingShifted(false);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Invalid alternate: " + alternate);
|
||||
}
|
||||
}
|
||||
|
||||
if (caseLevel) {
|
||||
rbc.setCaseLevel(true);
|
||||
}
|
||||
|
||||
if (caseFirst != null) {
|
||||
if (caseFirst.equalsIgnoreCase("lower")) {
|
||||
rbc.setLowerCaseFirst(true);
|
||||
} else if (caseFirst.equalsIgnoreCase("upper")) {
|
||||
rbc.setUpperCaseFirst(true);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Invalid caseFirst: " + caseFirst);
|
||||
}
|
||||
}
|
||||
|
||||
if (numeric) {
|
||||
rbc.setNumericCollation(true);
|
||||
}
|
||||
|
||||
if (variableTop != null) {
|
||||
rbc.setVariableTop(variableTop);
|
||||
}
|
||||
|
||||
if (hiraganaQuaternaryMode) {
|
||||
rbc.setHiraganaQuaternary(true);
|
||||
}
|
||||
|
||||
// freeze so thread-safe
|
||||
return collator.freeze();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ICUCollationKeywordFieldMapper build(BuilderContext context) {
|
||||
final Collator collator = buildCollator();
|
||||
fieldType().setCollator(collator);
|
||||
setupFieldType(context);
|
||||
return new ICUCollationKeywordFieldMapper(name, fieldType, defaultFieldType, context.indexSettings(),
|
||||
multiFieldsBuilder.build(this, context), copyTo, rules, language, country, variant, strength, decomposition,
|
||||
alternate, caseLevel, caseFirst, numeric, variableTop, hiraganaQuaternaryMode, collator);
|
||||
}
|
||||
}
|
||||
|
||||
public static class TypeParser implements Mapper.TypeParser {
|
||||
@Override
|
||||
public Mapper.Builder<?, ?> parse(String name, Map<String, Object> node, ParserContext parserContext)
|
||||
throws MapperParsingException {
|
||||
Builder builder = new Builder(name);
|
||||
TypeParsers.parseField(builder, name, node, parserContext);
|
||||
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext(); ) {
|
||||
Map.Entry<String, Object> entry = iterator.next();
|
||||
String fieldName = entry.getKey();
|
||||
Object fieldNode = entry.getValue();
|
||||
switch (fieldName) {
|
||||
case "null_value":
|
||||
if (fieldNode == null) {
|
||||
throw new MapperParsingException("Property [null_value] cannot be null.");
|
||||
}
|
||||
builder.nullValue(fieldNode.toString());
|
||||
iterator.remove();
|
||||
break;
|
||||
case "norms":
|
||||
builder.omitNorms(!XContentMapValues.nodeBooleanValue(fieldNode, "norms"));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "rules":
|
||||
builder.rules(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "language":
|
||||
builder.language(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "country":
|
||||
builder.country(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "variant":
|
||||
builder.variant(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "strength":
|
||||
builder.strength(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "decomposition":
|
||||
builder.decomposition(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "alternate":
|
||||
builder.alternate(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "case_level":
|
||||
builder.caseLevel(XContentMapValues.nodeBooleanValue(fieldNode, false));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "case_first":
|
||||
builder.caseFirst(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "numeric":
|
||||
builder.numeric(XContentMapValues.nodeBooleanValue(fieldNode, false));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "variable_top":
|
||||
builder.variableTop(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||
iterator.remove();
|
||||
break;
|
||||
case "hiragana_quaternary_mode":
|
||||
builder.hiraganaQuaternaryMode(XContentMapValues.nodeBooleanValue(fieldNode, false));
|
||||
iterator.remove();
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
private final String rules;
|
||||
private final String language;
|
||||
private final String country;
|
||||
private final String variant;
|
||||
private final String strength;
|
||||
private final String decomposition;
|
||||
private final String alternate;
|
||||
private final boolean caseLevel;
|
||||
private final String caseFirst;
|
||||
private final boolean numeric;
|
||||
private final String variableTop;
|
||||
private final boolean hiraganaQuaternaryMode;
|
||||
private final Collator collator;
|
||||
|
||||
protected ICUCollationKeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, String rules, String language,
|
||||
String country, String variant,
|
||||
String strength, String decomposition, String alternate, boolean caseLevel, String caseFirst,
|
||||
boolean numeric, String variableTop, boolean hiraganaQuaternaryMode, Collator collator) {
|
||||
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||
assert collator.isFrozen();
|
||||
this.rules = rules;
|
||||
this.language = language;
|
||||
this.country = country;
|
||||
this.variant = variant;
|
||||
this.strength = strength;
|
||||
this.decomposition = decomposition;
|
||||
this.alternate = alternate;
|
||||
this.caseLevel = caseLevel;
|
||||
this.caseFirst = caseFirst;
|
||||
this.numeric = numeric;
|
||||
this.variableTop = variableTop;
|
||||
this.hiraganaQuaternaryMode = hiraganaQuaternaryMode;
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CollationFieldType fieldType() {
|
||||
return (CollationFieldType) super.fieldType();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String contentType() {
|
||||
return CONTENT_TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
|
||||
super.doMerge(mergeWith, updateAllTypes);
|
||||
|
||||
List<String> conflicts = new ArrayList<>();
|
||||
ICUCollationKeywordFieldMapper icuMergeWith = (ICUCollationKeywordFieldMapper) mergeWith;
|
||||
|
||||
if (!Objects.equals(rules, icuMergeWith.rules)) {
|
||||
conflicts.add("Cannot update rules setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!Objects.equals(language, icuMergeWith.language)) {
|
||||
conflicts.add("Cannot update language setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!Objects.equals(country, icuMergeWith.country)) {
|
||||
conflicts.add("Cannot update country setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!Objects.equals(variant, icuMergeWith.variant)) {
|
||||
conflicts.add("Cannot update variant setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!Objects.equals(strength, icuMergeWith.strength)) {
|
||||
conflicts.add("Cannot update strength setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!Objects.equals(decomposition, icuMergeWith.decomposition)) {
|
||||
conflicts.add("Cannot update decomposition setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!Objects.equals(alternate, icuMergeWith.alternate)) {
|
||||
conflicts.add("Cannot update alternate setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (caseLevel != icuMergeWith.caseLevel) {
|
||||
conflicts.add("Cannot update case_level setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!Objects.equals(caseFirst, icuMergeWith.caseFirst)) {
|
||||
conflicts.add("Cannot update case_first setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (numeric != icuMergeWith.numeric) {
|
||||
conflicts.add("Cannot update numeric setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!Objects.equals(variableTop, icuMergeWith.variableTop)) {
|
||||
conflicts.add("Cannot update variable_top setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (hiraganaQuaternaryMode != icuMergeWith.hiraganaQuaternaryMode) {
|
||||
conflicts.add("Cannot update hiragana_quaternary_mode setting for [" + CONTENT_TYPE + "]");
|
||||
}
|
||||
|
||||
if (!conflicts.isEmpty()) {
|
||||
throw new IllegalArgumentException("Can't merge because of conflicts: " + conflicts);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
|
||||
super.doXContentBody(builder, includeDefaults, params);
|
||||
|
||||
if (includeDefaults || fieldType().nullValue() != null) {
|
||||
builder.field("null_value", fieldType().nullValue());
|
||||
}
|
||||
|
||||
if (includeDefaults || rules != null) {
|
||||
builder.field("rules", rules);
|
||||
}
|
||||
|
||||
if (includeDefaults || language != null) {
|
||||
builder.field("language", language);
|
||||
}
|
||||
|
||||
if (includeDefaults || country != null) {
|
||||
builder.field("country", country);
|
||||
}
|
||||
|
||||
if (includeDefaults || variant != null) {
|
||||
builder.field("variant", variant);
|
||||
}
|
||||
|
||||
if (includeDefaults || strength != null) {
|
||||
builder.field("strength", strength);
|
||||
}
|
||||
|
||||
if (includeDefaults || decomposition != null) {
|
||||
builder.field("decomposition", decomposition);
|
||||
}
|
||||
|
||||
if (includeDefaults || alternate != null) {
|
||||
builder.field("alternate", alternate);
|
||||
}
|
||||
|
||||
if (includeDefaults || caseLevel) {
|
||||
builder.field("case_level", caseLevel);
|
||||
}
|
||||
|
||||
if (includeDefaults || caseFirst != null) {
|
||||
builder.field("case_first", caseFirst);
|
||||
}
|
||||
|
||||
if (includeDefaults || numeric) {
|
||||
builder.field("numeric", numeric);
|
||||
}
|
||||
|
||||
if (includeDefaults || variableTop != null) {
|
||||
builder.field("variable_top", variableTop);
|
||||
}
|
||||
|
||||
if (includeDefaults || hiraganaQuaternaryMode) {
|
||||
builder.field("hiragana_quaternary_mode", hiraganaQuaternaryMode);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
|
||||
final String value;
|
||||
if (context.externalValueSet()) {
|
||||
value = context.externalValue().toString();
|
||||
} else {
|
||||
XContentParser parser = context.parser();
|
||||
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
|
||||
value = fieldType().nullValueAsString();
|
||||
} else {
|
||||
value = parser.textOrNull();
|
||||
}
|
||||
}
|
||||
|
||||
if (value == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
RawCollationKey key = collator.getRawCollationKey(value, null);
|
||||
final BytesRef binaryValue = new BytesRef(key.bytes, 0, key.size);
|
||||
|
||||
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
|
||||
Field field = new Field(fieldType().name(), binaryValue, fieldType());
|
||||
fields.add(field);
|
||||
}
|
||||
|
||||
if (fieldType().hasDocValues()) {
|
||||
fields.add(new SortedDocValuesField(fieldType().name(), binaryValue));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -19,6 +19,9 @@
|
|||
|
||||
package org.elasticsearch.plugin.analysis.icu;
|
||||
|
||||
import static java.util.Collections.singletonMap;
|
||||
|
||||
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.IcuCollationTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.IcuFoldingTokenFilterFactory;
|
||||
|
@ -28,16 +31,20 @@ import org.elasticsearch.index.analysis.IcuTokenizerFactory;
|
|||
import org.elasticsearch.index.analysis.IcuTransformTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||
import org.elasticsearch.index.mapper.ICUCollationKeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.Mapper;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||
import org.elasticsearch.plugins.MapperPlugin;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static java.util.Collections.singletonMap;
|
||||
|
||||
public class AnalysisICUPlugin extends Plugin implements AnalysisPlugin {
|
||||
public class AnalysisICUPlugin extends Plugin implements AnalysisPlugin, MapperPlugin {
|
||||
@Override
|
||||
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
|
||||
return singletonMap("icu_normalizer", IcuNormalizerCharFilterFactory::new);
|
||||
|
@ -57,4 +64,20 @@ public class AnalysisICUPlugin extends Plugin implements AnalysisPlugin {
|
|||
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
||||
return singletonMap("icu_tokenizer", IcuTokenizerFactory::new);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, Mapper.TypeParser> getMappers() {
|
||||
return Collections.singletonMap(ICUCollationKeywordFieldMapper.CONTENT_TYPE, new ICUCollationKeywordFieldMapper.TypeParser());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<NamedWriteableRegistry.Entry> getNamedWriteables() {
|
||||
return Collections.singletonList(
|
||||
new NamedWriteableRegistry.Entry(
|
||||
DocValueFormat.class,
|
||||
ICUCollationKeywordFieldMapper.CollationFieldType.COLLATE_FORMAT.getWriteableName(),
|
||||
in -> ICUCollationKeywordFieldMapper.CollationFieldType.COLLATE_FORMAT
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.index.mapper.ICUCollationKeywordFieldMapper.CollationFieldType;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class CollationFieldTypeTests extends FieldTypeTestCase {
|
||||
@Override
|
||||
protected MappedFieldType createDefaultFieldType() {
|
||||
return new CollationFieldType();
|
||||
}
|
||||
|
||||
public void testIsFieldWithinQuery() throws IOException {
|
||||
CollationFieldType ft = new CollationFieldType();
|
||||
// current impl ignores args and shourd always return INTERSECTS
|
||||
assertEquals(Relation.INTERSECTS, ft.isFieldWithinQuery(null,
|
||||
RandomStrings.randomAsciiOfLengthBetween(random(), 0, 5),
|
||||
RandomStrings.randomAsciiOfLengthBetween(random(), 0, 5),
|
||||
randomBoolean(), randomBoolean(), null, null, null));
|
||||
}
|
||||
|
||||
public void testTermQuery() {
|
||||
MappedFieldType ft = createDefaultFieldType();
|
||||
ft.setName("field");
|
||||
ft.setIndexOptions(IndexOptions.DOCS);
|
||||
|
||||
Collator collator = Collator.getInstance(new ULocale("tr"));
|
||||
collator.setStrength(Collator.PRIMARY);
|
||||
collator.freeze();
|
||||
((CollationFieldType) ft).setCollator(collator);
|
||||
|
||||
RawCollationKey key = collator.getRawCollationKey("ı will use turkish casıng", null);
|
||||
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||
|
||||
assertEquals(new TermQuery(new Term("field", expected)), ft.termQuery("I WİLL USE TURKİSH CASING", null));
|
||||
|
||||
ft.setIndexOptions(IndexOptions.NONE);
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> ft.termQuery("bar", null));
|
||||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testTermsQuery() {
|
||||
MappedFieldType ft = createDefaultFieldType();
|
||||
ft.setName("field");
|
||||
ft.setIndexOptions(IndexOptions.DOCS);
|
||||
|
||||
Collator collator = Collator.getInstance().freeze();
|
||||
((CollationFieldType) ft).setCollator(collator);
|
||||
|
||||
RawCollationKey fooKey = collator.getRawCollationKey("foo", null);
|
||||
RawCollationKey barKey = collator.getRawCollationKey("bar", null);
|
||||
|
||||
List<BytesRef> terms = new ArrayList<>();
|
||||
terms.add(new BytesRef(fooKey.bytes, 0, fooKey.size));
|
||||
terms.add(new BytesRef(barKey.bytes, 0, barKey.size));
|
||||
|
||||
assertEquals(new TermInSetQuery("field", terms),
|
||||
ft.termsQuery(Arrays.asList("foo", "bar"), null));
|
||||
|
||||
ft.setIndexOptions(IndexOptions.NONE);
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> ft.termsQuery(Arrays.asList("foo", "bar"), null));
|
||||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testRegexpQuery() {
|
||||
MappedFieldType ft = createDefaultFieldType();
|
||||
ft.setName("field");
|
||||
ft.setIndexOptions(IndexOptions.DOCS);
|
||||
expectThrows(UnsupportedOperationException.class,
|
||||
() -> ft.regexpQuery("foo.*", 0, 10, null, null));
|
||||
}
|
||||
|
||||
public void testFuzzyQuery() {
|
||||
MappedFieldType ft = createDefaultFieldType();
|
||||
ft.setName("field");
|
||||
ft.setIndexOptions(IndexOptions.DOCS);
|
||||
expectThrows(UnsupportedOperationException.class,
|
||||
() -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
|
||||
}
|
||||
|
||||
public void testPrefixQuery() {
|
||||
MappedFieldType ft = createDefaultFieldType();
|
||||
ft.setName("field");
|
||||
ft.setIndexOptions(IndexOptions.DOCS);
|
||||
expectThrows(UnsupportedOperationException.class,
|
||||
() -> ft.prefixQuery("prefix", null, null));
|
||||
}
|
||||
|
||||
public void testRangeQuery() {
|
||||
MappedFieldType ft = createDefaultFieldType();
|
||||
ft.setName("field");
|
||||
ft.setIndexOptions(IndexOptions.DOCS);
|
||||
|
||||
Collator collator = Collator.getInstance().freeze();
|
||||
((CollationFieldType) ft).setCollator(collator);
|
||||
|
||||
RawCollationKey aKey = collator.getRawCollationKey("a", null);
|
||||
RawCollationKey bKey = collator.getRawCollationKey("b", null);
|
||||
|
||||
TermRangeQuery expected = new TermRangeQuery("field", new BytesRef(aKey.bytes, 0, aKey.size),
|
||||
new BytesRef(bKey.bytes, 0, bKey.size), false, false);
|
||||
|
||||
assertEquals(expected, ft.rangeQuery("a", "b", false, false, null));
|
||||
|
||||
ft.setIndexOptions(IndexOptions.NONE);
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> ft.rangeQuery("a", "b", false, false, null));
|
||||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,443 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import org.elasticsearch.action.search.SearchRequest;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.search.sort.SortOrder;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
public class ICUCollationKeywordFieldMapperIT extends ESIntegTestCase {
|
||||
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||
return Collections.singletonList(AnalysisICUPlugin.class);
|
||||
}
|
||||
|
||||
/*
|
||||
* Turkish has some funny casing.
|
||||
* This test shows how you can solve this kind of thing easily with collation.
|
||||
* Instead of using LowerCaseFilter, use a turkish collator with primary strength.
|
||||
* Then things will sort and match correctly.
|
||||
*/
|
||||
public void testBasicUsage() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
String[] equilavent = {"I WİLL USE TURKİSH CASING", "ı will use turkish casıng"};
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "tr")
|
||||
.field("strength", "primary")
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
// both values should collate to same value
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
// searching for either of the terms should return both results since they collate to the same value
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||
.sort("collate")
|
||||
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
|
||||
/*
|
||||
* Test usage of the decomposition option for unicode normalization.
|
||||
*/
|
||||
public void testNormalization() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
String[] equilavent = {"I W\u0049\u0307LL USE TURKİSH CASING", "ı will use turkish casıng"};
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "tr")
|
||||
.field("strength", "primary")
|
||||
.field("decomposition", "canonical")
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
// searching for either of the terms should return both results since they collate to the same value
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||
.sort("collate")
|
||||
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
|
||||
/*
|
||||
* Test secondary strength, for english case is not significant.
|
||||
*/
|
||||
public void testSecondaryStrength() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
String[] equilavent = {"TESTING", "testing"};
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "en")
|
||||
.field("strength", "secondary")
|
||||
.field("decomposition", "no")
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||
.sort("collate")
|
||||
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting alternate=shifted to shift whitespace, punctuation and symbols
|
||||
* to quaternary level
|
||||
*/
|
||||
public void testIgnorePunctuation() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
String[] equilavent = {"foo-bar", "foo bar"};
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "en")
|
||||
.field("strength", "primary")
|
||||
.field("alternate", "shifted")
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||
.sort("collate")
|
||||
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting alternate=shifted and variableTop to shift whitespace, but not
|
||||
* punctuation or symbols, to quaternary level
|
||||
*/
|
||||
public void testIgnoreWhitespace() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "en")
|
||||
.field("strength", "primary")
|
||||
.field("alternate", "shifted")
|
||||
.field("variable_top", " ")
|
||||
.field("index", false)
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"foo bar\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"foobar\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "3").setSource("{\"collate\":\"foo-bar\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.sort("collate", SortOrder.ASC)
|
||||
.sort("_uid", SortOrder.ASC) // secondary sort should kick in on docs 1 and 3 because same value collate value
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 3L);
|
||||
assertOrderedSearchHits(response, "3", "1", "2");
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting numeric to encode digits with numeric value, so that
|
||||
* foobar-9 sorts before foobar-10
|
||||
*/
|
||||
public void testNumerics() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "en")
|
||||
.field("numeric", true)
|
||||
.field("index", false)
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"foobar-10\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"foobar-9\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.sort("collate", SortOrder.ASC)
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting caseLevel=true to create an additional case level between
|
||||
* secondary and tertiary
|
||||
*/
|
||||
public void testIgnoreAccentsButNotCase() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "en")
|
||||
.field("strength", "primary")
|
||||
.field("case_level", true)
|
||||
.field("index", false)
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"résumé\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"Resume\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "3").setSource("{\"collate\":\"resume\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "4").setSource("{\"collate\":\"Résumé\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.sort("collate", SortOrder.ASC)
|
||||
.sort("_uid", SortOrder.DESC)
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 4L);
|
||||
assertOrderedSearchHits(response, "3", "1", "4", "2");
|
||||
}
|
||||
|
||||
/*
|
||||
* Setting caseFirst=upper to cause uppercase strings to sort
|
||||
* before lowercase ones.
|
||||
*/
|
||||
public void testUpperCaseFirst() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "en")
|
||||
.field("strength", "tertiary")
|
||||
.field("case_first", "upper")
|
||||
.field("index", false)
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"resume\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"Resume\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.sort("collate", SortOrder.ASC)
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
|
||||
/*
|
||||
* For german, you might want oe to sort and match with o umlaut.
|
||||
* This is not the default, but you can make a customized ruleset to do this.
|
||||
*
|
||||
* The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
|
||||
* http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
|
||||
*/
|
||||
public void testCustomRules() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
|
||||
String DIN5007_2_tailorings =
|
||||
"& ae , a\u0308 & AE , A\u0308" +
|
||||
"& oe , o\u0308 & OE , O\u0308" +
|
||||
"& ue , u\u0308 & UE , u\u0308";
|
||||
|
||||
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
|
||||
String tailoredRules = tailoredCollator.getRules();
|
||||
|
||||
String[] equilavent = {"Töne", "Toene"};
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("rules", tailoredRules)
|
||||
.field("strength", "primary")
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||
.sort("collate", SortOrder.ASC)
|
||||
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,342 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.text.RawCollationKey;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.mapper.MapperService.MergeReason;
|
||||
import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
import org.elasticsearch.test.InternalSettingsPlugin;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||
|
||||
private static final String FIELD_TYPE = "icu_collation_keyword";
|
||||
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> getPlugins() {
|
||||
return Arrays.asList(AnalysisICUPlugin.class, InternalSettingsPlugin.class);
|
||||
}
|
||||
|
||||
IndexService indexService;
|
||||
DocumentMapperParser parser;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
indexService = createIndex("test");
|
||||
parser = indexService.mapperService().documentMapperParser();
|
||||
}
|
||||
|
||||
public void testDefaults() throws Exception {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "1234")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
|
||||
Collator collator = Collator.getInstance();
|
||||
RawCollationKey key = collator.getRawCollationKey("1234", null);
|
||||
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||
|
||||
assertEquals(expected, fields[0].binaryValue());
|
||||
IndexableFieldType fieldType = fields[0].fieldType();
|
||||
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||
assertFalse(fieldType.tokenized());
|
||||
assertFalse(fieldType.stored());
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||
|
||||
assertEquals(expected, fields[1].binaryValue());
|
||||
fieldType = fields[1].fieldType();
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||
assertEquals(DocValuesType.SORTED, fieldType.docValuesType());
|
||||
}
|
||||
|
||||
public void testNullValue() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.nullField("field")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field"));
|
||||
|
||||
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||
.field("null_value", "1234").endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(0, fields.length);
|
||||
|
||||
doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.nullField("field")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
Collator collator = Collator.getInstance();
|
||||
RawCollationKey key = collator.getRawCollationKey("1234", null);
|
||||
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||
|
||||
fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
assertEquals(expected, fields[0].binaryValue());
|
||||
}
|
||||
|
||||
public void testEnableStore() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||
.field("store", true).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "1234")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
assertTrue(fields[0].fieldType().stored());
|
||||
}
|
||||
|
||||
public void testDisableIndex() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||
.field("index", false).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "1234")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(1, fields.length);
|
||||
assertEquals(IndexOptions.NONE, fields[0].fieldType().indexOptions());
|
||||
assertEquals(DocValuesType.SORTED, fields[0].fieldType().docValuesType());
|
||||
}
|
||||
|
||||
public void testDisableDocValues() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||
.field("doc_values", false).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "1234")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(1, fields.length);
|
||||
assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType());
|
||||
}
|
||||
|
||||
public void testIndexOptions() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||
.field("index_options", "freqs").endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "1234")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
assertEquals(IndexOptions.DOCS_AND_FREQS, fields[0].fieldType().indexOptions());
|
||||
|
||||
for (String indexOptions : Arrays.asList("positions", "offsets")) {
|
||||
final String mapping2 = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||
.field("index_options", indexOptions).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> parser.parse("type", new CompressedXContent(mapping2)));
|
||||
assertEquals("The [" + FIELD_TYPE + "] field does not support positions, got [index_options]=" + indexOptions,
|
||||
e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testEnableNorms() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||
.field("norms", true).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "1234")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
assertFalse(fields[0].fieldType().omitNorms());
|
||||
}
|
||||
|
||||
public void testCollator() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", FIELD_TYPE)
|
||||
.field("language", "tr")
|
||||
.field("strength", "primary")
|
||||
.endObject().endObject().endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "I WİLL USE TURKİSH CASING")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
Collator collator = Collator.getInstance(new ULocale("tr"));
|
||||
collator.setStrength(Collator.PRIMARY);
|
||||
RawCollationKey key = collator.getRawCollationKey("ı will use turkish casıng", null); // should collate to same value
|
||||
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
|
||||
assertEquals(expected, fields[0].binaryValue());
|
||||
IndexableFieldType fieldType = fields[0].fieldType();
|
||||
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||
assertFalse(fieldType.tokenized());
|
||||
assertFalse(fieldType.stored());
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||
|
||||
assertEquals(expected, fields[1].binaryValue());
|
||||
fieldType = fields[1].fieldType();
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||
assertEquals(DocValuesType.SORTED, fieldType.docValuesType());
|
||||
}
|
||||
|
||||
public void testUpdateCollator() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", FIELD_TYPE)
|
||||
.field("language", "tr")
|
||||
.field("strength", "primary")
|
||||
.endObject().endObject().endObject().endObject().string();
|
||||
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, randomBoolean());
|
||||
|
||||
String mapping2 = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", FIELD_TYPE)
|
||||
.field("language", "en")
|
||||
.endObject().endObject().endObject().endObject().string();
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> indexService.mapperService().merge("type",
|
||||
new CompressedXContent(mapping2), MergeReason.MAPPING_UPDATE, randomBoolean()));
|
||||
assertEquals("Can't merge because of conflicts: [Cannot update language setting for [" + FIELD_TYPE
|
||||
+ "], Cannot update strength setting for [" + FIELD_TYPE + "]]", e.getMessage());
|
||||
}
|
||||
}
|
|
@ -175,13 +175,15 @@ public abstract class FieldTypeTestCase extends ESTestCase {
|
|||
// TODO: remove this once toString is no longer final on FieldType...
|
||||
protected void assertFieldTypeEquals(String property, MappedFieldType ft1, MappedFieldType ft2) {
|
||||
if (ft1.equals(ft2) == false) {
|
||||
fail("Expected equality, testing property " + property + "\nexpected: " + toString(ft1) + "; \nactual: " + toString(ft2) + "\n");
|
||||
fail("Expected equality, testing property " + property + "\nexpected: " + toString(ft1) + "; \nactual: " + toString(ft2)
|
||||
+ "\n");
|
||||
}
|
||||
}
|
||||
|
||||
protected void assertFieldTypeNotEquals(String property, MappedFieldType ft1, MappedFieldType ft2) {
|
||||
if (ft1.equals(ft2)) {
|
||||
fail("Expected inequality, testing property " + property + "\nfirst: " + toString(ft1) + "; \nsecond: " + toString(ft2) + "\n");
|
||||
fail("Expected inequality, testing property " + property + "\nfirst: " + toString(ft1) + "; \nsecond: " + toString(ft2)
|
||||
+ "\n");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue