Add ICUCollationFieldMapper (#24126)
Adds a new "icu_collation" field type that exposes lucene's ICUCollationDocValuesField. ICUCollationDocValuesField is the replacement for ICUCollationKeyFilter which has been deprecated since Lucene 5.
This commit is contained in:
parent
3f1ef488cd
commit
b24326271e
|
@ -57,7 +57,7 @@ public abstract class StringFieldType extends TermBasedFieldType {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions,
|
public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions,
|
||||||
boolean transpositions) {
|
boolean transpositions) {
|
||||||
failIfNotIndexed();
|
failIfNotIndexed();
|
||||||
return new FuzzyQuery(new Term(name(), indexedValueForSearch(value)),
|
return new FuzzyQuery(new Term(name(), indexedValueForSearch(value)),
|
||||||
|
@ -65,7 +65,7 @@ public abstract class StringFieldType extends TermBasedFieldType {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||||
failIfNotIndexed();
|
failIfNotIndexed();
|
||||||
PrefixQuery query = new PrefixQuery(new Term(name(), indexedValueForSearch(value)));
|
PrefixQuery query = new PrefixQuery(new Term(name(), indexedValueForSearch(value)));
|
||||||
if (method != null) {
|
if (method != null) {
|
||||||
|
@ -75,7 +75,7 @@ public abstract class StringFieldType extends TermBasedFieldType {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||||
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||||
failIfNotIndexed();
|
failIfNotIndexed();
|
||||||
RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates);
|
RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates);
|
||||||
|
|
|
@ -302,50 +302,46 @@ PUT icu_sample
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
|
||||||
[[analysis-icu-collation]]
|
[[analysis-icu-collation]]
|
||||||
==== ICU Collation Token Filter
|
==== ICU Collation Token Filter
|
||||||
|
|
||||||
|
[WARNING]
|
||||||
|
======
|
||||||
|
This token filter has been deprecated since Lucene 5.0. Please use
|
||||||
|
<<analysis-icu-collation-keyword-field, ICU Collation Keyword Field>>.
|
||||||
|
======
|
||||||
|
|
||||||
|
[[analysis-icu-collation-keyword-field]]
|
||||||
|
==== ICU Collation Keyword Field
|
||||||
|
|
||||||
Collations are used for sorting documents in a language-specific word order.
|
Collations are used for sorting documents in a language-specific word order.
|
||||||
The `icu_collation` token filter is available to all indices and defaults to
|
The `icu_collation_keyword` field type is available to all indices and will encode
|
||||||
using the
|
the terms directly as bytes in a doc values field and a single indexed token just
|
||||||
{defguide}/sorting-collations.html#uca[DUCET collation],
|
like a standard {ref}/keyword.html[Keyword Field].
|
||||||
|
|
||||||
|
Defaults to using {defguide}/sorting-collations.html#uca[DUCET collation],
|
||||||
which is a best-effort attempt at language-neutral sorting.
|
which is a best-effort attempt at language-neutral sorting.
|
||||||
|
|
||||||
Below is an example of how to set up a field for sorting German names in
|
Below is an example of how to set up a field for sorting German names in
|
||||||
``phonebook'' order:
|
``phonebook'' order:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------
|
||||||
PUT /my_index
|
PUT my_index
|
||||||
{
|
{
|
||||||
"settings": {
|
|
||||||
"analysis": {
|
|
||||||
"filter": {
|
|
||||||
"german_phonebook": {
|
|
||||||
"type": "icu_collation",
|
|
||||||
"language": "de",
|
|
||||||
"country": "DE",
|
|
||||||
"variant": "@collation=phonebook"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"analyzer": {
|
|
||||||
"german_phonebook": {
|
|
||||||
"tokenizer": "keyword",
|
|
||||||
"filter": [ "german_phonebook" ]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"mappings": {
|
"mappings": {
|
||||||
"user": {
|
"user": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"name": { <1>
|
"name": { <1>
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"fields": {
|
"fields": {
|
||||||
"sort": { <2>
|
"sort": { <2>
|
||||||
"type": "text",
|
"type": "icu_collation_keyword",
|
||||||
"fielddata": true,
|
"index": false,
|
||||||
"analyzer": "german_phonebook"
|
"language": "de",
|
||||||
|
"country": "DE",
|
||||||
|
"variant": "@collation=phonebook"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -364,15 +360,47 @@ GET _search <3>
|
||||||
"sort": "name.sort"
|
"sort": "name.sort"
|
||||||
}
|
}
|
||||||
|
|
||||||
--------------------------------------------------
|
--------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
<1> The `name` field uses the `standard` analyzer, and so support full text queries.
|
<1> The `name` field uses the `standard` analyzer, and so support full text queries.
|
||||||
<2> The `name.sort` field uses the `keyword` analyzer to preserve the name as
|
<2> The `name.sort` field is an `icu_collation_keyword` field that will preserve the name as
|
||||||
a single token, and applies the `german_phonebook` token filter to index
|
a single token doc values, and applies the German ``phonebook'' order.
|
||||||
the value in German phonebook sort order.
|
|
||||||
<3> An example query which searches the `name` field and sorts on the `name.sort` field.
|
<3> An example query which searches the `name` field and sorts on the `name.sort` field.
|
||||||
|
|
||||||
|
==== Parameters for ICU Collation Keyword Fields
|
||||||
|
|
||||||
|
The following parameters are accepted by `icu_collation_keyword` fields:
|
||||||
|
|
||||||
|
[horizontal]
|
||||||
|
|
||||||
|
`doc_values`::
|
||||||
|
|
||||||
|
Should the field be stored on disk in a column-stride fashion, so that it
|
||||||
|
can later be used for sorting, aggregations, or scripting? Accepts `true`
|
||||||
|
(default) or `false`.
|
||||||
|
|
||||||
|
`index`::
|
||||||
|
|
||||||
|
Should the field be searchable? Accepts `true` (default) or `false`.
|
||||||
|
|
||||||
|
`null_value`::
|
||||||
|
|
||||||
|
Accepts a string value which is substituted for any explicit `null`
|
||||||
|
values. Defaults to `null`, which means the field is treated as missing.
|
||||||
|
|
||||||
|
`store`::
|
||||||
|
|
||||||
|
Whether the field value should be stored and retrievable separately from
|
||||||
|
the {ref}/mapping-source-field.html[`_source`] field. Accepts `true` or `false`
|
||||||
|
(default).
|
||||||
|
|
||||||
|
`fields`::
|
||||||
|
|
||||||
|
Multi-fields allow the same string value to be indexed in multiple ways for
|
||||||
|
different purposes, such as one field for search and a multi-field for
|
||||||
|
sorting and aggregations.
|
||||||
|
|
||||||
===== Collation options
|
===== Collation options
|
||||||
|
|
||||||
`strength`::
|
`strength`::
|
||||||
|
@ -404,14 +432,14 @@ Possible values: `shifted` or `non-ignorable`. Sets the alternate handling for
|
||||||
strength `quaternary` to be either shifted or non-ignorable. Which boils down
|
strength `quaternary` to be either shifted or non-ignorable. Which boils down
|
||||||
to ignoring punctuation and whitespace.
|
to ignoring punctuation and whitespace.
|
||||||
|
|
||||||
`caseLevel`::
|
`case_level`::
|
||||||
|
|
||||||
Possible values: `true` or `false` (default). Whether case level sorting is
|
Possible values: `true` or `false` (default). Whether case level sorting is
|
||||||
required. When strength is set to `primary` this will ignore accent
|
required. When strength is set to `primary` this will ignore accent
|
||||||
differences.
|
differences.
|
||||||
|
|
||||||
|
|
||||||
`caseFirst`::
|
`case_first`::
|
||||||
|
|
||||||
Possible values: `lower` or `upper`. Useful to control which case is sorted
|
Possible values: `lower` or `upper`. Useful to control which case is sorted
|
||||||
first when case is not ignored for strength `tertiary`. The default depends on
|
first when case is not ignored for strength `tertiary`. The default depends on
|
||||||
|
@ -424,11 +452,11 @@ according to their numeric representation. For example the value `egg-9` is
|
||||||
sorted before the value `egg-21`.
|
sorted before the value `egg-21`.
|
||||||
|
|
||||||
|
|
||||||
`variableTop`::
|
`variable_top`::
|
||||||
|
|
||||||
Single character or contraction. Controls what is variable for `alternate`.
|
Single character or contraction. Controls what is variable for `alternate`.
|
||||||
|
|
||||||
`hiraganaQuaternaryMode`::
|
`hiragana_quaternary_mode`::
|
||||||
|
|
||||||
Possible values: `true` or `false`. Distinguishing between Katakana and
|
Possible values: `true` or `false`. Distinguishing between Katakana and
|
||||||
Hiragana characters in `quaternary` strength.
|
Hiragana characters in `quaternary` strength.
|
||||||
|
|
|
@ -0,0 +1,746 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
|
import com.ibm.icu.text.Collator;
|
||||||
|
import com.ibm.icu.text.RawCollationKey;
|
||||||
|
import com.ibm.icu.text.RuleBasedCollator;
|
||||||
|
import com.ibm.icu.util.ULocale;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.SortedDocValuesField;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.search.MultiTermQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
||||||
|
import org.elasticsearch.index.analysis.IndexableBinaryStringTools;
|
||||||
|
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||||
|
import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData;
|
||||||
|
import org.elasticsearch.index.query.QueryShardContext;
|
||||||
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
|
import org.joda.time.DateTimeZone;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.function.LongSupplier;
|
||||||
|
|
||||||
|
public class ICUCollationKeywordFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
|
public static final String CONTENT_TYPE = "icu_collation_keyword";
|
||||||
|
|
||||||
|
public static class Defaults {
|
||||||
|
public static final MappedFieldType FIELD_TYPE = new CollationFieldType();
|
||||||
|
|
||||||
|
static {
|
||||||
|
FIELD_TYPE.setTokenized(false);
|
||||||
|
FIELD_TYPE.setOmitNorms(true);
|
||||||
|
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
FIELD_TYPE.freeze();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String NULL_VALUE = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class CollationFieldType extends StringFieldType {
|
||||||
|
private Collator collator = null;
|
||||||
|
|
||||||
|
public CollationFieldType() {
|
||||||
|
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||||
|
setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected CollationFieldType(CollationFieldType ref) {
|
||||||
|
super(ref);
|
||||||
|
this.collator = ref.collator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public CollationFieldType clone() {
|
||||||
|
return new CollationFieldType(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
return super.equals(o) && Objects.equals(collator, ((CollationFieldType) o).collator);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void checkCompatibility(MappedFieldType otherFT, List<String> conflicts, boolean strict) {
|
||||||
|
super.checkCompatibility(otherFT, conflicts, strict);
|
||||||
|
CollationFieldType other = (CollationFieldType) otherFT;
|
||||||
|
if (!Objects.equals(collator, other.collator)) {
|
||||||
|
conflicts.add("mapper [" + name() + "] has different [collator]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return 31 * super.hashCode() + Objects.hashCode(collator);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String typeName() {
|
||||||
|
return CONTENT_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collator collator() {
|
||||||
|
return collator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCollator(Collator collator) {
|
||||||
|
checkIfFrozen();
|
||||||
|
this.collator = collator.isFrozen() ? collator : collator.freeze();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query nullValueQuery() {
|
||||||
|
if (nullValue() == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return termQuery(nullValue(), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexFieldData.Builder fielddataBuilder() {
|
||||||
|
failIfNoDocValues();
|
||||||
|
return new DocValuesIndexFieldData.Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected BytesRef indexedValueForSearch(Object value) {
|
||||||
|
if (value == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (value instanceof BytesRef) {
|
||||||
|
value = ((BytesRef) value).utf8ToString();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (collator != null) {
|
||||||
|
RawCollationKey key = collator.getRawCollationKey(value.toString(), null);
|
||||||
|
return new BytesRef(key.bytes, 0, key.size);
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("collator is null");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query fuzzyQuery(Object value, Fuzziness fuzziness, int prefixLength, int maxExpansions,
|
||||||
|
boolean transpositions) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||||
|
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DocValueFormat COLLATE_FORMAT = new DocValueFormat() {
|
||||||
|
@Override
|
||||||
|
public String getWriteableName() {
|
||||||
|
return "collate";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String format(long value) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String format(double value) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String format(BytesRef value) {
|
||||||
|
int encodedLength = IndexableBinaryStringTools.getEncodedLength(value.bytes, value.offset, value.length);
|
||||||
|
char[] encoded = new char[encodedLength];
|
||||||
|
IndexableBinaryStringTools.encode(value.bytes, value.offset, value.length, encoded, 0, encodedLength);
|
||||||
|
return new String(encoded, 0, encodedLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long parseLong(String value, boolean roundUp, LongSupplier now) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public double parseDouble(String value, boolean roundUp, LongSupplier now) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef parseBytesRef(String value) {
|
||||||
|
char[] encoded = value.toCharArray();
|
||||||
|
int decodedLength = IndexableBinaryStringTools.getDecodedLength(encoded, 0, encoded.length);
|
||||||
|
byte[] decoded = new byte[decodedLength];
|
||||||
|
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0, decodedLength);
|
||||||
|
return new BytesRef(decoded);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValueFormat docValueFormat(final String format, final DateTimeZone timeZone) {
|
||||||
|
return COLLATE_FORMAT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder extends FieldMapper.Builder<Builder, ICUCollationKeywordFieldMapper> {
|
||||||
|
private String rules = null;
|
||||||
|
private String language = null;
|
||||||
|
private String country = null;
|
||||||
|
private String variant = null;
|
||||||
|
private String strength = null;
|
||||||
|
private String decomposition = null;
|
||||||
|
private String alternate = null;
|
||||||
|
private boolean caseLevel = false;
|
||||||
|
private String caseFirst = null;
|
||||||
|
private boolean numeric = false;
|
||||||
|
private String variableTop = null;
|
||||||
|
private boolean hiraganaQuaternaryMode = false;
|
||||||
|
private String nullValue = Defaults.NULL_VALUE;
|
||||||
|
|
||||||
|
public Builder(String name) {
|
||||||
|
super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE);
|
||||||
|
builder = this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CollationFieldType fieldType() {
|
||||||
|
return (CollationFieldType) super.fieldType();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Builder indexOptions(IndexOptions indexOptions) {
|
||||||
|
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) > 0) {
|
||||||
|
throw new IllegalArgumentException("The [" + CONTENT_TYPE + "] field does not support positions, got [index_options]="
|
||||||
|
+ indexOptionToString(indexOptions));
|
||||||
|
}
|
||||||
|
|
||||||
|
return super.indexOptions(indexOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String rules() {
|
||||||
|
return rules;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder rules(final String rules) {
|
||||||
|
this.rules = rules;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String language() {
|
||||||
|
return language;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder language(final String language) {
|
||||||
|
this.language = language;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String country() {
|
||||||
|
return country;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder country(final String country) {
|
||||||
|
this.country = country;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String variant() {
|
||||||
|
return variant;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder variant(final String variant) {
|
||||||
|
this.variant = variant;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String strength() {
|
||||||
|
return strength;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder strength(final String strength) {
|
||||||
|
this.strength = strength;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String decomposition() {
|
||||||
|
return decomposition;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder decomposition(final String decomposition) {
|
||||||
|
this.decomposition = decomposition;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String alternate() {
|
||||||
|
return alternate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder alternate(final String alternate) {
|
||||||
|
this.alternate = alternate;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean caseLevel() {
|
||||||
|
return caseLevel;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder caseLevel(final boolean caseLevel) {
|
||||||
|
this.caseLevel = caseLevel;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String caseFirst() {
|
||||||
|
return caseFirst;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder caseFirst(final String caseFirst) {
|
||||||
|
this.caseFirst = caseFirst;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean numeric() {
|
||||||
|
return numeric;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder numeric(final boolean numeric) {
|
||||||
|
this.numeric = numeric;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String variableTop() {
|
||||||
|
return variableTop;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder variableTop(final String variableTop) {
|
||||||
|
this.variableTop = variableTop;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hiraganaQuaternaryMode() {
|
||||||
|
return hiraganaQuaternaryMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Builder hiraganaQuaternaryMode(final boolean hiraganaQuaternaryMode) {
|
||||||
|
this.hiraganaQuaternaryMode = hiraganaQuaternaryMode;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collator buildCollator() {
|
||||||
|
Collator collator;
|
||||||
|
if (rules != null) {
|
||||||
|
try {
|
||||||
|
collator = new RuleBasedCollator(rules);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IllegalArgumentException("Failed to parse collation rules", e);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (language != null) {
|
||||||
|
ULocale locale;
|
||||||
|
if (country != null) {
|
||||||
|
if (variant != null) {
|
||||||
|
locale = new ULocale(language, country, variant);
|
||||||
|
} else {
|
||||||
|
locale = new ULocale(language, country);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
locale = new ULocale(language);
|
||||||
|
}
|
||||||
|
collator = Collator.getInstance(locale);
|
||||||
|
} else {
|
||||||
|
collator = Collator.getInstance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the strength flag, otherwise it will be the default.
|
||||||
|
if (strength != null) {
|
||||||
|
if (strength.equalsIgnoreCase("primary")) {
|
||||||
|
collator.setStrength(Collator.PRIMARY);
|
||||||
|
} else if (strength.equalsIgnoreCase("secondary")) {
|
||||||
|
collator.setStrength(Collator.SECONDARY);
|
||||||
|
} else if (strength.equalsIgnoreCase("tertiary")) {
|
||||||
|
collator.setStrength(Collator.TERTIARY);
|
||||||
|
} else if (strength.equalsIgnoreCase("quaternary")) {
|
||||||
|
collator.setStrength(Collator.QUATERNARY);
|
||||||
|
} else if (strength.equalsIgnoreCase("identical")) {
|
||||||
|
collator.setStrength(Collator.IDENTICAL);
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Invalid strength: " + strength);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// set the decomposition flag, otherwise it will be the default.
|
||||||
|
if (decomposition != null) {
|
||||||
|
if (decomposition.equalsIgnoreCase("no")) {
|
||||||
|
collator.setDecomposition(Collator.NO_DECOMPOSITION);
|
||||||
|
} else if (decomposition.equalsIgnoreCase("canonical")) {
|
||||||
|
collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Invalid decomposition: " + decomposition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// expert options: concrete subclasses are always a RuleBasedCollator
|
||||||
|
RuleBasedCollator rbc = (RuleBasedCollator) collator;
|
||||||
|
if (alternate != null) {
|
||||||
|
if (alternate.equalsIgnoreCase("shifted")) {
|
||||||
|
rbc.setAlternateHandlingShifted(true);
|
||||||
|
} else if (alternate.equalsIgnoreCase("non-ignorable")) {
|
||||||
|
rbc.setAlternateHandlingShifted(false);
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Invalid alternate: " + alternate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (caseLevel) {
|
||||||
|
rbc.setCaseLevel(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (caseFirst != null) {
|
||||||
|
if (caseFirst.equalsIgnoreCase("lower")) {
|
||||||
|
rbc.setLowerCaseFirst(true);
|
||||||
|
} else if (caseFirst.equalsIgnoreCase("upper")) {
|
||||||
|
rbc.setUpperCaseFirst(true);
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Invalid caseFirst: " + caseFirst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numeric) {
|
||||||
|
rbc.setNumericCollation(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (variableTop != null) {
|
||||||
|
rbc.setVariableTop(variableTop);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hiraganaQuaternaryMode) {
|
||||||
|
rbc.setHiraganaQuaternary(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// freeze so thread-safe
|
||||||
|
return collator.freeze();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ICUCollationKeywordFieldMapper build(BuilderContext context) {
|
||||||
|
final Collator collator = buildCollator();
|
||||||
|
fieldType().setCollator(collator);
|
||||||
|
setupFieldType(context);
|
||||||
|
return new ICUCollationKeywordFieldMapper(name, fieldType, defaultFieldType, context.indexSettings(),
|
||||||
|
multiFieldsBuilder.build(this, context), copyTo, rules, language, country, variant, strength, decomposition,
|
||||||
|
alternate, caseLevel, caseFirst, numeric, variableTop, hiraganaQuaternaryMode, collator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class TypeParser implements Mapper.TypeParser {
|
||||||
|
@Override
|
||||||
|
public Mapper.Builder<?, ?> parse(String name, Map<String, Object> node, ParserContext parserContext)
|
||||||
|
throws MapperParsingException {
|
||||||
|
Builder builder = new Builder(name);
|
||||||
|
TypeParsers.parseField(builder, name, node, parserContext);
|
||||||
|
for (Iterator<Map.Entry<String, Object>> iterator = node.entrySet().iterator(); iterator.hasNext(); ) {
|
||||||
|
Map.Entry<String, Object> entry = iterator.next();
|
||||||
|
String fieldName = entry.getKey();
|
||||||
|
Object fieldNode = entry.getValue();
|
||||||
|
switch (fieldName) {
|
||||||
|
case "null_value":
|
||||||
|
if (fieldNode == null) {
|
||||||
|
throw new MapperParsingException("Property [null_value] cannot be null.");
|
||||||
|
}
|
||||||
|
builder.nullValue(fieldNode.toString());
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "norms":
|
||||||
|
builder.omitNorms(!XContentMapValues.nodeBooleanValue(fieldNode, "norms"));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "rules":
|
||||||
|
builder.rules(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "language":
|
||||||
|
builder.language(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "country":
|
||||||
|
builder.country(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "variant":
|
||||||
|
builder.variant(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "strength":
|
||||||
|
builder.strength(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "decomposition":
|
||||||
|
builder.decomposition(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "alternate":
|
||||||
|
builder.alternate(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "case_level":
|
||||||
|
builder.caseLevel(XContentMapValues.nodeBooleanValue(fieldNode, false));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "case_first":
|
||||||
|
builder.caseFirst(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "numeric":
|
||||||
|
builder.numeric(XContentMapValues.nodeBooleanValue(fieldNode, false));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "variable_top":
|
||||||
|
builder.variableTop(XContentMapValues.nodeStringValue(fieldNode, null));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
case "hiragana_quaternary_mode":
|
||||||
|
builder.hiraganaQuaternaryMode(XContentMapValues.nodeBooleanValue(fieldNode, false));
|
||||||
|
iterator.remove();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String rules;
|
||||||
|
private final String language;
|
||||||
|
private final String country;
|
||||||
|
private final String variant;
|
||||||
|
private final String strength;
|
||||||
|
private final String decomposition;
|
||||||
|
private final String alternate;
|
||||||
|
private final boolean caseLevel;
|
||||||
|
private final String caseFirst;
|
||||||
|
private final boolean numeric;
|
||||||
|
private final String variableTop;
|
||||||
|
private final boolean hiraganaQuaternaryMode;
|
||||||
|
private final Collator collator;
|
||||||
|
|
||||||
|
protected ICUCollationKeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||||
|
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, String rules, String language,
|
||||||
|
String country, String variant,
|
||||||
|
String strength, String decomposition, String alternate, boolean caseLevel, String caseFirst,
|
||||||
|
boolean numeric, String variableTop, boolean hiraganaQuaternaryMode, Collator collator) {
|
||||||
|
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
|
||||||
|
assert collator.isFrozen();
|
||||||
|
this.rules = rules;
|
||||||
|
this.language = language;
|
||||||
|
this.country = country;
|
||||||
|
this.variant = variant;
|
||||||
|
this.strength = strength;
|
||||||
|
this.decomposition = decomposition;
|
||||||
|
this.alternate = alternate;
|
||||||
|
this.caseLevel = caseLevel;
|
||||||
|
this.caseFirst = caseFirst;
|
||||||
|
this.numeric = numeric;
|
||||||
|
this.variableTop = variableTop;
|
||||||
|
this.hiraganaQuaternaryMode = hiraganaQuaternaryMode;
|
||||||
|
this.collator = collator;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CollationFieldType fieldType() {
|
||||||
|
return (CollationFieldType) super.fieldType();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String contentType() {
|
||||||
|
return CONTENT_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doMerge(Mapper mergeWith, boolean updateAllTypes) {
|
||||||
|
super.doMerge(mergeWith, updateAllTypes);
|
||||||
|
|
||||||
|
List<String> conflicts = new ArrayList<>();
|
||||||
|
ICUCollationKeywordFieldMapper icuMergeWith = (ICUCollationKeywordFieldMapper) mergeWith;
|
||||||
|
|
||||||
|
if (!Objects.equals(rules, icuMergeWith.rules)) {
|
||||||
|
conflicts.add("Cannot update rules setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Objects.equals(language, icuMergeWith.language)) {
|
||||||
|
conflicts.add("Cannot update language setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Objects.equals(country, icuMergeWith.country)) {
|
||||||
|
conflicts.add("Cannot update country setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Objects.equals(variant, icuMergeWith.variant)) {
|
||||||
|
conflicts.add("Cannot update variant setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Objects.equals(strength, icuMergeWith.strength)) {
|
||||||
|
conflicts.add("Cannot update strength setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Objects.equals(decomposition, icuMergeWith.decomposition)) {
|
||||||
|
conflicts.add("Cannot update decomposition setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Objects.equals(alternate, icuMergeWith.alternate)) {
|
||||||
|
conflicts.add("Cannot update alternate setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (caseLevel != icuMergeWith.caseLevel) {
|
||||||
|
conflicts.add("Cannot update case_level setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Objects.equals(caseFirst, icuMergeWith.caseFirst)) {
|
||||||
|
conflicts.add("Cannot update case_first setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numeric != icuMergeWith.numeric) {
|
||||||
|
conflicts.add("Cannot update numeric setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Objects.equals(variableTop, icuMergeWith.variableTop)) {
|
||||||
|
conflicts.add("Cannot update variable_top setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hiraganaQuaternaryMode != icuMergeWith.hiraganaQuaternaryMode) {
|
||||||
|
conflicts.add("Cannot update hiragana_quaternary_mode setting for [" + CONTENT_TYPE + "]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!conflicts.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("Can't merge because of conflicts: " + conflicts);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException {
|
||||||
|
super.doXContentBody(builder, includeDefaults, params);
|
||||||
|
|
||||||
|
if (includeDefaults || fieldType().nullValue() != null) {
|
||||||
|
builder.field("null_value", fieldType().nullValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || rules != null) {
|
||||||
|
builder.field("rules", rules);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || language != null) {
|
||||||
|
builder.field("language", language);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || country != null) {
|
||||||
|
builder.field("country", country);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || variant != null) {
|
||||||
|
builder.field("variant", variant);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || strength != null) {
|
||||||
|
builder.field("strength", strength);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || decomposition != null) {
|
||||||
|
builder.field("decomposition", decomposition);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || alternate != null) {
|
||||||
|
builder.field("alternate", alternate);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || caseLevel) {
|
||||||
|
builder.field("case_level", caseLevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || caseFirst != null) {
|
||||||
|
builder.field("case_first", caseFirst);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || numeric) {
|
||||||
|
builder.field("numeric", numeric);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || variableTop != null) {
|
||||||
|
builder.field("variable_top", variableTop);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (includeDefaults || hiraganaQuaternaryMode) {
|
||||||
|
builder.field("hiragana_quaternary_mode", hiraganaQuaternaryMode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
|
||||||
|
final String value;
|
||||||
|
if (context.externalValueSet()) {
|
||||||
|
value = context.externalValue().toString();
|
||||||
|
} else {
|
||||||
|
XContentParser parser = context.parser();
|
||||||
|
if (parser.currentToken() == XContentParser.Token.VALUE_NULL) {
|
||||||
|
value = fieldType().nullValueAsString();
|
||||||
|
} else {
|
||||||
|
value = parser.textOrNull();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
RawCollationKey key = collator.getRawCollationKey(value, null);
|
||||||
|
final BytesRef binaryValue = new BytesRef(key.bytes, 0, key.size);
|
||||||
|
|
||||||
|
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
|
||||||
|
Field field = new Field(fieldType().name(), binaryValue, fieldType());
|
||||||
|
fields.add(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fieldType().hasDocValues()) {
|
||||||
|
fields.add(new SortedDocValuesField(fieldType().name(), binaryValue));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,6 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.plugin.analysis.icu;
|
package org.elasticsearch.plugin.analysis.icu;
|
||||||
|
|
||||||
|
import static java.util.Collections.singletonMap;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
|
||||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.IcuCollationTokenFilterFactory;
|
import org.elasticsearch.index.analysis.IcuCollationTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.IcuFoldingTokenFilterFactory;
|
import org.elasticsearch.index.analysis.IcuFoldingTokenFilterFactory;
|
||||||
|
@ -28,16 +31,20 @@ import org.elasticsearch.index.analysis.IcuTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.IcuTransformTokenFilterFactory;
|
import org.elasticsearch.index.analysis.IcuTransformTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
|
import org.elasticsearch.index.mapper.ICUCollationKeywordFieldMapper;
|
||||||
|
import org.elasticsearch.index.mapper.Mapper;
|
||||||
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
|
||||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||||
|
import org.elasticsearch.plugins.MapperPlugin;
|
||||||
import org.elasticsearch.plugins.Plugin;
|
import org.elasticsearch.plugins.Plugin;
|
||||||
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static java.util.Collections.singletonMap;
|
public class AnalysisICUPlugin extends Plugin implements AnalysisPlugin, MapperPlugin {
|
||||||
|
|
||||||
public class AnalysisICUPlugin extends Plugin implements AnalysisPlugin {
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
|
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
|
||||||
return singletonMap("icu_normalizer", IcuNormalizerCharFilterFactory::new);
|
return singletonMap("icu_normalizer", IcuNormalizerCharFilterFactory::new);
|
||||||
|
@ -57,4 +64,20 @@ public class AnalysisICUPlugin extends Plugin implements AnalysisPlugin {
|
||||||
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
||||||
return singletonMap("icu_tokenizer", IcuTokenizerFactory::new);
|
return singletonMap("icu_tokenizer", IcuTokenizerFactory::new);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, Mapper.TypeParser> getMappers() {
|
||||||
|
return Collections.singletonMap(ICUCollationKeywordFieldMapper.CONTENT_TYPE, new ICUCollationKeywordFieldMapper.TypeParser());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<NamedWriteableRegistry.Entry> getNamedWriteables() {
|
||||||
|
return Collections.singletonList(
|
||||||
|
new NamedWriteableRegistry.Entry(
|
||||||
|
DocValueFormat.class,
|
||||||
|
ICUCollationKeywordFieldMapper.CollationFieldType.COLLATE_FORMAT.getWriteableName(),
|
||||||
|
in -> ICUCollationKeywordFieldMapper.CollationFieldType.COLLATE_FORMAT
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,145 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||||
|
import com.ibm.icu.text.Collator;
|
||||||
|
import com.ibm.icu.text.RawCollationKey;
|
||||||
|
import com.ibm.icu.util.ULocale;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.TermInSetQuery;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TermRangeQuery;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
|
import org.elasticsearch.index.mapper.ICUCollationKeywordFieldMapper.CollationFieldType;
|
||||||
|
import org.elasticsearch.index.mapper.MappedFieldType.Relation;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class CollationFieldTypeTests extends FieldTypeTestCase {
|
||||||
|
@Override
|
||||||
|
protected MappedFieldType createDefaultFieldType() {
|
||||||
|
return new CollationFieldType();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIsFieldWithinQuery() throws IOException {
|
||||||
|
CollationFieldType ft = new CollationFieldType();
|
||||||
|
// current impl ignores args and shourd always return INTERSECTS
|
||||||
|
assertEquals(Relation.INTERSECTS, ft.isFieldWithinQuery(null,
|
||||||
|
RandomStrings.randomAsciiOfLengthBetween(random(), 0, 5),
|
||||||
|
RandomStrings.randomAsciiOfLengthBetween(random(), 0, 5),
|
||||||
|
randomBoolean(), randomBoolean(), null, null, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTermQuery() {
|
||||||
|
MappedFieldType ft = createDefaultFieldType();
|
||||||
|
ft.setName("field");
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
|
||||||
|
Collator collator = Collator.getInstance(new ULocale("tr"));
|
||||||
|
collator.setStrength(Collator.PRIMARY);
|
||||||
|
collator.freeze();
|
||||||
|
((CollationFieldType) ft).setCollator(collator);
|
||||||
|
|
||||||
|
RawCollationKey key = collator.getRawCollationKey("ı will use turkish casıng", null);
|
||||||
|
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||||
|
|
||||||
|
assertEquals(new TermQuery(new Term("field", expected)), ft.termQuery("I WİLL USE TURKİSH CASING", null));
|
||||||
|
|
||||||
|
ft.setIndexOptions(IndexOptions.NONE);
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> ft.termQuery("bar", null));
|
||||||
|
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTermsQuery() {
|
||||||
|
MappedFieldType ft = createDefaultFieldType();
|
||||||
|
ft.setName("field");
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
|
||||||
|
Collator collator = Collator.getInstance().freeze();
|
||||||
|
((CollationFieldType) ft).setCollator(collator);
|
||||||
|
|
||||||
|
RawCollationKey fooKey = collator.getRawCollationKey("foo", null);
|
||||||
|
RawCollationKey barKey = collator.getRawCollationKey("bar", null);
|
||||||
|
|
||||||
|
List<BytesRef> terms = new ArrayList<>();
|
||||||
|
terms.add(new BytesRef(fooKey.bytes, 0, fooKey.size));
|
||||||
|
terms.add(new BytesRef(barKey.bytes, 0, barKey.size));
|
||||||
|
|
||||||
|
assertEquals(new TermInSetQuery("field", terms),
|
||||||
|
ft.termsQuery(Arrays.asList("foo", "bar"), null));
|
||||||
|
|
||||||
|
ft.setIndexOptions(IndexOptions.NONE);
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> ft.termsQuery(Arrays.asList("foo", "bar"), null));
|
||||||
|
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRegexpQuery() {
|
||||||
|
MappedFieldType ft = createDefaultFieldType();
|
||||||
|
ft.setName("field");
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
expectThrows(UnsupportedOperationException.class,
|
||||||
|
() -> ft.regexpQuery("foo.*", 0, 10, null, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFuzzyQuery() {
|
||||||
|
MappedFieldType ft = createDefaultFieldType();
|
||||||
|
ft.setName("field");
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
expectThrows(UnsupportedOperationException.class,
|
||||||
|
() -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPrefixQuery() {
|
||||||
|
MappedFieldType ft = createDefaultFieldType();
|
||||||
|
ft.setName("field");
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
expectThrows(UnsupportedOperationException.class,
|
||||||
|
() -> ft.prefixQuery("prefix", null, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeQuery() {
|
||||||
|
MappedFieldType ft = createDefaultFieldType();
|
||||||
|
ft.setName("field");
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
|
||||||
|
Collator collator = Collator.getInstance().freeze();
|
||||||
|
((CollationFieldType) ft).setCollator(collator);
|
||||||
|
|
||||||
|
RawCollationKey aKey = collator.getRawCollationKey("a", null);
|
||||||
|
RawCollationKey bKey = collator.getRawCollationKey("b", null);
|
||||||
|
|
||||||
|
TermRangeQuery expected = new TermRangeQuery("field", new BytesRef(aKey.bytes, 0, aKey.size),
|
||||||
|
new BytesRef(bKey.bytes, 0, bKey.size), false, false);
|
||||||
|
|
||||||
|
assertEquals(expected, ft.rangeQuery("a", "b", false, false, null));
|
||||||
|
|
||||||
|
ft.setIndexOptions(IndexOptions.NONE);
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> ft.rangeQuery("a", "b", false, false, null));
|
||||||
|
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,443 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
|
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits;
|
||||||
|
|
||||||
|
import com.ibm.icu.text.Collator;
|
||||||
|
import com.ibm.icu.text.RuleBasedCollator;
|
||||||
|
import com.ibm.icu.util.ULocale;
|
||||||
|
import org.elasticsearch.action.search.SearchRequest;
|
||||||
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
|
import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
|
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||||
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
public class ICUCollationKeywordFieldMapperIT extends ESIntegTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
|
return Collections.singletonList(AnalysisICUPlugin.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Turkish has some funny casing.
|
||||||
|
* This test shows how you can solve this kind of thing easily with collation.
|
||||||
|
* Instead of using LowerCaseFilter, use a turkish collator with primary strength.
|
||||||
|
* Then things will sort and match correctly.
|
||||||
|
*/
|
||||||
|
public void testBasicUsage() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
String[] equilavent = {"I WİLL USE TURKİSH CASING", "ı will use turkish casıng"};
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("language", "tr")
|
||||||
|
.field("strength", "primary")
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
// both values should collate to same value
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
// searching for either of the terms should return both results since they collate to the same value
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||||
|
.sort("collate")
|
||||||
|
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 2L);
|
||||||
|
assertOrderedSearchHits(response, "2", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test usage of the decomposition option for unicode normalization.
|
||||||
|
*/
|
||||||
|
public void testNormalization() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
String[] equilavent = {"I W\u0049\u0307LL USE TURKİSH CASING", "ı will use turkish casıng"};
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("language", "tr")
|
||||||
|
.field("strength", "primary")
|
||||||
|
.field("decomposition", "canonical")
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
// searching for either of the terms should return both results since they collate to the same value
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||||
|
.sort("collate")
|
||||||
|
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 2L);
|
||||||
|
assertOrderedSearchHits(response, "2", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test secondary strength, for english case is not significant.
|
||||||
|
*/
|
||||||
|
public void testSecondaryStrength() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
String[] equilavent = {"TESTING", "testing"};
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("language", "en")
|
||||||
|
.field("strength", "secondary")
|
||||||
|
.field("decomposition", "no")
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||||
|
.sort("collate")
|
||||||
|
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 2L);
|
||||||
|
assertOrderedSearchHits(response, "2", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setting alternate=shifted to shift whitespace, punctuation and symbols
|
||||||
|
* to quaternary level
|
||||||
|
*/
|
||||||
|
public void testIgnorePunctuation() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
String[] equilavent = {"foo-bar", "foo bar"};
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("language", "en")
|
||||||
|
.field("strength", "primary")
|
||||||
|
.field("alternate", "shifted")
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||||
|
.sort("collate")
|
||||||
|
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 2L);
|
||||||
|
assertOrderedSearchHits(response, "2", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setting alternate=shifted and variableTop to shift whitespace, but not
|
||||||
|
* punctuation or symbols, to quaternary level
|
||||||
|
*/
|
||||||
|
public void testIgnoreWhitespace() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("language", "en")
|
||||||
|
.field("strength", "primary")
|
||||||
|
.field("alternate", "shifted")
|
||||||
|
.field("variable_top", " ")
|
||||||
|
.field("index", false)
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"foo bar\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"foobar\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "3").setSource("{\"collate\":\"foo-bar\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.sort("collate", SortOrder.ASC)
|
||||||
|
.sort("_uid", SortOrder.ASC) // secondary sort should kick in on docs 1 and 3 because same value collate value
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 3L);
|
||||||
|
assertOrderedSearchHits(response, "3", "1", "2");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setting numeric to encode digits with numeric value, so that
|
||||||
|
* foobar-9 sorts before foobar-10
|
||||||
|
*/
|
||||||
|
public void testNumerics() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("language", "en")
|
||||||
|
.field("numeric", true)
|
||||||
|
.field("index", false)
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"foobar-10\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"foobar-9\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.sort("collate", SortOrder.ASC)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 2L);
|
||||||
|
assertOrderedSearchHits(response, "2", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setting caseLevel=true to create an additional case level between
|
||||||
|
* secondary and tertiary
|
||||||
|
*/
|
||||||
|
public void testIgnoreAccentsButNotCase() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("language", "en")
|
||||||
|
.field("strength", "primary")
|
||||||
|
.field("case_level", true)
|
||||||
|
.field("index", false)
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"résumé\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"Resume\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "3").setSource("{\"collate\":\"resume\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "4").setSource("{\"collate\":\"Résumé\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.sort("collate", SortOrder.ASC)
|
||||||
|
.sort("_uid", SortOrder.DESC)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 4L);
|
||||||
|
assertOrderedSearchHits(response, "3", "1", "4", "2");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Setting caseFirst=upper to cause uppercase strings to sort
|
||||||
|
* before lowercase ones.
|
||||||
|
*/
|
||||||
|
public void testUpperCaseFirst() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("language", "en")
|
||||||
|
.field("strength", "tertiary")
|
||||||
|
.field("case_first", "upper")
|
||||||
|
.field("index", false)
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"resume\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"Resume\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.sort("collate", SortOrder.ASC)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 2L);
|
||||||
|
assertOrderedSearchHits(response, "2", "1");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For german, you might want oe to sort and match with o umlaut.
|
||||||
|
* This is not the default, but you can make a customized ruleset to do this.
|
||||||
|
*
|
||||||
|
* The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
|
||||||
|
* http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
|
||||||
|
*/
|
||||||
|
public void testCustomRules() throws Exception {
|
||||||
|
String index = "foo";
|
||||||
|
String type = "mytype";
|
||||||
|
|
||||||
|
RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
|
||||||
|
String DIN5007_2_tailorings =
|
||||||
|
"& ae , a\u0308 & AE , A\u0308" +
|
||||||
|
"& oe , o\u0308 & OE , O\u0308" +
|
||||||
|
"& ue , u\u0308 & UE , u\u0308";
|
||||||
|
|
||||||
|
RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
|
||||||
|
String tailoredRules = tailoredCollator.getRules();
|
||||||
|
|
||||||
|
String[] equilavent = {"Töne", "Toene"};
|
||||||
|
|
||||||
|
XContentBuilder builder = jsonBuilder()
|
||||||
|
.startObject().startObject("properties")
|
||||||
|
.startObject("collate")
|
||||||
|
.field("type", "icu_collation_keyword")
|
||||||
|
.field("rules", tailoredRules)
|
||||||
|
.field("strength", "primary")
|
||||||
|
.endObject()
|
||||||
|
.endObject().endObject();
|
||||||
|
|
||||||
|
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||||
|
|
||||||
|
indexRandom(true,
|
||||||
|
client().prepareIndex(index, type, "1").setSource("{\"collate\":\"" + equilavent[0] + "\"}", XContentType.JSON),
|
||||||
|
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[1] + "\"}", XContentType.JSON)
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchRequest request = new SearchRequest()
|
||||||
|
.indices(index)
|
||||||
|
.types(type)
|
||||||
|
.source(new SearchSourceBuilder()
|
||||||
|
.fetchSource(false)
|
||||||
|
.query(QueryBuilders.termQuery("collate", randomBoolean() ? equilavent[0] : equilavent[1]))
|
||||||
|
.sort("collate", SortOrder.ASC)
|
||||||
|
.sort("_uid", SortOrder.DESC) // secondary sort should kick in because both will collate to same value
|
||||||
|
);
|
||||||
|
|
||||||
|
SearchResponse response = client().search(request).actionGet();
|
||||||
|
assertNoFailures(response);
|
||||||
|
assertHitCount(response, 2L);
|
||||||
|
assertOrderedSearchHits(response, "2", "1");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,342 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
import com.ibm.icu.text.Collator;
|
||||||
|
import com.ibm.icu.text.RawCollationKey;
|
||||||
|
import com.ibm.icu.util.ULocale;
|
||||||
|
import org.apache.lucene.index.DocValuesType;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.index.IndexableFieldType;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.compress.CompressedXContent;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
|
import org.elasticsearch.index.IndexService;
|
||||||
|
import org.elasticsearch.index.mapper.MapperService.MergeReason;
|
||||||
|
import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
|
import org.elasticsearch.test.InternalSettingsPlugin;
|
||||||
|
import org.junit.Before;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
|
private static final String FIELD_TYPE = "icu_collation_keyword";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<Class<? extends Plugin>> getPlugins() {
|
||||||
|
return Arrays.asList(AnalysisICUPlugin.class, InternalSettingsPlugin.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexService indexService;
|
||||||
|
DocumentMapperParser parser;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() {
|
||||||
|
indexService = createIndex("test");
|
||||||
|
parser = indexService.mapperService().documentMapperParser();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDefaults() throws Exception {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("field", "1234")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(2, fields.length);
|
||||||
|
|
||||||
|
Collator collator = Collator.getInstance();
|
||||||
|
RawCollationKey key = collator.getRawCollationKey("1234", null);
|
||||||
|
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||||
|
|
||||||
|
assertEquals(expected, fields[0].binaryValue());
|
||||||
|
IndexableFieldType fieldType = fields[0].fieldType();
|
||||||
|
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||||
|
assertFalse(fieldType.tokenized());
|
||||||
|
assertFalse(fieldType.stored());
|
||||||
|
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||||
|
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||||
|
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||||
|
|
||||||
|
assertEquals(expected, fields[1].binaryValue());
|
||||||
|
fieldType = fields[1].fieldType();
|
||||||
|
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||||
|
assertEquals(DocValuesType.SORTED, fieldType.docValuesType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNullValue() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.nullField("field")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
assertArrayEquals(new IndexableField[0], doc.rootDoc().getFields("field"));
|
||||||
|
|
||||||
|
mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||||
|
.field("null_value", "1234").endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(0, fields.length);
|
||||||
|
|
||||||
|
doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.nullField("field")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
Collator collator = Collator.getInstance();
|
||||||
|
RawCollationKey key = collator.getRawCollationKey("1234", null);
|
||||||
|
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||||
|
|
||||||
|
fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(2, fields.length);
|
||||||
|
assertEquals(expected, fields[0].binaryValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEnableStore() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||||
|
.field("store", true).endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("field", "1234")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(2, fields.length);
|
||||||
|
assertTrue(fields[0].fieldType().stored());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDisableIndex() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||||
|
.field("index", false).endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("field", "1234")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(1, fields.length);
|
||||||
|
assertEquals(IndexOptions.NONE, fields[0].fieldType().indexOptions());
|
||||||
|
assertEquals(DocValuesType.SORTED, fields[0].fieldType().docValuesType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDisableDocValues() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||||
|
.field("doc_values", false).endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("field", "1234")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(1, fields.length);
|
||||||
|
assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIndexOptions() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||||
|
.field("index_options", "freqs").endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("field", "1234")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(2, fields.length);
|
||||||
|
assertEquals(IndexOptions.DOCS_AND_FREQS, fields[0].fieldType().indexOptions());
|
||||||
|
|
||||||
|
for (String indexOptions : Arrays.asList("positions", "offsets")) {
|
||||||
|
final String mapping2 = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||||
|
.field("index_options", indexOptions).endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> parser.parse("type", new CompressedXContent(mapping2)));
|
||||||
|
assertEquals("The [" + FIELD_TYPE + "] field does not support positions, got [index_options]=" + indexOptions,
|
||||||
|
e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEnableNorms() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||||
|
.field("norms", true).endObject().endObject()
|
||||||
|
.endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("field", "1234")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(2, fields.length);
|
||||||
|
assertFalse(fields[0].fieldType().omitNorms());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCollator() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field")
|
||||||
|
.field("type", FIELD_TYPE)
|
||||||
|
.field("language", "tr")
|
||||||
|
.field("strength", "primary")
|
||||||
|
.endObject().endObject().endObject().endObject().string();
|
||||||
|
|
||||||
|
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||||
|
|
||||||
|
assertEquals(mapping, mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("field", "I WİLL USE TURKİSH CASING")
|
||||||
|
.endObject()
|
||||||
|
.bytes(),
|
||||||
|
XContentType.JSON));
|
||||||
|
|
||||||
|
Collator collator = Collator.getInstance(new ULocale("tr"));
|
||||||
|
collator.setStrength(Collator.PRIMARY);
|
||||||
|
RawCollationKey key = collator.getRawCollationKey("ı will use turkish casıng", null); // should collate to same value
|
||||||
|
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||||
|
|
||||||
|
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(2, fields.length);
|
||||||
|
|
||||||
|
assertEquals(expected, fields[0].binaryValue());
|
||||||
|
IndexableFieldType fieldType = fields[0].fieldType();
|
||||||
|
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||||
|
assertFalse(fieldType.tokenized());
|
||||||
|
assertFalse(fieldType.stored());
|
||||||
|
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||||
|
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||||
|
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||||
|
|
||||||
|
assertEquals(expected, fields[1].binaryValue());
|
||||||
|
fieldType = fields[1].fieldType();
|
||||||
|
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||||
|
assertEquals(DocValuesType.SORTED, fieldType.docValuesType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUpdateCollator() throws IOException {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field")
|
||||||
|
.field("type", FIELD_TYPE)
|
||||||
|
.field("language", "tr")
|
||||||
|
.field("strength", "primary")
|
||||||
|
.endObject().endObject().endObject().endObject().string();
|
||||||
|
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE, randomBoolean());
|
||||||
|
|
||||||
|
String mapping2 = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field")
|
||||||
|
.field("type", FIELD_TYPE)
|
||||||
|
.field("language", "en")
|
||||||
|
.endObject().endObject().endObject().endObject().string();
|
||||||
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> indexService.mapperService().merge("type",
|
||||||
|
new CompressedXContent(mapping2), MergeReason.MAPPING_UPDATE, randomBoolean()));
|
||||||
|
assertEquals("Can't merge because of conflicts: [Cannot update language setting for [" + FIELD_TYPE
|
||||||
|
+ "], Cannot update strength setting for [" + FIELD_TYPE + "]]", e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
|
@ -175,13 +175,15 @@ public abstract class FieldTypeTestCase extends ESTestCase {
|
||||||
// TODO: remove this once toString is no longer final on FieldType...
|
// TODO: remove this once toString is no longer final on FieldType...
|
||||||
protected void assertFieldTypeEquals(String property, MappedFieldType ft1, MappedFieldType ft2) {
|
protected void assertFieldTypeEquals(String property, MappedFieldType ft1, MappedFieldType ft2) {
|
||||||
if (ft1.equals(ft2) == false) {
|
if (ft1.equals(ft2) == false) {
|
||||||
fail("Expected equality, testing property " + property + "\nexpected: " + toString(ft1) + "; \nactual: " + toString(ft2) + "\n");
|
fail("Expected equality, testing property " + property + "\nexpected: " + toString(ft1) + "; \nactual: " + toString(ft2)
|
||||||
|
+ "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void assertFieldTypeNotEquals(String property, MappedFieldType ft1, MappedFieldType ft2) {
|
protected void assertFieldTypeNotEquals(String property, MappedFieldType ft1, MappedFieldType ft2) {
|
||||||
if (ft1.equals(ft2)) {
|
if (ft1.equals(ft2)) {
|
||||||
fail("Expected inequality, testing property " + property + "\nfirst: " + toString(ft1) + "; \nsecond: " + toString(ft2) + "\n");
|
fail("Expected inequality, testing property " + property + "\nfirst: " + toString(ft1) + "; \nsecond: " + toString(ft2)
|
||||||
|
+ "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue