diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java index 54b79334ba7..655a6d32524 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java @@ -25,11 +25,13 @@ import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.util.ULocale; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.Version; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; @@ -50,6 +52,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.BiFunction; import java.util.function.LongSupplier; public class ICUCollationKeywordFieldMapper extends FieldMapper { @@ -563,6 +566,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper { private final String variableTop; private final boolean hiraganaQuaternaryMode; private final Collator collator; + private final BiFunction getDVField; protected ICUCollationKeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings, MultiFields multiFields, CopyTo copyTo, String rules, String language, @@ -584,6 +588,11 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper { this.variableTop = variableTop; this.hiraganaQuaternaryMode = hiraganaQuaternaryMode; this.collator = collator; + if (indexCreatedVersion.onOrAfter(Version.V_5_6_0)) { + getDVField = SortedSetDocValuesField::new; + } else { + getDVField = SortedDocValuesField::new; + } } @Override @@ -740,7 +749,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper { } if (fieldType().hasDocValues()) { - fields.add(new SortedDocValuesField(fieldType().name(), binaryValue)); + fields.add(getDVField.apply(fieldType().name(), binaryValue)); } } } diff --git a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperIT.java b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperIT.java index 8a6e9b49ac9..5220d44dca3 100644 --- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperIT.java +++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperIT.java @@ -35,6 +35,8 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.sort.SortBuilders; +import org.elasticsearch.search.sort.SortMode; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.ESIntegTestCase; @@ -94,6 +96,64 @@ public class ICUCollationKeywordFieldMapperIT extends ESIntegTestCase { assertOrderedSearchHits(response, "2", "1"); } + public void testMultipleValues() throws Exception { + String index = "foo"; + String type = "mytype"; + + String[] equilavent = {"a", "C", "a", "B"}; + + XContentBuilder builder = jsonBuilder() + .startObject().startObject("properties") + .startObject("collate") + .field("type", "icu_collation_keyword") + .field("language", "en") + .endObject() + .endObject().endObject(); + + assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder)); + + // everything should be indexed fine, no exceptions + indexRandom(true, + client().prepareIndex(index, type, "1").setSource("{\"collate\":[\"" + equilavent[0] + "\", \"" + + equilavent[1] + "\"]}", XContentType.JSON), + client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[2] + "\"}", XContentType.JSON) + ); + + // using sort mode = max, values B and C will be used for the sort + SearchRequest request = new SearchRequest() + .indices(index) + .types(type) + .source(new SearchSourceBuilder() + .fetchSource(false) + .query(QueryBuilders.termQuery("collate", "a")) + // if mode max we use c and b as sort values, if max we use "a" for both + .sort(SortBuilders.fieldSort("collate").sortMode(SortMode.MAX).order(SortOrder.DESC)) + .sort("_uid", SortOrder.DESC) // will be ignored + ); + + SearchResponse response = client().search(request).actionGet(); + assertNoFailures(response); + assertHitCount(response, 2L); + assertOrderedSearchHits(response, "1", "2"); + + // same thing, using different sort mode that will use a for both docs + request = new SearchRequest() + .indices(index) + .types(type) + .source(new SearchSourceBuilder() + .fetchSource(false) + .query(QueryBuilders.termQuery("collate", "a")) + // if mode max we use c and b as sort values, if max we use "a" for both + .sort(SortBuilders.fieldSort("collate").sortMode(SortMode.MIN).order(SortOrder.DESC)) + .sort("_uid", SortOrder.DESC) // will NOT be ignored and will determine order + ); + + response = client().search(request).actionGet(); + assertNoFailures(response); + assertHitCount(response, 2L); + assertOrderedSearchHits(response, "2", "1"); + } + /* * Test usage of the decomposition option for unicode normalization. */ diff --git a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java index ebe909837e9..be9dff4bdec 100644 --- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java +++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapperTests.java @@ -28,7 +28,9 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.Version; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexService; @@ -96,6 +98,51 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase { assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + assertEquals(expected, fields[1].binaryValue()); + fieldType = fields[1].fieldType(); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE)); + assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); + } + + public void testBackCompat() throws Exception { + indexService = createIndex("oldindex", Settings.builder().put("index.version.created", Version.V_5_5_0).build()); + parser = indexService.mapperService().documentMapperParser(); + + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("_all").field("enabled", false).endObject() + .startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + + assertEquals(mapping, mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(SourceToParse.source("oldindex", "type", "1", XContentFactory.jsonBuilder() + .startObject() + .field("field", "1234") + .endObject() + .bytes(), + XContentType.JSON)); + + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(2, fields.length); + + Collator collator = Collator.getInstance(); + RawCollationKey key = collator.getRawCollationKey("1234", null); + BytesRef expected = new BytesRef(key.bytes, 0, key.size); + + assertEquals(expected, fields[0].binaryValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertFalse(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + assertEquals(expected, fields[1].binaryValue()); fieldType = fields[1].fieldType(); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE)); @@ -194,7 +241,7 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase { IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); assertEquals(IndexOptions.NONE, fields[0].fieldType().indexOptions()); - assertEquals(DocValuesType.SORTED, fields[0].fieldType().docValuesType()); + assertEquals(DocValuesType.SORTED_SET, fields[0].fieldType().docValuesType()); } public void testDisableDocValues() throws IOException { @@ -219,6 +266,68 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase { assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType()); } + public void testMultipleValues() throws IOException { + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject() + .endObject().endObject().string(); + + DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); + + assertEquals(mapping, mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder() + .startObject() + .field("field", Arrays.asList("1234", "5678")) + .endObject() + .bytes(), + XContentType.JSON)); + + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(4, fields.length); + + Collator collator = Collator.getInstance(); + RawCollationKey key = collator.getRawCollationKey("1234", null); + BytesRef expected = new BytesRef(key.bytes, 0, key.size); + + assertEquals(expected, fields[0].binaryValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertFalse(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + + assertEquals(expected, fields[1].binaryValue()); + fieldType = fields[1].fieldType(); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE)); + assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); + + collator = Collator.getInstance(); + key = collator.getRawCollationKey("5678", null); + expected = new BytesRef(key.bytes, 0, key.size); + + assertEquals(expected, fields[2].binaryValue()); + fieldType = fields[2].fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertFalse(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + + assertEquals(expected, fields[3].binaryValue()); + fieldType = fields[3].fieldType(); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE)); + assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); + } + public void testIndexOptions() throws IOException { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field").field("type", FIELD_TYPE) @@ -316,7 +425,7 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase { assertEquals(expected, fields[1].binaryValue()); fieldType = fields[1].fieldType(); assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE)); - assertEquals(DocValuesType.SORTED, fieldType.docValuesType()); + assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType()); } public void testUpdateCollator() throws IOException {