ICUCollationKeywordFieldMapper use SortedSetDocValuesField (#26267)

Switch ICUCollationKeywordFieldMapper from using SortedDocValuesField to SortedSetDocValuesField
so we can support fields with multiple values.
This commit is contained in:
Matt Weber 2017-08-21 01:40:56 -07:00 committed by Adrien Grand
parent 41f81e2279
commit e89d9400c9
3 changed files with 181 additions and 3 deletions

View File

@ -25,11 +25,13 @@ import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.util.ULocale;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.settings.Settings;
@ -50,6 +52,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.LongSupplier;
public class ICUCollationKeywordFieldMapper extends FieldMapper {
@ -563,6 +566,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
private final String variableTop;
private final boolean hiraganaQuaternaryMode;
private final Collator collator;
private final BiFunction<String, BytesRef, Field> getDVField;
protected ICUCollationKeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, String rules, String language,
@ -584,6 +588,11 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
this.variableTop = variableTop;
this.hiraganaQuaternaryMode = hiraganaQuaternaryMode;
this.collator = collator;
if (indexCreatedVersion.onOrAfter(Version.V_5_6_0)) {
getDVField = SortedSetDocValuesField::new;
} else {
getDVField = SortedDocValuesField::new;
}
}
@Override
@ -740,7 +749,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
}
if (fieldType().hasDocValues()) {
fields.add(new SortedDocValuesField(fieldType().name(), binaryValue));
fields.add(getDVField.apply(fieldType().name(), binaryValue));
}
}
}

View File

@ -35,6 +35,8 @@ import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.search.sort.SortMode;
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.test.ESIntegTestCase;
@ -94,6 +96,64 @@ public class ICUCollationKeywordFieldMapperIT extends ESIntegTestCase {
assertOrderedSearchHits(response, "2", "1");
}
public void testMultipleValues() throws Exception {
String index = "foo";
String type = "mytype";
String[] equilavent = {"a", "C", "a", "B"};
XContentBuilder builder = jsonBuilder()
.startObject().startObject("properties")
.startObject("collate")
.field("type", "icu_collation_keyword")
.field("language", "en")
.endObject()
.endObject().endObject();
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
// everything should be indexed fine, no exceptions
indexRandom(true,
client().prepareIndex(index, type, "1").setSource("{\"collate\":[\"" + equilavent[0] + "\", \""
+ equilavent[1] + "\"]}", XContentType.JSON),
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[2] + "\"}", XContentType.JSON)
);
// using sort mode = max, values B and C will be used for the sort
SearchRequest request = new SearchRequest()
.indices(index)
.types(type)
.source(new SearchSourceBuilder()
.fetchSource(false)
.query(QueryBuilders.termQuery("collate", "a"))
// if mode max we use c and b as sort values, if max we use "a" for both
.sort(SortBuilders.fieldSort("collate").sortMode(SortMode.MAX).order(SortOrder.DESC))
.sort("_uid", SortOrder.DESC) // will be ignored
);
SearchResponse response = client().search(request).actionGet();
assertNoFailures(response);
assertHitCount(response, 2L);
assertOrderedSearchHits(response, "1", "2");
// same thing, using different sort mode that will use a for both docs
request = new SearchRequest()
.indices(index)
.types(type)
.source(new SearchSourceBuilder()
.fetchSource(false)
.query(QueryBuilders.termQuery("collate", "a"))
// if mode max we use c and b as sort values, if max we use "a" for both
.sort(SortBuilders.fieldSort("collate").sortMode(SortMode.MIN).order(SortOrder.DESC))
.sort("_uid", SortOrder.DESC) // will NOT be ignored and will determine order
);
response = client().search(request).actionGet();
assertNoFailures(response);
assertHitCount(response, 2L);
assertOrderedSearchHits(response, "2", "1");
}
/*
* Test usage of the decomposition option for unicode normalization.
*/

View File

@ -28,7 +28,9 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
@ -96,6 +98,51 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
assertEquals(expected, fields[1].binaryValue());
fieldType = fields[1].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
}
public void testBackCompat() throws Exception {
indexService = createIndex("oldindex", Settings.builder().put("index.version.created", Version.V_5_5_0).build());
parser = indexService.mapperService().documentMapperParser();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("_all").field("enabled", false).endObject()
.startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
.endObject().endObject().string();
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse(SourceToParse.source("oldindex", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("field", "1234")
.endObject()
.bytes(),
XContentType.JSON));
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
Collator collator = Collator.getInstance();
RawCollationKey key = collator.getRawCollationKey("1234", null);
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
assertEquals(expected, fields[0].binaryValue());
IndexableFieldType fieldType = fields[0].fieldType();
assertThat(fieldType.omitNorms(), equalTo(true));
assertFalse(fieldType.tokenized());
assertFalse(fieldType.stored());
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
assertThat(fieldType.storeTermVectors(), equalTo(false));
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
assertEquals(expected, fields[1].binaryValue());
fieldType = fields[1].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
@ -194,7 +241,7 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(1, fields.length);
assertEquals(IndexOptions.NONE, fields[0].fieldType().indexOptions());
assertEquals(DocValuesType.SORTED, fields[0].fieldType().docValuesType());
assertEquals(DocValuesType.SORTED_SET, fields[0].fieldType().docValuesType());
}
public void testDisableDocValues() throws IOException {
@ -219,6 +266,68 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType());
}
public void testMultipleValues() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
.endObject().endObject().string();
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
assertEquals(mapping, mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
.startObject()
.field("field", Arrays.asList("1234", "5678"))
.endObject()
.bytes(),
XContentType.JSON));
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(4, fields.length);
Collator collator = Collator.getInstance();
RawCollationKey key = collator.getRawCollationKey("1234", null);
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
assertEquals(expected, fields[0].binaryValue());
IndexableFieldType fieldType = fields[0].fieldType();
assertThat(fieldType.omitNorms(), equalTo(true));
assertFalse(fieldType.tokenized());
assertFalse(fieldType.stored());
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
assertThat(fieldType.storeTermVectors(), equalTo(false));
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
assertEquals(expected, fields[1].binaryValue());
fieldType = fields[1].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
collator = Collator.getInstance();
key = collator.getRawCollationKey("5678", null);
expected = new BytesRef(key.bytes, 0, key.size);
assertEquals(expected, fields[2].binaryValue());
fieldType = fields[2].fieldType();
assertThat(fieldType.omitNorms(), equalTo(true));
assertFalse(fieldType.tokenized());
assertFalse(fieldType.stored());
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
assertThat(fieldType.storeTermVectors(), equalTo(false));
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
assertEquals(expected, fields[3].binaryValue());
fieldType = fields[3].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
}
public void testIndexOptions() throws IOException {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
@ -316,7 +425,7 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
assertEquals(expected, fields[1].binaryValue());
fieldType = fields[1].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
assertEquals(DocValuesType.SORTED, fieldType.docValuesType());
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
}
public void testUpdateCollator() throws IOException {