mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-22 12:56:53 +00:00
ICUCollationKeywordFieldMapper use SortedSetDocValuesField (#26267)
Switch ICUCollationKeywordFieldMapper from using SortedDocValuesField to SortedSetDocValuesField so we can support fields with multiple values.
This commit is contained in:
parent
41f81e2279
commit
e89d9400c9
@ -25,11 +25,13 @@ import com.ibm.icu.text.RuleBasedCollator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
@ -50,6 +52,7 @@ import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.LongSupplier;
|
||||
|
||||
public class ICUCollationKeywordFieldMapper extends FieldMapper {
|
||||
@ -563,6 +566,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
|
||||
private final String variableTop;
|
||||
private final boolean hiraganaQuaternaryMode;
|
||||
private final Collator collator;
|
||||
private final BiFunction<String, BytesRef, Field> getDVField;
|
||||
|
||||
protected ICUCollationKeywordFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
|
||||
Settings indexSettings, MultiFields multiFields, CopyTo copyTo, String rules, String language,
|
||||
@ -584,6 +588,11 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
|
||||
this.variableTop = variableTop;
|
||||
this.hiraganaQuaternaryMode = hiraganaQuaternaryMode;
|
||||
this.collator = collator;
|
||||
if (indexCreatedVersion.onOrAfter(Version.V_5_6_0)) {
|
||||
getDVField = SortedSetDocValuesField::new;
|
||||
} else {
|
||||
getDVField = SortedDocValuesField::new;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -740,7 +749,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
|
||||
}
|
||||
|
||||
if (fieldType().hasDocValues()) {
|
||||
fields.add(new SortedDocValuesField(fieldType().name(), binaryValue));
|
||||
fields.add(getDVField.apply(fieldType().name(), binaryValue));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -35,6 +35,8 @@ import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||
import org.elasticsearch.search.sort.SortBuilders;
|
||||
import org.elasticsearch.search.sort.SortMode;
|
||||
import org.elasticsearch.search.sort.SortOrder;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
|
||||
@ -94,6 +96,64 @@ public class ICUCollationKeywordFieldMapperIT extends ESIntegTestCase {
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
|
||||
public void testMultipleValues() throws Exception {
|
||||
String index = "foo";
|
||||
String type = "mytype";
|
||||
|
||||
String[] equilavent = {"a", "C", "a", "B"};
|
||||
|
||||
XContentBuilder builder = jsonBuilder()
|
||||
.startObject().startObject("properties")
|
||||
.startObject("collate")
|
||||
.field("type", "icu_collation_keyword")
|
||||
.field("language", "en")
|
||||
.endObject()
|
||||
.endObject().endObject();
|
||||
|
||||
assertAcked(client().admin().indices().prepareCreate(index).addMapping(type, builder));
|
||||
|
||||
// everything should be indexed fine, no exceptions
|
||||
indexRandom(true,
|
||||
client().prepareIndex(index, type, "1").setSource("{\"collate\":[\"" + equilavent[0] + "\", \""
|
||||
+ equilavent[1] + "\"]}", XContentType.JSON),
|
||||
client().prepareIndex(index, type, "2").setSource("{\"collate\":\"" + equilavent[2] + "\"}", XContentType.JSON)
|
||||
);
|
||||
|
||||
// using sort mode = max, values B and C will be used for the sort
|
||||
SearchRequest request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.query(QueryBuilders.termQuery("collate", "a"))
|
||||
// if mode max we use c and b as sort values, if max we use "a" for both
|
||||
.sort(SortBuilders.fieldSort("collate").sortMode(SortMode.MAX).order(SortOrder.DESC))
|
||||
.sort("_uid", SortOrder.DESC) // will be ignored
|
||||
);
|
||||
|
||||
SearchResponse response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "1", "2");
|
||||
|
||||
// same thing, using different sort mode that will use a for both docs
|
||||
request = new SearchRequest()
|
||||
.indices(index)
|
||||
.types(type)
|
||||
.source(new SearchSourceBuilder()
|
||||
.fetchSource(false)
|
||||
.query(QueryBuilders.termQuery("collate", "a"))
|
||||
// if mode max we use c and b as sort values, if max we use "a" for both
|
||||
.sort(SortBuilders.fieldSort("collate").sortMode(SortMode.MIN).order(SortOrder.DESC))
|
||||
.sort("_uid", SortOrder.DESC) // will NOT be ignored and will determine order
|
||||
);
|
||||
|
||||
response = client().search(request).actionGet();
|
||||
assertNoFailures(response);
|
||||
assertHitCount(response, 2L);
|
||||
assertOrderedSearchHits(response, "2", "1");
|
||||
}
|
||||
|
||||
/*
|
||||
* Test usage of the decomposition option for unicode normalization.
|
||||
*/
|
||||
|
@ -28,7 +28,9 @@ import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.compress.CompressedXContent;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
@ -96,6 +98,51 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||
|
||||
assertEquals(expected, fields[1].binaryValue());
|
||||
fieldType = fields[1].fieldType();
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
|
||||
}
|
||||
|
||||
public void testBackCompat() throws Exception {
|
||||
indexService = createIndex("oldindex", Settings.builder().put("index.version.created", Version.V_5_5_0).build());
|
||||
parser = indexService.mapperService().documentMapperParser();
|
||||
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("_all").field("enabled", false).endObject()
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("oldindex", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", "1234")
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(2, fields.length);
|
||||
|
||||
Collator collator = Collator.getInstance();
|
||||
RawCollationKey key = collator.getRawCollationKey("1234", null);
|
||||
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||
|
||||
assertEquals(expected, fields[0].binaryValue());
|
||||
IndexableFieldType fieldType = fields[0].fieldType();
|
||||
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||
assertFalse(fieldType.tokenized());
|
||||
assertFalse(fieldType.stored());
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||
|
||||
assertEquals(expected, fields[1].binaryValue());
|
||||
fieldType = fields[1].fieldType();
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||
@ -194,7 +241,7 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(1, fields.length);
|
||||
assertEquals(IndexOptions.NONE, fields[0].fieldType().indexOptions());
|
||||
assertEquals(DocValuesType.SORTED, fields[0].fieldType().docValuesType());
|
||||
assertEquals(DocValuesType.SORTED_SET, fields[0].fieldType().docValuesType());
|
||||
}
|
||||
|
||||
public void testDisableDocValues() throws IOException {
|
||||
@ -219,6 +266,68 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||
assertEquals(DocValuesType.NONE, fields[0].fieldType().docValuesType());
|
||||
}
|
||||
|
||||
public void testMultipleValues() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE).endObject().endObject()
|
||||
.endObject().endObject().string();
|
||||
|
||||
DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
|
||||
|
||||
assertEquals(mapping, mapper.mappingSource().toString());
|
||||
|
||||
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("field", Arrays.asList("1234", "5678"))
|
||||
.endObject()
|
||||
.bytes(),
|
||||
XContentType.JSON));
|
||||
|
||||
IndexableField[] fields = doc.rootDoc().getFields("field");
|
||||
assertEquals(4, fields.length);
|
||||
|
||||
Collator collator = Collator.getInstance();
|
||||
RawCollationKey key = collator.getRawCollationKey("1234", null);
|
||||
BytesRef expected = new BytesRef(key.bytes, 0, key.size);
|
||||
|
||||
assertEquals(expected, fields[0].binaryValue());
|
||||
IndexableFieldType fieldType = fields[0].fieldType();
|
||||
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||
assertFalse(fieldType.tokenized());
|
||||
assertFalse(fieldType.stored());
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||
|
||||
assertEquals(expected, fields[1].binaryValue());
|
||||
fieldType = fields[1].fieldType();
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
|
||||
|
||||
collator = Collator.getInstance();
|
||||
key = collator.getRawCollationKey("5678", null);
|
||||
expected = new BytesRef(key.bytes, 0, key.size);
|
||||
|
||||
assertEquals(expected, fields[2].binaryValue());
|
||||
fieldType = fields[2].fieldType();
|
||||
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||
assertFalse(fieldType.tokenized());
|
||||
assertFalse(fieldType.stored());
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||
|
||||
assertEquals(expected, fields[3].binaryValue());
|
||||
fieldType = fields[3].fieldType();
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
|
||||
}
|
||||
|
||||
public void testIndexOptions() throws IOException {
|
||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field").field("type", FIELD_TYPE)
|
||||
@ -316,7 +425,7 @@ public class ICUCollationKeywordFieldMapperTests extends ESSingleNodeTestCase {
|
||||
assertEquals(expected, fields[1].binaryValue());
|
||||
fieldType = fields[1].fieldType();
|
||||
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
|
||||
assertEquals(DocValuesType.SORTED, fieldType.docValuesType());
|
||||
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());
|
||||
}
|
||||
|
||||
public void testUpdateCollator() throws IOException {
|
||||
|
Loading…
x
Reference in New Issue
Block a user