Save one utf8 conversion in KeywordFieldMapper. #19867

If a `keyword` field is both indexed and doc-valued, then we will convert the
input string to utf8 bytes twice: once for indexing/storing, and once for doc
values. This commit changes `keyword` fields to compute the utf8 representation
up-front and then feed both the inverted index and doc values with it.

Rather than adding version-based bw compat logic, I broke the `keyword` field
(they are now indexed/stored as a binary field rather than string), which is
fine since we are still on alpha releases for 5.0.
This commit is contained in:
Adrien Grand 2016-08-08 16:33:59 +02:00
parent 1aba907ea2
commit c44679d952
11 changed files with 68 additions and 33 deletions

View File

@ -368,10 +368,7 @@ public final class ShardGetService extends AbstractIndexShardComponent {
}
List<Object> values = searchLookup.source().extractRawValues(field);
if (!values.isEmpty()) {
for (int i = 0; i < values.size(); i++) {
values.set(i, fieldMapper.fieldType().valueForSearch(values.get(i)));
}
if (values.isEmpty() == false) {
value = values;
}
}

View File

@ -168,6 +168,16 @@ public final class KeywordFieldMapper extends FieldMapper implements AllFieldMap
failIfNoDocValues();
return new DocValuesIndexFieldData.Builder();
}
@Override
public Object valueForSearch(Object value) {
if (value == null) {
return null;
}
// keywords are internally stored as utf8 bytes
BytesRef binaryValue = (BytesRef) value;
return binaryValue.utf8ToString();
}
}
private Boolean includeInAll;
@ -252,12 +262,14 @@ public final class KeywordFieldMapper extends FieldMapper implements AllFieldMap
context.allEntries().addText(fieldType().name(), value, fieldType().boost());
}
// convert to utf8 only once before feeding postings/dv/stored fields
final BytesRef binaryValue = new BytesRef(value);
if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) {
Field field = new Field(fieldType().name(), value, fieldType());
Field field = new Field(fieldType().name(), binaryValue, fieldType());
fields.add(field);
}
if (fieldType().hasDocValues()) {
fields.add(new SortedSetDocValuesField(fieldType().name(), new BytesRef(value)));
fields.add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
}
}

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.mapper.geo;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.geo.GeoHashUtils;
import org.apache.lucene.util.LegacyNumericUtils;
@ -47,6 +48,7 @@ import org.elasticsearch.index.mapper.core.LegacyDoubleFieldMapper;
import org.elasticsearch.index.mapper.core.KeywordFieldMapper;
import org.elasticsearch.index.mapper.core.LegacyNumberFieldMapper;
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.mapper.object.ArrayValueMapperParser;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.query.QueryShardException;
@ -149,7 +151,7 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
public abstract Y build(BuilderContext context, String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
Settings indexSettings, FieldMapper latMapper, FieldMapper lonMapper,
KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo);
FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo);
public Y build(Mapper.BuilderContext context) {
GeoPointFieldType geoPointFieldType = (GeoPointFieldType)fieldType;
@ -176,10 +178,17 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
}
geoPointFieldType.setLatLonEnabled(latMapper.fieldType(), lonMapper.fieldType());
}
KeywordFieldMapper geoHashMapper = null;
FieldMapper geoHashMapper = null;
if (enableGeoHash || enableGeoHashPrefix) {
// TODO: possible also implicitly enable geohash if geohash precision is set
geoHashMapper = new KeywordFieldMapper.Builder(Names.GEOHASH).index(true).includeInAll(false).store(fieldType.stored()).build(context);
if (context.indexCreatedVersion().onOrAfter(Version.V_5_0_0_alpha1)) {
geoHashMapper = new KeywordFieldMapper.Builder(Names.GEOHASH)
.index(true).includeInAll(false).store(fieldType.stored()).build(context);
} else {
geoHashMapper = new StringFieldMapper.Builder(Names.GEOHASH)
.tokenized(false).index(true).omitNorms(true).indexOptions(IndexOptions.DOCS)
.includeInAll(false).store(fieldType.stored()).build(context);
}
geoPointFieldType.setGeoHashEnabled(geoHashMapper.fieldType(), geoHashPrecision, enableGeoHashPrefix);
}
context.path().remove();
@ -380,12 +389,12 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
protected FieldMapper lonMapper;
protected KeywordFieldMapper geoHashMapper;
protected FieldMapper geoHashMapper;
protected Explicit<Boolean> ignoreMalformed;
protected BaseGeoPointFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings,
FieldMapper latMapper, FieldMapper lonMapper, KeywordFieldMapper geoHashMapper,
FieldMapper latMapper, FieldMapper lonMapper, FieldMapper geoHashMapper,
MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
this.latMapper = latMapper;
@ -556,7 +565,7 @@ public abstract class BaseGeoPointFieldMapper extends FieldMapper implements Arr
@Override
public FieldMapper updateFieldType(Map<String, MappedFieldType> fullNameToFieldType) {
BaseGeoPointFieldMapper updated = (BaseGeoPointFieldMapper) super.updateFieldType(fullNameToFieldType);
KeywordFieldMapper geoUpdated = geoHashMapper == null ? null : (KeywordFieldMapper) geoHashMapper.updateFieldType(fullNameToFieldType);
FieldMapper geoUpdated = geoHashMapper == null ? null : geoHashMapper.updateFieldType(fullNameToFieldType);
FieldMapper latUpdated = latMapper == null ? null : latMapper.updateFieldType(fullNameToFieldType);
FieldMapper lonUpdated = lonMapper == null ? null : lonMapper.updateFieldType(fullNameToFieldType);
if (updated == this

View File

@ -79,7 +79,7 @@ public class GeoPointFieldMapper extends BaseGeoPointFieldMapper {
@Override
public GeoPointFieldMapper build(BuilderContext context, String simpleName, MappedFieldType fieldType,
MappedFieldType defaultFieldType, Settings indexSettings, FieldMapper latMapper,
FieldMapper lonMapper, KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
FieldMapper lonMapper, FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
CopyTo copyTo) {
fieldType.setTokenized(false);
if (context.indexCreatedVersion().before(Version.V_2_3_0)) {
@ -110,7 +110,7 @@ public class GeoPointFieldMapper extends BaseGeoPointFieldMapper {
public GeoPointFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings,
FieldMapper latMapper, FieldMapper lonMapper,
KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo) {
FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, latMapper, lonMapper, geoHashMapper, multiFields,
ignoreMalformed, copyTo);
}

View File

@ -25,7 +25,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Explicit;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.geo.GeoDistance;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.GeoUtils;
@ -40,7 +39,6 @@ import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.CustomDocValuesField;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.core.KeywordFieldMapper;
import org.elasticsearch.index.mapper.object.ArrayValueMapperParser;
import java.io.IOException;
@ -109,7 +107,7 @@ public class GeoPointFieldMapperLegacy extends BaseGeoPointFieldMapper implement
@Override
public GeoPointFieldMapperLegacy build(BuilderContext context, String simpleName, MappedFieldType fieldType,
MappedFieldType defaultFieldType, Settings indexSettings, FieldMapper latMapper,
FieldMapper lonMapper, KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
FieldMapper lonMapper, FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
CopyTo copyTo) {
fieldType.setTokenized(false);
setupFieldType(context);
@ -267,7 +265,7 @@ public class GeoPointFieldMapperLegacy extends BaseGeoPointFieldMapper implement
public GeoPointFieldMapperLegacy(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, Settings indexSettings,
FieldMapper latMapper, FieldMapper lonMapper,
KeywordFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
FieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
Explicit<Boolean> coerce, CopyTo copyTo) {
super(simpleName, fieldType, defaultFieldType, indexSettings, latMapper, lonMapper, geoHashMapper, multiFields,
ignoreMalformed, copyTo);

View File

@ -78,7 +78,7 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertEquals("1234", fields[0].stringValue());
assertEquals(new BytesRef("1234"), fields[0].binaryValue());
IndexableFieldType fieldType = fields[0].fieldType();
assertThat(fieldType.omitNorms(), equalTo(true));
assertFalse(fieldType.tokenized());
@ -163,7 +163,7 @@ public class KeywordFieldMapperTests extends ESSingleNodeTestCase {
fields = doc.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertEquals("uri", fields[0].stringValue());
assertEquals(new BytesRef("uri"), fields[0].binaryValue());
}
public void testEnableStore() throws IOException {

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.dynamictemplate.simple;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.json.JsonXContent;
@ -80,7 +81,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
IndexableField f = doc.getField("name");
assertThat(f.name(), equalTo("name"));
assertThat(f.stringValue(), equalTo("some name"));
assertThat(f.binaryValue(), equalTo(new BytesRef("some name")));
assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
assertThat(f.fieldType().tokenized(), equalTo(false));
@ -98,7 +99,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
f = doc.getField("multi1.org");
assertThat(f.name(), equalTo("multi1.org"));
assertThat(f.stringValue(), equalTo("multi 1"));
assertThat(f.binaryValue(), equalTo(new BytesRef("multi 1")));
assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
assertThat(f.fieldType().tokenized(), equalTo(false));
@ -116,7 +117,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
f = doc.getField("multi2.org");
assertThat(f.name(), equalTo("multi2.org"));
assertThat(f.stringValue(), equalTo("multi 2"));
assertThat(f.binaryValue(), equalTo(new BytesRef("multi 2")));
assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
assertThat(f.fieldType().tokenized(), equalTo(false));
@ -137,7 +138,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
IndexableField f = doc.getField("name");
assertThat(f.name(), equalTo("name"));
assertThat(f.stringValue(), equalTo("some name"));
assertThat(f.binaryValue(), equalTo(new BytesRef("some name")));
assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
assertThat(f.fieldType().tokenized(), equalTo(false));
@ -155,7 +156,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
f = doc.getField("multi1.org");
assertThat(f.name(), equalTo("multi1.org"));
assertThat(f.stringValue(), equalTo("multi 1"));
assertThat(f.binaryValue(), equalTo(new BytesRef("multi 1")));
assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
assertThat(f.fieldType().tokenized(), equalTo(false));
@ -173,7 +174,7 @@ public class SimpleDynamicTemplatesTests extends ESSingleNodeTestCase {
f = doc.getField("multi2.org");
assertThat(f.name(), equalTo("multi2.org"));
assertThat(f.stringValue(), equalTo("multi 2"));
assertThat(f.binaryValue(), equalTo(new BytesRef("multi 2")));
assertNotSame(IndexOptions.NONE, f.fieldType().indexOptions());
assertThat(f.fieldType().tokenized(), equalTo(false));

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.mapper.externalvalues;
import org.apache.lucene.spatial.geopoint.document.GeoPointField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.compress.CompressedXContent;
@ -155,7 +156,7 @@ public class SimpleExternalMappingTests extends ESSingleNodeTestCase {
assertThat(doc.rootDoc().getField("field.field").stringValue(), is("foo"));
assertThat(doc.rootDoc().getField("field.field.raw"), notNullValue());
assertThat(doc.rootDoc().getField("field.field.raw").stringValue(), is("foo"));
assertThat(doc.rootDoc().getField("field.field.raw").binaryValue(), is(new BytesRef("foo")));
}
public void testExternalValuesWithMultifieldTwoLevels() throws Exception {

View File

@ -20,6 +20,7 @@ package org.elasticsearch.index.mapper.geo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.spatial.geopoint.document.GeoPointField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
@ -112,7 +113,11 @@ public class GeoPointFieldMapperTests extends ESSingleNodeTestCase {
assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
if (version.onOrAfter(Version.V_5_0_0_alpha1)) {
assertThat(doc.rootDoc().getBinaryValue("point.geohash"), equalTo(new BytesRef(stringEncode(1.3, 1.2))));
} else {
assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
}
}
public void testLatLonInOneValueWithGeohash() throws Exception {
@ -132,7 +137,11 @@ public class GeoPointFieldMapperTests extends ESSingleNodeTestCase {
assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
if (version.onOrAfter(Version.V_5_0_0_alpha1)) {
assertThat(doc.rootDoc().getBinaryValue("point.geohash"), equalTo(new BytesRef(stringEncode(1.3, 1.2))));
} else {
assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
}
}
public void testGeoHashIndexValue() throws Exception {
@ -152,7 +161,11 @@ public class GeoPointFieldMapperTests extends ESSingleNodeTestCase {
assertThat(doc.rootDoc().getField("point.lat"), notNullValue());
assertThat(doc.rootDoc().getField("point.lon"), notNullValue());
assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
if (version.onOrAfter(Version.V_5_0_0_alpha1)) {
assertThat(doc.rootDoc().getBinaryValue("point.geohash"), equalTo(new BytesRef(stringEncode(1.3, 1.2))));
} else {
assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
}
}
public void testGeoHashValue() throws Exception {
@ -848,7 +861,9 @@ public class GeoPointFieldMapperTests extends ESSingleNodeTestCase {
final int numHashes = hashes.size();
for(int i=0; i<numHashes; ++i) {
assertEquals("dr5regy6rc6y".substring(0, numHashes-i), hashes.get(i));
String hash = "dr5regy6rc6y".substring(0, numHashes-i);
Object expected = version.before(Version.V_5_0_0_alpha1) ? hash : new BytesRef(hash);
assertEquals(expected, hashes.get(i));
}
}

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.mapper.geo;
import org.apache.lucene.spatial.geopoint.document.GeoPointField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.compress.CompressedXContent;
@ -117,7 +118,7 @@ public class GeohashMappingGeoPointTests extends ESSingleNodeTestCase {
assertThat(doc.rootDoc().getField("point.lat"), nullValue());
assertThat(doc.rootDoc().getField("point.lon"), nullValue());
assertThat(doc.rootDoc().get("point.geohash"), equalTo(stringEncode(1.3, 1.2)));
assertThat(doc.rootDoc().getBinaryValue("point.geohash"), equalTo(new BytesRef(stringEncode(1.3, 1.2))));
assertThat(doc.rootDoc().get("point"), notNullValue());
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.multifield;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.compress.CompressedXContent;
@ -89,7 +90,7 @@ public class MultiFieldTests extends ESSingleNodeTestCase {
f = doc.getField("object1.multi1.string");
assertThat(f.name(), equalTo("object1.multi1.string"));
assertThat(f.stringValue(), equalTo("2010-01-01"));
assertThat(f.binaryValue(), equalTo(new BytesRef("2010-01-01")));
assertThat(docMapper.mappers().getMapper("name"), notNullValue());
assertThat(docMapper.mappers().getMapper("name"), instanceOf(TextFieldMapper.class));