Move the `murmur3` field to a plugin and fix defaults.
This move the `murmur3` field to the `mapper-murmur3` plugin and fixes its defaults so that values will not be indexed by default, as the only purpose of this field is to speed up `cardinality` aggregations on high-cardinality string fields, which only requires doc values. I also removed the `rehash` option from the `cardinality` aggregation as it doesn't bring much value (rehashing is cheap) and allowed to remove the coupling between the `cardinality` aggregation and the `murmur3` field. Close #12874
This commit is contained in:
parent
7765b0497d
commit
a91b3fcbb9
|
@ -101,8 +101,7 @@ public class DocumentMapperParser {
|
||||||
.put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser())
|
.put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser())
|
||||||
.put(TypeParsers.MULTI_FIELD_CONTENT_TYPE, TypeParsers.multiFieldConverterTypeParser)
|
.put(TypeParsers.MULTI_FIELD_CONTENT_TYPE, TypeParsers.multiFieldConverterTypeParser)
|
||||||
.put(CompletionFieldMapper.CONTENT_TYPE, new CompletionFieldMapper.TypeParser())
|
.put(CompletionFieldMapper.CONTENT_TYPE, new CompletionFieldMapper.TypeParser())
|
||||||
.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser())
|
.put(GeoPointFieldMapper.CONTENT_TYPE, new GeoPointFieldMapper.TypeParser());
|
||||||
.put(Murmur3FieldMapper.CONTENT_TYPE, new Murmur3FieldMapper.TypeParser());
|
|
||||||
|
|
||||||
if (ShapesAvailability.JTS_AVAILABLE) {
|
if (ShapesAvailability.JTS_AVAILABLE) {
|
||||||
typeParsersBuilder.put(GeoShapeFieldMapper.CONTENT_TYPE, new GeoShapeFieldMapper.TypeParser());
|
typeParsersBuilder.put(GeoShapeFieldMapper.CONTENT_TYPE, new GeoShapeFieldMapper.TypeParser());
|
||||||
|
|
|
@ -84,10 +84,6 @@ public final class MapperBuilders {
|
||||||
return new LongFieldMapper.Builder(name);
|
return new LongFieldMapper.Builder(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Murmur3FieldMapper.Builder murmur3Field(String name) {
|
|
||||||
return new Murmur3FieldMapper.Builder(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static FloatFieldMapper.Builder floatField(String name) {
|
public static FloatFieldMapper.Builder floatField(String name) {
|
||||||
return new FloatFieldMapper.Builder(name);
|
return new FloatFieldMapper.Builder(name);
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,6 +86,7 @@ public class PluginManager {
|
||||||
"elasticsearch-delete-by-query",
|
"elasticsearch-delete-by-query",
|
||||||
"elasticsearch-lang-javascript",
|
"elasticsearch-lang-javascript",
|
||||||
"elasticsearch-lang-python",
|
"elasticsearch-lang-python",
|
||||||
|
"elasticsearch-mapper-murmur3",
|
||||||
"elasticsearch-mapper-size"
|
"elasticsearch-mapper-size"
|
||||||
).build();
|
).build();
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,6 @@ import java.util.Map;
|
||||||
public class CardinalityAggregator extends NumericMetricsAggregator.SingleValue {
|
public class CardinalityAggregator extends NumericMetricsAggregator.SingleValue {
|
||||||
|
|
||||||
private final int precision;
|
private final int precision;
|
||||||
private final boolean rehash;
|
|
||||||
private final ValuesSource valuesSource;
|
private final ValuesSource valuesSource;
|
||||||
|
|
||||||
// Expensive to initialize, so we only initialize it when we have an actual value source
|
// Expensive to initialize, so we only initialize it when we have an actual value source
|
||||||
|
@ -66,11 +65,10 @@ public class CardinalityAggregator extends NumericMetricsAggregator.SingleValue
|
||||||
private Collector collector;
|
private Collector collector;
|
||||||
private ValueFormatter formatter;
|
private ValueFormatter formatter;
|
||||||
|
|
||||||
public CardinalityAggregator(String name, ValuesSource valuesSource, boolean rehash, int precision, ValueFormatter formatter,
|
public CardinalityAggregator(String name, ValuesSource valuesSource, int precision, ValueFormatter formatter,
|
||||||
AggregationContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
|
AggregationContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
|
||||||
super(name, context, parent, pipelineAggregators, metaData);
|
super(name, context, parent, pipelineAggregators, metaData);
|
||||||
this.valuesSource = valuesSource;
|
this.valuesSource = valuesSource;
|
||||||
this.rehash = rehash;
|
|
||||||
this.precision = precision;
|
this.precision = precision;
|
||||||
this.counts = valuesSource == null ? null : new HyperLogLogPlusPlus(precision, context.bigArrays(), 1);
|
this.counts = valuesSource == null ? null : new HyperLogLogPlusPlus(precision, context.bigArrays(), 1);
|
||||||
this.formatter = formatter;
|
this.formatter = formatter;
|
||||||
|
@ -85,13 +83,6 @@ public class CardinalityAggregator extends NumericMetricsAggregator.SingleValue
|
||||||
if (valuesSource == null) {
|
if (valuesSource == null) {
|
||||||
return new EmptyCollector();
|
return new EmptyCollector();
|
||||||
}
|
}
|
||||||
// if rehash is false then the value source is either already hashed, or the user explicitly
|
|
||||||
// requested not to hash the values (perhaps they already hashed the values themselves before indexing the doc)
|
|
||||||
// so we can just work with the original value source as is
|
|
||||||
if (!rehash) {
|
|
||||||
MurmurHash3Values hashValues = MurmurHash3Values.cast(((ValuesSource.Numeric) valuesSource).longValues(ctx));
|
|
||||||
return new DirectCollector(counts, hashValues);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (valuesSource instanceof ValuesSource.Numeric) {
|
if (valuesSource instanceof ValuesSource.Numeric) {
|
||||||
ValuesSource.Numeric source = (ValuesSource.Numeric) valuesSource;
|
ValuesSource.Numeric source = (ValuesSource.Numeric) valuesSource;
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
|
|
||||||
package org.elasticsearch.search.aggregations.metrics.cardinality;
|
package org.elasticsearch.search.aggregations.metrics.cardinality;
|
||||||
|
|
||||||
import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
|
||||||
import org.elasticsearch.search.aggregations.Aggregator;
|
import org.elasticsearch.search.aggregations.Aggregator;
|
||||||
import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator;
|
import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator;
|
||||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||||
|
@ -35,12 +34,10 @@ import java.util.Map;
|
||||||
final class CardinalityAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {
|
final class CardinalityAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {
|
||||||
|
|
||||||
private final long precisionThreshold;
|
private final long precisionThreshold;
|
||||||
private final boolean rehash;
|
|
||||||
|
|
||||||
CardinalityAggregatorFactory(String name, ValuesSourceConfig config, long precisionThreshold, boolean rehash) {
|
CardinalityAggregatorFactory(String name, ValuesSourceConfig config, long precisionThreshold) {
|
||||||
super(name, InternalCardinality.TYPE.name(), config);
|
super(name, InternalCardinality.TYPE.name(), config);
|
||||||
this.precisionThreshold = precisionThreshold;
|
this.precisionThreshold = precisionThreshold;
|
||||||
this.rehash = rehash;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private int precision(Aggregator parent) {
|
private int precision(Aggregator parent) {
|
||||||
|
@ -50,16 +47,13 @@ final class CardinalityAggregatorFactory extends ValuesSourceAggregatorFactory<V
|
||||||
@Override
|
@Override
|
||||||
protected Aggregator createUnmapped(AggregationContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
protected Aggregator createUnmapped(AggregationContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return new CardinalityAggregator(name, null, true, precision(parent), config.formatter(), context, parent, pipelineAggregators, metaData);
|
return new CardinalityAggregator(name, null, precision(parent), config.formatter(), context, parent, pipelineAggregators, metaData);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Aggregator doCreateInternal(ValuesSource valuesSource, AggregationContext context, Aggregator parent,
|
protected Aggregator doCreateInternal(ValuesSource valuesSource, AggregationContext context, Aggregator parent,
|
||||||
boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
|
boolean collectsFromSingleBucket, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
|
||||||
if (!(valuesSource instanceof ValuesSource.Numeric) && !rehash) {
|
return new CardinalityAggregator(name, valuesSource, precision(parent), config.formatter(), context, parent, pipelineAggregators,
|
||||||
throw new AggregationExecutionException("Turning off rehashing for cardinality aggregation [" + name + "] on non-numeric values in not allowed");
|
|
||||||
}
|
|
||||||
return new CardinalityAggregator(name, valuesSource, rehash, precision(parent), config.formatter(), context, parent, pipelineAggregators,
|
|
||||||
metaData);
|
metaData);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,11 +21,9 @@ package org.elasticsearch.search.aggregations.metrics.cardinality;
|
||||||
|
|
||||||
import org.elasticsearch.common.ParseField;
|
import org.elasticsearch.common.ParseField;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.index.mapper.core.Murmur3FieldMapper;
|
|
||||||
import org.elasticsearch.search.SearchParseException;
|
import org.elasticsearch.search.SearchParseException;
|
||||||
import org.elasticsearch.search.aggregations.Aggregator;
|
import org.elasticsearch.search.aggregations.Aggregator;
|
||||||
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
import org.elasticsearch.search.aggregations.AggregatorFactory;
|
||||||
import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
|
|
||||||
import org.elasticsearch.search.aggregations.support.ValuesSourceParser;
|
import org.elasticsearch.search.aggregations.support.ValuesSourceParser;
|
||||||
import org.elasticsearch.search.internal.SearchContext;
|
import org.elasticsearch.search.internal.SearchContext;
|
||||||
|
|
||||||
|
@ -35,6 +33,7 @@ import java.io.IOException;
|
||||||
public class CardinalityParser implements Aggregator.Parser {
|
public class CardinalityParser implements Aggregator.Parser {
|
||||||
|
|
||||||
private static final ParseField PRECISION_THRESHOLD = new ParseField("precision_threshold");
|
private static final ParseField PRECISION_THRESHOLD = new ParseField("precision_threshold");
|
||||||
|
private static final ParseField REHASH = new ParseField("rehash").withAllDeprecated("no replacement - values will always be rehashed");
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String type() {
|
public String type() {
|
||||||
|
@ -44,10 +43,9 @@ public class CardinalityParser implements Aggregator.Parser {
|
||||||
@Override
|
@Override
|
||||||
public AggregatorFactory parse(String name, XContentParser parser, SearchContext context) throws IOException {
|
public AggregatorFactory parse(String name, XContentParser parser, SearchContext context) throws IOException {
|
||||||
|
|
||||||
ValuesSourceParser vsParser = ValuesSourceParser.any(name, InternalCardinality.TYPE, context).formattable(false).build();
|
ValuesSourceParser<?> vsParser = ValuesSourceParser.any(name, InternalCardinality.TYPE, context).formattable(false).build();
|
||||||
|
|
||||||
long precisionThreshold = -1;
|
long precisionThreshold = -1;
|
||||||
Boolean rehash = null;
|
|
||||||
|
|
||||||
XContentParser.Token token;
|
XContentParser.Token token;
|
||||||
String currentFieldName = null;
|
String currentFieldName = null;
|
||||||
|
@ -57,8 +55,8 @@ public class CardinalityParser implements Aggregator.Parser {
|
||||||
} else if (vsParser.token(currentFieldName, token, parser)) {
|
} else if (vsParser.token(currentFieldName, token, parser)) {
|
||||||
continue;
|
continue;
|
||||||
} else if (token.isValue()) {
|
} else if (token.isValue()) {
|
||||||
if ("rehash".equals(currentFieldName)) {
|
if (context.parseFieldMatcher().match(currentFieldName, REHASH)) {
|
||||||
rehash = parser.booleanValue();
|
// ignore
|
||||||
} else if (context.parseFieldMatcher().match(currentFieldName, PRECISION_THRESHOLD)) {
|
} else if (context.parseFieldMatcher().match(currentFieldName, PRECISION_THRESHOLD)) {
|
||||||
precisionThreshold = parser.longValue();
|
precisionThreshold = parser.longValue();
|
||||||
} else {
|
} else {
|
||||||
|
@ -70,15 +68,7 @@ public class CardinalityParser implements Aggregator.Parser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ValuesSourceConfig<?> config = vsParser.config();
|
return new CardinalityAggregatorFactory(name, vsParser.config(), precisionThreshold);
|
||||||
|
|
||||||
if (rehash == null && config.fieldContext() != null && config.fieldContext().fieldType() instanceof Murmur3FieldMapper.Murmur3FieldType) {
|
|
||||||
rehash = false;
|
|
||||||
} else if (rehash == null) {
|
|
||||||
rehash = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new CardinalityAggregatorFactory(name, config, precisionThreshold, rehash);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,7 @@ OFFICIAL PLUGINS
|
||||||
- elasticsearch-delete-by-query
|
- elasticsearch-delete-by-query
|
||||||
- elasticsearch-lang-javascript
|
- elasticsearch-lang-javascript
|
||||||
- elasticsearch-lang-python
|
- elasticsearch-lang-python
|
||||||
|
- elasticsearch-mapper-murmur3
|
||||||
- elasticsearch-mapper-size
|
- elasticsearch-mapper-size
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1116,7 +1116,7 @@ public class GetActionIT extends ESIntegTestCase {
|
||||||
@Test
|
@Test
|
||||||
public void testGeneratedNumberFieldsUnstored() throws IOException {
|
public void testGeneratedNumberFieldsUnstored() throws IOException {
|
||||||
indexSingleDocumentWithNumericFieldsGeneratedFromText(false, randomBoolean());
|
indexSingleDocumentWithNumericFieldsGeneratedFromText(false, randomBoolean());
|
||||||
String[] fieldsList = {"token_count", "text.token_count", "murmur", "text.murmur"};
|
String[] fieldsList = {"token_count", "text.token_count"};
|
||||||
// before refresh - document is only in translog
|
// before refresh - document is only in translog
|
||||||
assertGetFieldsAlwaysNull(indexOrAlias(), "doc", "1", fieldsList);
|
assertGetFieldsAlwaysNull(indexOrAlias(), "doc", "1", fieldsList);
|
||||||
refresh();
|
refresh();
|
||||||
|
@ -1130,7 +1130,7 @@ public class GetActionIT extends ESIntegTestCase {
|
||||||
@Test
|
@Test
|
||||||
public void testGeneratedNumberFieldsStored() throws IOException {
|
public void testGeneratedNumberFieldsStored() throws IOException {
|
||||||
indexSingleDocumentWithNumericFieldsGeneratedFromText(true, randomBoolean());
|
indexSingleDocumentWithNumericFieldsGeneratedFromText(true, randomBoolean());
|
||||||
String[] fieldsList = {"token_count", "text.token_count", "murmur", "text.murmur"};
|
String[] fieldsList = {"token_count", "text.token_count"};
|
||||||
// before refresh - document is only in translog
|
// before refresh - document is only in translog
|
||||||
assertGetFieldsNull(indexOrAlias(), "doc", "1", fieldsList);
|
assertGetFieldsNull(indexOrAlias(), "doc", "1", fieldsList);
|
||||||
assertGetFieldsException(indexOrAlias(), "doc", "1", fieldsList);
|
assertGetFieldsException(indexOrAlias(), "doc", "1", fieldsList);
|
||||||
|
@ -1159,10 +1159,6 @@ public class GetActionIT extends ESIntegTestCase {
|
||||||
" \"analyzer\": \"standard\",\n" +
|
" \"analyzer\": \"standard\",\n" +
|
||||||
" \"store\": \"" + storedString + "\"" +
|
" \"store\": \"" + storedString + "\"" +
|
||||||
" },\n" +
|
" },\n" +
|
||||||
" \"murmur\": {\n" +
|
|
||||||
" \"type\": \"murmur3\",\n" +
|
|
||||||
" \"store\": \"" + storedString + "\"" +
|
|
||||||
" },\n" +
|
|
||||||
" \"text\": {\n" +
|
" \"text\": {\n" +
|
||||||
" \"type\": \"string\",\n" +
|
" \"type\": \"string\",\n" +
|
||||||
" \"fields\": {\n" +
|
" \"fields\": {\n" +
|
||||||
|
@ -1170,10 +1166,6 @@ public class GetActionIT extends ESIntegTestCase {
|
||||||
" \"type\": \"token_count\",\n" +
|
" \"type\": \"token_count\",\n" +
|
||||||
" \"analyzer\": \"standard\",\n" +
|
" \"analyzer\": \"standard\",\n" +
|
||||||
" \"store\": \"" + storedString + "\"" +
|
" \"store\": \"" + storedString + "\"" +
|
||||||
" },\n" +
|
|
||||||
" \"murmur\": {\n" +
|
|
||||||
" \"type\": \"murmur3\",\n" +
|
|
||||||
" \"store\": \"" + storedString + "\"" +
|
|
||||||
" }\n" +
|
" }\n" +
|
||||||
" }\n" +
|
" }\n" +
|
||||||
" }" +
|
" }" +
|
||||||
|
@ -1185,7 +1177,6 @@ public class GetActionIT extends ESIntegTestCase {
|
||||||
assertAcked(prepareCreate("test").addAlias(new Alias("alias")).setSource(createIndexSource));
|
assertAcked(prepareCreate("test").addAlias(new Alias("alias")).setSource(createIndexSource));
|
||||||
ensureGreen();
|
ensureGreen();
|
||||||
String doc = "{\n" +
|
String doc = "{\n" +
|
||||||
" \"murmur\": \"Some value that can be hashed\",\n" +
|
|
||||||
" \"token_count\": \"A text with five words.\",\n" +
|
" \"token_count\": \"A text with five words.\",\n" +
|
||||||
" \"text\": \"A text with five words.\"\n" +
|
" \"text\": \"A text with five words.\"\n" +
|
||||||
"}\n";
|
"}\n";
|
||||||
|
|
|
@ -550,6 +550,7 @@ public class PluginManagerIT extends ESIntegTestCase {
|
||||||
PluginManager.checkForOfficialPlugins("elasticsearch-delete-by-query");
|
PluginManager.checkForOfficialPlugins("elasticsearch-delete-by-query");
|
||||||
PluginManager.checkForOfficialPlugins("elasticsearch-lang-javascript");
|
PluginManager.checkForOfficialPlugins("elasticsearch-lang-javascript");
|
||||||
PluginManager.checkForOfficialPlugins("elasticsearch-lang-python");
|
PluginManager.checkForOfficialPlugins("elasticsearch-lang-python");
|
||||||
|
PluginManager.checkForOfficialPlugins("elasticsearch-mapper-murmur3");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
PluginManager.checkForOfficialPlugins("elasticsearch-mapper-attachment");
|
PluginManager.checkForOfficialPlugins("elasticsearch-mapper-attachment");
|
||||||
|
|
|
@ -61,54 +61,23 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
jsonBuilder().startObject().startObject("type").startObject("properties")
|
jsonBuilder().startObject().startObject("type").startObject("properties")
|
||||||
.startObject("str_value")
|
.startObject("str_value")
|
||||||
.field("type", "string")
|
.field("type", "string")
|
||||||
.startObject("fields")
|
|
||||||
.startObject("hash")
|
|
||||||
.field("type", "murmur3")
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
.endObject()
|
||||||
.startObject("str_values")
|
.startObject("str_values")
|
||||||
.field("type", "string")
|
.field("type", "string")
|
||||||
.startObject("fields")
|
|
||||||
.startObject("hash")
|
|
||||||
.field("type", "murmur3")
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
.endObject()
|
||||||
.startObject("l_value")
|
.startObject("l_value")
|
||||||
.field("type", "long")
|
.field("type", "long")
|
||||||
.startObject("fields")
|
|
||||||
.startObject("hash")
|
|
||||||
.field("type", "murmur3")
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
.endObject()
|
||||||
.startObject("l_values")
|
.startObject("l_values")
|
||||||
.field("type", "long")
|
.field("type", "long")
|
||||||
.startObject("fields")
|
|
||||||
.startObject("hash")
|
|
||||||
.field("type", "murmur3")
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
.endObject()
|
||||||
.startObject("d_value")
|
.startObject("d_value")
|
||||||
.field("type", "double")
|
.field("type", "double")
|
||||||
.startObject("fields")
|
.endObject()
|
||||||
.startObject("hash")
|
.startObject("d_values")
|
||||||
.field("type", "murmur3")
|
.field("type", "double")
|
||||||
.endObject()
|
.endObject()
|
||||||
.endObject()
|
.endObject().endObject().endObject()).execute().actionGet();
|
||||||
.endObject()
|
|
||||||
.startObject("d_values")
|
|
||||||
.field("type", "double")
|
|
||||||
.startObject("fields")
|
|
||||||
.startObject("hash")
|
|
||||||
.field("type", "murmur3")
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
|
||||||
.endObject()
|
|
||||||
.endObject().endObject()).execute().actionGet();
|
|
||||||
|
|
||||||
numDocs = randomIntBetween(2, 100);
|
numDocs = randomIntBetween(2, 100);
|
||||||
precisionThreshold = randomIntBetween(0, 1 << randomInt(20));
|
precisionThreshold = randomIntBetween(0, 1 << randomInt(20));
|
||||||
|
@ -145,12 +114,12 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
assertThat(count.getValue(), greaterThan(0L));
|
assertThat(count.getValue(), greaterThan(0L));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private String singleNumericField(boolean hash) {
|
private String singleNumericField() {
|
||||||
return (randomBoolean() ? "l_value" : "d_value") + (hash ? ".hash" : "");
|
return randomBoolean() ? "l_value" : "d_value";
|
||||||
}
|
}
|
||||||
|
|
||||||
private String multiNumericField(boolean hash) {
|
private String multiNumericField(boolean hash) {
|
||||||
return (randomBoolean() ? "l_values" : "d_values") + (hash ? ".hash" : "");
|
return randomBoolean() ? "l_values" : "d_values";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -195,24 +164,10 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
assertCount(count, numDocs);
|
assertCount(count, numDocs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void singleValuedStringHashed() throws Exception {
|
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
|
||||||
.addAggregation(cardinality("cardinality").precisionThreshold(precisionThreshold).field("str_value.hash"))
|
|
||||||
.execute().actionGet();
|
|
||||||
|
|
||||||
assertSearchResponse(response);
|
|
||||||
|
|
||||||
Cardinality count = response.getAggregations().get("cardinality");
|
|
||||||
assertThat(count, notNullValue());
|
|
||||||
assertThat(count.getName(), equalTo("cardinality"));
|
|
||||||
assertCount(count, numDocs);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void singleValuedNumeric() throws Exception {
|
public void singleValuedNumeric() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(cardinality("cardinality").precisionThreshold(precisionThreshold).field(singleNumericField(false)))
|
.addAggregation(cardinality("cardinality").precisionThreshold(precisionThreshold).field(singleNumericField()))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
assertSearchResponse(response);
|
assertSearchResponse(response);
|
||||||
|
@ -229,7 +184,7 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
SearchResponse searchResponse = client().prepareSearch("idx").setQuery(matchAllQuery())
|
SearchResponse searchResponse = client().prepareSearch("idx").setQuery(matchAllQuery())
|
||||||
.addAggregation(
|
.addAggregation(
|
||||||
global("global").subAggregation(
|
global("global").subAggregation(
|
||||||
cardinality("cardinality").precisionThreshold(precisionThreshold).field(singleNumericField(false))))
|
cardinality("cardinality").precisionThreshold(precisionThreshold).field(singleNumericField())))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
assertSearchResponse(searchResponse);
|
assertSearchResponse(searchResponse);
|
||||||
|
@ -254,7 +209,7 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
@Test
|
@Test
|
||||||
public void singleValuedNumericHashed() throws Exception {
|
public void singleValuedNumericHashed() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(cardinality("cardinality").precisionThreshold(precisionThreshold).field(singleNumericField(true)))
|
.addAggregation(cardinality("cardinality").precisionThreshold(precisionThreshold).field(singleNumericField()))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
assertSearchResponse(response);
|
assertSearchResponse(response);
|
||||||
|
@ -279,20 +234,6 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
assertCount(count, numDocs * 2);
|
assertCount(count, numDocs * 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void multiValuedStringHashed() throws Exception {
|
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
|
||||||
.addAggregation(cardinality("cardinality").precisionThreshold(precisionThreshold).field("str_values.hash"))
|
|
||||||
.execute().actionGet();
|
|
||||||
|
|
||||||
assertSearchResponse(response);
|
|
||||||
|
|
||||||
Cardinality count = response.getAggregations().get("cardinality");
|
|
||||||
assertThat(count, notNullValue());
|
|
||||||
assertThat(count.getName(), equalTo("cardinality"));
|
|
||||||
assertCount(count, numDocs * 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void multiValuedNumeric() throws Exception {
|
public void multiValuedNumeric() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
|
@ -356,7 +297,7 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(
|
.addAggregation(
|
||||||
cardinality("cardinality").precisionThreshold(precisionThreshold).script(
|
cardinality("cardinality").precisionThreshold(precisionThreshold).script(
|
||||||
new Script("doc['" + singleNumericField(false) + "'].value")))
|
new Script("doc['" + singleNumericField() + "'].value")))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
assertSearchResponse(response);
|
assertSearchResponse(response);
|
||||||
|
@ -417,7 +358,7 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
public void singleValuedNumericValueScript() throws Exception {
|
public void singleValuedNumericValueScript() throws Exception {
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
||||||
.addAggregation(
|
.addAggregation(
|
||||||
cardinality("cardinality").precisionThreshold(precisionThreshold).field(singleNumericField(false))
|
cardinality("cardinality").precisionThreshold(precisionThreshold).field(singleNumericField())
|
||||||
.script(new Script("_value")))
|
.script(new Script("_value")))
|
||||||
.execute().actionGet();
|
.execute().actionGet();
|
||||||
|
|
||||||
|
@ -464,23 +405,4 @@ public class CardinalityIT extends ESIntegTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void asSubAggHashed() throws Exception {
|
|
||||||
SearchResponse response = client().prepareSearch("idx").setTypes("type")
|
|
||||||
.addAggregation(terms("terms").field("str_value")
|
|
||||||
.collectMode(randomFrom(SubAggCollectionMode.values()))
|
|
||||||
.subAggregation(cardinality("cardinality").precisionThreshold(precisionThreshold).field("str_values.hash")))
|
|
||||||
.execute().actionGet();
|
|
||||||
|
|
||||||
assertSearchResponse(response);
|
|
||||||
|
|
||||||
Terms terms = response.getAggregations().get("terms");
|
|
||||||
for (Terms.Bucket bucket : terms.getBuckets()) {
|
|
||||||
Cardinality count = bucket.getAggregations().get("cardinality");
|
|
||||||
assertThat(count, notNullValue());
|
|
||||||
assertThat(count.getName(), equalTo("cardinality"));
|
|
||||||
assertCount(count, 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
[[mapper-murmur3]]
|
||||||
|
=== Mapper Murmur3 Plugin
|
||||||
|
|
||||||
|
The mapper-murmur3 plugin provides the ability to compute hash of field values
|
||||||
|
at index-time and store them in the index. This can sometimes be helpful when
|
||||||
|
running cardinality aggregations on high-cardinality and large string fields.
|
||||||
|
|
||||||
|
[[mapper-murmur3-install]]
|
||||||
|
[float]
|
||||||
|
==== Installation
|
||||||
|
|
||||||
|
This plugin can be installed using the plugin manager:
|
||||||
|
|
||||||
|
[source,sh]
|
||||||
|
----------------------------------------------------------------
|
||||||
|
sudo bin/plugin install mapper-murmur3
|
||||||
|
----------------------------------------------------------------
|
||||||
|
|
||||||
|
The plugin must be installed on every node in the cluster, and each node must
|
||||||
|
be restarted after installation.
|
||||||
|
|
||||||
|
[[mapper-murmur3-remove]]
|
||||||
|
[float]
|
||||||
|
==== Removal
|
||||||
|
|
||||||
|
The plugin can be removed with the following command:
|
||||||
|
|
||||||
|
[source,sh]
|
||||||
|
----------------------------------------------------------------
|
||||||
|
sudo bin/plugin remove mapper-murmur3
|
||||||
|
----------------------------------------------------------------
|
||||||
|
|
||||||
|
The node must be stopped before removing the plugin.
|
||||||
|
|
||||||
|
[[mapper-murmur3-usage]]
|
||||||
|
==== Using the `murmur3` field
|
||||||
|
|
||||||
|
The `murmur3` is typically used within a multi-field, so that both the original
|
||||||
|
value and its hash are stored in the index:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------
|
||||||
|
PUT my_index
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"my_type": {
|
||||||
|
"properties": {
|
||||||
|
"my_field": {
|
||||||
|
"type": "string",
|
||||||
|
"fields": {
|
||||||
|
"hash": {
|
||||||
|
"type": "murmur3"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
Such a mapping would allow to refer to `my_field.hash` in order to get hashes
|
||||||
|
of the values of the `my_field` field. This is only useful in order to run
|
||||||
|
`cardinality` aggregations:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------
|
||||||
|
# Example documents
|
||||||
|
PUT my_index/my_type/1
|
||||||
|
{
|
||||||
|
"my_field": "This is a document"
|
||||||
|
}
|
||||||
|
|
||||||
|
PUT my_index/my_type/2
|
||||||
|
{
|
||||||
|
"my_field": "This is another document"
|
||||||
|
}
|
||||||
|
|
||||||
|
GET my_index/_search
|
||||||
|
{
|
||||||
|
"aggs": {
|
||||||
|
"my_field_cardinality": {
|
||||||
|
"cardinality": {
|
||||||
|
"field": "my_field.hash" <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------
|
||||||
|
// AUTOSENSE
|
||||||
|
|
||||||
|
<1> Counting unique values on the `my_field.hash` field
|
||||||
|
|
||||||
|
Running a `cardinality` aggregation on the `my_field` field directly would
|
||||||
|
yield the same result, however using `my_field.hash` instead might result in
|
||||||
|
a speed-up if the field has a high-cardinality. On the other hand, it is
|
||||||
|
discouraged to use the `murmur3` field on numeric fields and string fields
|
||||||
|
that are not almost unique as the use of a `murmur3` field is unlikely to
|
||||||
|
bring significant speed-ups, while increasing the amount of disk space required
|
||||||
|
to store the index.
|
|
@ -14,5 +14,10 @@ The mapper-size plugin provides the `_size` meta field which, when enabled,
|
||||||
indexes the size in bytes of the original
|
indexes the size in bytes of the original
|
||||||
{ref}/mapping-source-field.html[`_source`] field.
|
{ref}/mapping-source-field.html[`_source`] field.
|
||||||
|
|
||||||
include::mapper-size.asciidoc[]
|
<<mapper-murmur3>>::
|
||||||
|
|
||||||
|
The mapper-murmur3 plugin allows hashes to be computed at index-time and stored
|
||||||
|
in the index for later use with the `cardinality` aggregation.
|
||||||
|
|
||||||
|
include::mapper-size.asciidoc[]
|
||||||
|
include::mapper-murmur3.asciidoc[]
|
||||||
|
|
|
@ -23,9 +23,9 @@ match a query:
|
||||||
|
|
||||||
==== Precision control
|
==== Precision control
|
||||||
|
|
||||||
This aggregation also supports the `precision_threshold` and `rehash` options:
|
This aggregation also supports the `precision_threshold` option:
|
||||||
|
|
||||||
experimental[The `precision_threshold` and `rehash` options are specific to the current internal implementation of the `cardinality` agg, which may change in the future]
|
experimental[The `precision_threshold` option is specific to the current internal implementation of the `cardinality` agg, which may change in the future]
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
@ -34,8 +34,7 @@ experimental[The `precision_threshold` and `rehash` options are specific to the
|
||||||
"author_count" : {
|
"author_count" : {
|
||||||
"cardinality" : {
|
"cardinality" : {
|
||||||
"field" : "author_hash",
|
"field" : "author_hash",
|
||||||
"precision_threshold": 100, <1>
|
"precision_threshold": 100 <1>
|
||||||
"rehash": false <2>
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -49,11 +48,6 @@ supported value is 40000, thresholds above this number will have the same
|
||||||
effect as a threshold of 40000.
|
effect as a threshold of 40000.
|
||||||
Default value depends on the number of parent aggregations that multiple
|
Default value depends on the number of parent aggregations that multiple
|
||||||
create buckets (such as terms or histograms).
|
create buckets (such as terms or histograms).
|
||||||
<2> If you computed a hash on client-side, stored it into your documents and want
|
|
||||||
Elasticsearch to use them to compute counts using this hash function without
|
|
||||||
rehashing values, it is possible to specify `rehash: false`. Default value is
|
|
||||||
`true`. Please note that the hash must be indexed as a long when `rehash` is
|
|
||||||
false.
|
|
||||||
|
|
||||||
==== Counts are approximate
|
==== Counts are approximate
|
||||||
|
|
||||||
|
@ -86,47 +80,11 @@ counting millions of items.
|
||||||
|
|
||||||
==== Pre-computed hashes
|
==== Pre-computed hashes
|
||||||
|
|
||||||
If you don't want Elasticsearch to re-compute hashes on every run of this
|
On string fields that have a high cardinality, it might be faster to store the
|
||||||
aggregation, it is possible to use pre-computed hashes, either by computing a
|
hash of your field values in your index and then run the cardinality aggregation
|
||||||
hash on client-side, indexing it and specifying `rehash: false`, or by using
|
on this field. This can either be done by providing hash values from client-side
|
||||||
the special `murmur3` field mapper, typically in the context of a `multi-field`
|
or by letting elasticsearch compute hash values for you by using the
|
||||||
in the mapping:
|
{plugins}/mapper-size.html[`mapper-murmur3`] plugin.
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"author": {
|
|
||||||
"type": "string",
|
|
||||||
"fields": {
|
|
||||||
"hash": {
|
|
||||||
"type": "murmur3"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
With such a mapping, Elasticsearch is going to compute hashes of the `author`
|
|
||||||
field at indexing time and store them in the `author.hash` field. This
|
|
||||||
way, unique counts can be computed using the cardinality aggregation by only
|
|
||||||
loading the hashes into memory, not the values of the `author` field, and
|
|
||||||
without computing hashes on the fly:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
{
|
|
||||||
"aggs" : {
|
|
||||||
"author_count" : {
|
|
||||||
"cardinality" : {
|
|
||||||
"field" : "author.hash"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
NOTE: `rehash` is automatically set to `false` when computing unique counts on
|
|
||||||
a `murmur3` field.
|
|
||||||
|
|
||||||
NOTE: Pre-computing hashes is usually only useful on very large and/or
|
NOTE: Pre-computing hashes is usually only useful on very large and/or
|
||||||
high-cardinality fields as it saves CPU and memory. However, on numeric
|
high-cardinality fields as it saves CPU and memory. However, on numeric
|
||||||
|
|
|
@ -33,6 +33,7 @@ document:
|
||||||
<<search-suggesters-completion,Completion datatype>>::
|
<<search-suggesters-completion,Completion datatype>>::
|
||||||
`completion` to provide auto-complete suggestions
|
`completion` to provide auto-complete suggestions
|
||||||
<<token-count>>:: `token_count` to count the number of tokens in a string
|
<<token-count>>:: `token_count` to count the number of tokens in a string
|
||||||
|
{plugins}/mapper-size.html[`mapper-murmur3`]:: `murmur3` to compute hashes of values at index-time and store them in the index
|
||||||
|
|
||||||
Attachment datatype::
|
Attachment datatype::
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,16 @@ can install the plugin with:
|
||||||
The `_shutdown` API has been removed without a replacement. Nodes should be
|
The `_shutdown` API has been removed without a replacement. Nodes should be
|
||||||
managed via the operating system and the provided start/stop scripts.
|
managed via the operating system and the provided start/stop scripts.
|
||||||
|
|
||||||
|
==== `murmur3` is now a plugin
|
||||||
|
|
||||||
|
The `murmur3` field, which indexes hashes of the field values, has been moved
|
||||||
|
out of core and is available as a plugin. It can be installed as:
|
||||||
|
|
||||||
|
[source,sh]
|
||||||
|
------------------
|
||||||
|
./bin/plugin install mapper-murmur3
|
||||||
|
------------------
|
||||||
|
|
||||||
==== `_size` is now a plugin
|
==== `_size` is now a plugin
|
||||||
|
|
||||||
The `_size` meta-data field, which indexes the size in bytes of the original
|
The `_size` meta-data field, which indexes the size in bytes of the original
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
This plugin has no third party dependencies
|
|
@ -0,0 +1,43 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!-- Licensed to Elasticsearch under one or more contributor
|
||||||
|
license agreements. See the NOTICE file distributed with this work for additional
|
||||||
|
information regarding copyright ownership. ElasticSearch licenses this file to you
|
||||||
|
under the Apache License, Version 2.0 (the "License"); you may not use this
|
||||||
|
file except in compliance with the License. You may obtain a copy of the
|
||||||
|
License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by
|
||||||
|
applicable law or agreed to in writing, software distributed under the License
|
||||||
|
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the specific language
|
||||||
|
governing permissions and limitations under the License. -->
|
||||||
|
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>org.elasticsearch.plugin</groupId>
|
||||||
|
<artifactId>elasticsearch-plugin</artifactId>
|
||||||
|
<version>2.1.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>elasticsearch-mapper-murmur3</artifactId>
|
||||||
|
<name>Elasticsearch Mapper Murmur3 plugin</name>
|
||||||
|
<description>The Mapper Murmur3 plugin allows to compute hashes of a field's values at index-time and to store them in the index.</description>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<elasticsearch.plugin.classname>org.elasticsearch.plugin.mapper.MapperMurmur3Plugin</elasticsearch.plugin.classname>
|
||||||
|
<tests.rest.suite>mapper_murmur3</tests.rest.suite>
|
||||||
|
<tests.rest.load_packaged>false</tests.rest.load_packaged>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-assembly-plugin</artifactId>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
</project>
|
|
@ -0,0 +1,65 @@
|
||||||
|
# Integration tests for Mapper Murmur3 components
|
||||||
|
#
|
||||||
|
|
||||||
|
---
|
||||||
|
"Mapper Murmur3":
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
mappings:
|
||||||
|
type1: { "properties": { "foo": { "type": "string", "fields": { "hash": { "type": "murmur3" } } } } }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
type: type1
|
||||||
|
id: 0
|
||||||
|
body: { "foo": null }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.refresh: {}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body: { "aggs": { "foo_count": { "cardinality": { "field": "foo.hash" } } } }
|
||||||
|
|
||||||
|
- match: { aggregations.foo_count.value: 0 }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
type: type1
|
||||||
|
id: 1
|
||||||
|
body: { "foo": "bar" }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
type: type1
|
||||||
|
id: 2
|
||||||
|
body: { "foo": "baz" }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
type: type1
|
||||||
|
id: 3
|
||||||
|
body: { "foo": "quux" }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
type: type1
|
||||||
|
id: 4
|
||||||
|
body: { "foo": "bar" }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.refresh: {}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body: { "aggs": { "foo_count": { "cardinality": { "field": "foo.hash" } } } }
|
||||||
|
|
||||||
|
- match: { aggregations.foo_count.value: 3 }
|
|
@ -17,9 +17,10 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.mapper.core;
|
package org.elasticsearch.index.mapper.murmur3;
|
||||||
|
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.Explicit;
|
import org.elasticsearch.common.Explicit;
|
||||||
|
@ -31,12 +32,13 @@ import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
import org.elasticsearch.index.mapper.Mapper;
|
import org.elasticsearch.index.mapper.Mapper;
|
||||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||||
import org.elasticsearch.index.mapper.ParseContext;
|
import org.elasticsearch.index.mapper.ParseContext;
|
||||||
|
import org.elasticsearch.index.mapper.core.LongFieldMapper;
|
||||||
|
import org.elasticsearch.index.mapper.core.NumberFieldMapper;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.elasticsearch.index.mapper.MapperBuilders.murmur3Field;
|
|
||||||
import static org.elasticsearch.index.mapper.core.TypeParsers.parseNumberField;
|
import static org.elasticsearch.index.mapper.core.TypeParsers.parseNumberField;
|
||||||
|
|
||||||
public class Murmur3FieldMapper extends LongFieldMapper {
|
public class Murmur3FieldMapper extends LongFieldMapper {
|
||||||
|
@ -45,6 +47,9 @@ public class Murmur3FieldMapper extends LongFieldMapper {
|
||||||
|
|
||||||
public static class Defaults extends LongFieldMapper.Defaults {
|
public static class Defaults extends LongFieldMapper.Defaults {
|
||||||
public static final MappedFieldType FIELD_TYPE = new Murmur3FieldType();
|
public static final MappedFieldType FIELD_TYPE = new Murmur3FieldType();
|
||||||
|
static {
|
||||||
|
FIELD_TYPE.freeze();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Builder extends NumberFieldMapper.Builder<Builder, Murmur3FieldMapper> {
|
public static class Builder extends NumberFieldMapper.Builder<Builder, Murmur3FieldMapper> {
|
||||||
|
@ -65,6 +70,17 @@ public class Murmur3FieldMapper extends LongFieldMapper {
|
||||||
return fieldMapper;
|
return fieldMapper;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void setupFieldType(BuilderContext context) {
|
||||||
|
super.setupFieldType(context);
|
||||||
|
if (context.indexCreatedVersion().onOrAfter(Version.V_2_0_0)) {
|
||||||
|
fieldType.setIndexOptions(IndexOptions.NONE);
|
||||||
|
defaultFieldType.setIndexOptions(IndexOptions.NONE);
|
||||||
|
fieldType.setHasDocValues(true);
|
||||||
|
defaultFieldType.setHasDocValues(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
|
protected NamedAnalyzer makeNumberAnalyzer(int precisionStep) {
|
||||||
return NumericLongAnalyzer.buildNamedAnalyzer(precisionStep);
|
return NumericLongAnalyzer.buildNamedAnalyzer(precisionStep);
|
||||||
|
@ -80,7 +96,7 @@ public class Murmur3FieldMapper extends LongFieldMapper {
|
||||||
@Override
|
@Override
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
||||||
Builder builder = murmur3Field(name);
|
Builder builder = new Builder(name);
|
||||||
|
|
||||||
// tweaking these settings is no longer allowed, the entire purpose of murmur3 fields is to store a hash
|
// tweaking these settings is no longer allowed, the entire purpose of murmur3 fields is to store a hash
|
||||||
if (parserContext.indexVersionCreated().onOrAfter(Version.V_2_0_0_beta1)) {
|
if (parserContext.indexVersionCreated().onOrAfter(Version.V_2_0_0_beta1)) {
|
||||||
|
@ -92,6 +108,10 @@ public class Murmur3FieldMapper extends LongFieldMapper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (parserContext.indexVersionCreated().before(Version.V_2_0_0)) {
|
||||||
|
builder.indexOptions(IndexOptions.DOCS);
|
||||||
|
}
|
||||||
|
|
||||||
parseNumberField(builder, name, node, parserContext);
|
parseNumberField(builder, name, node, parserContext);
|
||||||
// Because this mapper extends LongFieldMapper the null_value field will be added to the JSON when transferring cluster state
|
// Because this mapper extends LongFieldMapper the null_value field will be added to the JSON when transferring cluster state
|
||||||
// between nodes so we have to remove the entry here so that the validation doesn't fail
|
// between nodes so we have to remove the entry here so that the validation doesn't fail
|
||||||
|
@ -104,7 +124,8 @@ public class Murmur3FieldMapper extends LongFieldMapper {
|
||||||
|
|
||||||
// this only exists so a check can be done to match the field type to using murmur3 hashing...
|
// this only exists so a check can be done to match the field type to using murmur3 hashing...
|
||||||
public static class Murmur3FieldType extends LongFieldMapper.LongFieldType {
|
public static class Murmur3FieldType extends LongFieldMapper.LongFieldType {
|
||||||
public Murmur3FieldType() {}
|
public Murmur3FieldType() {
|
||||||
|
}
|
||||||
|
|
||||||
protected Murmur3FieldType(Murmur3FieldType ref) {
|
protected Murmur3FieldType(Murmur3FieldType ref) {
|
||||||
super(ref);
|
super(ref);
|
|
@ -0,0 +1,36 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.mapper.murmur3;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.index.AbstractIndexComponent;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
|
|
||||||
|
public class RegisterMurmur3FieldMapper extends AbstractIndexComponent {
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public RegisterMurmur3FieldMapper(Index index, Settings indexSettings, MapperService mapperService) {
|
||||||
|
super(index, indexSettings);
|
||||||
|
mapperService.documentMapperParser().putTypeParser(Murmur3FieldMapper.CONTENT_TYPE, new Murmur3FieldMapper.TypeParser());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.plugin.mapper;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.inject.AbstractModule;
|
||||||
|
import org.elasticsearch.index.mapper.murmur3.RegisterMurmur3FieldMapper;
|
||||||
|
|
||||||
|
public class MapperMurmur3IndexModule extends AbstractModule {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void configure() {
|
||||||
|
bind(RegisterMurmur3FieldMapper.class).asEagerSingleton();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.plugin.mapper;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.inject.Module;
|
||||||
|
import org.elasticsearch.plugins.AbstractPlugin;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
public class MapperMurmur3Plugin extends AbstractPlugin {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String name() {
|
||||||
|
return "mapper-murmur3";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String description() {
|
||||||
|
return "A mapper that allows to precompute murmur3 hashes of values at index-time and store them in the index";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<Class<? extends Module>> indexModules() {
|
||||||
|
return Collections.<Class<? extends Module>>singleton(MapperMurmur3IndexModule.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.mapper.murmur3;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.Name;
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
|
|
||||||
|
import org.elasticsearch.test.rest.ESRestTestCase;
|
||||||
|
import org.elasticsearch.test.rest.RestTestCandidate;
|
||||||
|
import org.elasticsearch.test.rest.parser.RestTestParseException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class MapperMurmur3RestIT extends ESRestTestCase {
|
||||||
|
|
||||||
|
public MapperMurmur3RestIT(@Name("yaml") RestTestCandidate testCandidate) {
|
||||||
|
super(testCandidate);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParametersFactory
|
||||||
|
public static Iterable<Object[]> parameters() throws IOException, RestTestParseException {
|
||||||
|
return createParameters(0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -17,9 +17,11 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.mapper.core;
|
package org.elasticsearch.index.mapper.murmur3;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.IndexOptions;
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.elasticsearch.Version;
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -28,9 +30,12 @@ import org.elasticsearch.index.IndexService;
|
||||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||||
import org.elasticsearch.index.mapper.DocumentMapperParser;
|
import org.elasticsearch.index.mapper.DocumentMapperParser;
|
||||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||||
|
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class Murmur3FieldMapperTests extends ESSingleNodeTestCase {
|
public class Murmur3FieldMapperTests extends ESSingleNodeTestCase {
|
||||||
|
|
||||||
IndexService indexService;
|
IndexService indexService;
|
||||||
|
@ -40,6 +45,22 @@ public class Murmur3FieldMapperTests extends ESSingleNodeTestCase {
|
||||||
public void before() {
|
public void before() {
|
||||||
indexService = createIndex("test");
|
indexService = createIndex("test");
|
||||||
parser = indexService.mapperService().documentMapperParser();
|
parser = indexService.mapperService().documentMapperParser();
|
||||||
|
parser.putTypeParser(Murmur3FieldMapper.CONTENT_TYPE, new Murmur3FieldMapper.TypeParser());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDefaults() throws Exception {
|
||||||
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
|
.startObject("properties").startObject("field")
|
||||||
|
.field("type", "murmur3")
|
||||||
|
.endObject().endObject().endObject().endObject().string();
|
||||||
|
DocumentMapper mapper = parser.parse(mapping);
|
||||||
|
ParsedDocument parsedDoc = mapper.parse("test", "type", "1", XContentFactory.jsonBuilder().startObject().field("field", "value").endObject().bytes());
|
||||||
|
IndexableField[] fields = parsedDoc.rootDoc().getFields("field");
|
||||||
|
assertNotNull(fields);
|
||||||
|
assertEquals(Arrays.toString(fields), 1, fields.length);
|
||||||
|
IndexableField field = fields[0];
|
||||||
|
assertEquals(IndexOptions.NONE, field.fieldType().indexOptions());
|
||||||
|
assertEquals(DocValuesType.SORTED_NUMERIC, field.fieldType().docValuesType());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDocValuesSettingNotAllowed() throws Exception {
|
public void testDocValuesSettingNotAllowed() throws Exception {
|
||||||
|
@ -100,6 +121,7 @@ public class Murmur3FieldMapperTests extends ESSingleNodeTestCase {
|
||||||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build();
|
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build();
|
||||||
indexService = createIndex("test_bwc", settings);
|
indexService = createIndex("test_bwc", settings);
|
||||||
parser = indexService.mapperService().documentMapperParser();
|
parser = indexService.mapperService().documentMapperParser();
|
||||||
|
parser.putTypeParser(Murmur3FieldMapper.CONTENT_TYPE, new Murmur3FieldMapper.TypeParser());
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("properties").startObject("field")
|
.startObject("properties").startObject("field")
|
||||||
.field("type", "murmur3")
|
.field("type", "murmur3")
|
||||||
|
@ -115,6 +137,7 @@ public class Murmur3FieldMapperTests extends ESSingleNodeTestCase {
|
||||||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build();
|
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_1_4_2.id).build();
|
||||||
indexService = createIndex("test_bwc", settings);
|
indexService = createIndex("test_bwc", settings);
|
||||||
parser = indexService.mapperService().documentMapperParser();
|
parser = indexService.mapperService().documentMapperParser();
|
||||||
|
parser.putTypeParser(Murmur3FieldMapper.CONTENT_TYPE, new Murmur3FieldMapper.TypeParser());
|
||||||
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
String mapping = XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||||
.startObject("properties").startObject("field")
|
.startObject("properties").startObject("field")
|
||||||
.field("type", "murmur3")
|
.field("type", "murmur3")
|
|
@ -436,6 +436,7 @@
|
||||||
<module>delete-by-query</module>
|
<module>delete-by-query</module>
|
||||||
<module>lang-python</module>
|
<module>lang-python</module>
|
||||||
<module>lang-javascript</module>
|
<module>lang-javascript</module>
|
||||||
|
<module>mapper-murmur3</module>
|
||||||
<module>mapper-size</module>
|
<module>mapper-size</module>
|
||||||
<module>jvm-example</module>
|
<module>jvm-example</module>
|
||||||
<module>site-example</module>
|
<module>site-example</module>
|
||||||
|
|
|
@ -333,6 +333,14 @@
|
||||||
<overWrite>true</overWrite>
|
<overWrite>true</overWrite>
|
||||||
</artifactItem>
|
</artifactItem>
|
||||||
|
|
||||||
|
<artifactItem>
|
||||||
|
<groupId>org.elasticsearch.plugin</groupId>
|
||||||
|
<artifactId>elasticsearch-mapper-murmur3</artifactId>
|
||||||
|
<version>${elasticsearch.version}</version>
|
||||||
|
<type>zip</type>
|
||||||
|
<overWrite>true</overWrite>
|
||||||
|
</artifactItem>
|
||||||
|
|
||||||
<artifactItem>
|
<artifactItem>
|
||||||
<groupId>org.elasticsearch.plugin</groupId>
|
<groupId>org.elasticsearch.plugin</groupId>
|
||||||
<artifactId>elasticsearch-mapper-size</artifactId>
|
<artifactId>elasticsearch-mapper-size</artifactId>
|
||||||
|
|
Loading…
Reference in New Issue