From 37acba1b57e5775eaa7f04aba814e1d70936adf1 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sun, 20 Jan 2013 07:10:50 +0100 Subject: [PATCH] terms stats to use new field data --- .../common/lucene/HashedBytesRef.java | 10 ++ .../fielddata/IndexNumericFieldData.java | 44 +++++++-- .../plain/LongArrayIndexFieldData.java | 2 +- .../termsstats/TermsStatsFacetProcessor.java | 41 ++++---- .../TermsStatsDoubleFacetCollector.java | 75 +++++--------- .../longs/TermsStatsLongFacetCollector.java | 81 +++++----------- .../InternalTermsStatsStringFacet.java | 7 +- .../TermsStatsStringFacetCollector.java | 97 +++++++------------ 8 files changed, 158 insertions(+), 199 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/HashedBytesRef.java b/src/main/java/org/elasticsearch/common/lucene/HashedBytesRef.java index 299de543e9a..44873602d2f 100644 --- a/src/main/java/org/elasticsearch/common/lucene/HashedBytesRef.java +++ b/src/main/java/org/elasticsearch/common/lucene/HashedBytesRef.java @@ -73,4 +73,14 @@ public class HashedBytesRef { public String toString() { return bytes.toString(); } + + public HashedBytesRef deepCopy() { + return deepCopyOf(this); + } + + public static HashedBytesRef deepCopyOf(HashedBytesRef other) { + BytesRef copy = new BytesRef(); + copy.copyBytes(other.bytes); + return new HashedBytesRef(copy, other.hash); + } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java index 90633cba92a..41873549e87 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/IndexNumericFieldData.java @@ -26,12 +26,44 @@ import org.apache.lucene.index.AtomicReaderContext; public interface IndexNumericFieldData extends IndexFieldData { static enum NumericType { - BYTE, - SHORT, - INT, - LONG, - FLOAT, - DOUBLE + BYTE { + @Override + public boolean isFloatingPoint() { + return false; + } + }, + SHORT { + @Override + public boolean isFloatingPoint() { + return false; + } + }, + INT { + @Override + public boolean isFloatingPoint() { + return false; + } + }, + LONG { + @Override + public boolean isFloatingPoint() { + return false; + } + }, + FLOAT { + @Override + public boolean isFloatingPoint() { + return true; + } + }, + DOUBLE { + @Override + public boolean isFloatingPoint() { + return true; + } + }; + + public abstract boolean isFloatingPoint(); } NumericType getNumericType(); diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/LongArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/LongArrayIndexFieldData.java index 8e358f875c3..b38ead65e84 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/LongArrayIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/LongArrayIndexFieldData.java @@ -54,7 +54,7 @@ public class LongArrayIndexFieldData extends AbstractIndexFieldData params) { + private DoubleValues keyValues; + + public TermsStatsDoubleFacetCollector(String facetName, IndexNumericFieldData keyIndexFieldData, IndexNumericFieldData valueIndexFieldData, SearchScript script, + int size, TermsStatsFacet.ComparatorType comparatorType, SearchContext context) { super(facetName); - this.fieldDataCache = context.fieldDataCache(); this.size = size; this.comparatorType = comparatorType; this.numberOfShards = context.numberOfShards(); - - MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(keyFieldName); - if (smartMappers == null || !smartMappers.hasMapper()) { - this.keyFieldName = keyFieldName; - this.keyFieldDataType = FieldDataType.DefaultTypes.STRING; - } else { - // add type filter if there is exact doc mapper associated with it - if (smartMappers.explicitTypeInNameWithDocMapper()) { - setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); - } - - this.keyFieldName = smartMappers.mapper().names().indexName(); - this.keyFieldDataType = smartMappers.mapper().fieldDataType(); - } + this.keyIndexFieldData = keyIndexFieldData; + this.valueIndexFieldData = valueIndexFieldData; + this.script = script; if (script == null) { - smartMappers = context.smartFieldMappers(valueFieldName); - if (smartMappers == null || !smartMappers.hasMapper()) { - throw new ElasticSearchIllegalArgumentException("failed to find mappings for [" + valueFieldName + "]"); - } - this.valueFieldName = smartMappers.mapper().names().indexName(); - this.valueFieldDataType = smartMappers.mapper().fieldDataType(); - this.script = null; this.aggregator = new Aggregator(); } else { - this.valueFieldName = null; - this.valueFieldDataType = null; - this.script = context.scriptService().search(context.lookup(), scriptLang, script, params); this.aggregator = new ScriptAggregator(this.script); } } @@ -114,17 +79,17 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector { @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { - keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); + keyValues = keyIndexFieldData.load(context).getDoubleValues(); if (script != null) { script.setNextReader(context); } else { - aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); + aggregator.valueFieldData = valueIndexFieldData.load(context).getDoubleValues(); } } @Override protected void doCollect(int doc) throws IOException { - keyFieldData.forEachValueInDoc(doc, aggregator); + keyValues.forEachValueInDoc(doc, aggregator); } @Override @@ -153,12 +118,12 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector { return new InternalTermsStatsDoubleFacet(facetName, comparatorType, size, ordered, aggregator.missing); } - public static class Aggregator implements NumericFieldData.MissingDoubleValueInDocProc { + public static class Aggregator implements DoubleValues.ValueInDocProc { final ExtTDoubleObjectHashMap entries = CacheRecycler.popDoubleObjectMap(); int missing; - NumericFieldData valueFieldData; + DoubleValues valueFieldData; final ValueAggregator valueAggregator = new ValueAggregator(); @@ -179,10 +144,14 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector { missing++; } - public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc { + public static class ValueAggregator implements DoubleValues.ValueInDocProc { InternalTermsStatsDoubleFacet.DoubleEntry doubleEntry; + @Override + public void onMissing(int docId) { + } + @Override public void onValue(int docId, double value) { if (value < doubleEntry.min) { diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java index c36a89a3441..2dfab251d1b 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java @@ -23,13 +23,11 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; -import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.trove.ExtTLongObjectHashMap; -import org.elasticsearch.index.cache.field.data.FieldDataCache; -import org.elasticsearch.index.field.data.FieldDataType; -import org.elasticsearch.index.field.data.NumericFieldData; -import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.fielddata.DoubleValues; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.fielddata.LongValues; import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; @@ -40,68 +38,35 @@ import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.List; -import java.util.Map; public class TermsStatsLongFacetCollector extends AbstractFacetCollector { private final TermsStatsFacet.ComparatorType comparatorType; - private final FieldDataCache fieldDataCache; - - private final String keyFieldName; - - private final String valueFieldName; - - private final int size; - - private final int numberOfShards; - - private final FieldDataType keyFieldDataType; - - private NumericFieldData keyFieldData; - - private final FieldDataType valueFieldDataType; - + private final IndexNumericFieldData keyIndexFieldData; + private final IndexNumericFieldData valueIndexFieldData; private final SearchScript script; + private final int size; + private final int numberOfShards; private final Aggregator aggregator; - public TermsStatsLongFacetCollector(String facetName, String keyFieldName, String valueFieldName, int size, TermsStatsFacet.ComparatorType comparatorType, - SearchContext context, String scriptLang, String script, Map params) { + private LongValues keyValues; + + public TermsStatsLongFacetCollector(String facetName, IndexNumericFieldData keyIndexFieldData, IndexNumericFieldData valueIndexFieldData, SearchScript script, + int size, TermsStatsFacet.ComparatorType comparatorType, SearchContext context) { super(facetName); - this.fieldDataCache = context.fieldDataCache(); this.size = size; this.comparatorType = comparatorType; this.numberOfShards = context.numberOfShards(); - - MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(keyFieldName); - if (smartMappers == null || !smartMappers.hasMapper()) { - this.keyFieldName = keyFieldName; - this.keyFieldDataType = FieldDataType.DefaultTypes.STRING; - } else { - // add type filter if there is exact doc mapper associated with it - if (smartMappers.explicitTypeInNameWithDocMapper()) { - setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); - } - - this.keyFieldName = smartMappers.mapper().names().indexName(); - this.keyFieldDataType = smartMappers.mapper().fieldDataType(); - } + this.keyIndexFieldData = keyIndexFieldData; + this.valueIndexFieldData = valueIndexFieldData; + this.script = script; if (script == null) { - smartMappers = context.smartFieldMappers(valueFieldName); - if (smartMappers == null || !smartMappers.hasMapper()) { - throw new ElasticSearchIllegalArgumentException("failed to find mappings for [" + valueFieldName + "]"); - } - this.valueFieldName = smartMappers.mapper().names().indexName(); - this.valueFieldDataType = smartMappers.mapper().fieldDataType(); - this.script = null; this.aggregator = new Aggregator(); } else { - this.valueFieldName = null; - this.valueFieldDataType = null; - this.script = context.scriptService().search(context.lookup(), scriptLang, script, params); this.aggregator = new ScriptAggregator(this.script); } } @@ -115,17 +80,17 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { - keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); + keyValues = keyIndexFieldData.load(context).getLongValues(); if (script != null) { script.setNextReader(context); } else { - aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); + aggregator.valueValues = valueIndexFieldData.load(context).getDoubleValues(); } } @Override protected void doCollect(int doc) throws IOException { - keyFieldData.forEachValueInDoc(doc, aggregator); + keyValues.forEachValueInDoc(doc, aggregator); } @Override @@ -155,13 +120,13 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { return new InternalTermsStatsLongFacet(facetName, comparatorType, size, ordered, aggregator.missing); } - public static class Aggregator implements NumericFieldData.MissingLongValueInDocProc { + public static class Aggregator implements LongValues.ValueInDocProc { final ExtTLongObjectHashMap entries = CacheRecycler.popLongObjectMap(); int missing; - NumericFieldData valueFieldData; + DoubleValues valueValues; final ValueAggregator valueAggregator = new ValueAggregator(); @@ -174,7 +139,7 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { } longEntry.count++; valueAggregator.longEntry = longEntry; - valueFieldData.forEachValueInDoc(docId, valueAggregator); + valueValues.forEachValueInDoc(docId, valueAggregator); } @Override @@ -182,10 +147,14 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { missing++; } - public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc { + public static class ValueAggregator implements DoubleValues.ValueInDocProc { InternalTermsStatsLongFacet.LongEntry longEntry; + @Override + public void onMissing(int docId) { + } + @Override public void onValue(int docId, double value) { if (value < longEntry.min) { diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java index cc0836b4e74..b7375c3a300 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java @@ -20,12 +20,11 @@ package org.elasticsearch.search.facet.termsstats.strings; import com.google.common.collect.ImmutableList; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.HashedBytesRef; import org.elasticsearch.common.text.BytesText; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.trove.ExtTHashMap; @@ -69,8 +68,8 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { double min; double max; - public StringEntry(BytesRef term, long count, long totalCount, double total, double min, double max) { - this(new BytesText(new BytesArray(term)), count, totalCount, total, min, max); + public StringEntry(HashedBytesRef term, long count, long totalCount, double total, double min, double max) { + this(new BytesText(new BytesArray(term.bytes)), count, totalCount, total, min, max); } public StringEntry(Text term, long count, long totalCount, double total, double min, double max) { diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java index bdff3fb3714..a02b9d41dfa 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java @@ -23,15 +23,13 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.lucene.HashedBytesRef; import org.elasticsearch.common.trove.ExtTHashMap; -import org.elasticsearch.index.cache.field.data.FieldDataCache; -import org.elasticsearch.index.field.data.FieldData; -import org.elasticsearch.index.field.data.FieldDataType; -import org.elasticsearch.index.field.data.NumericFieldData; -import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.fielddata.DoubleValues; +import org.elasticsearch.index.fielddata.HashedBytesValues; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; @@ -42,68 +40,36 @@ import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.List; -import java.util.Map; public class TermsStatsStringFacetCollector extends AbstractFacetCollector { private final TermsStatsFacet.ComparatorType comparatorType; - private final FieldDataCache fieldDataCache; - - private final String keyFieldName; - - private final String valueFieldName; + private final IndexFieldData keyIndexFieldData; + private final IndexNumericFieldData valueIndexFieldData; + private final SearchScript script; private final int size; - private final int numberOfShards; - private final FieldDataType keyFieldDataType; - - private FieldData keyFieldData; - - private final FieldDataType valueFieldDataType; - - private final SearchScript script; - private final Aggregator aggregator; - public TermsStatsStringFacetCollector(String facetName, String keyFieldName, String valueFieldName, int size, TermsStatsFacet.ComparatorType comparatorType, - SearchContext context, String scriptLang, String script, Map params) { + private HashedBytesValues keyValues; + + public TermsStatsStringFacetCollector(String facetName, IndexFieldData keyIndexFieldData, IndexNumericFieldData valueIndexFieldData, SearchScript valueScript, + int size, TermsStatsFacet.ComparatorType comparatorType, SearchContext context) { super(facetName); - this.fieldDataCache = context.fieldDataCache(); + this.keyIndexFieldData = keyIndexFieldData; + this.valueIndexFieldData = valueIndexFieldData; + this.script = valueScript; this.size = size; this.comparatorType = comparatorType; this.numberOfShards = context.numberOfShards(); - MapperService.SmartNameFieldMappers smartMappers = context.smartFieldMappers(keyFieldName); - if (smartMappers == null || !smartMappers.hasMapper()) { - this.keyFieldName = keyFieldName; - this.keyFieldDataType = FieldDataType.DefaultTypes.STRING; + if (script != null) { + this.aggregator = new ScriptAggregator(valueScript); } else { - // add type filter if there is exact doc mapper associated with it - if (smartMappers.explicitTypeInNameWithDocMapper()) { - setFilter(context.filterCache().cache(smartMappers.docMapper().typeFilter())); - } - - this.keyFieldName = smartMappers.mapper().names().indexName(); - this.keyFieldDataType = smartMappers.mapper().fieldDataType(); - } - - if (script == null) { - smartMappers = context.smartFieldMappers(valueFieldName); - if (smartMappers == null || !smartMappers.hasMapper()) { - throw new ElasticSearchIllegalArgumentException("failed to find mappings for [" + valueFieldName + "]"); - } - this.valueFieldName = smartMappers.mapper().names().indexName(); - this.valueFieldDataType = smartMappers.mapper().fieldDataType(); - this.script = null; this.aggregator = new Aggregator(); - } else { - this.valueFieldName = null; - this.valueFieldDataType = null; - this.script = context.scriptService().search(context.lookup(), scriptLang, script, params); - this.aggregator = new ScriptAggregator(this.script); } } @@ -116,17 +82,17 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { - keyFieldData = fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); + keyValues = keyIndexFieldData.load(context).getHashedBytesValues(); if (script != null) { script.setNextReader(context); } else { - aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); + aggregator.valueValues = valueIndexFieldData.load(context).getDoubleValues(); } } @Override protected void doCollect(int doc) throws IOException { - keyFieldData.forEachValueInDoc(doc, aggregator); + keyValues.forEachValueInDoc(doc, aggregator); } @Override @@ -155,27 +121,28 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { return new InternalTermsStatsStringFacet(facetName, comparatorType, size, ordered, aggregator.missing); } - public static class Aggregator implements FieldData.StringValueInDocProc { + public static class Aggregator implements HashedBytesValues.ValueInDocProc { - // LUCENE 4 UPGRADE: check if hashcode is not too expensive - final ExtTHashMap entries = CacheRecycler.popHashMap(); + final ExtTHashMap entries = CacheRecycler.popHashMap(); int missing = 0; - NumericFieldData valueFieldData; + DoubleValues valueValues; ValueAggregator valueAggregator = new ValueAggregator(); @Override - public void onValue(int docId, BytesRef value) { + public void onValue(int docId, HashedBytesRef value) { InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(value); if (stringEntry == null) { + // we use "unsafe" hashedBytes, and only copy over if we "miss" on the map, and need to put it there + value = value.deepCopy(); stringEntry = new InternalTermsStatsStringFacet.StringEntry(value, 0, 0, 0, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY); entries.put(value, stringEntry); } stringEntry.count++; valueAggregator.stringEntry = stringEntry; - valueFieldData.forEachValueInDoc(docId, valueAggregator); + valueValues.forEachValueInDoc(docId, valueAggregator); } @Override @@ -183,10 +150,14 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { missing++; } - public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc { + public static class ValueAggregator implements DoubleValues.ValueInDocProc { InternalTermsStatsStringFacet.StringEntry stringEntry; + @Override + public void onMissing(int docId) { + } + @Override public void onValue(int docId, double value) { if (value < stringEntry.min) { @@ -209,9 +180,11 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { } @Override - public void onValue(int docId, BytesRef value) { + public void onValue(int docId, HashedBytesRef value) { InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(value); if (stringEntry == null) { + // we use "unsafe" hashedBytes, and only copy over if we "miss" on the map, and need to put it there + value = value.deepCopy(); stringEntry = new InternalTermsStatsStringFacet.StringEntry(value, 1, 0, 0, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY); entries.put(value, stringEntry); } else {