From 7fcf82af807608a3e05a388ae16e713b55982cd0 Mon Sep 17 00:00:00 2001 From: kimchy Date: Thu, 7 Apr 2011 14:44:34 +0300 Subject: [PATCH] faster handling of a case where each doc has multi valued values fields in terms stats --- .../facet/TermsFacetSearchBenchmark.java | 43 +++++++++++++++++++ .../TermsStatsDoubleFacetCollector.java | 31 ++++++++----- .../longs/TermsStatsLongFacetCollector.java | 31 ++++++++----- .../TermsStatsStringFacetCollector.java | 31 ++++++++----- 4 files changed, 103 insertions(+), 33 deletions(-) diff --git a/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/TermsFacetSearchBenchmark.java b/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/TermsFacetSearchBenchmark.java index ad0c0c01623..5877eff8396 100644 --- a/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/TermsFacetSearchBenchmark.java +++ b/modules/benchmark/micro/src/main/java/org/elasticsearch/benchmark/search/facet/TermsFacetSearchBenchmark.java @@ -154,6 +154,9 @@ public class TermsFacetSearchBenchmark { stats.add(terms("terms_lm", "lm_value", null)); stats.add(terms("terms_map_lm", "lm_value", "map")); + stats.add(termsStats("terms_stats_s_l", "s_value", "l_value", null)); + stats.add(termsStats("terms_stats_s_lm", "s_value", "lm_value", null)); + System.out.println("------------------ SUMMARY -------------------------------"); System.out.format("%25s%10s%10s\n", "name", "took", "millis"); for (StatsResult stat : stats) { @@ -216,4 +219,44 @@ public class TermsFacetSearchBenchmark { System.out.println("--> Terms Facet (" + field + "), hint(" + executionHint + "): " + (totalQueryTime / QUERY_COUNT) + "ms"); return new StatsResult(name, totalQueryTime); } + + private static StatsResult termsStats(String name, String keyField, String valueField, String executionHint) { + long totalQueryTime; + + client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet(); + + System.out.println("--> Warmup (" + name + ")..."); + // run just the child query, warm up first + for (int j = 0; j < QUERY_WARMUP; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setSearchType(SearchType.COUNT) + .setQuery(matchAllQuery()) + .addFacet(termsStatsFacet(name).keyField(keyField).valueField(valueField)) + .execute().actionGet(); + if (j == 0) { + System.out.println("--> Loading (" + name + "): took: " + searchResponse.took()); + } + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + } + System.out.println("--> Warmup (" + name + ") DONE"); + + + System.out.println("--> Running (" + name + ")..."); + totalQueryTime = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = client.prepareSearch() + .setSearchType(SearchType.COUNT) + .setQuery(matchAllQuery()) + .addFacet(termsStatsFacet(name).keyField(keyField).valueField(valueField)) + .execute().actionGet(); + if (searchResponse.hits().totalHits() != COUNT) { + System.err.println("--> mismatch on hits"); + } + totalQueryTime += searchResponse.tookInMillis(); + } + System.out.println("--> Terms Facet (" + name + "), hint(" + executionHint + "): " + (totalQueryTime / QUERY_COUNT) + "ms"); + return new StatsResult(name, totalQueryTime); + } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java index 89ae5fb5d74..5ace09588af 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java @@ -157,6 +157,8 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector { NumericFieldData valueFieldData; + final ValueAggregator valueAggregator = new ValueAggregator(); + @Override public void onValue(int docId, double value) { InternalTermsStatsDoubleFacet.DoubleEntry doubleEntry = entries.get(value); if (doubleEntry == null) { @@ -167,17 +169,8 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector { } if (valueFieldData.multiValued()) { - double[] valueValues = valueFieldData.doubleValues(docId); - doubleEntry.totalCount += valueValues.length; - for (double valueValue : valueValues) { - if (valueValue < doubleEntry.min) { - doubleEntry.min = valueValue; - } - if (valueValue > doubleEntry.max) { - doubleEntry.max = valueValue; - } - doubleEntry.total += valueValue; - } + valueAggregator.doubleEntry = doubleEntry; + valueFieldData.forEachValueInDoc(docId, valueAggregator); } else { double valueValue = valueFieldData.doubleValue(docId); if (valueValue < doubleEntry.min) { @@ -194,6 +187,22 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector { @Override public void onMissing(int docId) { missing++; } + + public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc { + + InternalTermsStatsDoubleFacet.DoubleEntry doubleEntry; + + @Override public void onValue(int docId, double value) { + if (value < doubleEntry.min) { + doubleEntry.min = value; + } + if (value > doubleEntry.max) { + doubleEntry.max = value; + } + doubleEntry.total += value; + doubleEntry.totalCount++; + } + } } public static class ScriptAggregator extends Aggregator { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java index 5c1e2154556..968cc5e444d 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java @@ -160,6 +160,8 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { NumericFieldData valueFieldData; + final ValueAggregator valueAggregator = new ValueAggregator(); + @Override public void onValue(int docId, long value) { InternalTermsStatsLongFacet.LongEntry longEntry = entries.get(value); if (longEntry == null) { @@ -169,17 +171,8 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { longEntry.count++; } if (valueFieldData.multiValued()) { - double[] valueValues = valueFieldData.doubleValues(docId); - longEntry.totalCount += valueValues.length; - for (double valueValue : valueValues) { - if (valueValue < longEntry.min) { - longEntry.min = valueValue; - } - if (valueValue > longEntry.max) { - longEntry.max = valueValue; - } - longEntry.total += valueValue; - } + valueAggregator.longEntry = longEntry; + valueFieldData.forEachValueInDoc(docId, valueAggregator); } else { double valueValue = valueFieldData.doubleValue(docId); if (valueValue < longEntry.min) { @@ -196,6 +189,22 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { @Override public void onMissing(int docId) { missing++; } + + public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc { + + InternalTermsStatsLongFacet.LongEntry longEntry; + + @Override public void onValue(int docId, double value) { + if (value < longEntry.min) { + longEntry.min = value; + } + if (value > longEntry.max) { + longEntry.max = value; + } + longEntry.total += value; + longEntry.totalCount++; + } + } } public static class ScriptAggregator extends Aggregator { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java index 3298a3ded49..71102668a3f 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java @@ -159,6 +159,8 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { NumericFieldData valueFieldData; + ValueAggregator valueAggregator = new ValueAggregator(); + @Override public void onValue(int docId, String value) { InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(value); if (stringEntry == null) { @@ -168,17 +170,8 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { stringEntry.count++; } if (valueFieldData.multiValued()) { - double[] valueValues = valueFieldData.doubleValues(docId); - stringEntry.totalCount += valueValues.length; - for (double valueValue : valueValues) { - if (valueValue < stringEntry.min) { - stringEntry.min = valueValue; - } - if (valueValue > stringEntry.max) { - stringEntry.max = valueValue; - } - stringEntry.total += valueValue; - } + valueAggregator.stringEntry = stringEntry; + valueFieldData.forEachValueInDoc(docId, valueAggregator); } else { double valueValue = valueFieldData.doubleValue(docId); if (valueValue < stringEntry.min) { @@ -196,6 +189,22 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { @Override public void onMissing(int docId) { missing++; } + + public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc { + + InternalTermsStatsStringFacet.StringEntry stringEntry; + + @Override public void onValue(int docId, double value) { + if (value < stringEntry.min) { + stringEntry.min = value; + } + if (value > stringEntry.max) { + stringEntry.max = value; + } + stringEntry.total += value; + stringEntry.totalCount++; + } + } } public static class ScriptAggregator extends Aggregator {