faster handling of a case where each doc has multi valued values fields in terms stats

This commit is contained in:
kimchy 2011-04-07 14:44:34 +03:00
parent 16a046f686
commit 7fcf82af80
4 changed files with 103 additions and 33 deletions

View File

@ -154,6 +154,9 @@ public class TermsFacetSearchBenchmark {
stats.add(terms("terms_lm", "lm_value", null));
stats.add(terms("terms_map_lm", "lm_value", "map"));
stats.add(termsStats("terms_stats_s_l", "s_value", "l_value", null));
stats.add(termsStats("terms_stats_s_lm", "s_value", "lm_value", null));
System.out.println("------------------ SUMMARY -------------------------------");
System.out.format("%25s%10s%10s\n", "name", "took", "millis");
for (StatsResult stat : stats) {
@ -216,4 +219,44 @@ public class TermsFacetSearchBenchmark {
System.out.println("--> Terms Facet (" + field + "), hint(" + executionHint + "): " + (totalQueryTime / QUERY_COUNT) + "ms");
return new StatsResult(name, totalQueryTime);
}
private static StatsResult termsStats(String name, String keyField, String valueField, String executionHint) {
long totalQueryTime;
client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet();
System.out.println("--> Warmup (" + name + ")...");
// run just the child query, warm up first
for (int j = 0; j < QUERY_WARMUP; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(matchAllQuery())
.addFacet(termsStatsFacet(name).keyField(keyField).valueField(valueField))
.execute().actionGet();
if (j == 0) {
System.out.println("--> Loading (" + name + "): took: " + searchResponse.took());
}
if (searchResponse.hits().totalHits() != COUNT) {
System.err.println("--> mismatch on hits");
}
}
System.out.println("--> Warmup (" + name + ") DONE");
System.out.println("--> Running (" + name + ")...");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(matchAllQuery())
.addFacet(termsStatsFacet(name).keyField(keyField).valueField(valueField))
.execute().actionGet();
if (searchResponse.hits().totalHits() != COUNT) {
System.err.println("--> mismatch on hits");
}
totalQueryTime += searchResponse.tookInMillis();
}
System.out.println("--> Terms Facet (" + name + "), hint(" + executionHint + "): " + (totalQueryTime / QUERY_COUNT) + "ms");
return new StatsResult(name, totalQueryTime);
}
}

View File

@ -157,6 +157,8 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector {
NumericFieldData valueFieldData;
final ValueAggregator valueAggregator = new ValueAggregator();
@Override public void onValue(int docId, double value) {
InternalTermsStatsDoubleFacet.DoubleEntry doubleEntry = entries.get(value);
if (doubleEntry == null) {
@ -167,17 +169,8 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector {
}
if (valueFieldData.multiValued()) {
double[] valueValues = valueFieldData.doubleValues(docId);
doubleEntry.totalCount += valueValues.length;
for (double valueValue : valueValues) {
if (valueValue < doubleEntry.min) {
doubleEntry.min = valueValue;
}
if (valueValue > doubleEntry.max) {
doubleEntry.max = valueValue;
}
doubleEntry.total += valueValue;
}
valueAggregator.doubleEntry = doubleEntry;
valueFieldData.forEachValueInDoc(docId, valueAggregator);
} else {
double valueValue = valueFieldData.doubleValue(docId);
if (valueValue < doubleEntry.min) {
@ -194,6 +187,22 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector {
@Override public void onMissing(int docId) {
missing++;
}
public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc {
InternalTermsStatsDoubleFacet.DoubleEntry doubleEntry;
@Override public void onValue(int docId, double value) {
if (value < doubleEntry.min) {
doubleEntry.min = value;
}
if (value > doubleEntry.max) {
doubleEntry.max = value;
}
doubleEntry.total += value;
doubleEntry.totalCount++;
}
}
}
public static class ScriptAggregator extends Aggregator {

View File

@ -160,6 +160,8 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector {
NumericFieldData valueFieldData;
final ValueAggregator valueAggregator = new ValueAggregator();
@Override public void onValue(int docId, long value) {
InternalTermsStatsLongFacet.LongEntry longEntry = entries.get(value);
if (longEntry == null) {
@ -169,17 +171,8 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector {
longEntry.count++;
}
if (valueFieldData.multiValued()) {
double[] valueValues = valueFieldData.doubleValues(docId);
longEntry.totalCount += valueValues.length;
for (double valueValue : valueValues) {
if (valueValue < longEntry.min) {
longEntry.min = valueValue;
}
if (valueValue > longEntry.max) {
longEntry.max = valueValue;
}
longEntry.total += valueValue;
}
valueAggregator.longEntry = longEntry;
valueFieldData.forEachValueInDoc(docId, valueAggregator);
} else {
double valueValue = valueFieldData.doubleValue(docId);
if (valueValue < longEntry.min) {
@ -196,6 +189,22 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector {
@Override public void onMissing(int docId) {
missing++;
}
public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc {
InternalTermsStatsLongFacet.LongEntry longEntry;
@Override public void onValue(int docId, double value) {
if (value < longEntry.min) {
longEntry.min = value;
}
if (value > longEntry.max) {
longEntry.max = value;
}
longEntry.total += value;
longEntry.totalCount++;
}
}
}
public static class ScriptAggregator extends Aggregator {

View File

@ -159,6 +159,8 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector {
NumericFieldData valueFieldData;
ValueAggregator valueAggregator = new ValueAggregator();
@Override public void onValue(int docId, String value) {
InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(value);
if (stringEntry == null) {
@ -168,17 +170,8 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector {
stringEntry.count++;
}
if (valueFieldData.multiValued()) {
double[] valueValues = valueFieldData.doubleValues(docId);
stringEntry.totalCount += valueValues.length;
for (double valueValue : valueValues) {
if (valueValue < stringEntry.min) {
stringEntry.min = valueValue;
}
if (valueValue > stringEntry.max) {
stringEntry.max = valueValue;
}
stringEntry.total += valueValue;
}
valueAggregator.stringEntry = stringEntry;
valueFieldData.forEachValueInDoc(docId, valueAggregator);
} else {
double valueValue = valueFieldData.doubleValue(docId);
if (valueValue < stringEntry.min) {
@ -196,6 +189,22 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector {
@Override public void onMissing(int docId) {
missing++;
}
public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc {
InternalTermsStatsStringFacet.StringEntry stringEntry;
@Override public void onValue(int docId, double value) {
if (value < stringEntry.min) {
stringEntry.min = value;
}
if (value > stringEntry.max) {
stringEntry.max = value;
}
stringEntry.total += value;
stringEntry.totalCount++;
}
}
}
public static class ScriptAggregator extends Aggregator {