faster handling of a case where each doc has multi valued values fields in terms stats

This commit is contained in:
kimchy 2011-04-07 14:44:34 +03:00
parent 16a046f686
commit 7fcf82af80
4 changed files with 103 additions and 33 deletions

View File

@ -154,6 +154,9 @@ public class TermsFacetSearchBenchmark {
stats.add(terms("terms_lm", "lm_value", null)); stats.add(terms("terms_lm", "lm_value", null));
stats.add(terms("terms_map_lm", "lm_value", "map")); stats.add(terms("terms_map_lm", "lm_value", "map"));
stats.add(termsStats("terms_stats_s_l", "s_value", "l_value", null));
stats.add(termsStats("terms_stats_s_lm", "s_value", "lm_value", null));
System.out.println("------------------ SUMMARY -------------------------------"); System.out.println("------------------ SUMMARY -------------------------------");
System.out.format("%25s%10s%10s\n", "name", "took", "millis"); System.out.format("%25s%10s%10s\n", "name", "took", "millis");
for (StatsResult stat : stats) { for (StatsResult stat : stats) {
@ -216,4 +219,44 @@ public class TermsFacetSearchBenchmark {
System.out.println("--> Terms Facet (" + field + "), hint(" + executionHint + "): " + (totalQueryTime / QUERY_COUNT) + "ms"); System.out.println("--> Terms Facet (" + field + "), hint(" + executionHint + "): " + (totalQueryTime / QUERY_COUNT) + "ms");
return new StatsResult(name, totalQueryTime); return new StatsResult(name, totalQueryTime);
} }
private static StatsResult termsStats(String name, String keyField, String valueField, String executionHint) {
long totalQueryTime;
client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet();
System.out.println("--> Warmup (" + name + ")...");
// run just the child query, warm up first
for (int j = 0; j < QUERY_WARMUP; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(matchAllQuery())
.addFacet(termsStatsFacet(name).keyField(keyField).valueField(valueField))
.execute().actionGet();
if (j == 0) {
System.out.println("--> Loading (" + name + "): took: " + searchResponse.took());
}
if (searchResponse.hits().totalHits() != COUNT) {
System.err.println("--> mismatch on hits");
}
}
System.out.println("--> Warmup (" + name + ") DONE");
System.out.println("--> Running (" + name + ")...");
totalQueryTime = 0;
for (int j = 0; j < QUERY_COUNT; j++) {
SearchResponse searchResponse = client.prepareSearch()
.setSearchType(SearchType.COUNT)
.setQuery(matchAllQuery())
.addFacet(termsStatsFacet(name).keyField(keyField).valueField(valueField))
.execute().actionGet();
if (searchResponse.hits().totalHits() != COUNT) {
System.err.println("--> mismatch on hits");
}
totalQueryTime += searchResponse.tookInMillis();
}
System.out.println("--> Terms Facet (" + name + "), hint(" + executionHint + "): " + (totalQueryTime / QUERY_COUNT) + "ms");
return new StatsResult(name, totalQueryTime);
}
} }

View File

@ -157,6 +157,8 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector {
NumericFieldData valueFieldData; NumericFieldData valueFieldData;
final ValueAggregator valueAggregator = new ValueAggregator();
@Override public void onValue(int docId, double value) { @Override public void onValue(int docId, double value) {
InternalTermsStatsDoubleFacet.DoubleEntry doubleEntry = entries.get(value); InternalTermsStatsDoubleFacet.DoubleEntry doubleEntry = entries.get(value);
if (doubleEntry == null) { if (doubleEntry == null) {
@ -167,17 +169,8 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector {
} }
if (valueFieldData.multiValued()) { if (valueFieldData.multiValued()) {
double[] valueValues = valueFieldData.doubleValues(docId); valueAggregator.doubleEntry = doubleEntry;
doubleEntry.totalCount += valueValues.length; valueFieldData.forEachValueInDoc(docId, valueAggregator);
for (double valueValue : valueValues) {
if (valueValue < doubleEntry.min) {
doubleEntry.min = valueValue;
}
if (valueValue > doubleEntry.max) {
doubleEntry.max = valueValue;
}
doubleEntry.total += valueValue;
}
} else { } else {
double valueValue = valueFieldData.doubleValue(docId); double valueValue = valueFieldData.doubleValue(docId);
if (valueValue < doubleEntry.min) { if (valueValue < doubleEntry.min) {
@ -194,6 +187,22 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector {
@Override public void onMissing(int docId) { @Override public void onMissing(int docId) {
missing++; missing++;
} }
public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc {
InternalTermsStatsDoubleFacet.DoubleEntry doubleEntry;
@Override public void onValue(int docId, double value) {
if (value < doubleEntry.min) {
doubleEntry.min = value;
}
if (value > doubleEntry.max) {
doubleEntry.max = value;
}
doubleEntry.total += value;
doubleEntry.totalCount++;
}
}
} }
public static class ScriptAggregator extends Aggregator { public static class ScriptAggregator extends Aggregator {

View File

@ -160,6 +160,8 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector {
NumericFieldData valueFieldData; NumericFieldData valueFieldData;
final ValueAggregator valueAggregator = new ValueAggregator();
@Override public void onValue(int docId, long value) { @Override public void onValue(int docId, long value) {
InternalTermsStatsLongFacet.LongEntry longEntry = entries.get(value); InternalTermsStatsLongFacet.LongEntry longEntry = entries.get(value);
if (longEntry == null) { if (longEntry == null) {
@ -169,17 +171,8 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector {
longEntry.count++; longEntry.count++;
} }
if (valueFieldData.multiValued()) { if (valueFieldData.multiValued()) {
double[] valueValues = valueFieldData.doubleValues(docId); valueAggregator.longEntry = longEntry;
longEntry.totalCount += valueValues.length; valueFieldData.forEachValueInDoc(docId, valueAggregator);
for (double valueValue : valueValues) {
if (valueValue < longEntry.min) {
longEntry.min = valueValue;
}
if (valueValue > longEntry.max) {
longEntry.max = valueValue;
}
longEntry.total += valueValue;
}
} else { } else {
double valueValue = valueFieldData.doubleValue(docId); double valueValue = valueFieldData.doubleValue(docId);
if (valueValue < longEntry.min) { if (valueValue < longEntry.min) {
@ -196,6 +189,22 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector {
@Override public void onMissing(int docId) { @Override public void onMissing(int docId) {
missing++; missing++;
} }
public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc {
InternalTermsStatsLongFacet.LongEntry longEntry;
@Override public void onValue(int docId, double value) {
if (value < longEntry.min) {
longEntry.min = value;
}
if (value > longEntry.max) {
longEntry.max = value;
}
longEntry.total += value;
longEntry.totalCount++;
}
}
} }
public static class ScriptAggregator extends Aggregator { public static class ScriptAggregator extends Aggregator {

View File

@ -159,6 +159,8 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector {
NumericFieldData valueFieldData; NumericFieldData valueFieldData;
ValueAggregator valueAggregator = new ValueAggregator();
@Override public void onValue(int docId, String value) { @Override public void onValue(int docId, String value) {
InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(value); InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(value);
if (stringEntry == null) { if (stringEntry == null) {
@ -168,17 +170,8 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector {
stringEntry.count++; stringEntry.count++;
} }
if (valueFieldData.multiValued()) { if (valueFieldData.multiValued()) {
double[] valueValues = valueFieldData.doubleValues(docId); valueAggregator.stringEntry = stringEntry;
stringEntry.totalCount += valueValues.length; valueFieldData.forEachValueInDoc(docId, valueAggregator);
for (double valueValue : valueValues) {
if (valueValue < stringEntry.min) {
stringEntry.min = valueValue;
}
if (valueValue > stringEntry.max) {
stringEntry.max = valueValue;
}
stringEntry.total += valueValue;
}
} else { } else {
double valueValue = valueFieldData.doubleValue(docId); double valueValue = valueFieldData.doubleValue(docId);
if (valueValue < stringEntry.min) { if (valueValue < stringEntry.min) {
@ -196,6 +189,22 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector {
@Override public void onMissing(int docId) { @Override public void onMissing(int docId) {
missing++; missing++;
} }
public static class ValueAggregator implements NumericFieldData.DoubleValueInDocProc {
InternalTermsStatsStringFacet.StringEntry stringEntry;
@Override public void onValue(int docId, double value) {
if (value < stringEntry.min) {
stringEntry.min = value;
}
if (value > stringEntry.max) {
stringEntry.max = value;
}
stringEntry.total += value;
stringEntry.totalCount++;
}
}
} }
public static class ScriptAggregator extends Aggregator { public static class ScriptAggregator extends Aggregator {