SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues', 'calcdistinct' is still supported as an alias for both options

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1678422 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2015-05-08 18:44:03 +00:00
parent d42d7e68b5
commit d479fd7be1
5 changed files with 64 additions and 35 deletions

View File

@ -174,6 +174,8 @@ New Features
* SOLR-4392: Make it possible to specify AES encrypted password in dataconfig.xml (Noble Paul) * SOLR-4392: Make it possible to specify AES encrypted password in dataconfig.xml (Noble Paul)
* SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues'.
'calcdistinct' is still supported as an alias for both options (hossman)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -86,7 +86,8 @@ public class StatsField {
mean(false, sum, count), mean(false, sum, count),
sumOfSquares(true), sumOfSquares(true),
stddev(false, sum, count, sumOfSquares), stddev(false, sum, count, sumOfSquares),
calcdistinct(true), distinctValues(true),
countDistinct(false, distinctValues),
percentiles(true){ percentiles(true){
/** special for percentiles **/ /** special for percentiles **/
boolean parseParams(StatsField sf) { boolean parseParams(StatsField sf) {
@ -178,6 +179,13 @@ public class StatsField {
} }
/**
* the equivilent stats if "calcdistinct" is specified
* @see Stat#countDistinct
* @see Stat#distinctValues
*/
private static final EnumSet<Stat> CALCDISTINCT_PSUEDO_STAT = EnumSet.of(Stat.countDistinct, Stat.distinctValues);
/** /**
* The set of stats computed by default when no localparams are used to specify explicit stats * The set of stats computed by default when no localparams are used to specify explicit stats
*/ */
@ -524,26 +532,33 @@ public class StatsField {
statSpecifiedByLocalParam = true; statSpecifiedByLocalParam = true;
if (stat.parseParams(this)) { if (stat.parseParams(this)) {
statsInResponse.add(stat); statsInResponse.add(stat);
statsToCalculate.addAll(stat.getDistribDeps());
} }
} }
} }
// if no individual stat setting. // if no individual stat setting use the default set
if ( ! statSpecifiedByLocalParam ) { if ( ! ( statSpecifiedByLocalParam
// calcdistinct (as a local param) is a psuedo-stat, prevents default set
|| localParams.getBool("calcdistinct", false) ) ) {
statsInResponse.addAll(DEFAULT_STATS); statsInResponse.addAll(DEFAULT_STATS);
}
// calcDistinct is a psuedo-stat with optional top level param default behavior
// if not overridden by the specific individual stats
if (localParams.getBool("calcdistinct", topLevelCalcDistinct)) {
for (Stat stat : CALCDISTINCT_PSUEDO_STAT) {
// assume true, but don't include if specific stat overrides
if (localParams.getBool(stat.name(), true)) {
statsInResponse.add(stat);
}
}
}
for (Stat stat : statsInResponse) { for (Stat stat : statsInResponse) {
statsToCalculate.addAll(stat.getDistribDeps()); statsToCalculate.addAll(stat.getDistribDeps());
} }
} }
// calcDistinct has special "default" behavior using top level CalcDistinct param
if (topLevelCalcDistinct && localParams.getBool(Stat.calcdistinct.toString(), true)) {
statsInResponse.add(Stat.calcdistinct);
statsToCalculate.addAll(Stat.calcdistinct.getDistribDeps());
}
}
public boolean calculateStats(Stat stat) { public boolean calculateStats(Stat stat) {
return statsToCalculate.contains(stat); return statsToCalculate.contains(stat);
} }

View File

@ -3,7 +3,7 @@
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0 * The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at * the License. You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
@ -105,7 +105,7 @@ abstract class AbstractStatsValues<T> implements StatsValues {
// final booleans from StatsField to allow better inlining & JIT optimizing // final booleans from StatsField to allow better inlining & JIT optimizing
final protected boolean computeCount; final protected boolean computeCount;
final protected boolean computeMissing; final protected boolean computeMissing;
final protected boolean computeCalcDistinct; final protected boolean computeCalcDistinct; // needed for either countDistinct or distinctValues
final protected boolean computeMin; final protected boolean computeMin;
final protected boolean computeMax; final protected boolean computeMax;
final protected boolean computeMinOrMax; final protected boolean computeMinOrMax;
@ -148,7 +148,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
this.statsField = statsField; this.statsField = statsField;
this.computeCount = statsField.calculateStats(Stat.count); this.computeCount = statsField.calculateStats(Stat.count);
this.computeMissing = statsField.calculateStats(Stat.missing); this.computeMissing = statsField.calculateStats(Stat.missing);
this.computeCalcDistinct = statsField.calculateStats(Stat.calcdistinct); this.computeCalcDistinct = statsField.calculateStats(Stat.countDistinct)
|| statsField.calculateStats(Stat.distinctValues);
this.computeMin = statsField.calculateStats(Stat.min); this.computeMin = statsField.calculateStats(Stat.min);
this.computeMax = statsField.calculateStats(Stat.max); this.computeMax = statsField.calculateStats(Stat.max);
this.computeMinOrMax = computeMin || computeMax; this.computeMinOrMax = computeMin || computeMax;
@ -324,8 +325,10 @@ abstract class AbstractStatsValues<T> implements StatsValues {
if (statsField.includeInResponse(Stat.missing)) { if (statsField.includeInResponse(Stat.missing)) {
res.add("missing", missing); res.add("missing", missing);
} }
if (statsField.includeInResponse(Stat.calcdistinct)) { if (statsField.includeInResponse(Stat.distinctValues)) {
res.add("distinctValues", distinctValues); res.add("distinctValues", distinctValues);
}
if (statsField.includeInResponse(Stat.countDistinct)) {
res.add("countDistinct", countDistinct); res.add("countDistinct", countDistinct);
} }
if (statsField.includeInResponse(Stat.cardinality)) { if (statsField.includeInResponse(Stat.cardinality)) {

View File

@ -696,6 +696,12 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
params("stats.calcdistinct", "false", params("stats.calcdistinct", "false",
"f."+i1+".stats.calcdistinct", "false", "f."+i1+".stats.calcdistinct", "false",
"stats.field", "{!min=true calcdistinct=true}" + i1), "stats.field", "{!min=true calcdistinct=true}" + i1),
params("stats.calcdistinct", "false",
"f."+i1+".stats.calcdistinct", "false",
"stats.field", "{!min=true countDistinct=true distinctValues=true}" + i1),
params("stats.field", "{!min=true countDistinct=true distinctValues=true}" + i1),
params("yes", "true",
"stats.field", "{!min=$yes countDistinct=$yes distinctValues=$yes}" + i1),
}) { }) {
rsp = query(SolrParams.wrapDefaults rsp = query(SolrParams.wrapDefaults
@ -732,6 +738,9 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
params("stats.calcdistinct", "true", params("stats.calcdistinct", "true",
"f."+i1+".stats.calcdistinct", "true", "f."+i1+".stats.calcdistinct", "true",
"stats.field", "{!min=true calcdistinct=false}" + i1), "stats.field", "{!min=true calcdistinct=false}" + i1),
params("stats.calcdistinct", "true",
"f."+i1+".stats.calcdistinct", "true",
"stats.field", "{!min=true countDistinct=false distinctValues=false}" + i1),
}) { }) {
rsp = query(SolrParams.wrapDefaults rsp = query(SolrParams.wrapDefaults
@ -752,7 +761,6 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
assertNull(p+" expected null for sum", s.getSum()); assertNull(p+" expected null for sum", s.getSum());
assertNull(p+" expected null for percentiles", s.getPercentiles()); assertNull(p+" expected null for percentiles", s.getPercentiles());
assertNull(p+" expected null for cardinality", s.getCardinality()); assertNull(p+" expected null for cardinality", s.getCardinality());
} }
// this field doesn't exist in any doc in the result set. // this field doesn't exist in any doc in the result set.
@ -839,8 +847,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
} }
assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" + assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" +
" (adjust constant accordingly if intentional)", " (adjust constant accordingly if intentional)",
4235, numTotalStatQueries); 5082, numTotalStatQueries);
/*** TODO: the failure may come back in "exception" /*** TODO: the failure may come back in "exception"
try { try {
@ -1164,6 +1171,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
} }
private void validateCommonQueryParameters() throws Exception { private void validateCommonQueryParameters() throws Exception {
ignoreException("parameter cannot be negative");
try { try {
SolrQuery query = new SolrQuery(); SolrQuery query = new SolrQuery();
query.setStart(-1).setQuery("*"); query.setStart(-1).setQuery("*");
@ -1181,5 +1189,6 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
} catch (SolrException e) { } catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code()); assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
} }
resetExceptionIgnores();
} }
} }

View File

@ -1115,8 +1115,11 @@ public class StatsComponentTest extends AbstractSolrTestCase {
SolrQueryRequest req2 = req(baseParams, SolrQueryRequest req2 = req(baseParams,
StatsParams.STATS_FIELD, StatsParams.STATS_FIELD,
"{!min=true, max=true, count=true, sum=true, mean=true, stddev=true, sumOfSquares=true, missing=true, calcdistinct=true}" + fieldName); "{!min=true, max=true, count=true, sum=true, mean=true, stddev=true, sumOfSquares=true, missing=true, calcdistinct=true}" + fieldName);
SolrQueryRequest req3 = req(baseParams,
StatsParams.STATS_FIELD,
"{!min=true, max=true, count=true, sum=true, mean=true, stddev=true, sumOfSquares=true, missing=true, countDistinct=true, distinctValues=true}" + fieldName);
for (SolrQueryRequest req : new SolrQueryRequest[] { req1, req2 }) { for (SolrQueryRequest req : new SolrQueryRequest[] { req1, req2, req3 }) {
assertQ("test status on docValues and multiValued: " + req.toString(), req assertQ("test status on docValues and multiValued: " + req.toString(), req
, "//lst[@name='" + fieldName + "']/double[@name='min'][.='-3.0']" , "//lst[@name='" + fieldName + "']/double[@name='min'][.='-3.0']"
, "//lst[@name='" + fieldName + "']/double[@name='max'][.='16.0']" , "//lst[@name='" + fieldName + "']/double[@name='max'][.='16.0']"
@ -1231,16 +1234,14 @@ public class StatsComponentTest extends AbstractSolrTestCase {
public final static String KPRE = XPRE + "lst[@name='stats_fields']/lst[@name='k']/"; public final static String KPRE = XPRE + "lst[@name='stats_fields']/lst[@name='k']/";
public final Stat stat; public final Stat stat;
public final String input; public final String input;
public final int numResponseKeys; // all because calcdistinct is obnoxious
public final List<String> perShardXpaths; public final List<String> perShardXpaths;
public final List<String> finalXpaths; public final List<String> finalXpaths;
public final static Map<Stat,ExpectedStat> ALL = new LinkedHashMap<Stat,ExpectedStat>(); public final static Map<Stat,ExpectedStat> ALL = new LinkedHashMap<Stat,ExpectedStat>();
private ExpectedStat(Stat stat, String input, int numResponseKeys, private ExpectedStat(Stat stat, String input,
List<String> perShardXpaths, List<String> finalXpaths) { List<String> perShardXpaths, List<String> finalXpaths) {
this.stat = stat; this.stat = stat;
this.input = input; this.input = input;
this.numResponseKeys = numResponseKeys;
this.perShardXpaths = perShardXpaths; this.perShardXpaths = perShardXpaths;
this.finalXpaths = finalXpaths; this.finalXpaths = finalXpaths;
} }
@ -1258,12 +1259,11 @@ public class StatsComponentTest extends AbstractSolrTestCase {
perShardXpaths.addAll(expectedDep.perShardXpaths); perShardXpaths.addAll(expectedDep.perShardXpaths);
} }
} }
ALL.put(stat, new ExpectedStat(stat, input, 1, ALL.put(stat, new ExpectedStat(stat, input, perShardXpaths, Collections.singletonList(xpath)));
perShardXpaths, Collections.singletonList(xpath)));
} }
public static void create(Stat stat, String input, int numResponseKeys, public static void create(Stat stat, String input,
List<String> perShardXpaths, List<String> finalXpaths) { List<String> perShardXpaths, List<String> finalXpaths) {
ALL.put(stat, new ExpectedStat(stat, input, numResponseKeys, perShardXpaths, finalXpaths)); ALL.put(stat, new ExpectedStat(stat, input, perShardXpaths, finalXpaths));
} }
} }
@ -1340,16 +1340,16 @@ public class StatsComponentTest extends AbstractSolrTestCase {
ExpectedStat.createSimple(Stat.mean, "true", "double", String.valueOf(sum / count)); ExpectedStat.createSimple(Stat.mean, "true", "double", String.valueOf(sum / count));
ExpectedStat.createSimple(Stat.sumOfSquares, "true", "double", String.valueOf(sumOfSquares)); ExpectedStat.createSimple(Stat.sumOfSquares, "true", "double", String.valueOf(sumOfSquares));
ExpectedStat.createSimple(Stat.stddev, "true", "double", String.valueOf(stddev)); ExpectedStat.createSimple(Stat.stddev, "true", "double", String.valueOf(stddev));
final String countDistinctXpath = kpre + "long[@name='countDistinct'][.='10']"; final String distinctValsXpath = "count(" + kpre + "arr[@name='distinctValues']/*)=10";
ExpectedStat.create(Stat.calcdistinct, "true", 2, ExpectedStat.create(Stat.distinctValues, "true",
Arrays.asList("count(" + kpre + "arr[@name='distinctValues']/*)=10", Collections.singletonList(distinctValsXpath),
countDistinctXpath), Collections.singletonList(distinctValsXpath));
Collections.singletonList(countDistinctXpath)); ExpectedStat.createSimple(Stat.countDistinct, "true", "long", "10");
final String percentileShardXpath = kpre + "str[@name='percentiles'][.='" final String percentileShardXpath = kpre + "str[@name='percentiles'][.='"
+ Base64.byteArrayToBase64(tdigestBuf.array(), 0, tdigestBuf.array().length) + "']"; + Base64.byteArrayToBase64(tdigestBuf.array(), 0, tdigestBuf.array().length) + "']";
final String p90 = "" + tdigest.quantile(0.90D); final String p90 = "" + tdigest.quantile(0.90D);
final String p99 = "" + tdigest.quantile(0.99D); final String p99 = "" + tdigest.quantile(0.99D);
ExpectedStat.create(Stat.percentiles, "'90, 99'", 1, ExpectedStat.create(Stat.percentiles, "'90, 99'",
Collections.singletonList(percentileShardXpath), Collections.singletonList(percentileShardXpath),
Arrays.asList("count(" + kpre + "lst[@name='percentiles']/*)=2", Arrays.asList("count(" + kpre + "lst[@name='percentiles']/*)=2",
kpre + "lst[@name='percentiles']/double[@name='90.0'][.="+p90+"]", kpre + "lst[@name='percentiles']/double[@name='90.0'][.="+p90+"]",
@ -1357,7 +1357,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
final String cardinalityShardXpath = kpre + "str[@name='cardinality'][.='" final String cardinalityShardXpath = kpre + "str[@name='cardinality'][.='"
+ Base64.byteArrayToBase64(hllBytes, 0, hllBytes.length) + "']"; + Base64.byteArrayToBase64(hllBytes, 0, hllBytes.length) + "']";
final String cardinalityXpath = kpre + "long[@name='cardinality'][.='10']"; final String cardinalityXpath = kpre + "long[@name='cardinality'][.='10']";
ExpectedStat.create(Stat.cardinality, "true", 1, ExpectedStat.create(Stat.cardinality, "true",
Collections.singletonList(cardinalityShardXpath), Collections.singletonList(cardinalityShardXpath),
Collections.singletonList(cardinalityXpath)); Collections.singletonList(cardinalityXpath));
@ -1377,7 +1377,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
int numKeysExpected = 0; int numKeysExpected = 0;
EnumSet<Stat> distribDeps = stat.getDistribDeps(); EnumSet<Stat> distribDeps = stat.getDistribDeps();
for (Stat perShardDep : distribDeps) { for (Stat perShardDep : distribDeps) {
numKeysExpected += ExpectedStat.ALL.get(perShardDep).numResponseKeys; numKeysExpected++;
// even if we go out of our way to exclude the dependent stats, // even if we go out of our way to exclude the dependent stats,
// the shard should return them since they are a dependency for the requested stat // the shard should return them since they are a dependency for the requested stat
@ -1413,7 +1413,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
paras.append(stat + "=" + expect.input + " "); paras.append(stat + "=" + expect.input + " ");
numKeysExpected += expect.numResponseKeys; numKeysExpected++;
testXpaths.addAll(expect.finalXpaths); testXpaths.addAll(expect.finalXpaths);
} }