SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues', 'calcdistinct' is still supported as an alias for both options

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1678422 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2015-05-08 18:44:03 +00:00
parent d42d7e68b5
commit d479fd7be1
5 changed files with 64 additions and 35 deletions

View File

@ -174,6 +174,8 @@ New Features
* SOLR-4392: Make it possible to specify AES encrypted password in dataconfig.xml (Noble Paul)
* SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues'.
'calcdistinct' is still supported as an alias for both options (hossman)
Bug Fixes
----------------------

View File

@ -86,7 +86,8 @@ public class StatsField {
mean(false, sum, count),
sumOfSquares(true),
stddev(false, sum, count, sumOfSquares),
calcdistinct(true),
distinctValues(true),
countDistinct(false, distinctValues),
percentiles(true){
/** special for percentiles **/
boolean parseParams(StatsField sf) {
@ -178,6 +179,13 @@ public class StatsField {
}
/**
* the equivilent stats if "calcdistinct" is specified
* @see Stat#countDistinct
* @see Stat#distinctValues
*/
private static final EnumSet<Stat> CALCDISTINCT_PSUEDO_STAT = EnumSet.of(Stat.countDistinct, Stat.distinctValues);
/**
* The set of stats computed by default when no localparams are used to specify explicit stats
*/
@ -524,23 +532,30 @@ public class StatsField {
statSpecifiedByLocalParam = true;
if (stat.parseParams(this)) {
statsInResponse.add(stat);
statsToCalculate.addAll(stat.getDistribDeps());
}
}
}
// if no individual stat setting.
if ( ! statSpecifiedByLocalParam ) {
// if no individual stat setting use the default set
if ( ! ( statSpecifiedByLocalParam
// calcdistinct (as a local param) is a psuedo-stat, prevents default set
|| localParams.getBool("calcdistinct", false) ) ) {
statsInResponse.addAll(DEFAULT_STATS);
for (Stat stat : statsInResponse) {
statsToCalculate.addAll(stat.getDistribDeps());
}
// calcDistinct is a psuedo-stat with optional top level param default behavior
// if not overridden by the specific individual stats
if (localParams.getBool("calcdistinct", topLevelCalcDistinct)) {
for (Stat stat : CALCDISTINCT_PSUEDO_STAT) {
// assume true, but don't include if specific stat overrides
if (localParams.getBool(stat.name(), true)) {
statsInResponse.add(stat);
}
}
}
// calcDistinct has special "default" behavior using top level CalcDistinct param
if (topLevelCalcDistinct && localParams.getBool(Stat.calcdistinct.toString(), true)) {
statsInResponse.add(Stat.calcdistinct);
statsToCalculate.addAll(Stat.calcdistinct.getDistribDeps());
for (Stat stat : statsInResponse) {
statsToCalculate.addAll(stat.getDistribDeps());
}
}

View File

@ -3,7 +3,7 @@
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
@ -105,7 +105,7 @@ abstract class AbstractStatsValues<T> implements StatsValues {
// final booleans from StatsField to allow better inlining & JIT optimizing
final protected boolean computeCount;
final protected boolean computeMissing;
final protected boolean computeCalcDistinct;
final protected boolean computeCalcDistinct; // needed for either countDistinct or distinctValues
final protected boolean computeMin;
final protected boolean computeMax;
final protected boolean computeMinOrMax;
@ -148,7 +148,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
this.statsField = statsField;
this.computeCount = statsField.calculateStats(Stat.count);
this.computeMissing = statsField.calculateStats(Stat.missing);
this.computeCalcDistinct = statsField.calculateStats(Stat.calcdistinct);
this.computeCalcDistinct = statsField.calculateStats(Stat.countDistinct)
|| statsField.calculateStats(Stat.distinctValues);
this.computeMin = statsField.calculateStats(Stat.min);
this.computeMax = statsField.calculateStats(Stat.max);
this.computeMinOrMax = computeMin || computeMax;
@ -324,8 +325,10 @@ abstract class AbstractStatsValues<T> implements StatsValues {
if (statsField.includeInResponse(Stat.missing)) {
res.add("missing", missing);
}
if (statsField.includeInResponse(Stat.calcdistinct)) {
if (statsField.includeInResponse(Stat.distinctValues)) {
res.add("distinctValues", distinctValues);
}
if (statsField.includeInResponse(Stat.countDistinct)) {
res.add("countDistinct", countDistinct);
}
if (statsField.includeInResponse(Stat.cardinality)) {

View File

@ -696,6 +696,12 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
params("stats.calcdistinct", "false",
"f."+i1+".stats.calcdistinct", "false",
"stats.field", "{!min=true calcdistinct=true}" + i1),
params("stats.calcdistinct", "false",
"f."+i1+".stats.calcdistinct", "false",
"stats.field", "{!min=true countDistinct=true distinctValues=true}" + i1),
params("stats.field", "{!min=true countDistinct=true distinctValues=true}" + i1),
params("yes", "true",
"stats.field", "{!min=$yes countDistinct=$yes distinctValues=$yes}" + i1),
}) {
rsp = query(SolrParams.wrapDefaults
@ -732,6 +738,9 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
params("stats.calcdistinct", "true",
"f."+i1+".stats.calcdistinct", "true",
"stats.field", "{!min=true calcdistinct=false}" + i1),
params("stats.calcdistinct", "true",
"f."+i1+".stats.calcdistinct", "true",
"stats.field", "{!min=true countDistinct=false distinctValues=false}" + i1),
}) {
rsp = query(SolrParams.wrapDefaults
@ -752,7 +761,6 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
assertNull(p+" expected null for sum", s.getSum());
assertNull(p+" expected null for percentiles", s.getPercentiles());
assertNull(p+" expected null for cardinality", s.getCardinality());
}
// this field doesn't exist in any doc in the result set.
@ -839,8 +847,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
}
assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" +
" (adjust constant accordingly if intentional)",
4235, numTotalStatQueries);
5082, numTotalStatQueries);
/*** TODO: the failure may come back in "exception"
try {
@ -1164,6 +1171,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
}
private void validateCommonQueryParameters() throws Exception {
ignoreException("parameter cannot be negative");
try {
SolrQuery query = new SolrQuery();
query.setStart(-1).setQuery("*");
@ -1180,6 +1188,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
fail("Expected the last query to fail, but got response: " + resp);
} catch (SolrException e) {
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
}
}
resetExceptionIgnores();
}
}

View File

@ -1115,8 +1115,11 @@ public class StatsComponentTest extends AbstractSolrTestCase {
SolrQueryRequest req2 = req(baseParams,
StatsParams.STATS_FIELD,
"{!min=true, max=true, count=true, sum=true, mean=true, stddev=true, sumOfSquares=true, missing=true, calcdistinct=true}" + fieldName);
SolrQueryRequest req3 = req(baseParams,
StatsParams.STATS_FIELD,
"{!min=true, max=true, count=true, sum=true, mean=true, stddev=true, sumOfSquares=true, missing=true, countDistinct=true, distinctValues=true}" + fieldName);
for (SolrQueryRequest req : new SolrQueryRequest[] { req1, req2 }) {
for (SolrQueryRequest req : new SolrQueryRequest[] { req1, req2, req3 }) {
assertQ("test status on docValues and multiValued: " + req.toString(), req
, "//lst[@name='" + fieldName + "']/double[@name='min'][.='-3.0']"
, "//lst[@name='" + fieldName + "']/double[@name='max'][.='16.0']"
@ -1231,16 +1234,14 @@ public class StatsComponentTest extends AbstractSolrTestCase {
public final static String KPRE = XPRE + "lst[@name='stats_fields']/lst[@name='k']/";
public final Stat stat;
public final String input;
public final int numResponseKeys; // all because calcdistinct is obnoxious
public final List<String> perShardXpaths;
public final List<String> finalXpaths;
public final static Map<Stat,ExpectedStat> ALL = new LinkedHashMap<Stat,ExpectedStat>();
private ExpectedStat(Stat stat, String input, int numResponseKeys,
private ExpectedStat(Stat stat, String input,
List<String> perShardXpaths, List<String> finalXpaths) {
this.stat = stat;
this.input = input;
this.numResponseKeys = numResponseKeys;
this.perShardXpaths = perShardXpaths;
this.finalXpaths = finalXpaths;
}
@ -1258,12 +1259,11 @@ public class StatsComponentTest extends AbstractSolrTestCase {
perShardXpaths.addAll(expectedDep.perShardXpaths);
}
}
ALL.put(stat, new ExpectedStat(stat, input, 1,
perShardXpaths, Collections.singletonList(xpath)));
ALL.put(stat, new ExpectedStat(stat, input, perShardXpaths, Collections.singletonList(xpath)));
}
public static void create(Stat stat, String input, int numResponseKeys,
public static void create(Stat stat, String input,
List<String> perShardXpaths, List<String> finalXpaths) {
ALL.put(stat, new ExpectedStat(stat, input, numResponseKeys, perShardXpaths, finalXpaths));
ALL.put(stat, new ExpectedStat(stat, input, perShardXpaths, finalXpaths));
}
}
@ -1340,16 +1340,16 @@ public class StatsComponentTest extends AbstractSolrTestCase {
ExpectedStat.createSimple(Stat.mean, "true", "double", String.valueOf(sum / count));
ExpectedStat.createSimple(Stat.sumOfSquares, "true", "double", String.valueOf(sumOfSquares));
ExpectedStat.createSimple(Stat.stddev, "true", "double", String.valueOf(stddev));
final String countDistinctXpath = kpre + "long[@name='countDistinct'][.='10']";
ExpectedStat.create(Stat.calcdistinct, "true", 2,
Arrays.asList("count(" + kpre + "arr[@name='distinctValues']/*)=10",
countDistinctXpath),
Collections.singletonList(countDistinctXpath));
final String distinctValsXpath = "count(" + kpre + "arr[@name='distinctValues']/*)=10";
ExpectedStat.create(Stat.distinctValues, "true",
Collections.singletonList(distinctValsXpath),
Collections.singletonList(distinctValsXpath));
ExpectedStat.createSimple(Stat.countDistinct, "true", "long", "10");
final String percentileShardXpath = kpre + "str[@name='percentiles'][.='"
+ Base64.byteArrayToBase64(tdigestBuf.array(), 0, tdigestBuf.array().length) + "']";
final String p90 = "" + tdigest.quantile(0.90D);
final String p99 = "" + tdigest.quantile(0.99D);
ExpectedStat.create(Stat.percentiles, "'90, 99'", 1,
ExpectedStat.create(Stat.percentiles, "'90, 99'",
Collections.singletonList(percentileShardXpath),
Arrays.asList("count(" + kpre + "lst[@name='percentiles']/*)=2",
kpre + "lst[@name='percentiles']/double[@name='90.0'][.="+p90+"]",
@ -1357,7 +1357,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
final String cardinalityShardXpath = kpre + "str[@name='cardinality'][.='"
+ Base64.byteArrayToBase64(hllBytes, 0, hllBytes.length) + "']";
final String cardinalityXpath = kpre + "long[@name='cardinality'][.='10']";
ExpectedStat.create(Stat.cardinality, "true", 1,
ExpectedStat.create(Stat.cardinality, "true",
Collections.singletonList(cardinalityShardXpath),
Collections.singletonList(cardinalityXpath));
@ -1377,7 +1377,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
int numKeysExpected = 0;
EnumSet<Stat> distribDeps = stat.getDistribDeps();
for (Stat perShardDep : distribDeps) {
numKeysExpected += ExpectedStat.ALL.get(perShardDep).numResponseKeys;
numKeysExpected++;
// even if we go out of our way to exclude the dependent stats,
// the shard should return them since they are a dependency for the requested stat
@ -1413,7 +1413,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
paras.append(stat + "=" + expect.input + " ");
numKeysExpected += expect.numResponseKeys;
numKeysExpected++;
testXpaths.addAll(expect.finalXpaths);
}