mirror of https://github.com/apache/lucene.git
SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues', 'calcdistinct' is still supported as an alias for both options
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1678422 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d42d7e68b5
commit
d479fd7be1
|
@ -174,6 +174,8 @@ New Features
|
||||||
|
|
||||||
* SOLR-4392: Make it possible to specify AES encrypted password in dataconfig.xml (Noble Paul)
|
* SOLR-4392: Make it possible to specify AES encrypted password in dataconfig.xml (Noble Paul)
|
||||||
|
|
||||||
|
* SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues'.
|
||||||
|
'calcdistinct' is still supported as an alias for both options (hossman)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
|
|
|
@ -86,7 +86,8 @@ public class StatsField {
|
||||||
mean(false, sum, count),
|
mean(false, sum, count),
|
||||||
sumOfSquares(true),
|
sumOfSquares(true),
|
||||||
stddev(false, sum, count, sumOfSquares),
|
stddev(false, sum, count, sumOfSquares),
|
||||||
calcdistinct(true),
|
distinctValues(true),
|
||||||
|
countDistinct(false, distinctValues),
|
||||||
percentiles(true){
|
percentiles(true){
|
||||||
/** special for percentiles **/
|
/** special for percentiles **/
|
||||||
boolean parseParams(StatsField sf) {
|
boolean parseParams(StatsField sf) {
|
||||||
|
@ -178,6 +179,13 @@ public class StatsField {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the equivilent stats if "calcdistinct" is specified
|
||||||
|
* @see Stat#countDistinct
|
||||||
|
* @see Stat#distinctValues
|
||||||
|
*/
|
||||||
|
private static final EnumSet<Stat> CALCDISTINCT_PSUEDO_STAT = EnumSet.of(Stat.countDistinct, Stat.distinctValues);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The set of stats computed by default when no localparams are used to specify explicit stats
|
* The set of stats computed by default when no localparams are used to specify explicit stats
|
||||||
*/
|
*/
|
||||||
|
@ -524,26 +532,33 @@ public class StatsField {
|
||||||
statSpecifiedByLocalParam = true;
|
statSpecifiedByLocalParam = true;
|
||||||
if (stat.parseParams(this)) {
|
if (stat.parseParams(this)) {
|
||||||
statsInResponse.add(stat);
|
statsInResponse.add(stat);
|
||||||
statsToCalculate.addAll(stat.getDistribDeps());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// if no individual stat setting.
|
// if no individual stat setting use the default set
|
||||||
if ( ! statSpecifiedByLocalParam ) {
|
if ( ! ( statSpecifiedByLocalParam
|
||||||
|
// calcdistinct (as a local param) is a psuedo-stat, prevents default set
|
||||||
|
|| localParams.getBool("calcdistinct", false) ) ) {
|
||||||
statsInResponse.addAll(DEFAULT_STATS);
|
statsInResponse.addAll(DEFAULT_STATS);
|
||||||
|
}
|
||||||
|
|
||||||
|
// calcDistinct is a psuedo-stat with optional top level param default behavior
|
||||||
|
// if not overridden by the specific individual stats
|
||||||
|
if (localParams.getBool("calcdistinct", topLevelCalcDistinct)) {
|
||||||
|
for (Stat stat : CALCDISTINCT_PSUEDO_STAT) {
|
||||||
|
// assume true, but don't include if specific stat overrides
|
||||||
|
if (localParams.getBool(stat.name(), true)) {
|
||||||
|
statsInResponse.add(stat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (Stat stat : statsInResponse) {
|
for (Stat stat : statsInResponse) {
|
||||||
statsToCalculate.addAll(stat.getDistribDeps());
|
statsToCalculate.addAll(stat.getDistribDeps());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// calcDistinct has special "default" behavior using top level CalcDistinct param
|
|
||||||
if (topLevelCalcDistinct && localParams.getBool(Stat.calcdistinct.toString(), true)) {
|
|
||||||
statsInResponse.add(Stat.calcdistinct);
|
|
||||||
statsToCalculate.addAll(Stat.calcdistinct.getDistribDeps());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean calculateStats(Stat stat) {
|
public boolean calculateStats(Stat stat) {
|
||||||
return statsToCalculate.contains(stat);
|
return statsToCalculate.contains(stat);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
* this work for additional information regarding copyright ownership.
|
* this work for additional information regarding copyright ownership.
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
@ -105,7 +105,7 @@ abstract class AbstractStatsValues<T> implements StatsValues {
|
||||||
// final booleans from StatsField to allow better inlining & JIT optimizing
|
// final booleans from StatsField to allow better inlining & JIT optimizing
|
||||||
final protected boolean computeCount;
|
final protected boolean computeCount;
|
||||||
final protected boolean computeMissing;
|
final protected boolean computeMissing;
|
||||||
final protected boolean computeCalcDistinct;
|
final protected boolean computeCalcDistinct; // needed for either countDistinct or distinctValues
|
||||||
final protected boolean computeMin;
|
final protected boolean computeMin;
|
||||||
final protected boolean computeMax;
|
final protected boolean computeMax;
|
||||||
final protected boolean computeMinOrMax;
|
final protected boolean computeMinOrMax;
|
||||||
|
@ -148,7 +148,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
|
||||||
this.statsField = statsField;
|
this.statsField = statsField;
|
||||||
this.computeCount = statsField.calculateStats(Stat.count);
|
this.computeCount = statsField.calculateStats(Stat.count);
|
||||||
this.computeMissing = statsField.calculateStats(Stat.missing);
|
this.computeMissing = statsField.calculateStats(Stat.missing);
|
||||||
this.computeCalcDistinct = statsField.calculateStats(Stat.calcdistinct);
|
this.computeCalcDistinct = statsField.calculateStats(Stat.countDistinct)
|
||||||
|
|| statsField.calculateStats(Stat.distinctValues);
|
||||||
this.computeMin = statsField.calculateStats(Stat.min);
|
this.computeMin = statsField.calculateStats(Stat.min);
|
||||||
this.computeMax = statsField.calculateStats(Stat.max);
|
this.computeMax = statsField.calculateStats(Stat.max);
|
||||||
this.computeMinOrMax = computeMin || computeMax;
|
this.computeMinOrMax = computeMin || computeMax;
|
||||||
|
@ -324,8 +325,10 @@ abstract class AbstractStatsValues<T> implements StatsValues {
|
||||||
if (statsField.includeInResponse(Stat.missing)) {
|
if (statsField.includeInResponse(Stat.missing)) {
|
||||||
res.add("missing", missing);
|
res.add("missing", missing);
|
||||||
}
|
}
|
||||||
if (statsField.includeInResponse(Stat.calcdistinct)) {
|
if (statsField.includeInResponse(Stat.distinctValues)) {
|
||||||
res.add("distinctValues", distinctValues);
|
res.add("distinctValues", distinctValues);
|
||||||
|
}
|
||||||
|
if (statsField.includeInResponse(Stat.countDistinct)) {
|
||||||
res.add("countDistinct", countDistinct);
|
res.add("countDistinct", countDistinct);
|
||||||
}
|
}
|
||||||
if (statsField.includeInResponse(Stat.cardinality)) {
|
if (statsField.includeInResponse(Stat.cardinality)) {
|
||||||
|
|
|
@ -696,6 +696,12 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
||||||
params("stats.calcdistinct", "false",
|
params("stats.calcdistinct", "false",
|
||||||
"f."+i1+".stats.calcdistinct", "false",
|
"f."+i1+".stats.calcdistinct", "false",
|
||||||
"stats.field", "{!min=true calcdistinct=true}" + i1),
|
"stats.field", "{!min=true calcdistinct=true}" + i1),
|
||||||
|
params("stats.calcdistinct", "false",
|
||||||
|
"f."+i1+".stats.calcdistinct", "false",
|
||||||
|
"stats.field", "{!min=true countDistinct=true distinctValues=true}" + i1),
|
||||||
|
params("stats.field", "{!min=true countDistinct=true distinctValues=true}" + i1),
|
||||||
|
params("yes", "true",
|
||||||
|
"stats.field", "{!min=$yes countDistinct=$yes distinctValues=$yes}" + i1),
|
||||||
}) {
|
}) {
|
||||||
|
|
||||||
rsp = query(SolrParams.wrapDefaults
|
rsp = query(SolrParams.wrapDefaults
|
||||||
|
@ -732,6 +738,9 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
||||||
params("stats.calcdistinct", "true",
|
params("stats.calcdistinct", "true",
|
||||||
"f."+i1+".stats.calcdistinct", "true",
|
"f."+i1+".stats.calcdistinct", "true",
|
||||||
"stats.field", "{!min=true calcdistinct=false}" + i1),
|
"stats.field", "{!min=true calcdistinct=false}" + i1),
|
||||||
|
params("stats.calcdistinct", "true",
|
||||||
|
"f."+i1+".stats.calcdistinct", "true",
|
||||||
|
"stats.field", "{!min=true countDistinct=false distinctValues=false}" + i1),
|
||||||
}) {
|
}) {
|
||||||
|
|
||||||
rsp = query(SolrParams.wrapDefaults
|
rsp = query(SolrParams.wrapDefaults
|
||||||
|
@ -752,7 +761,6 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
||||||
assertNull(p+" expected null for sum", s.getSum());
|
assertNull(p+" expected null for sum", s.getSum());
|
||||||
assertNull(p+" expected null for percentiles", s.getPercentiles());
|
assertNull(p+" expected null for percentiles", s.getPercentiles());
|
||||||
assertNull(p+" expected null for cardinality", s.getCardinality());
|
assertNull(p+" expected null for cardinality", s.getCardinality());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// this field doesn't exist in any doc in the result set.
|
// this field doesn't exist in any doc in the result set.
|
||||||
|
@ -839,8 +847,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
||||||
}
|
}
|
||||||
assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" +
|
assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" +
|
||||||
" (adjust constant accordingly if intentional)",
|
" (adjust constant accordingly if intentional)",
|
||||||
4235, numTotalStatQueries);
|
5082, numTotalStatQueries);
|
||||||
|
|
||||||
|
|
||||||
/*** TODO: the failure may come back in "exception"
|
/*** TODO: the failure may come back in "exception"
|
||||||
try {
|
try {
|
||||||
|
@ -1164,6 +1171,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateCommonQueryParameters() throws Exception {
|
private void validateCommonQueryParameters() throws Exception {
|
||||||
|
ignoreException("parameter cannot be negative");
|
||||||
try {
|
try {
|
||||||
SolrQuery query = new SolrQuery();
|
SolrQuery query = new SolrQuery();
|
||||||
query.setStart(-1).setQuery("*");
|
query.setStart(-1).setQuery("*");
|
||||||
|
@ -1181,5 +1189,6 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
||||||
} catch (SolrException e) {
|
} catch (SolrException e) {
|
||||||
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
|
assertEquals(ErrorCode.BAD_REQUEST.code, e.code());
|
||||||
}
|
}
|
||||||
|
resetExceptionIgnores();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1115,8 +1115,11 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
||||||
SolrQueryRequest req2 = req(baseParams,
|
SolrQueryRequest req2 = req(baseParams,
|
||||||
StatsParams.STATS_FIELD,
|
StatsParams.STATS_FIELD,
|
||||||
"{!min=true, max=true, count=true, sum=true, mean=true, stddev=true, sumOfSquares=true, missing=true, calcdistinct=true}" + fieldName);
|
"{!min=true, max=true, count=true, sum=true, mean=true, stddev=true, sumOfSquares=true, missing=true, calcdistinct=true}" + fieldName);
|
||||||
|
SolrQueryRequest req3 = req(baseParams,
|
||||||
|
StatsParams.STATS_FIELD,
|
||||||
|
"{!min=true, max=true, count=true, sum=true, mean=true, stddev=true, sumOfSquares=true, missing=true, countDistinct=true, distinctValues=true}" + fieldName);
|
||||||
|
|
||||||
for (SolrQueryRequest req : new SolrQueryRequest[] { req1, req2 }) {
|
for (SolrQueryRequest req : new SolrQueryRequest[] { req1, req2, req3 }) {
|
||||||
assertQ("test status on docValues and multiValued: " + req.toString(), req
|
assertQ("test status on docValues and multiValued: " + req.toString(), req
|
||||||
, "//lst[@name='" + fieldName + "']/double[@name='min'][.='-3.0']"
|
, "//lst[@name='" + fieldName + "']/double[@name='min'][.='-3.0']"
|
||||||
, "//lst[@name='" + fieldName + "']/double[@name='max'][.='16.0']"
|
, "//lst[@name='" + fieldName + "']/double[@name='max'][.='16.0']"
|
||||||
|
@ -1231,16 +1234,14 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
||||||
public final static String KPRE = XPRE + "lst[@name='stats_fields']/lst[@name='k']/";
|
public final static String KPRE = XPRE + "lst[@name='stats_fields']/lst[@name='k']/";
|
||||||
public final Stat stat;
|
public final Stat stat;
|
||||||
public final String input;
|
public final String input;
|
||||||
public final int numResponseKeys; // all because calcdistinct is obnoxious
|
|
||||||
public final List<String> perShardXpaths;
|
public final List<String> perShardXpaths;
|
||||||
public final List<String> finalXpaths;
|
public final List<String> finalXpaths;
|
||||||
|
|
||||||
public final static Map<Stat,ExpectedStat> ALL = new LinkedHashMap<Stat,ExpectedStat>();
|
public final static Map<Stat,ExpectedStat> ALL = new LinkedHashMap<Stat,ExpectedStat>();
|
||||||
private ExpectedStat(Stat stat, String input, int numResponseKeys,
|
private ExpectedStat(Stat stat, String input,
|
||||||
List<String> perShardXpaths, List<String> finalXpaths) {
|
List<String> perShardXpaths, List<String> finalXpaths) {
|
||||||
this.stat = stat;
|
this.stat = stat;
|
||||||
this.input = input;
|
this.input = input;
|
||||||
this.numResponseKeys = numResponseKeys;
|
|
||||||
this.perShardXpaths = perShardXpaths;
|
this.perShardXpaths = perShardXpaths;
|
||||||
this.finalXpaths = finalXpaths;
|
this.finalXpaths = finalXpaths;
|
||||||
}
|
}
|
||||||
|
@ -1258,12 +1259,11 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
||||||
perShardXpaths.addAll(expectedDep.perShardXpaths);
|
perShardXpaths.addAll(expectedDep.perShardXpaths);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ALL.put(stat, new ExpectedStat(stat, input, 1,
|
ALL.put(stat, new ExpectedStat(stat, input, perShardXpaths, Collections.singletonList(xpath)));
|
||||||
perShardXpaths, Collections.singletonList(xpath)));
|
|
||||||
}
|
}
|
||||||
public static void create(Stat stat, String input, int numResponseKeys,
|
public static void create(Stat stat, String input,
|
||||||
List<String> perShardXpaths, List<String> finalXpaths) {
|
List<String> perShardXpaths, List<String> finalXpaths) {
|
||||||
ALL.put(stat, new ExpectedStat(stat, input, numResponseKeys, perShardXpaths, finalXpaths));
|
ALL.put(stat, new ExpectedStat(stat, input, perShardXpaths, finalXpaths));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1340,16 +1340,16 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
||||||
ExpectedStat.createSimple(Stat.mean, "true", "double", String.valueOf(sum / count));
|
ExpectedStat.createSimple(Stat.mean, "true", "double", String.valueOf(sum / count));
|
||||||
ExpectedStat.createSimple(Stat.sumOfSquares, "true", "double", String.valueOf(sumOfSquares));
|
ExpectedStat.createSimple(Stat.sumOfSquares, "true", "double", String.valueOf(sumOfSquares));
|
||||||
ExpectedStat.createSimple(Stat.stddev, "true", "double", String.valueOf(stddev));
|
ExpectedStat.createSimple(Stat.stddev, "true", "double", String.valueOf(stddev));
|
||||||
final String countDistinctXpath = kpre + "long[@name='countDistinct'][.='10']";
|
final String distinctValsXpath = "count(" + kpre + "arr[@name='distinctValues']/*)=10";
|
||||||
ExpectedStat.create(Stat.calcdistinct, "true", 2,
|
ExpectedStat.create(Stat.distinctValues, "true",
|
||||||
Arrays.asList("count(" + kpre + "arr[@name='distinctValues']/*)=10",
|
Collections.singletonList(distinctValsXpath),
|
||||||
countDistinctXpath),
|
Collections.singletonList(distinctValsXpath));
|
||||||
Collections.singletonList(countDistinctXpath));
|
ExpectedStat.createSimple(Stat.countDistinct, "true", "long", "10");
|
||||||
final String percentileShardXpath = kpre + "str[@name='percentiles'][.='"
|
final String percentileShardXpath = kpre + "str[@name='percentiles'][.='"
|
||||||
+ Base64.byteArrayToBase64(tdigestBuf.array(), 0, tdigestBuf.array().length) + "']";
|
+ Base64.byteArrayToBase64(tdigestBuf.array(), 0, tdigestBuf.array().length) + "']";
|
||||||
final String p90 = "" + tdigest.quantile(0.90D);
|
final String p90 = "" + tdigest.quantile(0.90D);
|
||||||
final String p99 = "" + tdigest.quantile(0.99D);
|
final String p99 = "" + tdigest.quantile(0.99D);
|
||||||
ExpectedStat.create(Stat.percentiles, "'90, 99'", 1,
|
ExpectedStat.create(Stat.percentiles, "'90, 99'",
|
||||||
Collections.singletonList(percentileShardXpath),
|
Collections.singletonList(percentileShardXpath),
|
||||||
Arrays.asList("count(" + kpre + "lst[@name='percentiles']/*)=2",
|
Arrays.asList("count(" + kpre + "lst[@name='percentiles']/*)=2",
|
||||||
kpre + "lst[@name='percentiles']/double[@name='90.0'][.="+p90+"]",
|
kpre + "lst[@name='percentiles']/double[@name='90.0'][.="+p90+"]",
|
||||||
|
@ -1357,7 +1357,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
||||||
final String cardinalityShardXpath = kpre + "str[@name='cardinality'][.='"
|
final String cardinalityShardXpath = kpre + "str[@name='cardinality'][.='"
|
||||||
+ Base64.byteArrayToBase64(hllBytes, 0, hllBytes.length) + "']";
|
+ Base64.byteArrayToBase64(hllBytes, 0, hllBytes.length) + "']";
|
||||||
final String cardinalityXpath = kpre + "long[@name='cardinality'][.='10']";
|
final String cardinalityXpath = kpre + "long[@name='cardinality'][.='10']";
|
||||||
ExpectedStat.create(Stat.cardinality, "true", 1,
|
ExpectedStat.create(Stat.cardinality, "true",
|
||||||
Collections.singletonList(cardinalityShardXpath),
|
Collections.singletonList(cardinalityShardXpath),
|
||||||
Collections.singletonList(cardinalityXpath));
|
Collections.singletonList(cardinalityXpath));
|
||||||
|
|
||||||
|
@ -1377,7 +1377,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
||||||
int numKeysExpected = 0;
|
int numKeysExpected = 0;
|
||||||
EnumSet<Stat> distribDeps = stat.getDistribDeps();
|
EnumSet<Stat> distribDeps = stat.getDistribDeps();
|
||||||
for (Stat perShardDep : distribDeps) {
|
for (Stat perShardDep : distribDeps) {
|
||||||
numKeysExpected += ExpectedStat.ALL.get(perShardDep).numResponseKeys;
|
numKeysExpected++;
|
||||||
|
|
||||||
// even if we go out of our way to exclude the dependent stats,
|
// even if we go out of our way to exclude the dependent stats,
|
||||||
// the shard should return them since they are a dependency for the requested stat
|
// the shard should return them since they are a dependency for the requested stat
|
||||||
|
@ -1413,7 +1413,7 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
||||||
|
|
||||||
paras.append(stat + "=" + expect.input + " ");
|
paras.append(stat + "=" + expect.input + " ");
|
||||||
|
|
||||||
numKeysExpected += expect.numResponseKeys;
|
numKeysExpected++;
|
||||||
testXpaths.addAll(expect.finalXpaths);
|
testXpaths.addAll(expect.finalXpaths);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue