mirror of https://github.com/apache/lucene.git
SOLR-8420: Fix long overflow in sumOfSquares for Date statistics
Casted operations to double. Changed the test to support a percentage error given the FUZZY flag in doubles
This commit is contained in:
parent
dc95d5248a
commit
730d10f145
|
@ -218,6 +218,9 @@ Bug Fixes
|
|||
* SOLR-8696: Start the Overseer before actions that need the overseer on init and when reconnecting after
|
||||
zk expiration and improve init logic. (Scott Blum, Mark Miller)
|
||||
|
||||
* SOLR-8420: Fix long overflow in sumOfSquares for Date statistics. (Tom Hill, Christine Poerschke,
|
||||
Tomás Fernández Löbbe)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
|
||||
|
|
|
@ -751,7 +751,7 @@ class DateStatsValues extends AbstractStatsValues<Date> {
|
|||
public void updateTypeSpecificStats(Date v, int count) {
|
||||
long value = v.getTime();
|
||||
if (computeSumOfSquares) {
|
||||
sumOfSquares += (value * value * count); // for std deviation
|
||||
sumOfSquares += ((double)value * value * count); // for std deviation
|
||||
}
|
||||
if (computeSum) {
|
||||
sum += value * count;
|
||||
|
@ -807,7 +807,7 @@ class DateStatsValues extends AbstractStatsValues<Date> {
|
|||
if (count <= 1) {
|
||||
return 0.0D;
|
||||
}
|
||||
return Math.sqrt(((count * sumOfSquares) - (sum * sum))
|
||||
return Math.sqrt(((count * sumOfSquares) - (sum * (double)sum))
|
||||
/ (count * (count - 1.0D)));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -425,8 +425,13 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
|
||||
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", "stats_dt");
|
||||
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1);
|
||||
|
||||
handle.put("stddev", FUZZY);
|
||||
handle.put("sumOfSquares", FUZZY);
|
||||
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_a);
|
||||
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_b);
|
||||
handle.remove("stddev");
|
||||
handle.remove("sumOfSquares");
|
||||
|
||||
|
||||
rsp = query("q", "*:*", "sort", i1 + " desc", "stats", "true",
|
||||
|
@ -522,6 +527,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
"stats.field", "{!key=special_key}stats_dt",
|
||||
"stats.field", "{!ex=xxx}stats_dt");
|
||||
|
||||
handle.put("stddev", FUZZY);
|
||||
handle.put("sumOfSquares", FUZZY);
|
||||
query("q","*:*", "sort",i1+" desc", "stats", "true",
|
||||
// do a really simple query so distributed IDF doesn't cause problems
|
||||
// when comparing with control collection
|
||||
|
@ -838,7 +845,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
|
||||
paras.append("}").append(field);
|
||||
numTotalStatQueries++;
|
||||
rsp = query("q","*:*", "rows", "0", "stats", "true",
|
||||
rsp = query("q", q, "rows", "0", "stats", "true",
|
||||
"stats.field", paras.toString());
|
||||
// simple assert, mostly relying on comparison with single shard
|
||||
FieldStatsInfo s = rsp.getFieldStatsInfo().get("k");
|
||||
|
@ -850,6 +857,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
handle.remove("stddev");
|
||||
handle.remove("sumOfSquares");
|
||||
assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" +
|
||||
" (adjust constant accordingly if intentional)",
|
||||
5082, numTotalStatQueries);
|
||||
|
|
|
@ -54,7 +54,6 @@ import org.apache.solr.util.AbstractSolrTestCase;
|
|||
|
||||
import org.apache.commons.math3.util.Combinations;
|
||||
import com.tdunning.math.stats.AVLTreeDigest;
|
||||
import com.google.common.hash.Hashing;
|
||||
import com.google.common.hash.HashFunction;
|
||||
import org.apache.solr.util.hll.HLL;
|
||||
|
||||
|
@ -465,6 +464,47 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
|||
|
||||
}
|
||||
|
||||
// Check for overflow of sumOfSquares
|
||||
public void testFieldStatisticsResultsDateFieldOverflow() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
|
||||
assertU(adoc("id", "1", "active_dt", "2015-12-14T09:00:00Z"));
|
||||
assertU(commit());
|
||||
|
||||
Map<String, String> args = new HashMap<>();
|
||||
args.put(CommonParams.Q, "*:*");
|
||||
args.put(StatsParams.STATS, "true");
|
||||
args.put(StatsParams.STATS_FIELD, "active_dt");
|
||||
args.put("indent", "true");
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
|
||||
|
||||
assertQ("test date statistics values", req,
|
||||
"//long[@name='count'][.='1']",
|
||||
"//date[@name='min'][.='2015-12-14T09:00:00Z']",
|
||||
"//date[@name='max'][.='2015-12-14T09:00:00Z']",
|
||||
"//date[@name='sum'][.='2015-12-14T09:00:00Z']",
|
||||
"//date[@name='mean'][.='2015-12-14T09:00:00Z']",
|
||||
"//double[@name='sumOfSquares'][.='" + Double.toString(2102742446988960000000000.0)+"']"
|
||||
);
|
||||
|
||||
assertU(adoc("id", "2", "active_dt", "2115-12-14T09:00:00Z"));
|
||||
assertU(adoc("id", "3", "active_dt", "2215-12-14T09:00:00Z"));
|
||||
assertU(commit());
|
||||
|
||||
assertQ("test date statistics values", req,
|
||||
"//long[@name='count'][.='3']",
|
||||
"//date[@name='min'][.='2015-12-14T09:00:00Z']",
|
||||
"//date[@name='max'][.='2215-12-14T09:00:00Z']",
|
||||
"//date[@name='sum'][.='2407-11-08T03:00:00Z']",
|
||||
"//date[@name='mean'][.='2115-12-14T09:00:00Z']",
|
||||
"//double[@name='sumOfSquares'][.='" + Double.toString(83555549895529430000000000.0)+"']",
|
||||
// The following number matches the number returned by the current solr
|
||||
// implementation of standard deviation. Should be 3155673600000.
|
||||
// That number is not precise, and the implementation should be fixed.
|
||||
"//double[@name='stddev'][.='" + Double.toString(3155673599999.999)+"']"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
public void doTestFieldStatisticsMissingResult(String f, SolrParams[] baseParamsSet) throws Exception {
|
||||
assertU(adoc("id", "1", f, "-10"));
|
||||
|
|
|
@ -240,6 +240,13 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
|
|||
public static int SKIP = 2;
|
||||
public static int SKIPVAL = 4;
|
||||
public static int UNORDERED = 8;
|
||||
|
||||
/**
|
||||
* When this flag is set, Double values will be allowed a difference ratio of 1E-8
|
||||
* between the non-distributed and the distributed returned values
|
||||
*/
|
||||
public static int FUZZY = 16;
|
||||
private static final double DOUBLE_RATIO_LIMIT = 1E-8;
|
||||
|
||||
protected int flags;
|
||||
protected Map<String, Integer> handle = new HashMap<>();
|
||||
|
@ -876,6 +883,31 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
if ((flags & FUZZY) != 0) {
|
||||
if ((a instanceof Double && b instanceof Double)) {
|
||||
double aaa = ((Double) a).doubleValue();
|
||||
double bbb = ((Double) b).doubleValue();
|
||||
if (aaa == bbb || ((Double) a).isNaN() && ((Double) b).isNaN()) {
|
||||
return null;
|
||||
}
|
||||
if ((aaa == 0.0) || (bbb == 0.0)) {
|
||||
return ":" + a + "!=" + b;
|
||||
}
|
||||
|
||||
double diff = Math.abs(aaa - bbb);
|
||||
// When stats computations are done on multiple shards, there may
|
||||
// be small differences in the results. Allow a small difference
|
||||
// between the result of the computations.
|
||||
|
||||
double ratio = Math.max(Math.abs(diff / aaa), Math.abs(diff / bbb));
|
||||
if (ratio > DOUBLE_RATIO_LIMIT) {
|
||||
return ":" + a + "!=" + b;
|
||||
} else {
|
||||
return null;// close enough.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!(a.equals(b))) {
|
||||
return ":" + a + "!=" + b;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue