SOLR-8420: Fix long overflow in sumOfSquares for Date statistics

Casted operations to double. Changed the test to support a percentage error given the FUZZY flag in doubles
This commit is contained in:
Tomas Fernandez Lobbe 2016-02-24 19:02:17 -08:00
parent dc95d5248a
commit 730d10f145
5 changed files with 88 additions and 4 deletions

View File

@ -218,6 +218,9 @@ Bug Fixes
* SOLR-8696: Start the Overseer before actions that need the overseer on init and when reconnecting after
zk expiration and improve init logic. (Scott Blum, Mark Miller)
* SOLR-8420: Fix long overflow in sumOfSquares for Date statistics. (Tom Hill, Christine Poerschke,
Tomás Fernández Löbbe)
Optimizations
----------------------
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been

View File

@ -751,7 +751,7 @@ class DateStatsValues extends AbstractStatsValues<Date> {
public void updateTypeSpecificStats(Date v, int count) {
long value = v.getTime();
if (computeSumOfSquares) {
sumOfSquares += (value * value * count); // for std deviation
sumOfSquares += ((double)value * value * count); // for std deviation
}
if (computeSum) {
sum += value * count;
@ -807,7 +807,7 @@ class DateStatsValues extends AbstractStatsValues<Date> {
if (count <= 1) {
return 0.0D;
}
return Math.sqrt(((count * sumOfSquares) - (sum * sum))
return Math.sqrt(((count * sumOfSquares) - (sum * (double)sum))
/ (count * (count - 1.0D)));
}
}

View File

@ -425,8 +425,13 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", "stats_dt");
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1);
handle.put("stddev", FUZZY);
handle.put("sumOfSquares", FUZZY);
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_a);
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_b);
handle.remove("stddev");
handle.remove("sumOfSquares");
rsp = query("q", "*:*", "sort", i1 + " desc", "stats", "true",
@ -522,6 +527,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
"stats.field", "{!key=special_key}stats_dt",
"stats.field", "{!ex=xxx}stats_dt");
handle.put("stddev", FUZZY);
handle.put("sumOfSquares", FUZZY);
query("q","*:*", "sort",i1+" desc", "stats", "true",
// do a really simple query so distributed IDF doesn't cause problems
// when comparing with control collection
@ -838,7 +845,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
paras.append("}").append(field);
numTotalStatQueries++;
rsp = query("q","*:*", "rows", "0", "stats", "true",
rsp = query("q", q, "rows", "0", "stats", "true",
"stats.field", paras.toString());
// simple assert, mostly relying on comparison with single shard
FieldStatsInfo s = rsp.getFieldStatsInfo().get("k");
@ -850,6 +857,8 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
}
}
}
handle.remove("stddev");
handle.remove("sumOfSquares");
assertEquals("Sanity check failed: either test broke, or test changed, or you adjusted Stat enum" +
" (adjust constant accordingly if intentional)",
5082, numTotalStatQueries);

View File

@ -54,7 +54,6 @@ import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.commons.math3.util.Combinations;
import com.tdunning.math.stats.AVLTreeDigest;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
import org.apache.solr.util.hll.HLL;
@ -465,6 +464,47 @@ public class StatsComponentTest extends AbstractSolrTestCase {
}
// Check for overflow of sumOfSquares
public void testFieldStatisticsResultsDateFieldOverflow() throws Exception {
SolrCore core = h.getCore();
assertU(adoc("id", "1", "active_dt", "2015-12-14T09:00:00Z"));
assertU(commit());
Map<String, String> args = new HashMap<>();
args.put(CommonParams.Q, "*:*");
args.put(StatsParams.STATS, "true");
args.put(StatsParams.STATS_FIELD, "active_dt");
args.put("indent", "true");
SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
assertQ("test date statistics values", req,
"//long[@name='count'][.='1']",
"//date[@name='min'][.='2015-12-14T09:00:00Z']",
"//date[@name='max'][.='2015-12-14T09:00:00Z']",
"//date[@name='sum'][.='2015-12-14T09:00:00Z']",
"//date[@name='mean'][.='2015-12-14T09:00:00Z']",
"//double[@name='sumOfSquares'][.='" + Double.toString(2102742446988960000000000.0)+"']"
);
assertU(adoc("id", "2", "active_dt", "2115-12-14T09:00:00Z"));
assertU(adoc("id", "3", "active_dt", "2215-12-14T09:00:00Z"));
assertU(commit());
assertQ("test date statistics values", req,
"//long[@name='count'][.='3']",
"//date[@name='min'][.='2015-12-14T09:00:00Z']",
"//date[@name='max'][.='2215-12-14T09:00:00Z']",
"//date[@name='sum'][.='2407-11-08T03:00:00Z']",
"//date[@name='mean'][.='2115-12-14T09:00:00Z']",
"//double[@name='sumOfSquares'][.='" + Double.toString(83555549895529430000000000.0)+"']",
// The following number matches the number returned by the current solr
// implementation of standard deviation. Should be 3155673600000.
// That number is not precise, and the implementation should be fixed.
"//double[@name='stddev'][.='" + Double.toString(3155673599999.999)+"']"
);
}
public void doTestFieldStatisticsMissingResult(String f, SolrParams[] baseParamsSet) throws Exception {
assertU(adoc("id", "1", f, "-10"));

View File

@ -240,6 +240,13 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
public static int SKIP = 2;
public static int SKIPVAL = 4;
public static int UNORDERED = 8;
/**
* When this flag is set, Double values will be allowed a difference ratio of 1E-8
* between the non-distributed and the distributed returned values
*/
public static int FUZZY = 16;
private static final double DOUBLE_RATIO_LIMIT = 1E-8;
protected int flags;
protected Map<String, Integer> handle = new HashMap<>();
@ -876,6 +883,31 @@ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {
}
if ((flags & FUZZY) != 0) {
if ((a instanceof Double && b instanceof Double)) {
double aaa = ((Double) a).doubleValue();
double bbb = ((Double) b).doubleValue();
if (aaa == bbb || ((Double) a).isNaN() && ((Double) b).isNaN()) {
return null;
}
if ((aaa == 0.0) || (bbb == 0.0)) {
return ":" + a + "!=" + b;
}
double diff = Math.abs(aaa - bbb);
// When stats computations are done on multiple shards, there may
// be small differences in the results. Allow a small difference
// between the result of the computations.
double ratio = Math.max(Math.abs(diff / aaa), Math.abs(diff / bbb));
if (ratio > DOUBLE_RATIO_LIMIT) {
return ":" + a + "!=" + b;
} else {
return null;// close enough.
}
}
}
if (!(a.equals(b))) {
return ":" + a + "!=" + b;
}