diff --git a/CHANGES.txt b/CHANGES.txt index c602bc88a58..3fca114ca29 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -594,6 +594,10 @@ Bug Fixes 68. SOLR-1468: SolrJ's XML response parsing threw an exception for null names, such as those produced when facet.missing=true (yonik) +69. SOLR-1471: Fixed issue with calculating missing values for facets in single valued cases in Stats Component. + This is not correctly calculated for the multivalued case. (James Miller, gsingers) + + Other Changes ---------------------- 1. Upgraded to Lucene 2.4.0 (yonik) diff --git a/src/java/org/apache/solr/handler/component/FieldFacetStats.java b/src/java/org/apache/solr/handler/component/FieldFacetStats.java index f385a09b9ea..2d966489555 100644 --- a/src/java/org/apache/solr/handler/component/FieldFacetStats.java +++ b/src/java/org/apache/solr/handler/component/FieldFacetStats.java @@ -82,8 +82,6 @@ public class FieldFacetStats { public boolean facet(int docID, Double v) { - if (v == null) return false; - int term = termNum[docID]; int arrIdx = term - startTermIndex; if (arrIdx >= 0 && arrIdx < nTerms) { @@ -93,7 +91,12 @@ public class FieldFacetStats { stats = new StatsValues(); facetStatsValues.put(key, stats); } - stats.accumulate(v); + if (v != null) { + stats.accumulate(v); + } else { + stats.missing++; + return false; + } return true; } return false; diff --git a/src/java/org/apache/solr/request/UnInvertedField.java b/src/java/org/apache/solr/request/UnInvertedField.java index 00df1891981..dfbecc1ce14 100755 --- a/src/java/org/apache/solr/request/UnInvertedField.java +++ b/src/java/org/apache/solr/request/UnInvertedField.java @@ -634,33 +634,33 @@ public class UnInvertedField { //functionality between the two and refactor code somewhat use.incrementAndGet(); - FieldType ft = searcher.getSchema().getFieldType(field); StatsValues allstats = new StatsValues(); - int i = 0; - final FieldFacetStats[] finfo = new FieldFacetStats[facet.length]; - //Initialize facetstats, if facets have been passed in - FieldCache.StringIndex si; - for (String f : facet) { - ft = searcher.getSchema().getFieldType(f); - try { - si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f); - } - catch (IOException e) { - throw new RuntimeException("failed to open field cache for: " + f, e); - } - finfo[i++] = new FieldFacetStats(f, si, ft, numTermsInField); - } - DocSet docs = baseDocs; int baseSize = docs.size(); int maxDoc = searcher.maxDoc(); if (baseSize > 0) { + FieldType ft = searcher.getSchema().getFieldType(field); + + int i = 0; + final FieldFacetStats[] finfo = new FieldFacetStats[facet.length]; + //Initialize facetstats, if facets have been passed in + FieldCache.StringIndex si; + for (String f : facet) { + ft = searcher.getSchema().getFieldType(f); + try { + si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f); + } + catch (IOException e) { + throw new RuntimeException("failed to open field cache for: " + f, e); + } + finfo[i++] = new FieldFacetStats(f, si, ft, numTermsInField); + } final int[] index = this.index; - final int[] counts = new int[numTermsInField]; + final int[] counts = new int[numTermsInField];//keep track of the number of times we see each word in the field for all the documents in the docset NumberedTermEnum te = ti.getEnumerator(searcher.getReader()); @@ -769,11 +769,11 @@ public class UnInvertedField { if (c > 0) { allstats.addMissing(c); } - } - if (finfo.length > 0) { - allstats.facets = new HashMap>(); - for (FieldFacetStats f : finfo) { - allstats.facets.put(f.name, f.facetStatsValues); + if (finfo.length > 0) { + allstats.facets = new HashMap>(); + for (FieldFacetStats f : finfo) { + allstats.facets.put(f.name, f.facetStatsValues); + } } } return allstats; diff --git a/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/src/test/org/apache/solr/handler/component/StatsComponentTest.java index 422e6bfa263..a8c274c6893 100644 --- a/src/test/org/apache/solr/handler/component/StatsComponentTest.java +++ b/src/test/org/apache/solr/handler/component/StatsComponentTest.java @@ -82,32 +82,27 @@ public class StatsComponentTest extends AbstractSolrTestCase { assertU(adoc("id", "1", "stats_ii", "-10", "stats_ii", "-100", "active_s", "true")); assertU(adoc("id", "2", "stats_ii", "-20", "stats_ii", "200", "active_s", "true")); + assertU(adoc("id", "3", "stats_ii", "-30", "stats_ii", "-1", "active_s", "false")); assertU(adoc("id", "4", "stats_ii", "-40", "stats_ii", "10", "active_s", "false")); + assertU(adoc("id", "5", "active_s", "false")); assertU(commit()); - - Map args = new HashMap(); args.put(CommonParams.Q, "*:*"); args.put(StatsParams.STATS, "true"); args.put(StatsParams.STATS_FIELD, "stats_ii"); args.put("indent", "true"); SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args)); - - assertQ("test statistics values", req , "//double[@name='min'][.='-100.0']" , "//double[@name='max'][.='200.0']" , "//double[@name='sum'][.='9.0']" , "//long[@name='count'][.='8']" - , "//long[@name='missing'][.='0']" + , "//long[@name='missing'][.='1']" , "//double[@name='sumOfSquares'][.='53101.0']" , "//double[@name='mean'][.='1.125']" , "//double[@name='stddev'][.='87.08852228787508']" ); - - - args.put(StatsParams.STATS_FACET, "active_s"); req = new LocalSolrQueryRequest(core, new MapSolrParams(args)); @@ -116,15 +111,11 @@ public class StatsComponentTest extends AbstractSolrTestCase { , "//double[@name='max'][.='200.0']" , "//double[@name='sum'][.='9.0']" , "//long[@name='count'][.='8']" - , "//long[@name='missing'][.='0']" + , "//long[@name='missing'][.='1']" , "//double[@name='sumOfSquares'][.='53101.0']" , "//double[@name='mean'][.='1.125']" , "//double[@name='stddev'][.='87.08852228787508']" ); - - - - assertQ("test value for active_s=true", req , "//lst[@name='true']/double[@name='min'][.='-100.0']" , "//lst[@name='true']/double[@name='max'][.='200.0']" @@ -135,6 +126,17 @@ public class StatsComponentTest extends AbstractSolrTestCase { , "//lst[@name='true']/double[@name='mean'][.='17.5']" , "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']" ); + //Test for fixing multivalued missing + /*assertQ("test value for active_s=false", req + , "//lst[@name='false']/double[@name='min'][.='-40.0']" + , "//lst[@name='false']/double[@name='max'][.='10.0']" + , "//lst[@name='false']/double[@name='sum'][.='-61.0']" + , "//lst[@name='false']/long[@name='count'][.='4']" + , "//lst[@name='false']/long[@name='missing'][.='1']" + , "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']" + , "//lst[@name='false']/double[@name='mean'][.='-15.22']" + , "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']" + );*/ } @@ -205,4 +207,43 @@ public class StatsComponentTest extends AbstractSolrTestCase { , "//lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']" ); } + + public void testFacetStatisticsMissingResult() throws Exception { + SolrCore core = h.getCore(); + assertU(adoc("id", "1", "stats_i", "10", "active_s", "true")); + assertU(adoc("id", "2", "stats_i", "20", "active_s", "true")); + assertU(adoc("id", "3", "active_s", "false")); + assertU(adoc("id", "4", "stats_i", "40", "active_s", "false")); + assertU(commit()); + + Map args = new HashMap(); + args.put(CommonParams.Q, "*:*"); + args.put(StatsParams.STATS, "true"); + args.put(StatsParams.STATS_FIELD, "stats_i"); + args.put(StatsParams.STATS_FACET, "active_s"); + args.put("indent", "true"); + SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args)); + + assertQ("test value for active_s=true", req + , "//lst[@name='true']/double[@name='min'][.='10.0']" + , "//lst[@name='true']/double[@name='max'][.='20.0']" + , "//lst[@name='true']/double[@name='sum'][.='30.0']" + , "//lst[@name='true']/long[@name='count'][.='2']" + , "//lst[@name='true']/long[@name='missing'][.='0']" + , "//lst[@name='true']/double[@name='sumOfSquares'][.='500.0']" + , "//lst[@name='true']/double[@name='mean'][.='15.0']" + , "//lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']" + ); + + assertQ("test value for active_s=false", req + , "//lst[@name='false']/double[@name='min'][.='40.0']" + , "//lst[@name='false']/double[@name='max'][.='40.0']" + , "//lst[@name='false']/double[@name='sum'][.='40.0']" + , "//lst[@name='false']/long[@name='count'][.='1']" + , "//lst[@name='false']/long[@name='missing'][.='1']" + , "//lst[@name='false']/double[@name='sumOfSquares'][.='1600.0']" + , "//lst[@name='false']/double[@name='mean'][.='40.0']" + , "//lst[@name='false']/double[@name='stddev'][.='0.0']" + ); + } }