diff --git a/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java b/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java index a5e3f95ac02..4e6294b0353 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java +++ b/solr/core/src/java/org/apache/solr/handler/component/FieldFacetStats.java @@ -51,7 +51,7 @@ public class FieldFacetStats { final boolean calcDistinct; public final Map facetStatsValues; - + private final Map missingStats; List> facetStatsTerms; final AtomicReader topLevelReader; @@ -73,6 +73,7 @@ public class FieldFacetStats { facetStatsValues = new HashMap<>(); facetStatsTerms = new ArrayList<>(); + missingStats = new HashMap<>(); } private StatsValues getStatsValues(String key) throws IOException { @@ -100,8 +101,10 @@ public class FieldFacetStats { if (topLevelSortedValues == null) { topLevelSortedValues = DocValues.getSorted(topLevelReader, name); } + int term = topLevelSortedValues.getOrd(docID); + int arrIdx = term; if (arrIdx >= 0 && arrIdx < topLevelSortedValues.getValueCount()) { final String key; @@ -113,6 +116,8 @@ public class FieldFacetStats { while (facetStatsTerms.size() <= statsTermNum) { facetStatsTerms.add(new HashMap()); } + + final Map statsTermCounts = facetStatsTerms.get(statsTermNum); Integer statsTermCount = statsTermCounts.get(key); if (statsTermCount == null) { @@ -122,6 +127,7 @@ public class FieldFacetStats { } return true; } + return false; } @@ -132,8 +138,7 @@ public class FieldFacetStats { while (facetStatsTerms.size() <= statsTermNum) { facetStatsTerms.add(new HashMap()); } - for (Map.Entry stringIntegerEntry : facetStatsTerms.get(statsTermNum).entrySet()) { - Map.Entry pairs = (Map.Entry) stringIntegerEntry; + for (Map.Entry pairs : facetStatsTerms.get(statsTermNum).entrySet()) { String key = (String) pairs.getKey(); StatsValues facetStats = facetStatsValues.get(key); if (facetStats == null) { @@ -156,6 +161,35 @@ public class FieldFacetStats { } } + public void facetMissingNum(int docID) throws IOException { + if (topLevelSortedValues == null) { + topLevelSortedValues = DocValues.getSorted(topLevelReader, name); + } + + int ord = topLevelSortedValues.getOrd(docID); + if (ord != -1) { + Integer missingCount = missingStats.get(ord); + if (missingCount == null) { + missingStats.put(ord, 1); + } else { + missingStats.put(ord, missingCount + 1); + } + } + } + + public void accumulateMissing() throws IOException { + StatsValues statsValue; + + for (Map.Entry entry : missingStats.entrySet()) { + if (entry.getKey() >= 0) { + String key = topLevelSortedValues.lookupOrd(entry.getKey()).utf8ToString(); + if ((statsValue = facetStatsValues.get(key)) != null) { + statsValue.addMissing(entry.getValue()); + } + } + } + return; + } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java index 1d6b5bcd49d..1877c5be090 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java @@ -39,7 +39,6 @@ import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; import org.apache.solr.request.DocValuesStats; import org.apache.solr.request.SolrQueryRequest; -import org.apache.solr.request.UnInvertedField; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesStats.java b/solr/core/src/java/org/apache/solr/request/DocValuesStats.java index 271eb32b83f..4b2dc6ce522 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesStats.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesStats.java @@ -23,7 +23,6 @@ import java.util.Map; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.Term; import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.MultiDocValues.OrdinalMap; @@ -32,10 +31,9 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LongValues; +import org.apache.solr.common.SolrException; import org.apache.solr.handler.component.FieldFacetStats; import org.apache.solr.handler.component.StatsValues; import org.apache.solr.handler.component.StatsValuesFactory; @@ -62,11 +60,17 @@ public class DocValuesStats { //Initialize facetstats, if facets have been passed in final FieldFacetStats[] facetStats = new FieldFacetStats[facet.length]; int upto = 0; + for (String facetField : facet) { + SchemaField fsf = searcher.getSchema().getField(facetField); + if ( fsf.multiValued()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Stats can only facet on single-valued fields, not: " + facetField ); + } + SchemaField facetSchemaField = searcher.getSchema().getField(facetField); facetStats[upto++] = new FieldFacetStats(searcher, facetField, schemaField, facetSchemaField, calcDistinct); } - // TODO: remove multiValuedFieldCache(), check dv type / uninversion type? final boolean multiValued = schemaField.multiValued() || ft.multiValuedFieldCache(); @@ -74,6 +78,7 @@ public class DocValuesStats { OrdinalMap ordinalMap = null; // for mapping per-segment ords to global ones if (multiValued) { si = searcher.getAtomicReader().getSortedSetDocValues(fieldName); + if (si instanceof MultiSortedSetDocValues) { ordinalMap = ((MultiSortedSetDocValues)si).mapping; } @@ -90,26 +95,27 @@ public class DocValuesStats { if (si.getValueCount() >= Integer.MAX_VALUE) { throw new UnsupportedOperationException("Currently this stats method is limited to " + Integer.MAX_VALUE + " unique terms"); } - - DocSet missing = docs.andNot( searcher.getDocSet(new TermRangeQuery(fieldName, null, null, false, false))); - - final int nTerms = (int) si.getValueCount(); + int missingDocCountTotal = 0; + final int nTerms = (int) si.getValueCount(); // count collection array only needs to be as big as the number of terms we are // going to collect counts for. final int[] counts = new int[nTerms]; Filter filter = docs.getTopFilter(); List leaves = searcher.getTopReaderContext().leaves(); + for (int subIndex = 0; subIndex < leaves.size(); subIndex++) { AtomicReaderContext leaf = leaves.get(subIndex); DocIdSet dis = filter.getDocIdSet(leaf, null); // solr docsets already exclude any deleted docs DocIdSetIterator disi = null; + if (dis != null) { disi = dis.iterator(); } if (disi != null) { int docBase = leaf.docBase; + if (multiValued) { SortedSetDocValues sub = leaf.reader().getSortedSetDocValues(fieldName); if (sub == null) { @@ -118,23 +124,23 @@ public class DocValuesStats { final SortedDocValues singleton = DocValues.unwrapSingleton(sub); if (singleton != null) { // some codecs may optimize SORTED_SET storage for single-valued fields - accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap); + missingDocCountTotal += accumSingle(counts, docBase, facetStats, singleton, disi, subIndex, ordinalMap); } else { - accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); + missingDocCountTotal += accumMulti(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); } } else { SortedDocValues sub = leaf.reader().getSortedDocValues(fieldName); if (sub == null) { sub = DocValues.emptySorted(); } - accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); + missingDocCountTotal += accumSingle(counts, docBase, facetStats, sub, disi, subIndex, ordinalMap); } } } - // add results in index order for (int ord = 0; ord < counts.length; ord++) { int count = counts[ord]; + if (count > 0) { final BytesRef value = si.lookupOrd(ord); res.accumulate(value, count); @@ -143,26 +149,23 @@ public class DocValuesStats { } } } - - res.addMissing(missing.size()); + res.addMissing(missingDocCountTotal); + if (facetStats.length > 0) { for (FieldFacetStats f : facetStats) { - Map facetStatsValues = f.facetStatsValues; - FieldType facetType = searcher.getSchema().getFieldType(f.name); - for (Map.Entry entry : facetStatsValues.entrySet()) { - String termLabel = entry.getKey(); - int missingCount = searcher.numDocs(new TermQuery(new Term(f.name, facetType.toInternal(termLabel))), missing); - entry.getValue().addMissing(missingCount); - } + Map facetStatsValues = f.facetStatsValues; + f.accumulateMissing(); res.addFacet(f.name, facetStatsValues); } } + return res; } /** accumulates per-segment single-valued stats */ - static void accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + static int accumSingle(int counts[], int docBase, FieldFacetStats[] facetStats, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); + int missingDocCount = 0; int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term = si.getOrd(doc); @@ -174,18 +177,29 @@ public class DocValuesStats { for (FieldFacetStats f : facetStats) { f.facetTermNum(docBase + doc, term); } + }else{ + for (FieldFacetStats f : facetStats) { + f.facetMissingNum(docBase + doc); + } + + missingDocCount++; } } + return missingDocCount; } /** accumulates per-segment multi-valued stats */ - static void accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { + + static int accumMulti(int counts[], int docBase, FieldFacetStats[] facetStats, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordMap = map == null ? null : map.getGlobalOrds(subIndex); + int missingDocCount = 0; int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { si.setDocument(doc); long ord; + boolean emptyTerm = true; while ((ord = si.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + emptyTerm = false; int term = (int) ord; if (map != null) { term = (int) ordMap.get(term); @@ -195,6 +209,15 @@ public class DocValuesStats { f.facetTermNum(docBase + doc, term); } } + if (emptyTerm){ + for (FieldFacetStats f : facetStats) { + f.facetMissingNum(docBase + doc); + } + + missingDocCount++; + } } + + return missingDocCount; } } diff --git a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java index a9940ac64ad..fd1382ef869 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java @@ -28,10 +28,9 @@ import java.util.Map; import java.util.TimeZone; import org.apache.lucene.util.LuceneTestCase; -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MapSolrParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.StatsParams; import org.apache.solr.core.SolrCore; import org.apache.solr.request.LocalSolrQueryRequest; @@ -64,9 +63,8 @@ public class StatsComponentTest extends AbstractSolrTestCase { for (String f : new String[] { "stats_i","stats_l","stats_f","stats_d", "stats_ti","stats_tl","stats_tf","stats_td", - "stats_ti_dv","stats_tl_dv","stats_tf_dv","stats_td_dv" -// , TODO: enable this test after SOLR-6452 is fixed -// "stats_ti_ni_dv","stats_tl_ni_dv","stats_tf_ni_dv","stats_td_ni_dv" + "stats_ti_dv","stats_tl_dv","stats_tf_dv","stats_td_dv", + "stats_ti_ni_dv","stats_tl_ni_dv","stats_tf_ni_dv","stats_td_ni_dv" }) { // all of our checks should work with all of these params @@ -91,9 +89,8 @@ public class StatsComponentTest extends AbstractSolrTestCase { for (String f : new String[] {"stats_ii", "stats_tis","stats_tfs","stats_tls","stats_tds", // trie fields - "stats_tis_dv","stats_tfs_dv","stats_tls_dv","stats_tds_dv" // Doc Values -// , TODO: enable this test after SOLR-6452 is fixed - //"stats_tis_ni_dv","stats_tfs_ni_dv","stats_tls_ni_dv","stats_tds_ni_dv" // Doc Values Not indexed + "stats_tis_dv","stats_tfs_dv","stats_tls_dv","stats_tds_dv", // Doc Values + "stats_tis_ni_dv","stats_tfs_ni_dv","stats_tls_ni_dv","stats_tds_ni_dv" // Doc Values Not indexed }) { doTestMVFieldStatisticsResult(f); @@ -153,6 +150,9 @@ public class StatsComponentTest extends AbstractSolrTestCase { assertU(adoc("id", "3", f, "-30", f, "-1", "active_s", "false")); assertU(adoc("id", "4", f, "-40", f, "10", "active_s", "false")); assertU(adoc("id", "5", "active_s", "false")); + assertU(adoc("id", "6", "active_s", "false")); + assertU(adoc("id", "7", "active_s", "true")); + assertU(commit()); // with or w/o these excluded filters, results should be the same @@ -171,7 +171,7 @@ public class StatsComponentTest extends AbstractSolrTestCase { , "//double[@name='max'][.='200.0']" , "//double[@name='sum'][.='9.0']" , "//long[@name='count'][.='8']" - , "//long[@name='missing'][.='1']" + , "//long[@name='missing'][.='3']" , "//long[@name='countDistinct'][.='8']" , "count(//arr[@name='distinctValues']/*)=8" , "//double[@name='sumOfSquares'][.='53101.0']" @@ -186,7 +186,7 @@ public class StatsComponentTest extends AbstractSolrTestCase { , "//double[@name='max'][.='200.0']" , "//double[@name='sum'][.='119.0']" , "//long[@name='count'][.='6']" - , "//long[@name='missing'][.='1']" + , "//long[@name='missing'][.='3']" , "//long[@name='countDistinct'][.='6']" , "count(//arr[@name='distinctValues']/*)=6" , "//double[@name='sumOfSquares'][.='43001.0']" @@ -202,7 +202,7 @@ public class StatsComponentTest extends AbstractSolrTestCase { , "//double[@name='max'][.='200.0']" , "//double[@name='sum'][.='9.0']" , "//long[@name='count'][.='8']" - , "//long[@name='missing'][.='1']" + , "//long[@name='missing'][.='3']" , "//long[@name='countDistinct'][.='8']" , "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8" , "//double[@name='sumOfSquares'][.='53101.0']" @@ -216,7 +216,7 @@ public class StatsComponentTest extends AbstractSolrTestCase { , "//lst[@name='true']/double[@name='max'][.='200.0']" , "//lst[@name='true']/double[@name='sum'][.='70.0']" , "//lst[@name='true']/long[@name='count'][.='4']" - , "//lst[@name='true']/long[@name='missing'][.='0']" + , "//lst[@name='true']/long[@name='missing'][.='1']" , "//lst[@name='true']//long[@name='countDistinct'][.='4']" , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4" , "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']" @@ -230,7 +230,7 @@ public class StatsComponentTest extends AbstractSolrTestCase { , "//lst[@name='false']/double[@name='max'][.='10.0']" , "//lst[@name='false']/double[@name='sum'][.='-61.0']" , "//lst[@name='false']/long[@name='count'][.='4']" - , "//lst[@name='false']/long[@name='missing'][.='1']" + , "//lst[@name='false']/long[@name='missing'][.='2']" , "//lst[@name='true']//long[@name='countDistinct'][.='4']" , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4" , "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']" @@ -711,4 +711,31 @@ public class StatsComponentTest extends AbstractSolrTestCase { Collections.addAll(cat_docValues, comparables); return cat_docValues; } + + +// public void testOtherFacetStatsResult() throws Exception { +// +// assertU(adoc("id", "1", "stats_tls_dv", "10", "active_i", "1")); +// assertU(adoc("id", "2", "stats_tls_dv", "20", "active_i", "1")); +// assertU(commit()); +// assertU(adoc("id", "3", "stats_tls_dv", "30", "active_i", "2")); +// assertU(adoc("id", "4", "stats_tls_dv", "40", "active_i", "2")); +// assertU(commit()); +// +// final String pre = "//lst[@name='stats_fields']/lst[@name='stats_tls_dv']/lst[@name='facets']/lst[@name='active_i']"; +// +// assertQ("test value for active_s=true", req("q", "*:*", "stats", "true", "stats.field", "stats_tls_dv", "stats.facet", "active_i","indent", "true") +// , "*[count("+pre+")=1]" +// , pre+"/lst[@name='1']/double[@name='min'][.='10.0']" +// , pre+"/lst[@name='1']/double[@name='max'][.='20.0']" +// , pre+"/lst[@name='1']/double[@name='sum'][.='30.0']" +// , pre+"/lst[@name='1']/long[@name='count'][.='2']" +// , pre+"/lst[@name='1']/long[@name='missing'][.='0']" +// , pre + "/lst[@name='true']/long[@name='countDistinct'][.='2']" +// , "count(" + pre + "/lst[@name='true']/arr[@name='distinctValues']/*)=2" +// , pre+"/lst[@name='1']/double[@name='sumOfSquares'][.='500.0']" +// , pre+"/lst[@name='1']/double[@name='mean'][.='15.0']" +// , pre+"/lst[@name='1']/double[@name='stddev'][.='7.0710678118654755']" +// ); +// } }