From c56224895a265426265ea94a680699ec7e9a279d Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Fri, 11 Sep 2009 15:21:05 +0000 Subject: [PATCH] SOLR-1380: Added support for multi-valued fields to StatsComponent git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@813874 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../handler/component/FieldFacetStats.java | 144 +++++++++++++ .../handler/component/StatsComponent.java | 202 +++--------------- .../solr/handler/component/StatsValues.java | 159 ++++++++++++++ .../apache/solr/request/UnInvertedField.java | 180 +++++++++++++++- .../handler/component/StatsComponentTest.java | 64 ++++++ src/test/test-files/solr/conf/schema11.xml | 1 + 7 files changed, 572 insertions(+), 180 deletions(-) create mode 100644 src/java/org/apache/solr/handler/component/FieldFacetStats.java create mode 100644 src/java/org/apache/solr/handler/component/StatsValues.java diff --git a/CHANGES.txt b/CHANGES.txt index 0486929cade..778afd70a2e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -98,6 +98,8 @@ New Features 7. SOLR-680: Add StatsComponent. This gets simple statistics on matched numeric fields, including: min, max, mean, median, stddev. (koji, ryan) + 7.1 SOLR-1380: Added support for multi-valued fields (Harish Agarwal via gsingers) + 8. SOLR-561: Added Replication implemented in Java as a request handler. Supports index replication as well as configuration replication and exposes detailed statistics and progress information on the Admin page. Works on all platforms. (Noble Paul, yonik, Akshay Ukey, shalin) diff --git a/src/java/org/apache/solr/handler/component/FieldFacetStats.java b/src/java/org/apache/solr/handler/component/FieldFacetStats.java new file mode 100644 index 00000000000..f385a09b9ea --- /dev/null +++ b/src/java/org/apache/solr/handler/component/FieldFacetStats.java @@ -0,0 +1,144 @@ +package org.apache.solr.handler.component; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.search.FieldCache; +import org.apache.solr.schema.FieldType; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + + +/** + * 9/10/2009 - Moved out of StatsComponent to allow open access to UnInvertedField + * FieldFacetStats is a utility to accumulate statistics on a set of values in one field, + * for facet values present in another field. + *

+ * @see org.apache.solr.handler.component.StatsComponent + * + */ + +public class FieldFacetStats { + public final String name; + final FieldCache.StringIndex si; + final FieldType ft; + + final String[] terms; + final int[] termNum; + + final int startTermIndex; + final int endTermIndex; + final int nTerms; + + final int numStatsTerms; + + public final Map facetStatsValues; + + final List> facetStatsTerms; + + public FieldFacetStats(String name, FieldCache.StringIndex si, FieldType ft, int numStatsTerms) { + this.name = name; + this.si = si; + this.ft = ft; + this.numStatsTerms = numStatsTerms; + + terms = si.lookup; + termNum = si.order; + startTermIndex = 1; + endTermIndex = terms.length; + nTerms = endTermIndex - startTermIndex; + + facetStatsValues = new HashMap(); + + // for mv stats field, we'll want to keep track of terms + facetStatsTerms = new ArrayList>(); + if (numStatsTerms == 0) return; + int i = 0; + for (; i < numStatsTerms; i++) { + facetStatsTerms.add(new HashMap()); + } + } + + String getTermText(int docID) { + return terms[termNum[docID]]; + } + + + public boolean facet(int docID, Double v) { + if (v == null) return false; + + int term = termNum[docID]; + int arrIdx = term - startTermIndex; + if (arrIdx >= 0 && arrIdx < nTerms) { + String key = ft.indexedToReadable(terms[term]); + StatsValues stats = facetStatsValues.get(key); + if (stats == null) { + stats = new StatsValues(); + facetStatsValues.put(key, stats); + } + stats.accumulate(v); + return true; + } + return false; + } + + + //function to keep track of facet counts for term number + public boolean facetTermNum(int docID, int statsTermNum) { + + int term = termNum[docID]; + int arrIdx = term - startTermIndex; + if (arrIdx >= 0 && arrIdx < nTerms) { + String key = ft.indexedToReadable(terms[term]); + HashMap statsTermCounts = facetStatsTerms.get(statsTermNum); + Integer statsTermCount = statsTermCounts.get(key); + if (statsTermCount == null) { + statsTermCounts.put(key, 1); + } else { + statsTermCounts.put(key, statsTermCount + 1); + } + return true; + } + return false; + } + + + //function to accumulate counts for statsTermNum to specified value + public boolean accumulateTermNum(int statsTermNum, Double value) { + if (value == null) return false; + for (Map.Entry stringIntegerEntry : facetStatsTerms.get(statsTermNum).entrySet()) { + Map.Entry pairs = (Map.Entry) stringIntegerEntry; + String key = (String) pairs.getKey(); + StatsValues facetStats = facetStatsValues.get(key); + if (facetStats == null) { + facetStats = new StatsValues(); + facetStatsValues.put(key, facetStats); + } + Integer count = (Integer) pairs.getValue(); + if (count != null) { + facetStats.accumulate(value, count); + } + } + return true; + } + +} + + diff --git a/src/java/org/apache/solr/handler/component/StatsComponent.java b/src/java/org/apache/solr/handler/component/StatsComponent.java index 8d0c74a33a7..3ce47feb1d0 100644 --- a/src/java/org/apache/solr/handler/component/StatsComponent.java +++ b/src/java/org/apache/solr/handler/component/StatsComponent.java @@ -28,11 +28,16 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.StatsParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.handler.component.StatsValues; +import org.apache.solr.handler.component.FieldFacetStats; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.request.UnInvertedField; +import org.apache.solr.core.SolrCore; /** * Stats component calculates simple statistics on numeric field values @@ -173,166 +178,6 @@ class StatsInfo { } } -class StatsValues { - private static final String FACETS = "facets"; - double min; - double max; - double sum; - double sumOfSquares; - long count; - long missing; - - // facetField facetValue - Map> facets; - - public StatsValues() { - reset(); - } - - public void accumulate(NamedList stv){ - min = Math.min(min, (Double)stv.get("min")); - max = Math.max(max, (Double)stv.get("max")); - sum += (Double)stv.get("sum"); - count += (Long)stv.get("count"); - missing += (Long)stv.get("missing"); - sumOfSquares += (Double)stv.get("sumOfSquares"); - - NamedList f = (NamedList)stv.get( FACETS ); - if( f != null ) { - if( facets == null ) { - facets = new HashMap>(); - } - - for( int i=0; i< f.size(); i++ ) { - String field = f.getName(i); - NamedList vals = (NamedList)f.getVal( i ); - Map addTo = facets.get( field ); - if( addTo == null ) { - addTo = new HashMap(); - facets.put( field, addTo ); - } - for( int j=0; j< vals.size(); j++ ) { - String val = f.getName(i); - StatsValues vvals = addTo.get( val ); - if( vvals == null ) { - vvals = new StatsValues(); - addTo.put( val, vvals ); - } - vvals.accumulate( (NamedList)f.getVal( i ) ); - } - } - } - } - - public void accumulate(double v){ - sumOfSquares += (v*v); // for std deviation - min = Math.min(min, v); - max = Math.max(max, v); - sum += v; - count++; - } - - public double getAverage(){ - return sum / count; - } - - public double getStandardDeviation() - { - if( count <= 1.0D ) - return 0.0D; - - return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) ) - / ( count * ( count - 1.0D ) ) ); - } - - public void reset(){ - min = Double.MAX_VALUE; - max = Double.NEGATIVE_INFINITY; - sum = count = missing = 0; - sumOfSquares = 0; - facets = null; - } - - public NamedList getStatsValues(){ - NamedList res = new SimpleOrderedMap(); - res.add("min", min); - res.add("max", max); - res.add("sum", sum); - res.add("count", count); - res.add("missing", missing); - res.add("sumOfSquares", sumOfSquares ); - res.add("mean", getAverage()); - res.add( "stddev", getStandardDeviation() ); - - // add the facet stats - if( facets != null && facets.size() > 0 ) { - NamedList> nl = new SimpleOrderedMap>(); - for( Map.Entry> entry : facets.entrySet() ) { - NamedList> nl2 = new SimpleOrderedMap>(); - nl.add( entry.getKey(), nl2 ); - for( Map.Entry e2 : entry.getValue().entrySet() ) { - nl2.add( e2.getKey(), e2.getValue().getStatsValues() ); - } - } - res.add( FACETS, nl ); - } - return res; - } -} - -class FieldFacetStats { - final String name; - final FieldCache.StringIndex si; - final FieldType ft; - - final String[] terms; - final int[] termNum; - - final int startTermIndex; - final int endTermIndex; - final int nTerms; - - final Map facetStatsValues; - - FieldFacetStats( String name, FieldCache.StringIndex si, FieldType ft ) - { - this.name = name; - this.si = si; - this.ft = ft; - - terms = si.lookup; - termNum = si.order; - startTermIndex = 1; - endTermIndex = terms.length; - nTerms = endTermIndex - startTermIndex; - - facetStatsValues = new HashMap(); - } - - String getTermText( int docID ) - { - return terms[termNum[docID]]; - } - - public boolean facet( int docID, Double v ) - { - if( v == null ) return false; - - int term = termNum[docID]; - int arrIdx = term-startTermIndex; - if (arrIdx>=0 && arrIdx getStatsCounts() { + public NamedList getStatsCounts() throws IOException { NamedList res = new SimpleOrderedMap(); res.add("stats_fields", getStatsFields()); return res; } - public NamedList getStatsFields() { + public NamedList getStatsFields() throws IOException { NamedList> res = new SimpleOrderedMap>(); String[] statsFs = params.getParams(StatsParams.STATS_FIELD); if (null != statsFs) { for (String f : statsFs) { - String[] facets = params.getFieldParams( f, StatsParams.STATS_FACET ); - if( facets == null ) { + String[] facets = params.getFieldParams(f, StatsParams.STATS_FACET); + if (facets == null) { facets = new String[0]; // make sure it is something... } - res.add(f, getFieldCacheStats(f, facets)); + SchemaField sf = searcher.getSchema().getField(f); + FieldType ft = sf.getType(); + if (ft.isTokenized() || sf.multiValued()) { + //use UnInvertedField for multivalued fields + UnInvertedField uif = UnInvertedField.getUnInvertedField(f, searcher); + StatsValues allstats = uif.getStats(searcher, docs, facets); + if (allstats != null) { + res.add(f, (NamedList) allstats.getStatsValues()); + } else { + res.add(f, null); + } + } else { + res.add(f, getFieldCacheStats(f, facets)); + } } } return res; @@ -376,10 +234,6 @@ class SimpleStats { public NamedList getFieldCacheStats(String fieldName, String[] facet ) { FieldType ft = searcher.getSchema().getFieldType(fieldName); - if( ft.isTokenized() || ft.isMultiValued() ) { - throw new SolrException( ErrorCode.BAD_REQUEST, - "Stats are valid for single valued numeric values. not: "+fieldName + "["+ft+"]" ); - } FieldCache.StringIndex si = null; try { @@ -388,7 +242,7 @@ class SimpleStats { catch (IOException e) { throw new RuntimeException( "failed to open field cache for: "+fieldName, e ); } - FieldFacetStats all = new FieldFacetStats( "all", si, ft ); + FieldFacetStats all = new FieldFacetStats( "all", si, ft, 0 ); if ( all.nTerms <= 0 || docs.size() <= 0 ) return null; StatsValues allstats = new StatsValues(); @@ -397,17 +251,13 @@ class SimpleStats { final FieldFacetStats[] finfo = new FieldFacetStats[facet.length]; for( String f : facet ) { ft = searcher.getSchema().getFieldType(f); - if( ft.isTokenized() || ft.isMultiValued() ) { - throw new SolrException( ErrorCode.BAD_REQUEST, - "Stats can only facet on single valued fields, not: "+f + "["+ft+"]" ); - } try { si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f); } catch (IOException e) { throw new RuntimeException( "failed to open field cache for: "+f, e ); } - finfo[i++] = new FieldFacetStats( f, si, ft ); + finfo[i++] = new FieldFacetStats( f, si, ft, 0 ); } @@ -438,4 +288,6 @@ class SimpleStats { } return allstats.getStatsValues(); } + + } diff --git a/src/java/org/apache/solr/handler/component/StatsValues.java b/src/java/org/apache/solr/handler/component/StatsValues.java new file mode 100644 index 00000000000..a05d28463cd --- /dev/null +++ b/src/java/org/apache/solr/handler/component/StatsValues.java @@ -0,0 +1,159 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.solr.handler.component; + + +import java.util.HashMap; +import java.util.Map; + +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; + + +/** 2/11/2009 - Moved out of StatsComponent to allow open access to UnInvertedField + * StatsValues is a utility to accumulate statistics on a set of values + * + *

+ * @see org.apache.solr.handler.component.StatsComponent + * +*/ + +public class StatsValues { + private static final String FACETS = "facets"; + double min; + double max; + double sum; + double sumOfSquares; + long count; + long missing; + + // facetField facetValue + public Map> facets; + + public StatsValues() { + reset(); + } + + public void accumulate(NamedList stv){ + min = Math.min(min, (Double)stv.get("min")); + max = Math.max(max, (Double)stv.get("max")); + sum += (Double)stv.get("sum"); + count += (Long)stv.get("count"); + missing += (Long)stv.get("missing"); + sumOfSquares += (Double)stv.get("sumOfSquares"); + + NamedList f = (NamedList)stv.get( FACETS ); + if( f != null ) { + if( facets == null ) { + facets = new HashMap>(); + } + + for( int i=0; i< f.size(); i++ ) { + String field = f.getName(i); + NamedList vals = (NamedList)f.getVal( i ); + Map addTo = facets.get( field ); + if( addTo == null ) { + addTo = new HashMap(); + facets.put( field, addTo ); + } + for( int j=0; j< vals.size(); j++ ) { + String val = f.getName(i); + StatsValues vvals = addTo.get( val ); + if( vvals == null ) { + vvals = new StatsValues(); + addTo.put( val, vvals ); + } + vvals.accumulate( (NamedList)f.getVal( i ) ); + } + } + } + } + + public void accumulate(double v){ + sumOfSquares += (v*v); // for std deviation + min = Math.min(min, v); + max = Math.max(max, v); + sum += v; + count++; + } + + public void accumulate(double v, int c){ + sumOfSquares += (v*v*c); // for std deviation + min = Math.min(min, v); + max = Math.max(max, v); + sum += v*c; + count+= c; + } + + public void addMissing(int c){ + missing += c; + } + + public double getAverage(){ + return sum / count; + } + + public double getStandardDeviation() + { + if( count <= 1.0D ) + return 0.0D; + + return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) ) + / ( count * ( count - 1.0D ) ) ); + } + + public long getCount() + { + return count; + } + + public void reset(){ + min = Double.MAX_VALUE; + max = -1.0*Double.MAX_VALUE; + sum = count = missing = 0; + sumOfSquares = 0; + facets = null; + } + + public NamedList getStatsValues(){ + NamedList res = new SimpleOrderedMap(); + res.add("min", min); + res.add("max", max); + res.add("sum", sum); + res.add("count", count); + res.add("missing", missing); + res.add("sumOfSquares", sumOfSquares ); + res.add("mean", getAverage()); + res.add( "stddev", getStandardDeviation() ); + + // add the facet stats + if( facets != null && facets.size() > 0 ) { + NamedList> nl = new SimpleOrderedMap>(); + for( Map.Entry> entry : facets.entrySet() ) { + NamedList> nl2 = new SimpleOrderedMap>(); + nl.add( entry.getKey(), nl2 ); + for( Map.Entry e2 : entry.getValue().entrySet() ) { + nl2.add( e2.getKey(), e2.getValue().getStatsValues() ); + } + } + res.add( FACETS, nl ); + } + return res; + } +} diff --git a/src/java/org/apache/solr/request/UnInvertedField.java b/src/java/org/apache/solr/request/UnInvertedField.java index 2b203e4f304..77f14b8c384 100755 --- a/src/java/org/apache/solr/request/UnInvertedField.java +++ b/src/java/org/apache/solr/request/UnInvertedField.java @@ -17,6 +17,7 @@ package org.apache.solr.request; +import org.apache.lucene.search.FieldCache; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; @@ -26,10 +27,12 @@ import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.SolrException; import org.apache.solr.core.SolrCore; -import org.apache.solr.request.SimpleFacets; + import org.apache.solr.schema.FieldType; import org.apache.solr.search.*; import org.apache.solr.util.BoundedTreeSet; +import org.apache.solr.handler.component.StatsValues; +import org.apache.solr.handler.component.FieldFacetStats; import org.apache.lucene.util.OpenBitSet; import java.io.IOException; @@ -38,7 +41,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; -import java.util.WeakHashMap; + import java.util.concurrent.atomic.AtomicLong; /** @@ -593,9 +596,6 @@ public class UnInvertedField { for (; i=0) continue; if (--lim<0) break; @@ -616,6 +616,176 @@ public class UnInvertedField { return res; } + /** + * Collect statistics about the UninvertedField. Code is very similar to {@link #getCounts(org.apache.solr.search.SolrIndexSearcher, org.apache.solr.search.DocSet, int, int, Integer, boolean, String, String)} + * It can be used to calculate stats on multivalued fields. + *

+ * This method is mainly used by the {@link org.apache.solr.handler.component.StatsComponent}. + * + * @param searcher The Searcher to use to gather the statistics + * @param baseDocs The {@link org.apache.solr.search.DocSet} to gather the stats on + * @param facet One or more fields to facet on. + * @return The {@link org.apache.solr.handler.component.StatsValues} collected + * @throws IOException + */ + public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, String[] facet) throws IOException { + //this function is ripped off nearly wholesale from the getCounts function to use + //for multiValued fields within the StatsComponent. may be useful to find common + //functionality between the two and refactor code somewhat + use.incrementAndGet(); + + FieldType ft = searcher.getSchema().getFieldType(field); + StatsValues allstats = new StatsValues(); + + int i = 0; + final FieldFacetStats[] finfo = new FieldFacetStats[facet.length]; + //Initialize facetstats, if facets have been passed in + FieldCache.StringIndex si; + for (String f : facet) { + ft = searcher.getSchema().getFieldType(f); + try { + si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f); + } + catch (IOException e) { + throw new RuntimeException("failed to open field cache for: " + f, e); + } + finfo[i++] = new FieldFacetStats(f, si, ft, numTermsInField); + } + + + DocSet docs = baseDocs; + int baseSize = docs.size(); + int maxDoc = searcher.maxDoc(); + + if (baseSize > 0) { + + final int[] index = this.index; + final int[] counts = new int[numTermsInField]; + + NumberedTermEnum te = ti.getEnumerator(searcher.getReader()); + + + boolean doNegative = false; + if (finfo.length == 0) { + //if we're collecting statistics with a facet field, can't do inverted counting + doNegative = baseSize > maxDoc >> 1 && termInstances > 0 + && docs instanceof BitDocSet; + } + + if (doNegative) { + OpenBitSet bs = (OpenBitSet) ((BitDocSet) docs).getBits().clone(); + bs.flip(0, maxDoc); + // TODO: when iterator across negative elements is available, use that + // instead of creating a new bitset and inverting. + docs = new BitDocSet(bs, maxDoc - baseSize); + // simply negating will mean that we have deleted docs in the set. + // that should be OK, as their entries in our table should be empty. + } + + // For the biggest terms, do straight set intersections + for (TopTerm tt : bigTerms.values()) { + // TODO: counts could be deferred if sorted==false + if (tt.termNum >= 0 && tt.termNum < numTermsInField) { + if (finfo.length == 0) { + counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs); + } else { + //COULD BE VERY SLOW + //if we're collecting stats for facet fields, we need to iterate on all matching documents + DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(tt.term)).intersection(docs); + DocIterator iter = bigTermDocSet.iterator(); + while (iter.hasNext()) { + int doc = iter.nextDoc(); + counts[tt.termNum]++; + for (FieldFacetStats f : finfo) { + f.facetTermNum(doc, tt.termNum); + } + } + } + } + } + + + if (termInstances > 0) { + DocIterator iter = docs.iterator(); + while (iter.hasNext()) { + int doc = iter.nextDoc(); + int code = index[doc]; + + if ((code & 0xff) == 1) { + int pos = code >>> 8; + int whichArray = (doc >>> 16) & 0xff; + byte[] arr = tnums[whichArray]; + int tnum = 0; + for (; ;) { + int delta = 0; + for (; ;) { + byte b = arr[pos++]; + delta = (delta << 7) | (b & 0x7f); + if ((b & 0x80) == 0) break; + } + if (delta == 0) break; + tnum += delta - TNUM_OFFSET; + counts[tnum]++; + for (FieldFacetStats f : finfo) { + f.facetTermNum(doc, tnum); + } + } + } else { + int tnum = 0; + int delta = 0; + for (; ;) { + delta = (delta << 7) | (code & 0x7f); + if ((code & 0x80) == 0) { + if (delta == 0) break; + tnum += delta - TNUM_OFFSET; + counts[tnum]++; + for (FieldFacetStats f : finfo) { + f.facetTermNum(doc, tnum); + } + delta = 0; + } + code >>>= 8; + } + } + } + } + + // add results in index order + + for (i = 0; i < numTermsInField; i++) { + int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i]; + if (c == 0) { + continue; + } + Double value = Double.parseDouble(ft.indexedToReadable(getTermText(te, i))); + allstats.accumulate(value, c); + //as we've parsed the termnum into a value, lets also accumulate fieldfacet statistics + for (FieldFacetStats f : finfo) { + f.accumulateTermNum(i, value); + } + } + te.close(); + int c = SimpleFacets.getFieldMissingCount(searcher, baseDocs, field); + if (c > 0) { + allstats.addMissing(c); + } + } + if (allstats.getCount() > 0) { + if (finfo.length > 0) { + allstats.facets = new HashMap>(); + for (FieldFacetStats f : finfo) { + allstats.facets.put(f.name, f.facetStatsValues); + } + } + return allstats; + } else { + return null; + } + + } + + + String getTermText(NumberedTermEnum te, int termNum) throws IOException { if (bigTerms.size() > 0) { diff --git a/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/src/test/org/apache/solr/handler/component/StatsComponentTest.java index ab75fa395ed..422e6bfa263 100644 --- a/src/test/org/apache/solr/handler/component/StatsComponentTest.java +++ b/src/test/org/apache/solr/handler/component/StatsComponentTest.java @@ -19,6 +19,7 @@ package org.apache.solr.handler.component; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.StatsParams; +import org.apache.solr.common.SolrInputDocument; import org.apache.solr.core.SolrCore; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; @@ -76,6 +77,69 @@ public class StatsComponentTest extends AbstractSolrTestCase { } + public void testMVFieldStatisticsResult() throws Exception { + SolrCore core = h.getCore(); + + assertU(adoc("id", "1", "stats_ii", "-10", "stats_ii", "-100", "active_s", "true")); + assertU(adoc("id", "2", "stats_ii", "-20", "stats_ii", "200", "active_s", "true")); + assertU(adoc("id", "3", "stats_ii", "-30", "stats_ii", "-1", "active_s", "false")); + assertU(adoc("id", "4", "stats_ii", "-40", "stats_ii", "10", "active_s", "false")); + assertU(commit()); + + + Map args = new HashMap(); + args.put(CommonParams.Q, "*:*"); + args.put(StatsParams.STATS, "true"); + args.put(StatsParams.STATS_FIELD, "stats_ii"); + args.put("indent", "true"); + SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args)); + + + assertQ("test statistics values", req + , "//double[@name='min'][.='-100.0']" + , "//double[@name='max'][.='200.0']" + , "//double[@name='sum'][.='9.0']" + , "//long[@name='count'][.='8']" + , "//long[@name='missing'][.='0']" + , "//double[@name='sumOfSquares'][.='53101.0']" + , "//double[@name='mean'][.='1.125']" + , "//double[@name='stddev'][.='87.08852228787508']" + ); + + + + args.put(StatsParams.STATS_FACET, "active_s"); + req = new LocalSolrQueryRequest(core, new MapSolrParams(args)); + + assertQ("test statistics values", req + , "//double[@name='min'][.='-100.0']" + , "//double[@name='max'][.='200.0']" + , "//double[@name='sum'][.='9.0']" + , "//long[@name='count'][.='8']" + , "//long[@name='missing'][.='0']" + , "//double[@name='sumOfSquares'][.='53101.0']" + , "//double[@name='mean'][.='1.125']" + , "//double[@name='stddev'][.='87.08852228787508']" + ); + + + + + assertQ("test value for active_s=true", req + , "//lst[@name='true']/double[@name='min'][.='-100.0']" + , "//lst[@name='true']/double[@name='max'][.='200.0']" + , "//lst[@name='true']/double[@name='sum'][.='70.0']" + , "//lst[@name='true']/long[@name='count'][.='4']" + , "//lst[@name='true']/long[@name='missing'][.='0']" + , "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']" + , "//lst[@name='true']/double[@name='mean'][.='17.5']" + , "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']" + ); + + + } + + public void testFieldStatisticsMissingResult() throws Exception { SolrCore core = h.getCore(); assertU(adoc("id", "1", "stats_i", "-10")); diff --git a/src/test/test-files/solr/conf/schema11.xml b/src/test/test-files/solr/conf/schema11.xml index 526096e312a..74a9265e163 100755 --- a/src/test/test-files/solr/conf/schema11.xml +++ b/src/test/test-files/solr/conf/schema11.xml @@ -286,6 +286,7 @@ both match, the first appearing in the schema will be used. --> +