SOLR-680: Adding StatsComponent

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@705915 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2008-10-18 20:19:53 +00:00
parent f0adcb6bd6
commit d3d3251e5d
13 changed files with 852 additions and 5 deletions

View File

@ -21,6 +21,7 @@ import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.StatsParams;
/**
@ -321,6 +322,30 @@ public class SolrQuery extends ModifiableSolrParams
public String getSortField() {
return this.get(CommonParams.SORT);
}
public void setGetFieldStatistics( boolean v )
{
this.set( StatsParams.STATS, v );
}
public void setGetFieldStatistics( String field, boolean twopass )
{
this.set( StatsParams.STATS, true );
this.add( StatsParams.STATS_FIELD, field );
this.set( "f."+field+"."+StatsParams.STATS_TWOPASS, twopass+"" );
}
public void addStatsFieldFacets( String field, String ... facets )
{
if( field == null ) {
this.add( StatsParams.STATS_FACET, facets );
}
else {
for( String f : facets ) {
this.add( "f."+field+"."+StatsParams.STATS_FACET, f );
}
}
}
public SolrQuery setFilterQueries(String ... fq) {
this.set(CommonParams.FQ, fq);

View File

@ -0,0 +1,172 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.response;
import org.apache.solr.common.util.NamedList;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Holds stats info
*
* @version $Id: SpellCheckResponse.java 693622 2008-09-09 21:21:06Z gsingers $
* @since solr 1.4
*/
public class FieldStatsInfo implements Serializable {
final String name;
Double min;
Double max;
Double sum;
Long count;
Long missing;
Double mean = null;
Double sumOfSquares = null;
Double stddev = null;
Double median = null;
Map<String,List<FieldStatsInfo>> facets;
public FieldStatsInfo( NamedList<Object> nl, String fname )
{
name = fname;
for( Map.Entry<String, Object> entry : nl ) {
if( "min".equals( entry.getKey() ) ) {
min = (Double)entry.getValue();
}
else if( "max".equals( entry.getKey() ) ) {
max = (Double)entry.getValue();
}
else if( "sum".equals( entry.getKey() ) ) {
sum = (Double)entry.getValue();
}
else if( "count".equals( entry.getKey() ) ) {
count = (Long)entry.getValue();
}
else if( "missing".equals( entry.getKey() ) ) {
missing = (Long)entry.getValue();
}
else if( "mean".equals( entry.getKey() ) ) {
mean = (Double)entry.getValue();
}
else if( "sumOfSquares".equals( entry.getKey() ) ) {
sumOfSquares = (Double)entry.getValue();
}
else if( "stddev".equals( entry.getKey() ) ) {
stddev = (Double)entry.getValue();
}
else if( "median".equals( entry.getKey() ) ) {
median = (Double)entry.getValue();
}
else if( "facets".equals( entry.getKey() ) ) {
NamedList<Object> fields = (NamedList<Object>)entry.getValue();
facets = new HashMap<String, List<FieldStatsInfo>>();
for( Map.Entry<String, Object> ev : fields ) {
List<FieldStatsInfo> vals = new ArrayList<FieldStatsInfo>();
facets.put( ev.getKey(), vals );
NamedList<NamedList<Object>> vnl = (NamedList<NamedList<Object>>) ev.getValue();
for( int i=0; i<vnl.size(); i++ ) {
String n = vnl.getName(i);
vals.add( new FieldStatsInfo( vnl.getVal(i), n ) );
}
}
}
else {
throw new RuntimeException( "unknown key: "+entry.getKey() + " ["+entry.getValue()+"]" );
}
}
}
public String toString()
{
StringBuilder sb = new StringBuilder();
sb.append( name );
sb.append( ": {" );
if( min != null ) {
sb.append( " min:").append( min );
}
if( max != null ) {
sb.append( " max:").append( max );
}
if( sum != null ) {
sb.append( " sum:").append( sum );
}
if( count != null ) {
sb.append( " count:").append( count );
}
if( missing != null ) {
sb.append( " missing:").append( missing );
}
if( mean != null ) {
sb.append( " mean:").append( mean );
}
if( median != null ) {
sb.append( " median:").append(median);
}
if( stddev != null ) {
sb.append( " stddev:").append(stddev);
}
sb.append( " }" );
return sb.toString();
}
public String getName() {
return name;
}
public Double getMin() {
return min;
}
public Double getMax() {
return max;
}
public Double getSum() {
return sum;
}
public Long getCount() {
return count;
}
public Long getMissing() {
return missing;
}
public Double getMean() {
return mean;
}
public Double getStddev() {
return stddev;
}
public Double getMedian() {
return median;
}
public Map<String, List<FieldStatsInfo>> getFacets() {
return facets;
}
}

View File

@ -45,6 +45,7 @@ public class QueryResponse extends SolrResponseBase
private NamedList<Object> _debugInfo = null;
private NamedList<Object> _highlightingInfo = null;
private NamedList<Object> _spellInfo = null;
private NamedList<Object> _statsInfo = null;
// Facet stuff
private Map<String,Integer> _facetQuery = null;
@ -57,6 +58,9 @@ public class QueryResponse extends SolrResponseBase
// SpellCheck Response
private SpellCheckResponse _spellResponse = null;
// Field stats Response
private Map<String,FieldStatsInfo> _fieldStatsInfo = null;
// Debug Info
private Map<String,Object> _debugMap = null;
@ -110,6 +114,10 @@ public class QueryResponse extends SolrResponseBase
_spellInfo = (NamedList<Object>) res.getVal( i );
extractSpellCheckInfo( _spellInfo );
}
else if ( "stats".equals( n ) ) {
_statsInfo = (NamedList<Object>) res.getVal( i );
extractStatsInfo( _statsInfo );
}
}
}
@ -117,6 +125,19 @@ public class QueryResponse extends SolrResponseBase
_spellResponse = new SpellCheckResponse(spellInfo);
}
private void extractStatsInfo(NamedList<Object> info) {
if( info != null ) {
_fieldStatsInfo = new HashMap<String, FieldStatsInfo>();
NamedList<NamedList<Object>> ff = (NamedList<NamedList<Object>>) info.get( "stats_fields" );
if( ff != null ) {
for( Map.Entry<String,NamedList<Object>> entry : ff ) {
_fieldStatsInfo.put( entry.getKey(),
new FieldStatsInfo( entry.getValue(), entry.getKey() ) );
}
}
}
}
private void extractDebugInfo( NamedList<Object> debug )
{
_debugMap = new LinkedHashMap<String, Object>(); // keep the order
@ -289,6 +310,10 @@ public class QueryResponse extends SolrResponseBase
new DocumentObjectBinder().getBeans(type,_results):
solrServer.getBinder().getBeans(type, _results);
}
public Map<String, FieldStatsInfo> getFieldStatsInfo() {
return _fieldStatsInfo;
}
}

View File

@ -29,6 +29,7 @@ import junit.framework.Assert;
import org.apache.solr.client.solrj.request.DirectXmlRequest;
import org.apache.solr.client.solrj.request.LukeRequest;
import org.apache.solr.client.solrj.request.SolrPing;
import org.apache.solr.client.solrj.response.FieldStatsInfo;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.LukeResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
@ -318,7 +319,112 @@ abstract public class SolrExampleTests extends SolrExampleTestBase
rsp = luke.process( server );
assertNotNull( rsp.getFieldTypeInfo() );
}
public void testStatistics() throws Exception
{
SolrServer server = getSolrServer();
// Empty the database...
server.deleteByQuery( "*:*" );// delete everything!
server.commit();
assertNumFound( "*:*", 0 ); // make sure it got in
int i=0; // 0 1 2 3 4 5 6 7 8 9
int[] nums = new int[] { 23, 26, 38, 46, 55, 63, 77, 84, 92, 94 };
for( int num : nums ) {
SolrInputDocument doc = new SolrInputDocument();
doc.setField( "id", "doc"+i++ );
doc.setField( "name", "doc: "+num );
doc.setField( "popularity", num );
server.add( doc );
}
server.commit();
assertNumFound( "*:*", nums.length ); // make sure they all got in
SolrQuery query = new SolrQuery( "*:*" );
query.setRows( 0 );
query.setGetFieldStatistics( "popularity", true );
QueryResponse rsp = server.query( query );
FieldStatsInfo stats = rsp.getFieldStatsInfo().get( "popularity" );
assertNotNull( stats );
assertEquals( 23.0, stats.getMin() );
assertEquals( 94.0, stats.getMax() );
assertEquals( new Long(nums.length), stats.getCount() );
assertEquals( new Long(0), stats.getMissing() );
assertEquals( (nums[4]+nums[5])/2.0, stats.getMedian() );
assertEquals( "26.4", stats.getStddev().toString().substring(0,4) );
// now lets try again with a new set... (odd median)
//----------------------------------------------------
server.deleteByQuery( "*:*" );// delete everything!
server.commit();
assertNumFound( "*:*", 0 ); // make sure it got in
nums = new int[] { 5, 7, 10, 19, 20 };
for( int num : nums ) {
SolrInputDocument doc = new SolrInputDocument();
doc.setField( "id", "doc"+i++ );
doc.setField( "name", "doc: "+num );
doc.setField( "popularity", num );
server.add( doc );
}
server.commit();
assertNumFound( "*:*", nums.length ); // make sure they all got in
rsp = server.query( query );
stats = rsp.getFieldStatsInfo().get( "popularity" );
assertNotNull( stats );
assertEquals( 5.0, stats.getMin() );
assertEquals( 20.0, stats.getMax() );
assertEquals( new Long(nums.length), stats.getCount() );
assertEquals( new Long(0), stats.getMissing() );
assertEquals( 10.0, stats.getMedian() );
// Now try again with faceting
//---------------------------------
server.deleteByQuery( "*:*" );// delete everything!
server.commit();
assertNumFound( "*:*", 0 ); // make sure it got in
nums = new int[] { 1, 2, 3, 4, 5, 10, 11, 12, 13, 14 };
for( i=0; i<nums.length; i++ ) {
int num = nums[i];
SolrInputDocument doc = new SolrInputDocument();
doc.setField( "id", "doc"+i );
doc.setField( "name", "doc: "+num );
doc.setField( "popularity", num );
doc.setField( "inStock", i < 5 );
server.add( doc );
}
server.commit();
assertNumFound( "inStock:true", 5 ); // make sure they all got in
assertNumFound( "inStock:false", 5 ); // make sure they all got in
// facet on 'inStock'
query.addStatsFieldFacets( "popularity", "inStock" );
rsp = server.query( query );
stats = rsp.getFieldStatsInfo().get( "popularity" );
assertNotNull( stats );
List<FieldStatsInfo> facets = stats.getFacets().get( "inStock" );
assertNotNull( facets );
assertEquals( 2, facets.size() );
FieldStatsInfo inStockF = facets.get(0);
FieldStatsInfo inStockT = facets.get(1);
if( "true".equals( inStockF.getName() ) ) {
FieldStatsInfo tmp = inStockF;
inStockF = inStockT;
inStockT = tmp;
}
// make sure half went to each
assertEquals( inStockF.getCount(), inStockT.getCount() );
assertEquals( stats.getCount().longValue(), inStockF.getCount()+inStockT.getCount() );
assertTrue( "check that min max faceted ok", inStockF.getMin() > inStockT.getMax() );
assertEquals( "they have the same distribution", inStockF.getStddev(), inStockT.getStddev() );
}
public void testPingHandler() throws Exception
{

View File

@ -501,14 +501,16 @@
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
<searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" />
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
Default configuration in a requestHandler would look like:
<arr name="components">
<str>query</str>
<str>facet</str>
<str>mlt</str>
<str>highlight</str>
<str>stats</str>
<str>debug</str>
</arr>

View File

@ -17,10 +17,10 @@
package org.apache.solr.analysis;
import java.lang.reflect.Method;
import java.util.HashMap;
import java.util.Map;
import org.apache.solr.core.SolrConfig;
import org.apache.commons.codec.Encoder;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.commons.codec.language.Metaphone;
@ -80,6 +80,13 @@ public class PhoneticFilterFactory extends BaseTokenFilterFactory
try {
encoder = clazz.newInstance();
// Try to set the maxCodeLength
String v = args.get( "maxCodeLength" );
if( v != null ) {
Method setter = encoder.getClass().getMethod( "setMaxCodeLength", Integer.class );
setter.invoke( encoder, Integer.parseInt( v ) );
}
}
catch (Exception e) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Error initializing: "+name + "/"+clazz, e );

View File

@ -0,0 +1,28 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.common.params;
/**
* Stats Parameters
*/
public interface StatsParams {
public static final String STATS = "stats";
public static final String STATS_FIELD = STATS + ".field";
public static final String STATS_FACET = STATS + ".facet";
public static final String STATS_TWOPASS = STATS + ".twopass";
}

View File

@ -803,6 +803,7 @@ public final class SolrCore implements SolrInfoMBean {
standardcomponents.put( FacetComponent.COMPONENT_NAME, FacetComponent.class );
standardcomponents.put( MoreLikeThisComponent.COMPONENT_NAME, MoreLikeThisComponent.class );
standardcomponents.put( HighlightComponent.COMPONENT_NAME, HighlightComponent.class );
standardcomponents.put( StatsComponent.COMPONENT_NAME, StatsComponent.class );
standardcomponents.put( DebugComponent.COMPONENT_NAME, DebugComponent.class );
for( Map.Entry<String, Class<? extends SearchComponent>> entry : standardcomponents.entrySet() ) {
if( components.get( entry.getKey() ) == null ) {

View File

@ -29,10 +29,8 @@ import org.apache.solr.search.QParser;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SolrIndexSearcher;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* This class is experimental and will be changing in the future.
@ -46,6 +44,7 @@ public class ResponseBuilder
public SolrQueryResponse rsp;
public boolean doHighlights;
public boolean doFacets;
public boolean doStats;
private boolean needDocList = false;
private boolean needDocSet = false;
@ -130,6 +129,7 @@ public class ResponseBuilder
/* private... components that don't own these shouldn't use them */
SolrDocumentList _responseDocs;
FacetInfo _facetInfo;
StatsInfo _statsInfo;
/**
* Utility function to add debugging info. This will make sure a valid

View File

@ -66,6 +66,7 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware
names.add( FacetComponent.COMPONENT_NAME );
names.add( MoreLikeThisComponent.COMPONENT_NAME );
names.add( HighlightComponent.COMPONENT_NAME );
names.add( StatsComponent.COMPONENT_NAME );
names.add( DebugComponent.COMPONENT_NAME );
return names;
}

View File

@ -35,6 +35,7 @@ public class ShardRequest {
public final static int PURPOSE_GET_FIELDS = 0x40;
public final static int PURPOSE_GET_HIGHLIGHTS = 0x80;
public final static int PURPOSE_GET_DEBUG =0x100;
public final static int PURPOSE_GET_STATS =0x200;
public int purpose; // the purpose of this request

View File

@ -0,0 +1,474 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.search.FieldCache;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.StatsParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
/**
* Stats component calculates simple statistics on numeric field values
*
* @version $Id$
* @since solr 1.4
*/
public class StatsComponent extends SearchComponent {
public static final String COMPONENT_NAME = "stats";
@Override
public void prepare(ResponseBuilder rb) throws IOException {
if (rb.req.getParams().getBool(StatsParams.STATS,false)) {
rb.setNeedDocSet( true );
rb.doStats = true;
}
}
@Override
public void process(ResponseBuilder rb) throws IOException {
if (rb.doStats) {
SolrParams params = rb.req.getParams();
SimpleStats s = new SimpleStats(rb.req,
rb.getResults().docSet,
params );
// TODO ???? add this directly to the response, or to the builder?
rb.rsp.add( "stats", s.getStatsCounts() );
}
}
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
return ResponseBuilder.STAGE_DONE;
}
@Override
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
if (!rb.doStats) return;
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
sreq.purpose |= ShardRequest.PURPOSE_GET_STATS;
StatsInfo si = rb._statsInfo;
if (si == null) {
rb._statsInfo = si = new StatsInfo();
si.parse(rb.req.getParams(), rb);
// should already be true...
// sreq.params.set(StatsParams.STATS, "true");
}
} else {
// turn off stats on other requests
sreq.params.set(StatsParams.STATS, "false");
// we could optionally remove stats params
}
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if (!rb.doStats || (sreq.purpose & ShardRequest.PURPOSE_GET_STATS)==0) return;
StatsInfo si = rb._statsInfo;
for (ShardResponse srsp: sreq.responses) {
NamedList stats = (NamedList)srsp.getSolrResponse().getResponse().get("stats");
NamedList stats_fields = (NamedList)stats.get("stats_fields");
if (stats_fields != null) {
for (int i=0; i<stats_fields.size(); i++) {
String field = stats_fields.getName(i);
StatsValues stv = si.statsFields.get(field);
stv.accumulate( (NamedList)stats_fields.get(field) );
}
}
}
}
@Override
public void finishStage(ResponseBuilder rb) {
if (!rb.doStats || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
// wait until STAGE_GET_FIELDS
// so that "result" is already stored in the response (for aesthetics)
StatsInfo si = rb._statsInfo;
NamedList stats = new SimpleOrderedMap();
NamedList stats_fields = new SimpleOrderedMap();
stats.add("stats_fields",stats_fields);
for(String field : si.statsFields.keySet()){
stats_fields.add(field, si.statsFields.get(field).getStatsValues());
}
rb.rsp.add("stats", stats);
rb._statsInfo = null;
}
/////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////
@Override
public String getDescription() {
return "Calculate Statistics";
}
@Override
public String getVersion() {
return "$Revision$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getSource() {
return "$URL$";
}
}
class StatsInfo {
Map<String, StatsValues> statsFields;
void parse(SolrParams params, ResponseBuilder rb) {
statsFields = new HashMap<String, StatsValues>();
String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
if (statsFs != null) {
for (String field : statsFs) {
statsFields.put(field,new StatsValues());
}
}
}
}
class StatsValues {
private static final String FACETS = "facets";
double min;
double max;
double sum;
double sumOfSquares;
long count;
long missing;
Double median = null;
// facetField facetValue
Map<String, Map<String,StatsValues>> facets;
public StatsValues() {
reset();
}
public void accumulate(NamedList stv){
min = Math.min(min, (Double)stv.get("min"));
max = Math.max(max, (Double)stv.get("max"));
sum += (Double)stv.get("sum");
count += (Long)stv.get("count");
missing += (Long)stv.get("missing");
sumOfSquares += (Double)stv.get("sumOfSquares");
NamedList f = (NamedList)stv.get( FACETS );
if( f != null ) {
if( facets == null ) {
facets = new HashMap<String, Map<String,StatsValues>>();
}
for( int i=0; i< f.size(); i++ ) {
String field = f.getName(i);
NamedList vals = (NamedList)f.getVal( i );
Map<String,StatsValues> addTo = facets.get( field );
if( addTo == null ) {
addTo = new HashMap<String,StatsValues>();
facets.put( field, addTo );
}
for( int j=0; j< vals.size(); j++ ) {
String val = f.getName(i);
StatsValues vvals = addTo.get( val );
if( vvals == null ) {
vvals = new StatsValues();
addTo.put( val, vvals );
}
vvals.accumulate( (NamedList)f.getVal( i ) );
}
}
}
}
public void accumulate(double v){
sumOfSquares += (v*v); // for std deviation
min = Math.min(min, v);
max = Math.max(max, v);
sum += v;
count++;
}
public double getAverage(){
return sum / count;
}
public double getStandardDeviation()
{
if( count <= 1.0D )
return 0.0D;
return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) )
/ ( count * ( count - 1.0D ) ) );
}
public void reset(){
min = Double.MAX_VALUE;
max = Double.MIN_VALUE;
sum = count = missing = 0;
sumOfSquares = 0;
median = null;
facets = null;
}
public NamedList<?> getStatsValues(){
NamedList<Object> res = new SimpleOrderedMap<Object>();
res.add("min", min);
res.add("max", max);
res.add("sum", sum);
res.add("count", count);
res.add("missing", missing);
res.add("sumOfSquares", sumOfSquares );
res.add("mean", getAverage());
if( median != null ) {
res.add( "median", median );
}
res.add( "stddev", getStandardDeviation() );
// add the facet stats
if( facets != null && facets.size() > 0 ) {
NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
for( Map.Entry<String, Map<String,StatsValues>> entry : facets.entrySet() ) {
NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
nl.add( entry.getKey(), nl2 );
for( Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet() ) {
nl2.add( e2.getKey(), e2.getValue().getStatsValues() );
}
}
res.add( FACETS, nl );
}
return res;
}
}
class FieldFacetStats {
final String name;
final FieldCache.StringIndex si;
final FieldType ft;
final String[] terms;
final int[] termNum;
final int startTermIndex;
final int endTermIndex;
final int nTerms;
final Map<String,StatsValues> facetStatsValues;
FieldFacetStats( String name, FieldCache.StringIndex si, FieldType ft )
{
this.name = name;
this.si = si;
this.ft = ft;
terms = si.lookup;
termNum = si.order;
startTermIndex = 1;
endTermIndex = terms.length;
nTerms = endTermIndex - startTermIndex;
facetStatsValues = new HashMap<String, StatsValues>();
}
String getTermText( int docID )
{
return terms[termNum[docID]];
}
public boolean facet( int docID, Double v )
{
if( v == null ) return false;
int term = termNum[docID];
int arrIdx = term-startTermIndex;
if (arrIdx>=0 && arrIdx<nTerms) {
String key = ft.indexedToReadable( terms[term] );
StatsValues stats = facetStatsValues.get( key );
if( stats == null ) {
stats = new StatsValues();
facetStatsValues.put(key, stats);
}
stats.accumulate( v );
return true;
}
return false;
}
}
class SimpleStats {
/** The main set of documents */
protected DocSet docs;
/** Configuration params behavior should be driven by */
protected SolrParams params;
/** Searcher to use for all calculations */
protected SolrIndexSearcher searcher;
protected SolrQueryRequest req;
public SimpleStats(SolrQueryRequest req,
DocSet docs,
SolrParams params) {
this.req = req;
this.searcher = req.getSearcher();
this.docs = docs;
this.params = params;
}
public NamedList<Object> getStatsCounts() {
NamedList<Object> res = new SimpleOrderedMap<Object>();
res.add("stats_fields", getStatsFields());
return res;
}
public NamedList getStatsFields() {
NamedList<NamedList<Number>> res = new SimpleOrderedMap<NamedList<Number>>();
String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
if (null != statsFs) {
for (String f : statsFs) {
String[] facets = params.getFieldParams( f, StatsParams.STATS_FACET );
if( facets == null ) {
facets = new String[0]; // make sure it is something...
}
res.add(f, getFieldCacheStats(f, facets));
}
}
return res;
}
public NamedList getFieldCacheStats(String fieldName, String[] facet ) {
FieldType ft = searcher.getSchema().getFieldType(fieldName);
if( ft.isTokenized() || ft.isMultiValued() ) {
throw new SolrException( ErrorCode.BAD_REQUEST,
"Stats are valid for single valued numeric values. not: "+fieldName + "["+ft+"]" );
}
FieldCache.StringIndex si = null;
try {
si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), fieldName);
}
catch (IOException e) {
throw new RuntimeException( "failed to open field cache for: "+fieldName, e );
}
FieldFacetStats all = new FieldFacetStats( "all", si, ft );
if ( all.nTerms <= 0 || docs.size() <= 0 ) return null;
StatsValues allstats = new StatsValues();
// don't worry about faceting if the no documents match...
int i=0;
final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
for( String f : facet ) {
ft = searcher.getSchema().getFieldType(f);
if( ft.isTokenized() || ft.isMultiValued() ) {
throw new SolrException( ErrorCode.BAD_REQUEST,
"Stats can only facet on single valued fields, not: "+f + "["+ft+"]" );
}
try {
si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f);
}
catch (IOException e) {
throw new RuntimeException( "failed to open field cache for: "+f, e );
}
finfo[i++] = new FieldFacetStats( f, si, ft );
}
DocIterator iter = docs.iterator();
while (iter.hasNext()) {
int docID = iter.nextDoc();
String raw = all.getTermText(docID);
Double v = null;
if( raw != null ) {
v = Double.parseDouble( all.ft.indexedToReadable(raw) );
allstats.accumulate( v );
}
else {
allstats.missing++;
}
// now check the facets
for( FieldFacetStats f : finfo ) {
f.facet(docID, v);
}
}
// Find things that require a 2nd pass
if( params.getFieldBool(fieldName, StatsParams.STATS_TWOPASS, false) ) {
if( allstats.count > 1 ) { // must be 2 or more...
iter = docs.iterator();
boolean isEven = ( allstats.count % 2) == 0;
int medianIndex = (int) Math.ceil( allstats.count/2.0 );
for ( i=0; iter.hasNext(); ) {
String raw = all.getTermText(iter.nextDoc());
if( raw != null ) {
if( ++i == medianIndex ) {
double val0 = Double.parseDouble( all.ft.indexedToReadable(raw) );
if( isEven ) {
do {
raw = all.getTermText(iter.nextDoc());
} while( raw == null );
double val1 = Double.parseDouble( all.ft.indexedToReadable(raw) );
allstats.median = (val0+val1)/2.0;
}
else {
allstats.median = val0;
}
break;
}
}
}
} // get median
}
if( finfo.length > 0 ) {
allstats.facets = new HashMap<String, Map<String,StatsValues>>();
for( FieldFacetStats f : finfo ) {
allstats.facets.put( f.name, f.facetStatsValues );
}
}
return allstats.getStatsValues();
}
}

View File

@ -63,6 +63,11 @@ public abstract class FieldType extends FieldProperties {
return (properties & TOKENIZED) != 0;
}
/** Returns true if fields can have multiple values */
public boolean isMultiValued() {
return (properties & MULTIVALUED) != 0;
}
/** subclasses should initialize themselves with the args provided
* and remove valid arguments. leftover arguments will cause an exception.
* Common boolean properties have already been handled.