mirror of https://github.com/apache/lucene.git
SOLR-680: Adding StatsComponent
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@705915 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f0adcb6bd6
commit
d3d3251e5d
|
@ -21,6 +21,7 @@ import org.apache.solr.common.params.CommonParams;
|
|||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.params.HighlightParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.StatsParams;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -322,6 +323,30 @@ public class SolrQuery extends ModifiableSolrParams
|
|||
return this.get(CommonParams.SORT);
|
||||
}
|
||||
|
||||
public void setGetFieldStatistics( boolean v )
|
||||
{
|
||||
this.set( StatsParams.STATS, v );
|
||||
}
|
||||
|
||||
public void setGetFieldStatistics( String field, boolean twopass )
|
||||
{
|
||||
this.set( StatsParams.STATS, true );
|
||||
this.add( StatsParams.STATS_FIELD, field );
|
||||
this.set( "f."+field+"."+StatsParams.STATS_TWOPASS, twopass+"" );
|
||||
}
|
||||
|
||||
public void addStatsFieldFacets( String field, String ... facets )
|
||||
{
|
||||
if( field == null ) {
|
||||
this.add( StatsParams.STATS_FACET, facets );
|
||||
}
|
||||
else {
|
||||
for( String f : facets ) {
|
||||
this.add( "f."+field+"."+StatsParams.STATS_FACET, f );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public SolrQuery setFilterQueries(String ... fq) {
|
||||
this.set(CommonParams.FQ, fq);
|
||||
return this;
|
||||
|
|
|
@ -0,0 +1,172 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.client.solrj.response;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Holds stats info
|
||||
*
|
||||
* @version $Id: SpellCheckResponse.java 693622 2008-09-09 21:21:06Z gsingers $
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class FieldStatsInfo implements Serializable {
|
||||
final String name;
|
||||
|
||||
Double min;
|
||||
Double max;
|
||||
Double sum;
|
||||
Long count;
|
||||
Long missing;
|
||||
Double mean = null;
|
||||
Double sumOfSquares = null;
|
||||
Double stddev = null;
|
||||
Double median = null;
|
||||
|
||||
Map<String,List<FieldStatsInfo>> facets;
|
||||
|
||||
public FieldStatsInfo( NamedList<Object> nl, String fname )
|
||||
{
|
||||
name = fname;
|
||||
|
||||
for( Map.Entry<String, Object> entry : nl ) {
|
||||
if( "min".equals( entry.getKey() ) ) {
|
||||
min = (Double)entry.getValue();
|
||||
}
|
||||
else if( "max".equals( entry.getKey() ) ) {
|
||||
max = (Double)entry.getValue();
|
||||
}
|
||||
else if( "sum".equals( entry.getKey() ) ) {
|
||||
sum = (Double)entry.getValue();
|
||||
}
|
||||
else if( "count".equals( entry.getKey() ) ) {
|
||||
count = (Long)entry.getValue();
|
||||
}
|
||||
else if( "missing".equals( entry.getKey() ) ) {
|
||||
missing = (Long)entry.getValue();
|
||||
}
|
||||
else if( "mean".equals( entry.getKey() ) ) {
|
||||
mean = (Double)entry.getValue();
|
||||
}
|
||||
else if( "sumOfSquares".equals( entry.getKey() ) ) {
|
||||
sumOfSquares = (Double)entry.getValue();
|
||||
}
|
||||
else if( "stddev".equals( entry.getKey() ) ) {
|
||||
stddev = (Double)entry.getValue();
|
||||
}
|
||||
else if( "median".equals( entry.getKey() ) ) {
|
||||
median = (Double)entry.getValue();
|
||||
}
|
||||
else if( "facets".equals( entry.getKey() ) ) {
|
||||
NamedList<Object> fields = (NamedList<Object>)entry.getValue();
|
||||
facets = new HashMap<String, List<FieldStatsInfo>>();
|
||||
for( Map.Entry<String, Object> ev : fields ) {
|
||||
List<FieldStatsInfo> vals = new ArrayList<FieldStatsInfo>();
|
||||
facets.put( ev.getKey(), vals );
|
||||
NamedList<NamedList<Object>> vnl = (NamedList<NamedList<Object>>) ev.getValue();
|
||||
for( int i=0; i<vnl.size(); i++ ) {
|
||||
String n = vnl.getName(i);
|
||||
vals.add( new FieldStatsInfo( vnl.getVal(i), n ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
throw new RuntimeException( "unknown key: "+entry.getKey() + " ["+entry.getValue()+"]" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String toString()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append( name );
|
||||
sb.append( ": {" );
|
||||
if( min != null ) {
|
||||
sb.append( " min:").append( min );
|
||||
}
|
||||
if( max != null ) {
|
||||
sb.append( " max:").append( max );
|
||||
}
|
||||
if( sum != null ) {
|
||||
sb.append( " sum:").append( sum );
|
||||
}
|
||||
if( count != null ) {
|
||||
sb.append( " count:").append( count );
|
||||
}
|
||||
if( missing != null ) {
|
||||
sb.append( " missing:").append( missing );
|
||||
}
|
||||
if( mean != null ) {
|
||||
sb.append( " mean:").append( mean );
|
||||
}
|
||||
if( median != null ) {
|
||||
sb.append( " median:").append(median);
|
||||
}
|
||||
if( stddev != null ) {
|
||||
sb.append( " stddev:").append(stddev);
|
||||
}
|
||||
sb.append( " }" );
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public Double getMin() {
|
||||
return min;
|
||||
}
|
||||
|
||||
public Double getMax() {
|
||||
return max;
|
||||
}
|
||||
|
||||
public Double getSum() {
|
||||
return sum;
|
||||
}
|
||||
|
||||
public Long getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public Long getMissing() {
|
||||
return missing;
|
||||
}
|
||||
|
||||
public Double getMean() {
|
||||
return mean;
|
||||
}
|
||||
|
||||
public Double getStddev() {
|
||||
return stddev;
|
||||
}
|
||||
|
||||
public Double getMedian() {
|
||||
return median;
|
||||
}
|
||||
|
||||
public Map<String, List<FieldStatsInfo>> getFacets() {
|
||||
return facets;
|
||||
}
|
||||
|
||||
}
|
|
@ -45,6 +45,7 @@ public class QueryResponse extends SolrResponseBase
|
|||
private NamedList<Object> _debugInfo = null;
|
||||
private NamedList<Object> _highlightingInfo = null;
|
||||
private NamedList<Object> _spellInfo = null;
|
||||
private NamedList<Object> _statsInfo = null;
|
||||
|
||||
// Facet stuff
|
||||
private Map<String,Integer> _facetQuery = null;
|
||||
|
@ -58,6 +59,9 @@ public class QueryResponse extends SolrResponseBase
|
|||
// SpellCheck Response
|
||||
private SpellCheckResponse _spellResponse = null;
|
||||
|
||||
// Field stats Response
|
||||
private Map<String,FieldStatsInfo> _fieldStatsInfo = null;
|
||||
|
||||
// Debug Info
|
||||
private Map<String,Object> _debugMap = null;
|
||||
private Map<String,String> _explainMap = null;
|
||||
|
@ -110,6 +114,10 @@ public class QueryResponse extends SolrResponseBase
|
|||
_spellInfo = (NamedList<Object>) res.getVal( i );
|
||||
extractSpellCheckInfo( _spellInfo );
|
||||
}
|
||||
else if ( "stats".equals( n ) ) {
|
||||
_statsInfo = (NamedList<Object>) res.getVal( i );
|
||||
extractStatsInfo( _statsInfo );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,6 +125,19 @@ public class QueryResponse extends SolrResponseBase
|
|||
_spellResponse = new SpellCheckResponse(spellInfo);
|
||||
}
|
||||
|
||||
private void extractStatsInfo(NamedList<Object> info) {
|
||||
if( info != null ) {
|
||||
_fieldStatsInfo = new HashMap<String, FieldStatsInfo>();
|
||||
NamedList<NamedList<Object>> ff = (NamedList<NamedList<Object>>) info.get( "stats_fields" );
|
||||
if( ff != null ) {
|
||||
for( Map.Entry<String,NamedList<Object>> entry : ff ) {
|
||||
_fieldStatsInfo.put( entry.getKey(),
|
||||
new FieldStatsInfo( entry.getValue(), entry.getKey() ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void extractDebugInfo( NamedList<Object> debug )
|
||||
{
|
||||
_debugMap = new LinkedHashMap<String, Object>(); // keep the order
|
||||
|
@ -289,6 +310,10 @@ public class QueryResponse extends SolrResponseBase
|
|||
new DocumentObjectBinder().getBeans(type,_results):
|
||||
solrServer.getBinder().getBeans(type, _results);
|
||||
}
|
||||
|
||||
public Map<String, FieldStatsInfo> getFieldStatsInfo() {
|
||||
return _fieldStatsInfo;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ import junit.framework.Assert;
|
|||
import org.apache.solr.client.solrj.request.DirectXmlRequest;
|
||||
import org.apache.solr.client.solrj.request.LukeRequest;
|
||||
import org.apache.solr.client.solrj.request.SolrPing;
|
||||
import org.apache.solr.client.solrj.response.FieldStatsInfo;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.client.solrj.response.LukeResponse;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
|
@ -319,6 +320,111 @@ abstract public class SolrExampleTests extends SolrExampleTestBase
|
|||
assertNotNull( rsp.getFieldTypeInfo() );
|
||||
}
|
||||
|
||||
public void testStatistics() throws Exception
|
||||
{
|
||||
SolrServer server = getSolrServer();
|
||||
|
||||
// Empty the database...
|
||||
server.deleteByQuery( "*:*" );// delete everything!
|
||||
server.commit();
|
||||
assertNumFound( "*:*", 0 ); // make sure it got in
|
||||
|
||||
int i=0; // 0 1 2 3 4 5 6 7 8 9
|
||||
int[] nums = new int[] { 23, 26, 38, 46, 55, 63, 77, 84, 92, 94 };
|
||||
for( int num : nums ) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setField( "id", "doc"+i++ );
|
||||
doc.setField( "name", "doc: "+num );
|
||||
doc.setField( "popularity", num );
|
||||
server.add( doc );
|
||||
}
|
||||
server.commit();
|
||||
assertNumFound( "*:*", nums.length ); // make sure they all got in
|
||||
|
||||
SolrQuery query = new SolrQuery( "*:*" );
|
||||
query.setRows( 0 );
|
||||
query.setGetFieldStatistics( "popularity", true );
|
||||
|
||||
QueryResponse rsp = server.query( query );
|
||||
FieldStatsInfo stats = rsp.getFieldStatsInfo().get( "popularity" );
|
||||
assertNotNull( stats );
|
||||
|
||||
assertEquals( 23.0, stats.getMin() );
|
||||
assertEquals( 94.0, stats.getMax() );
|
||||
assertEquals( new Long(nums.length), stats.getCount() );
|
||||
assertEquals( new Long(0), stats.getMissing() );
|
||||
assertEquals( (nums[4]+nums[5])/2.0, stats.getMedian() );
|
||||
assertEquals( "26.4", stats.getStddev().toString().substring(0,4) );
|
||||
|
||||
// now lets try again with a new set... (odd median)
|
||||
//----------------------------------------------------
|
||||
server.deleteByQuery( "*:*" );// delete everything!
|
||||
server.commit();
|
||||
assertNumFound( "*:*", 0 ); // make sure it got in
|
||||
nums = new int[] { 5, 7, 10, 19, 20 };
|
||||
for( int num : nums ) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setField( "id", "doc"+i++ );
|
||||
doc.setField( "name", "doc: "+num );
|
||||
doc.setField( "popularity", num );
|
||||
server.add( doc );
|
||||
}
|
||||
server.commit();
|
||||
assertNumFound( "*:*", nums.length ); // make sure they all got in
|
||||
|
||||
rsp = server.query( query );
|
||||
stats = rsp.getFieldStatsInfo().get( "popularity" );
|
||||
assertNotNull( stats );
|
||||
|
||||
assertEquals( 5.0, stats.getMin() );
|
||||
assertEquals( 20.0, stats.getMax() );
|
||||
assertEquals( new Long(nums.length), stats.getCount() );
|
||||
assertEquals( new Long(0), stats.getMissing() );
|
||||
assertEquals( 10.0, stats.getMedian() );
|
||||
|
||||
// Now try again with faceting
|
||||
//---------------------------------
|
||||
server.deleteByQuery( "*:*" );// delete everything!
|
||||
server.commit();
|
||||
assertNumFound( "*:*", 0 ); // make sure it got in
|
||||
nums = new int[] { 1, 2, 3, 4, 5, 10, 11, 12, 13, 14 };
|
||||
for( i=0; i<nums.length; i++ ) {
|
||||
int num = nums[i];
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setField( "id", "doc"+i );
|
||||
doc.setField( "name", "doc: "+num );
|
||||
doc.setField( "popularity", num );
|
||||
doc.setField( "inStock", i < 5 );
|
||||
server.add( doc );
|
||||
}
|
||||
server.commit();
|
||||
assertNumFound( "inStock:true", 5 ); // make sure they all got in
|
||||
assertNumFound( "inStock:false", 5 ); // make sure they all got in
|
||||
|
||||
// facet on 'inStock'
|
||||
query.addStatsFieldFacets( "popularity", "inStock" );
|
||||
rsp = server.query( query );
|
||||
stats = rsp.getFieldStatsInfo().get( "popularity" );
|
||||
assertNotNull( stats );
|
||||
|
||||
List<FieldStatsInfo> facets = stats.getFacets().get( "inStock" );
|
||||
assertNotNull( facets );
|
||||
assertEquals( 2, facets.size() );
|
||||
FieldStatsInfo inStockF = facets.get(0);
|
||||
FieldStatsInfo inStockT = facets.get(1);
|
||||
if( "true".equals( inStockF.getName() ) ) {
|
||||
FieldStatsInfo tmp = inStockF;
|
||||
inStockF = inStockT;
|
||||
inStockT = tmp;
|
||||
}
|
||||
|
||||
// make sure half went to each
|
||||
assertEquals( inStockF.getCount(), inStockT.getCount() );
|
||||
assertEquals( stats.getCount().longValue(), inStockF.getCount()+inStockT.getCount() );
|
||||
|
||||
assertTrue( "check that min max faceted ok", inStockF.getMin() > inStockT.getMax() );
|
||||
assertEquals( "they have the same distribution", inStockF.getStddev(), inStockT.getStddev() );
|
||||
}
|
||||
|
||||
public void testPingHandler() throws Exception
|
||||
{
|
||||
|
|
|
@ -501,6 +501,7 @@
|
|||
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
|
||||
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
|
||||
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
|
||||
<searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" />
|
||||
<searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" />
|
||||
|
||||
Default configuration in a requestHandler would look like:
|
||||
|
@ -509,6 +510,7 @@
|
|||
<str>facet</str>
|
||||
<str>mlt</str>
|
||||
<str>highlight</str>
|
||||
<str>stats</str>
|
||||
<str>debug</str>
|
||||
</arr>
|
||||
|
||||
|
|
|
@ -17,10 +17,10 @@
|
|||
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.commons.codec.Encoder;
|
||||
import org.apache.commons.codec.language.DoubleMetaphone;
|
||||
import org.apache.commons.codec.language.Metaphone;
|
||||
|
@ -80,6 +80,13 @@ public class PhoneticFilterFactory extends BaseTokenFilterFactory
|
|||
|
||||
try {
|
||||
encoder = clazz.newInstance();
|
||||
|
||||
// Try to set the maxCodeLength
|
||||
String v = args.get( "maxCodeLength" );
|
||||
if( v != null ) {
|
||||
Method setter = encoder.getClass().getMethod( "setMaxCodeLength", Integer.class );
|
||||
setter.invoke( encoder, Integer.parseInt( v ) );
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Error initializing: "+name + "/"+clazz, e );
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.common.params;
|
||||
|
||||
/**
|
||||
* Stats Parameters
|
||||
*/
|
||||
public interface StatsParams {
|
||||
public static final String STATS = "stats";
|
||||
public static final String STATS_FIELD = STATS + ".field";
|
||||
public static final String STATS_FACET = STATS + ".facet";
|
||||
public static final String STATS_TWOPASS = STATS + ".twopass";
|
||||
}
|
|
@ -803,6 +803,7 @@ public final class SolrCore implements SolrInfoMBean {
|
|||
standardcomponents.put( FacetComponent.COMPONENT_NAME, FacetComponent.class );
|
||||
standardcomponents.put( MoreLikeThisComponent.COMPONENT_NAME, MoreLikeThisComponent.class );
|
||||
standardcomponents.put( HighlightComponent.COMPONENT_NAME, HighlightComponent.class );
|
||||
standardcomponents.put( StatsComponent.COMPONENT_NAME, StatsComponent.class );
|
||||
standardcomponents.put( DebugComponent.COMPONENT_NAME, DebugComponent.class );
|
||||
for( Map.Entry<String, Class<? extends SearchComponent>> entry : standardcomponents.entrySet() ) {
|
||||
if( components.get( entry.getKey() ) == null ) {
|
||||
|
|
|
@ -29,10 +29,8 @@ import org.apache.solr.search.QParser;
|
|||
import org.apache.solr.search.SortSpec;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* This class is experimental and will be changing in the future.
|
||||
|
@ -46,6 +44,7 @@ public class ResponseBuilder
|
|||
public SolrQueryResponse rsp;
|
||||
public boolean doHighlights;
|
||||
public boolean doFacets;
|
||||
public boolean doStats;
|
||||
|
||||
private boolean needDocList = false;
|
||||
private boolean needDocSet = false;
|
||||
|
@ -130,6 +129,7 @@ public class ResponseBuilder
|
|||
/* private... components that don't own these shouldn't use them */
|
||||
SolrDocumentList _responseDocs;
|
||||
FacetInfo _facetInfo;
|
||||
StatsInfo _statsInfo;
|
||||
|
||||
/**
|
||||
* Utility function to add debugging info. This will make sure a valid
|
||||
|
|
|
@ -66,6 +66,7 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware
|
|||
names.add( FacetComponent.COMPONENT_NAME );
|
||||
names.add( MoreLikeThisComponent.COMPONENT_NAME );
|
||||
names.add( HighlightComponent.COMPONENT_NAME );
|
||||
names.add( StatsComponent.COMPONENT_NAME );
|
||||
names.add( DebugComponent.COMPONENT_NAME );
|
||||
return names;
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@ public class ShardRequest {
|
|||
public final static int PURPOSE_GET_FIELDS = 0x40;
|
||||
public final static int PURPOSE_GET_HIGHLIGHTS = 0x80;
|
||||
public final static int PURPOSE_GET_DEBUG =0x100;
|
||||
public final static int PURPOSE_GET_STATS =0x200;
|
||||
|
||||
public int purpose; // the purpose of this request
|
||||
|
||||
|
|
|
@ -0,0 +1,474 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.StatsParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
/**
|
||||
* Stats component calculates simple statistics on numeric field values
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class StatsComponent extends SearchComponent {
|
||||
|
||||
public static final String COMPONENT_NAME = "stats";
|
||||
|
||||
@Override
|
||||
public void prepare(ResponseBuilder rb) throws IOException {
|
||||
if (rb.req.getParams().getBool(StatsParams.STATS,false)) {
|
||||
rb.setNeedDocSet( true );
|
||||
rb.doStats = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(ResponseBuilder rb) throws IOException {
|
||||
if (rb.doStats) {
|
||||
SolrParams params = rb.req.getParams();
|
||||
SimpleStats s = new SimpleStats(rb.req,
|
||||
rb.getResults().docSet,
|
||||
params );
|
||||
|
||||
// TODO ???? add this directly to the response, or to the builder?
|
||||
rb.rsp.add( "stats", s.getStatsCounts() );
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int distributedProcess(ResponseBuilder rb) throws IOException {
|
||||
return ResponseBuilder.STAGE_DONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
|
||||
if (!rb.doStats) return;
|
||||
|
||||
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
|
||||
sreq.purpose |= ShardRequest.PURPOSE_GET_STATS;
|
||||
|
||||
StatsInfo si = rb._statsInfo;
|
||||
if (si == null) {
|
||||
rb._statsInfo = si = new StatsInfo();
|
||||
si.parse(rb.req.getParams(), rb);
|
||||
// should already be true...
|
||||
// sreq.params.set(StatsParams.STATS, "true");
|
||||
}
|
||||
} else {
|
||||
// turn off stats on other requests
|
||||
sreq.params.set(StatsParams.STATS, "false");
|
||||
// we could optionally remove stats params
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
|
||||
if (!rb.doStats || (sreq.purpose & ShardRequest.PURPOSE_GET_STATS)==0) return;
|
||||
|
||||
StatsInfo si = rb._statsInfo;
|
||||
|
||||
for (ShardResponse srsp: sreq.responses) {
|
||||
NamedList stats = (NamedList)srsp.getSolrResponse().getResponse().get("stats");
|
||||
|
||||
NamedList stats_fields = (NamedList)stats.get("stats_fields");
|
||||
if (stats_fields != null) {
|
||||
for (int i=0; i<stats_fields.size(); i++) {
|
||||
String field = stats_fields.getName(i);
|
||||
StatsValues stv = si.statsFields.get(field);
|
||||
stv.accumulate( (NamedList)stats_fields.get(field) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finishStage(ResponseBuilder rb) {
|
||||
if (!rb.doStats || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
|
||||
// wait until STAGE_GET_FIELDS
|
||||
// so that "result" is already stored in the response (for aesthetics)
|
||||
|
||||
StatsInfo si = rb._statsInfo;
|
||||
|
||||
NamedList stats = new SimpleOrderedMap();
|
||||
NamedList stats_fields = new SimpleOrderedMap();
|
||||
stats.add("stats_fields",stats_fields);
|
||||
for(String field : si.statsFields.keySet()){
|
||||
stats_fields.add(field, si.statsFields.get(field).getStatsValues());
|
||||
}
|
||||
|
||||
rb.rsp.add("stats", stats);
|
||||
|
||||
rb._statsInfo = null;
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////
|
||||
/// SolrInfoMBean
|
||||
////////////////////////////////////////////
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Calculate Statistics";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getVersion() {
|
||||
return "$Revision$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSourceId() {
|
||||
return "$Id$";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSource() {
|
||||
return "$URL$";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class StatsInfo {
|
||||
Map<String, StatsValues> statsFields;
|
||||
|
||||
void parse(SolrParams params, ResponseBuilder rb) {
|
||||
statsFields = new HashMap<String, StatsValues>();
|
||||
|
||||
String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
|
||||
if (statsFs != null) {
|
||||
for (String field : statsFs) {
|
||||
statsFields.put(field,new StatsValues());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class StatsValues {
|
||||
private static final String FACETS = "facets";
|
||||
double min;
|
||||
double max;
|
||||
double sum;
|
||||
double sumOfSquares;
|
||||
long count;
|
||||
long missing;
|
||||
Double median = null;
|
||||
|
||||
// facetField facetValue
|
||||
Map<String, Map<String,StatsValues>> facets;
|
||||
|
||||
public StatsValues() {
|
||||
reset();
|
||||
}
|
||||
|
||||
public void accumulate(NamedList stv){
|
||||
min = Math.min(min, (Double)stv.get("min"));
|
||||
max = Math.max(max, (Double)stv.get("max"));
|
||||
sum += (Double)stv.get("sum");
|
||||
count += (Long)stv.get("count");
|
||||
missing += (Long)stv.get("missing");
|
||||
sumOfSquares += (Double)stv.get("sumOfSquares");
|
||||
|
||||
NamedList f = (NamedList)stv.get( FACETS );
|
||||
if( f != null ) {
|
||||
if( facets == null ) {
|
||||
facets = new HashMap<String, Map<String,StatsValues>>();
|
||||
}
|
||||
|
||||
for( int i=0; i< f.size(); i++ ) {
|
||||
String field = f.getName(i);
|
||||
NamedList vals = (NamedList)f.getVal( i );
|
||||
Map<String,StatsValues> addTo = facets.get( field );
|
||||
if( addTo == null ) {
|
||||
addTo = new HashMap<String,StatsValues>();
|
||||
facets.put( field, addTo );
|
||||
}
|
||||
for( int j=0; j< vals.size(); j++ ) {
|
||||
String val = f.getName(i);
|
||||
StatsValues vvals = addTo.get( val );
|
||||
if( vvals == null ) {
|
||||
vvals = new StatsValues();
|
||||
addTo.put( val, vvals );
|
||||
}
|
||||
vvals.accumulate( (NamedList)f.getVal( i ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void accumulate(double v){
|
||||
sumOfSquares += (v*v); // for std deviation
|
||||
min = Math.min(min, v);
|
||||
max = Math.max(max, v);
|
||||
sum += v;
|
||||
count++;
|
||||
}
|
||||
|
||||
public double getAverage(){
|
||||
return sum / count;
|
||||
}
|
||||
|
||||
public double getStandardDeviation()
|
||||
{
|
||||
if( count <= 1.0D )
|
||||
return 0.0D;
|
||||
|
||||
return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) )
|
||||
/ ( count * ( count - 1.0D ) ) );
|
||||
}
|
||||
|
||||
public void reset(){
|
||||
min = Double.MAX_VALUE;
|
||||
max = Double.MIN_VALUE;
|
||||
sum = count = missing = 0;
|
||||
sumOfSquares = 0;
|
||||
median = null;
|
||||
facets = null;
|
||||
}
|
||||
|
||||
public NamedList<?> getStatsValues(){
|
||||
NamedList<Object> res = new SimpleOrderedMap<Object>();
|
||||
res.add("min", min);
|
||||
res.add("max", max);
|
||||
res.add("sum", sum);
|
||||
res.add("count", count);
|
||||
res.add("missing", missing);
|
||||
res.add("sumOfSquares", sumOfSquares );
|
||||
res.add("mean", getAverage());
|
||||
if( median != null ) {
|
||||
res.add( "median", median );
|
||||
}
|
||||
res.add( "stddev", getStandardDeviation() );
|
||||
|
||||
// add the facet stats
|
||||
if( facets != null && facets.size() > 0 ) {
|
||||
NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
|
||||
for( Map.Entry<String, Map<String,StatsValues>> entry : facets.entrySet() ) {
|
||||
NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
|
||||
nl.add( entry.getKey(), nl2 );
|
||||
for( Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet() ) {
|
||||
nl2.add( e2.getKey(), e2.getValue().getStatsValues() );
|
||||
}
|
||||
}
|
||||
res.add( FACETS, nl );
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
class FieldFacetStats {
|
||||
final String name;
|
||||
final FieldCache.StringIndex si;
|
||||
final FieldType ft;
|
||||
|
||||
final String[] terms;
|
||||
final int[] termNum;
|
||||
|
||||
final int startTermIndex;
|
||||
final int endTermIndex;
|
||||
final int nTerms;
|
||||
|
||||
final Map<String,StatsValues> facetStatsValues;
|
||||
|
||||
FieldFacetStats( String name, FieldCache.StringIndex si, FieldType ft )
|
||||
{
|
||||
this.name = name;
|
||||
this.si = si;
|
||||
this.ft = ft;
|
||||
|
||||
terms = si.lookup;
|
||||
termNum = si.order;
|
||||
startTermIndex = 1;
|
||||
endTermIndex = terms.length;
|
||||
nTerms = endTermIndex - startTermIndex;
|
||||
|
||||
facetStatsValues = new HashMap<String, StatsValues>();
|
||||
}
|
||||
|
||||
String getTermText( int docID )
|
||||
{
|
||||
return terms[termNum[docID]];
|
||||
}
|
||||
|
||||
public boolean facet( int docID, Double v )
|
||||
{
|
||||
if( v == null ) return false;
|
||||
|
||||
int term = termNum[docID];
|
||||
int arrIdx = term-startTermIndex;
|
||||
if (arrIdx>=0 && arrIdx<nTerms) {
|
||||
String key = ft.indexedToReadable( terms[term] );
|
||||
StatsValues stats = facetStatsValues.get( key );
|
||||
if( stats == null ) {
|
||||
stats = new StatsValues();
|
||||
facetStatsValues.put(key, stats);
|
||||
}
|
||||
stats.accumulate( v );
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
class SimpleStats {
|
||||
|
||||
/** The main set of documents */
|
||||
protected DocSet docs;
|
||||
/** Configuration params behavior should be driven by */
|
||||
protected SolrParams params;
|
||||
/** Searcher to use for all calculations */
|
||||
protected SolrIndexSearcher searcher;
|
||||
protected SolrQueryRequest req;
|
||||
|
||||
public SimpleStats(SolrQueryRequest req,
|
||||
DocSet docs,
|
||||
SolrParams params) {
|
||||
this.req = req;
|
||||
this.searcher = req.getSearcher();
|
||||
this.docs = docs;
|
||||
this.params = params;
|
||||
}
|
||||
|
||||
public NamedList<Object> getStatsCounts() {
|
||||
NamedList<Object> res = new SimpleOrderedMap<Object>();
|
||||
res.add("stats_fields", getStatsFields());
|
||||
return res;
|
||||
}
|
||||
|
||||
public NamedList getStatsFields() {
|
||||
NamedList<NamedList<Number>> res = new SimpleOrderedMap<NamedList<Number>>();
|
||||
String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
|
||||
if (null != statsFs) {
|
||||
for (String f : statsFs) {
|
||||
String[] facets = params.getFieldParams( f, StatsParams.STATS_FACET );
|
||||
if( facets == null ) {
|
||||
facets = new String[0]; // make sure it is something...
|
||||
}
|
||||
res.add(f, getFieldCacheStats(f, facets));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public NamedList getFieldCacheStats(String fieldName, String[] facet ) {
|
||||
FieldType ft = searcher.getSchema().getFieldType(fieldName);
|
||||
if( ft.isTokenized() || ft.isMultiValued() ) {
|
||||
throw new SolrException( ErrorCode.BAD_REQUEST,
|
||||
"Stats are valid for single valued numeric values. not: "+fieldName + "["+ft+"]" );
|
||||
}
|
||||
|
||||
FieldCache.StringIndex si = null;
|
||||
try {
|
||||
si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), fieldName);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException( "failed to open field cache for: "+fieldName, e );
|
||||
}
|
||||
FieldFacetStats all = new FieldFacetStats( "all", si, ft );
|
||||
if ( all.nTerms <= 0 || docs.size() <= 0 ) return null;
|
||||
StatsValues allstats = new StatsValues();
|
||||
|
||||
// don't worry about faceting if the no documents match...
|
||||
int i=0;
|
||||
final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
|
||||
for( String f : facet ) {
|
||||
ft = searcher.getSchema().getFieldType(f);
|
||||
if( ft.isTokenized() || ft.isMultiValued() ) {
|
||||
throw new SolrException( ErrorCode.BAD_REQUEST,
|
||||
"Stats can only facet on single valued fields, not: "+f + "["+ft+"]" );
|
||||
}
|
||||
try {
|
||||
si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException( "failed to open field cache for: "+f, e );
|
||||
}
|
||||
finfo[i++] = new FieldFacetStats( f, si, ft );
|
||||
}
|
||||
|
||||
|
||||
DocIterator iter = docs.iterator();
|
||||
while (iter.hasNext()) {
|
||||
int docID = iter.nextDoc();
|
||||
String raw = all.getTermText(docID);
|
||||
Double v = null;
|
||||
if( raw != null ) {
|
||||
v = Double.parseDouble( all.ft.indexedToReadable(raw) );
|
||||
allstats.accumulate( v );
|
||||
}
|
||||
else {
|
||||
allstats.missing++;
|
||||
}
|
||||
|
||||
// now check the facets
|
||||
for( FieldFacetStats f : finfo ) {
|
||||
f.facet(docID, v);
|
||||
}
|
||||
}
|
||||
|
||||
// Find things that require a 2nd pass
|
||||
if( params.getFieldBool(fieldName, StatsParams.STATS_TWOPASS, false) ) {
|
||||
if( allstats.count > 1 ) { // must be 2 or more...
|
||||
iter = docs.iterator();
|
||||
boolean isEven = ( allstats.count % 2) == 0;
|
||||
int medianIndex = (int) Math.ceil( allstats.count/2.0 );
|
||||
for ( i=0; iter.hasNext(); ) {
|
||||
String raw = all.getTermText(iter.nextDoc());
|
||||
if( raw != null ) {
|
||||
if( ++i == medianIndex ) {
|
||||
double val0 = Double.parseDouble( all.ft.indexedToReadable(raw) );
|
||||
if( isEven ) {
|
||||
do {
|
||||
raw = all.getTermText(iter.nextDoc());
|
||||
} while( raw == null );
|
||||
double val1 = Double.parseDouble( all.ft.indexedToReadable(raw) );
|
||||
allstats.median = (val0+val1)/2.0;
|
||||
}
|
||||
else {
|
||||
allstats.median = val0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // get median
|
||||
}
|
||||
|
||||
if( finfo.length > 0 ) {
|
||||
allstats.facets = new HashMap<String, Map<String,StatsValues>>();
|
||||
for( FieldFacetStats f : finfo ) {
|
||||
allstats.facets.put( f.name, f.facetStatsValues );
|
||||
}
|
||||
}
|
||||
return allstats.getStatsValues();
|
||||
}
|
||||
}
|
|
@ -63,6 +63,11 @@ public abstract class FieldType extends FieldProperties {
|
|||
return (properties & TOKENIZED) != 0;
|
||||
}
|
||||
|
||||
/** Returns true if fields can have multiple values */
|
||||
public boolean isMultiValued() {
|
||||
return (properties & MULTIVALUED) != 0;
|
||||
}
|
||||
|
||||
/** subclasses should initialize themselves with the args provided
|
||||
* and remove valid arguments. leftover arguments will cause an exception.
|
||||
* Common boolean properties have already been handled.
|
||||
|
|
Loading…
Reference in New Issue