mirror of https://github.com/apache/lucene.git
SOLR-1380: Added support for multi-valued fields to StatsComponent
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@813874 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f536fe5121
commit
c56224895a
|
@ -98,6 +98,8 @@ New Features
|
|||
7. SOLR-680: Add StatsComponent. This gets simple statistics on matched numeric fields,
|
||||
including: min, max, mean, median, stddev. (koji, ryan)
|
||||
|
||||
7.1 SOLR-1380: Added support for multi-valued fields (Harish Agarwal via gsingers)
|
||||
|
||||
8. SOLR-561: Added Replication implemented in Java as a request handler. Supports index replication
|
||||
as well as configuration replication and exposes detailed statistics and progress information
|
||||
on the Admin page. Works on all platforms. (Noble Paul, yonik, Akshay Ukey, shalin)
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
package org.apache.solr.handler.component;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* 9/10/2009 - Moved out of StatsComponent to allow open access to UnInvertedField
|
||||
* FieldFacetStats is a utility to accumulate statistics on a set of values in one field,
|
||||
* for facet values present in another field.
|
||||
* <p/>
|
||||
* @see org.apache.solr.handler.component.StatsComponent
|
||||
*
|
||||
*/
|
||||
|
||||
public class FieldFacetStats {
|
||||
public final String name;
|
||||
final FieldCache.StringIndex si;
|
||||
final FieldType ft;
|
||||
|
||||
final String[] terms;
|
||||
final int[] termNum;
|
||||
|
||||
final int startTermIndex;
|
||||
final int endTermIndex;
|
||||
final int nTerms;
|
||||
|
||||
final int numStatsTerms;
|
||||
|
||||
public final Map<String, StatsValues> facetStatsValues;
|
||||
|
||||
final List<HashMap<String, Integer>> facetStatsTerms;
|
||||
|
||||
public FieldFacetStats(String name, FieldCache.StringIndex si, FieldType ft, int numStatsTerms) {
|
||||
this.name = name;
|
||||
this.si = si;
|
||||
this.ft = ft;
|
||||
this.numStatsTerms = numStatsTerms;
|
||||
|
||||
terms = si.lookup;
|
||||
termNum = si.order;
|
||||
startTermIndex = 1;
|
||||
endTermIndex = terms.length;
|
||||
nTerms = endTermIndex - startTermIndex;
|
||||
|
||||
facetStatsValues = new HashMap<String, StatsValues>();
|
||||
|
||||
// for mv stats field, we'll want to keep track of terms
|
||||
facetStatsTerms = new ArrayList<HashMap<String, Integer>>();
|
||||
if (numStatsTerms == 0) return;
|
||||
int i = 0;
|
||||
for (; i < numStatsTerms; i++) {
|
||||
facetStatsTerms.add(new HashMap<String, Integer>());
|
||||
}
|
||||
}
|
||||
|
||||
String getTermText(int docID) {
|
||||
return terms[termNum[docID]];
|
||||
}
|
||||
|
||||
|
||||
public boolean facet(int docID, Double v) {
|
||||
if (v == null) return false;
|
||||
|
||||
int term = termNum[docID];
|
||||
int arrIdx = term - startTermIndex;
|
||||
if (arrIdx >= 0 && arrIdx < nTerms) {
|
||||
String key = ft.indexedToReadable(terms[term]);
|
||||
StatsValues stats = facetStatsValues.get(key);
|
||||
if (stats == null) {
|
||||
stats = new StatsValues();
|
||||
facetStatsValues.put(key, stats);
|
||||
}
|
||||
stats.accumulate(v);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//function to keep track of facet counts for term number
|
||||
public boolean facetTermNum(int docID, int statsTermNum) {
|
||||
|
||||
int term = termNum[docID];
|
||||
int arrIdx = term - startTermIndex;
|
||||
if (arrIdx >= 0 && arrIdx < nTerms) {
|
||||
String key = ft.indexedToReadable(terms[term]);
|
||||
HashMap<String, Integer> statsTermCounts = facetStatsTerms.get(statsTermNum);
|
||||
Integer statsTermCount = statsTermCounts.get(key);
|
||||
if (statsTermCount == null) {
|
||||
statsTermCounts.put(key, 1);
|
||||
} else {
|
||||
statsTermCounts.put(key, statsTermCount + 1);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
//function to accumulate counts for statsTermNum to specified value
|
||||
public boolean accumulateTermNum(int statsTermNum, Double value) {
|
||||
if (value == null) return false;
|
||||
for (Map.Entry<String, Integer> stringIntegerEntry : facetStatsTerms.get(statsTermNum).entrySet()) {
|
||||
Map.Entry pairs = (Map.Entry) stringIntegerEntry;
|
||||
String key = (String) pairs.getKey();
|
||||
StatsValues facetStats = facetStatsValues.get(key);
|
||||
if (facetStats == null) {
|
||||
facetStats = new StatsValues();
|
||||
facetStatsValues.put(key, facetStats);
|
||||
}
|
||||
Integer count = (Integer) pairs.getValue();
|
||||
if (count != null) {
|
||||
facetStats.accumulate(value, count);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -28,11 +28,16 @@ import org.apache.solr.common.params.SolrParams;
|
|||
import org.apache.solr.common.params.StatsParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.handler.component.StatsValues;
|
||||
import org.apache.solr.handler.component.FieldFacetStats;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.request.UnInvertedField;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
||||
/**
|
||||
* Stats component calculates simple statistics on numeric field values
|
||||
|
@ -173,166 +178,6 @@ class StatsInfo {
|
|||
}
|
||||
}
|
||||
|
||||
class StatsValues {
|
||||
private static final String FACETS = "facets";
|
||||
double min;
|
||||
double max;
|
||||
double sum;
|
||||
double sumOfSquares;
|
||||
long count;
|
||||
long missing;
|
||||
|
||||
// facetField facetValue
|
||||
Map<String, Map<String,StatsValues>> facets;
|
||||
|
||||
public StatsValues() {
|
||||
reset();
|
||||
}
|
||||
|
||||
public void accumulate(NamedList stv){
|
||||
min = Math.min(min, (Double)stv.get("min"));
|
||||
max = Math.max(max, (Double)stv.get("max"));
|
||||
sum += (Double)stv.get("sum");
|
||||
count += (Long)stv.get("count");
|
||||
missing += (Long)stv.get("missing");
|
||||
sumOfSquares += (Double)stv.get("sumOfSquares");
|
||||
|
||||
NamedList f = (NamedList)stv.get( FACETS );
|
||||
if( f != null ) {
|
||||
if( facets == null ) {
|
||||
facets = new HashMap<String, Map<String,StatsValues>>();
|
||||
}
|
||||
|
||||
for( int i=0; i< f.size(); i++ ) {
|
||||
String field = f.getName(i);
|
||||
NamedList vals = (NamedList)f.getVal( i );
|
||||
Map<String,StatsValues> addTo = facets.get( field );
|
||||
if( addTo == null ) {
|
||||
addTo = new HashMap<String,StatsValues>();
|
||||
facets.put( field, addTo );
|
||||
}
|
||||
for( int j=0; j< vals.size(); j++ ) {
|
||||
String val = f.getName(i);
|
||||
StatsValues vvals = addTo.get( val );
|
||||
if( vvals == null ) {
|
||||
vvals = new StatsValues();
|
||||
addTo.put( val, vvals );
|
||||
}
|
||||
vvals.accumulate( (NamedList)f.getVal( i ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void accumulate(double v){
|
||||
sumOfSquares += (v*v); // for std deviation
|
||||
min = Math.min(min, v);
|
||||
max = Math.max(max, v);
|
||||
sum += v;
|
||||
count++;
|
||||
}
|
||||
|
||||
public double getAverage(){
|
||||
return sum / count;
|
||||
}
|
||||
|
||||
public double getStandardDeviation()
|
||||
{
|
||||
if( count <= 1.0D )
|
||||
return 0.0D;
|
||||
|
||||
return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) )
|
||||
/ ( count * ( count - 1.0D ) ) );
|
||||
}
|
||||
|
||||
public void reset(){
|
||||
min = Double.MAX_VALUE;
|
||||
max = Double.NEGATIVE_INFINITY;
|
||||
sum = count = missing = 0;
|
||||
sumOfSquares = 0;
|
||||
facets = null;
|
||||
}
|
||||
|
||||
public NamedList<?> getStatsValues(){
|
||||
NamedList<Object> res = new SimpleOrderedMap<Object>();
|
||||
res.add("min", min);
|
||||
res.add("max", max);
|
||||
res.add("sum", sum);
|
||||
res.add("count", count);
|
||||
res.add("missing", missing);
|
||||
res.add("sumOfSquares", sumOfSquares );
|
||||
res.add("mean", getAverage());
|
||||
res.add( "stddev", getStandardDeviation() );
|
||||
|
||||
// add the facet stats
|
||||
if( facets != null && facets.size() > 0 ) {
|
||||
NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
|
||||
for( Map.Entry<String, Map<String,StatsValues>> entry : facets.entrySet() ) {
|
||||
NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
|
||||
nl.add( entry.getKey(), nl2 );
|
||||
for( Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet() ) {
|
||||
nl2.add( e2.getKey(), e2.getValue().getStatsValues() );
|
||||
}
|
||||
}
|
||||
res.add( FACETS, nl );
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
class FieldFacetStats {
|
||||
final String name;
|
||||
final FieldCache.StringIndex si;
|
||||
final FieldType ft;
|
||||
|
||||
final String[] terms;
|
||||
final int[] termNum;
|
||||
|
||||
final int startTermIndex;
|
||||
final int endTermIndex;
|
||||
final int nTerms;
|
||||
|
||||
final Map<String,StatsValues> facetStatsValues;
|
||||
|
||||
FieldFacetStats( String name, FieldCache.StringIndex si, FieldType ft )
|
||||
{
|
||||
this.name = name;
|
||||
this.si = si;
|
||||
this.ft = ft;
|
||||
|
||||
terms = si.lookup;
|
||||
termNum = si.order;
|
||||
startTermIndex = 1;
|
||||
endTermIndex = terms.length;
|
||||
nTerms = endTermIndex - startTermIndex;
|
||||
|
||||
facetStatsValues = new HashMap<String, StatsValues>();
|
||||
}
|
||||
|
||||
String getTermText( int docID )
|
||||
{
|
||||
return terms[termNum[docID]];
|
||||
}
|
||||
|
||||
public boolean facet( int docID, Double v )
|
||||
{
|
||||
if( v == null ) return false;
|
||||
|
||||
int term = termNum[docID];
|
||||
int arrIdx = term-startTermIndex;
|
||||
if (arrIdx>=0 && arrIdx<nTerms) {
|
||||
String key = ft.indexedToReadable( terms[term] );
|
||||
StatsValues stats = facetStatsValues.get( key );
|
||||
if( stats == null ) {
|
||||
stats = new StatsValues();
|
||||
facetStatsValues.put(key, stats);
|
||||
}
|
||||
stats.accumulate( v );
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
class SimpleStats {
|
||||
|
||||
|
@ -353,13 +198,13 @@ class SimpleStats {
|
|||
this.params = params;
|
||||
}
|
||||
|
||||
public NamedList<Object> getStatsCounts() {
|
||||
public NamedList<Object> getStatsCounts() throws IOException {
|
||||
NamedList<Object> res = new SimpleOrderedMap<Object>();
|
||||
res.add("stats_fields", getStatsFields());
|
||||
return res;
|
||||
}
|
||||
|
||||
public NamedList getStatsFields() {
|
||||
public NamedList getStatsFields() throws IOException {
|
||||
NamedList<NamedList<Number>> res = new SimpleOrderedMap<NamedList<Number>>();
|
||||
String[] statsFs = params.getParams(StatsParams.STATS_FIELD);
|
||||
if (null != statsFs) {
|
||||
|
@ -368,18 +213,27 @@ class SimpleStats {
|
|||
if (facets == null) {
|
||||
facets = new String[0]; // make sure it is something...
|
||||
}
|
||||
SchemaField sf = searcher.getSchema().getField(f);
|
||||
FieldType ft = sf.getType();
|
||||
if (ft.isTokenized() || sf.multiValued()) {
|
||||
//use UnInvertedField for multivalued fields
|
||||
UnInvertedField uif = UnInvertedField.getUnInvertedField(f, searcher);
|
||||
StatsValues allstats = uif.getStats(searcher, docs, facets);
|
||||
if (allstats != null) {
|
||||
res.add(f, (NamedList) allstats.getStatsValues());
|
||||
} else {
|
||||
res.add(f, null);
|
||||
}
|
||||
} else {
|
||||
res.add(f, getFieldCacheStats(f, facets));
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public NamedList getFieldCacheStats(String fieldName, String[] facet ) {
|
||||
FieldType ft = searcher.getSchema().getFieldType(fieldName);
|
||||
if( ft.isTokenized() || ft.isMultiValued() ) {
|
||||
throw new SolrException( ErrorCode.BAD_REQUEST,
|
||||
"Stats are valid for single valued numeric values. not: "+fieldName + "["+ft+"]" );
|
||||
}
|
||||
|
||||
FieldCache.StringIndex si = null;
|
||||
try {
|
||||
|
@ -388,7 +242,7 @@ class SimpleStats {
|
|||
catch (IOException e) {
|
||||
throw new RuntimeException( "failed to open field cache for: "+fieldName, e );
|
||||
}
|
||||
FieldFacetStats all = new FieldFacetStats( "all", si, ft );
|
||||
FieldFacetStats all = new FieldFacetStats( "all", si, ft, 0 );
|
||||
if ( all.nTerms <= 0 || docs.size() <= 0 ) return null;
|
||||
StatsValues allstats = new StatsValues();
|
||||
|
||||
|
@ -397,17 +251,13 @@ class SimpleStats {
|
|||
final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
|
||||
for( String f : facet ) {
|
||||
ft = searcher.getSchema().getFieldType(f);
|
||||
if( ft.isTokenized() || ft.isMultiValued() ) {
|
||||
throw new SolrException( ErrorCode.BAD_REQUEST,
|
||||
"Stats can only facet on single valued fields, not: "+f + "["+ft+"]" );
|
||||
}
|
||||
try {
|
||||
si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException( "failed to open field cache for: "+f, e );
|
||||
}
|
||||
finfo[i++] = new FieldFacetStats( f, si, ft );
|
||||
finfo[i++] = new FieldFacetStats( f, si, ft, 0 );
|
||||
}
|
||||
|
||||
|
||||
|
@ -438,4 +288,6 @@ class SimpleStats {
|
|||
}
|
||||
return allstats.getStatsValues();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
|
||||
|
||||
/** 2/11/2009 - Moved out of StatsComponent to allow open access to UnInvertedField
|
||||
* StatsValues is a utility to accumulate statistics on a set of values
|
||||
*
|
||||
* </p>
|
||||
* @see org.apache.solr.handler.component.StatsComponent
|
||||
*
|
||||
*/
|
||||
|
||||
public class StatsValues {
|
||||
private static final String FACETS = "facets";
|
||||
double min;
|
||||
double max;
|
||||
double sum;
|
||||
double sumOfSquares;
|
||||
long count;
|
||||
long missing;
|
||||
|
||||
// facetField facetValue
|
||||
public Map<String, Map<String,StatsValues>> facets;
|
||||
|
||||
public StatsValues() {
|
||||
reset();
|
||||
}
|
||||
|
||||
public void accumulate(NamedList stv){
|
||||
min = Math.min(min, (Double)stv.get("min"));
|
||||
max = Math.max(max, (Double)stv.get("max"));
|
||||
sum += (Double)stv.get("sum");
|
||||
count += (Long)stv.get("count");
|
||||
missing += (Long)stv.get("missing");
|
||||
sumOfSquares += (Double)stv.get("sumOfSquares");
|
||||
|
||||
NamedList f = (NamedList)stv.get( FACETS );
|
||||
if( f != null ) {
|
||||
if( facets == null ) {
|
||||
facets = new HashMap<String, Map<String,StatsValues>>();
|
||||
}
|
||||
|
||||
for( int i=0; i< f.size(); i++ ) {
|
||||
String field = f.getName(i);
|
||||
NamedList vals = (NamedList)f.getVal( i );
|
||||
Map<String,StatsValues> addTo = facets.get( field );
|
||||
if( addTo == null ) {
|
||||
addTo = new HashMap<String,StatsValues>();
|
||||
facets.put( field, addTo );
|
||||
}
|
||||
for( int j=0; j< vals.size(); j++ ) {
|
||||
String val = f.getName(i);
|
||||
StatsValues vvals = addTo.get( val );
|
||||
if( vvals == null ) {
|
||||
vvals = new StatsValues();
|
||||
addTo.put( val, vvals );
|
||||
}
|
||||
vvals.accumulate( (NamedList)f.getVal( i ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void accumulate(double v){
|
||||
sumOfSquares += (v*v); // for std deviation
|
||||
min = Math.min(min, v);
|
||||
max = Math.max(max, v);
|
||||
sum += v;
|
||||
count++;
|
||||
}
|
||||
|
||||
public void accumulate(double v, int c){
|
||||
sumOfSquares += (v*v*c); // for std deviation
|
||||
min = Math.min(min, v);
|
||||
max = Math.max(max, v);
|
||||
sum += v*c;
|
||||
count+= c;
|
||||
}
|
||||
|
||||
public void addMissing(int c){
|
||||
missing += c;
|
||||
}
|
||||
|
||||
public double getAverage(){
|
||||
return sum / count;
|
||||
}
|
||||
|
||||
public double getStandardDeviation()
|
||||
{
|
||||
if( count <= 1.0D )
|
||||
return 0.0D;
|
||||
|
||||
return Math.sqrt( ( ( count * sumOfSquares ) - ( sum * sum ) )
|
||||
/ ( count * ( count - 1.0D ) ) );
|
||||
}
|
||||
|
||||
public long getCount()
|
||||
{
|
||||
return count;
|
||||
}
|
||||
|
||||
public void reset(){
|
||||
min = Double.MAX_VALUE;
|
||||
max = -1.0*Double.MAX_VALUE;
|
||||
sum = count = missing = 0;
|
||||
sumOfSquares = 0;
|
||||
facets = null;
|
||||
}
|
||||
|
||||
public NamedList<?> getStatsValues(){
|
||||
NamedList<Object> res = new SimpleOrderedMap<Object>();
|
||||
res.add("min", min);
|
||||
res.add("max", max);
|
||||
res.add("sum", sum);
|
||||
res.add("count", count);
|
||||
res.add("missing", missing);
|
||||
res.add("sumOfSquares", sumOfSquares );
|
||||
res.add("mean", getAverage());
|
||||
res.add( "stddev", getStandardDeviation() );
|
||||
|
||||
// add the facet stats
|
||||
if( facets != null && facets.size() > 0 ) {
|
||||
NamedList<NamedList<?>> nl = new SimpleOrderedMap<NamedList<?>>();
|
||||
for( Map.Entry<String, Map<String,StatsValues>> entry : facets.entrySet() ) {
|
||||
NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<NamedList<?>>();
|
||||
nl.add( entry.getKey(), nl2 );
|
||||
for( Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet() ) {
|
||||
nl2.add( e2.getKey(), e2.getValue().getStatsValues() );
|
||||
}
|
||||
}
|
||||
res.add( FACETS, nl );
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package org.apache.solr.request;
|
||||
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
|
@ -26,10 +27,12 @@ import org.apache.solr.common.params.FacetParams;
|
|||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SimpleFacets;
|
||||
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.search.*;
|
||||
import org.apache.solr.util.BoundedTreeSet;
|
||||
import org.apache.solr.handler.component.StatsValues;
|
||||
import org.apache.solr.handler.component.FieldFacetStats;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -38,7 +41,7 @@ import java.util.Arrays;
|
|||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.WeakHashMap;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
/**
|
||||
|
@ -593,9 +596,6 @@ public class UnInvertedField {
|
|||
|
||||
for (; i<endTerm; i++) {
|
||||
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
|
||||
if (c==0) {
|
||||
|
||||
}
|
||||
if (c<mincount || --off>=0) continue;
|
||||
if (--lim<0) break;
|
||||
|
||||
|
@ -616,6 +616,176 @@ public class UnInvertedField {
|
|||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collect statistics about the UninvertedField. Code is very similar to {@link #getCounts(org.apache.solr.search.SolrIndexSearcher, org.apache.solr.search.DocSet, int, int, Integer, boolean, String, String)}
|
||||
* It can be used to calculate stats on multivalued fields.
|
||||
* <p/>
|
||||
* This method is mainly used by the {@link org.apache.solr.handler.component.StatsComponent}.
|
||||
*
|
||||
* @param searcher The Searcher to use to gather the statistics
|
||||
* @param baseDocs The {@link org.apache.solr.search.DocSet} to gather the stats on
|
||||
* @param facet One or more fields to facet on.
|
||||
* @return The {@link org.apache.solr.handler.component.StatsValues} collected
|
||||
* @throws IOException
|
||||
*/
|
||||
public StatsValues getStats(SolrIndexSearcher searcher, DocSet baseDocs, String[] facet) throws IOException {
|
||||
//this function is ripped off nearly wholesale from the getCounts function to use
|
||||
//for multiValued fields within the StatsComponent. may be useful to find common
|
||||
//functionality between the two and refactor code somewhat
|
||||
use.incrementAndGet();
|
||||
|
||||
FieldType ft = searcher.getSchema().getFieldType(field);
|
||||
StatsValues allstats = new StatsValues();
|
||||
|
||||
int i = 0;
|
||||
final FieldFacetStats[] finfo = new FieldFacetStats[facet.length];
|
||||
//Initialize facetstats, if facets have been passed in
|
||||
FieldCache.StringIndex si;
|
||||
for (String f : facet) {
|
||||
ft = searcher.getSchema().getFieldType(f);
|
||||
try {
|
||||
si = FieldCache.DEFAULT.getStringIndex(searcher.getReader(), f);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("failed to open field cache for: " + f, e);
|
||||
}
|
||||
finfo[i++] = new FieldFacetStats(f, si, ft, numTermsInField);
|
||||
}
|
||||
|
||||
|
||||
DocSet docs = baseDocs;
|
||||
int baseSize = docs.size();
|
||||
int maxDoc = searcher.maxDoc();
|
||||
|
||||
if (baseSize > 0) {
|
||||
|
||||
final int[] index = this.index;
|
||||
final int[] counts = new int[numTermsInField];
|
||||
|
||||
NumberedTermEnum te = ti.getEnumerator(searcher.getReader());
|
||||
|
||||
|
||||
boolean doNegative = false;
|
||||
if (finfo.length == 0) {
|
||||
//if we're collecting statistics with a facet field, can't do inverted counting
|
||||
doNegative = baseSize > maxDoc >> 1 && termInstances > 0
|
||||
&& docs instanceof BitDocSet;
|
||||
}
|
||||
|
||||
if (doNegative) {
|
||||
OpenBitSet bs = (OpenBitSet) ((BitDocSet) docs).getBits().clone();
|
||||
bs.flip(0, maxDoc);
|
||||
// TODO: when iterator across negative elements is available, use that
|
||||
// instead of creating a new bitset and inverting.
|
||||
docs = new BitDocSet(bs, maxDoc - baseSize);
|
||||
// simply negating will mean that we have deleted docs in the set.
|
||||
// that should be OK, as their entries in our table should be empty.
|
||||
}
|
||||
|
||||
// For the biggest terms, do straight set intersections
|
||||
for (TopTerm tt : bigTerms.values()) {
|
||||
// TODO: counts could be deferred if sorted==false
|
||||
if (tt.termNum >= 0 && tt.termNum < numTermsInField) {
|
||||
if (finfo.length == 0) {
|
||||
counts[tt.termNum] = searcher.numDocs(new TermQuery(tt.term), docs);
|
||||
} else {
|
||||
//COULD BE VERY SLOW
|
||||
//if we're collecting stats for facet fields, we need to iterate on all matching documents
|
||||
DocSet bigTermDocSet = searcher.getDocSet(new TermQuery(tt.term)).intersection(docs);
|
||||
DocIterator iter = bigTermDocSet.iterator();
|
||||
while (iter.hasNext()) {
|
||||
int doc = iter.nextDoc();
|
||||
counts[tt.termNum]++;
|
||||
for (FieldFacetStats f : finfo) {
|
||||
f.facetTermNum(doc, tt.termNum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (termInstances > 0) {
|
||||
DocIterator iter = docs.iterator();
|
||||
while (iter.hasNext()) {
|
||||
int doc = iter.nextDoc();
|
||||
int code = index[doc];
|
||||
|
||||
if ((code & 0xff) == 1) {
|
||||
int pos = code >>> 8;
|
||||
int whichArray = (doc >>> 16) & 0xff;
|
||||
byte[] arr = tnums[whichArray];
|
||||
int tnum = 0;
|
||||
for (; ;) {
|
||||
int delta = 0;
|
||||
for (; ;) {
|
||||
byte b = arr[pos++];
|
||||
delta = (delta << 7) | (b & 0x7f);
|
||||
if ((b & 0x80) == 0) break;
|
||||
}
|
||||
if (delta == 0) break;
|
||||
tnum += delta - TNUM_OFFSET;
|
||||
counts[tnum]++;
|
||||
for (FieldFacetStats f : finfo) {
|
||||
f.facetTermNum(doc, tnum);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int tnum = 0;
|
||||
int delta = 0;
|
||||
for (; ;) {
|
||||
delta = (delta << 7) | (code & 0x7f);
|
||||
if ((code & 0x80) == 0) {
|
||||
if (delta == 0) break;
|
||||
tnum += delta - TNUM_OFFSET;
|
||||
counts[tnum]++;
|
||||
for (FieldFacetStats f : finfo) {
|
||||
f.facetTermNum(doc, tnum);
|
||||
}
|
||||
delta = 0;
|
||||
}
|
||||
code >>>= 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// add results in index order
|
||||
|
||||
for (i = 0; i < numTermsInField; i++) {
|
||||
int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
|
||||
if (c == 0) {
|
||||
continue;
|
||||
}
|
||||
Double value = Double.parseDouble(ft.indexedToReadable(getTermText(te, i)));
|
||||
allstats.accumulate(value, c);
|
||||
//as we've parsed the termnum into a value, lets also accumulate fieldfacet statistics
|
||||
for (FieldFacetStats f : finfo) {
|
||||
f.accumulateTermNum(i, value);
|
||||
}
|
||||
}
|
||||
te.close();
|
||||
int c = SimpleFacets.getFieldMissingCount(searcher, baseDocs, field);
|
||||
if (c > 0) {
|
||||
allstats.addMissing(c);
|
||||
}
|
||||
}
|
||||
if (allstats.getCount() > 0) {
|
||||
if (finfo.length > 0) {
|
||||
allstats.facets = new HashMap<String, Map<String, StatsValues>>();
|
||||
for (FieldFacetStats f : finfo) {
|
||||
allstats.facets.put(f.name, f.facetStatsValues);
|
||||
}
|
||||
}
|
||||
return allstats;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
String getTermText(NumberedTermEnum te, int termNum) throws IOException {
|
||||
if (bigTerms.size() > 0) {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.handler.component;
|
|||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.StatsParams;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
@ -76,6 +77,69 @@ public class StatsComponentTest extends AbstractSolrTestCase {
|
|||
}
|
||||
|
||||
|
||||
public void testMVFieldStatisticsResult() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
|
||||
assertU(adoc("id", "1", "stats_ii", "-10", "stats_ii", "-100", "active_s", "true"));
|
||||
assertU(adoc("id", "2", "stats_ii", "-20", "stats_ii", "200", "active_s", "true"));
|
||||
assertU(adoc("id", "3", "stats_ii", "-30", "stats_ii", "-1", "active_s", "false"));
|
||||
assertU(adoc("id", "4", "stats_ii", "-40", "stats_ii", "10", "active_s", "false"));
|
||||
assertU(commit());
|
||||
|
||||
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
args.put(CommonParams.Q, "*:*");
|
||||
args.put(StatsParams.STATS, "true");
|
||||
args.put(StatsParams.STATS_FIELD, "stats_ii");
|
||||
args.put("indent", "true");
|
||||
SolrQueryRequest req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
|
||||
|
||||
|
||||
assertQ("test statistics values", req
|
||||
, "//double[@name='min'][.='-100.0']"
|
||||
, "//double[@name='max'][.='200.0']"
|
||||
, "//double[@name='sum'][.='9.0']"
|
||||
, "//long[@name='count'][.='8']"
|
||||
, "//long[@name='missing'][.='0']"
|
||||
, "//double[@name='sumOfSquares'][.='53101.0']"
|
||||
, "//double[@name='mean'][.='1.125']"
|
||||
, "//double[@name='stddev'][.='87.08852228787508']"
|
||||
);
|
||||
|
||||
|
||||
|
||||
args.put(StatsParams.STATS_FACET, "active_s");
|
||||
req = new LocalSolrQueryRequest(core, new MapSolrParams(args));
|
||||
|
||||
assertQ("test statistics values", req
|
||||
, "//double[@name='min'][.='-100.0']"
|
||||
, "//double[@name='max'][.='200.0']"
|
||||
, "//double[@name='sum'][.='9.0']"
|
||||
, "//long[@name='count'][.='8']"
|
||||
, "//long[@name='missing'][.='0']"
|
||||
, "//double[@name='sumOfSquares'][.='53101.0']"
|
||||
, "//double[@name='mean'][.='1.125']"
|
||||
, "//double[@name='stddev'][.='87.08852228787508']"
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
assertQ("test value for active_s=true", req
|
||||
, "//lst[@name='true']/double[@name='min'][.='-100.0']"
|
||||
, "//lst[@name='true']/double[@name='max'][.='200.0']"
|
||||
, "//lst[@name='true']/double[@name='sum'][.='70.0']"
|
||||
, "//lst[@name='true']/long[@name='count'][.='4']"
|
||||
, "//lst[@name='true']/long[@name='missing'][.='0']"
|
||||
, "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']"
|
||||
, "//lst[@name='true']/double[@name='mean'][.='17.5']"
|
||||
, "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']"
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void testFieldStatisticsMissingResult() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
assertU(adoc("id", "1", "stats_i", "-10"));
|
||||
|
|
|
@ -286,6 +286,7 @@
|
|||
both match, the first appearing in the schema will be used. -->
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_ii" type="integer" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
|
||||
|
|
Loading…
Reference in New Issue