SOLR-792 -- adding pivot to the FacetComponent

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1024389 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan McKinley 2010-10-19 20:10:03 +00:00
parent 536823be22
commit 99b7ec6e2e
5 changed files with 283 additions and 289 deletions

View File

@ -498,9 +498,6 @@
<str name="version">2.1</str>
-->
</lst>
<arr name="last-components">
<str>pivot</str> <!-- not included by default -->
</arr>
</requestHandler>
<!--
@ -668,7 +665,6 @@
<searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" />
<searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" />
<searchComponent name="pivot" class="org.apache.solr.handler.component.PivotFacetComponent" />
<searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" />
<searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" />
<searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" />

View File

@ -834,7 +834,6 @@ public final class SolrCore implements SolrInfoMBean {
addIfNotPresent(components,HighlightComponent.COMPONENT_NAME,HighlightComponent.class);
addIfNotPresent(components,QueryComponent.COMPONENT_NAME,QueryComponent.class);
addIfNotPresent(components,FacetComponent.COMPONENT_NAME,FacetComponent.class);
addIfNotPresent(components,PivotFacetComponent.COMPONENT_NAME,PivotFacetComponent.class);
addIfNotPresent(components,MoreLikeThisComponent.COMPONENT_NAME,MoreLikeThisComponent.class);
addIfNotPresent(components,StatsComponent.COMPONENT_NAME,StatsComponent.class);
addIfNotPresent(components,DebugComponent.COMPONENT_NAME,DebugComponent.class);
@ -843,6 +842,9 @@ public final class SolrCore implements SolrInfoMBean {
private <T> void addIfNotPresent(Map<String ,T> registry, String name, Class<? extends T> c){
if(!registry.containsKey(name)){
T searchComp = (T) resourceLoader.newInstance(c.getName());
if (searchComp instanceof NamedListInitializedPlugin){
((NamedListInitializedPlugin)searchComp).init( new NamedList() );
}
registry.put(name, searchComp);
if (searchComp instanceof SolrInfoMBean){
infoRegistry.put(((SolrInfoMBean)searchComp).getName(), (SolrInfoMBean)searchComp);

View File

@ -45,6 +45,16 @@ public class FacetComponent extends SearchComponent
{
public static final String COMPONENT_NAME = "facet";
static final String PIVOT_KEY = "facet_pivot";
PivotFacetHelper pivotHelper;
@Override
public void init( NamedList args )
{
pivotHelper = new PivotFacetHelper(); // Maybe this would configurable?
}
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
@ -68,8 +78,17 @@ public class FacetComponent extends SearchComponent
params,
rb );
NamedList counts = f.getFacetCounts();
String[] pivots = params.getParams( FacetParams.FACET_PIVOT );
if( pivots != null && pivots.length > 0 ) {
NamedList v = pivotHelper.process(rb, params, pivots);
if( v != null ) {
counts.add( PIVOT_KEY, v );
}
}
// TODO ???? add this directly to the response, or to the builder?
rb.rsp.add( "facet_counts", f.getFacetCounts() );
rb.rsp.add( "facet_counts", counts );
}
}

View File

@ -1,282 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.Term;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
*
* @since solr 4.0
*/
public class PivotFacetComponent extends SearchComponent
{
public static final String COMPONENT_NAME = "pivot";
static final String PIVOT_KEY = "facet_pivot";
/**
* Designed to be overridden by subclasses that provide different faceting implementations.
* TODO: Currently this is returning a SimpleFacets object, but those capabilities would
* be better as an extracted abstract class or interface.
*/
protected SimpleFacets getFacetImplementation(SolrQueryRequest req,
DocSet docs,
SolrParams params) {
return new SimpleFacets(req, docs, params);
}
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
if (rb.req.getParams().getBool(FacetParams.FACET,false)) {
rb.setNeedDocSet( true );
rb.doFacets = true;
}
}
public void process(ResponseBuilder rb) throws IOException {
if (!rb.doFacets) return;
SolrParams params = rb.req.getParams();
String[] pivots = params.getParams(FacetParams.FACET_PIVOT); // example: author,type (for types by author / types within author)
if (pivots == null) return;
int minMatch = params.getInt( FacetParams.FACET_PIVOT_MINCOUNT, 1 );
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<List<NamedList<Object>>>();
for (String pivot : pivots) {
String[] fields = pivot.split(","); // only support two levels for now
if( fields.length < 2 ) {
throw new SolrException( ErrorCode.BAD_REQUEST,
"Pivot Facet needs at least two fields: "+pivot );
}
DocSet docs = rb.getResults().docSet;
String field = fields[0];
String subField = fields[1];
Deque<String> fnames = new LinkedList<String>();
for( int i=fields.length-1; i>1; i-- ) {
fnames.push( fields[i] );
}
SimpleFacets sf = getFacetImplementation(rb.req, rb.getResults().docSet, rb.req.getParams());
NamedList<Integer> superFacets = sf.getTermCounts(field);
pivotResponse.add(pivot, doPivots(superFacets, field, subField, fnames, rb, docs, minMatch));
}
NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts");
if (facetCounts == null) {
facetCounts = new NamedList();
rb.rsp.add("facet_counts", facetCounts);
}
facetCounts.add( PIVOT_KEY, pivotResponse);
}
/**
* Recursive function to do all the pivots
*/
protected List<NamedList<Object>> doPivots( NamedList<Integer> superFacets, String field, String subField, Deque<String> fnames, ResponseBuilder rb, DocSet docs, int minMatch ) throws IOException
{
SolrIndexSearcher searcher = rb.req.getSearcher();
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below
FieldType ftype = searcher.getSchema().getField(field).getType();
// Required to translate back to an object
Field f = new Field( field, "X", Store.YES, Index.ANALYZED );
String nextField = fnames.poll();
List<NamedList<Object>> values = new ArrayList<NamedList<Object>>( superFacets.size() );
for (Map.Entry<String, Integer> kv : superFacets) {
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
if (kv.getValue() > minMatch ) {
String internal = ftype.toInternal( kv.getKey() );
f.setValue( internal );
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<Object>();
pivot.add( "field", field );
pivot.add( "value", ftype.toObject( f ) );
pivot.add( "count", kv.getValue() );
if( subField == null ) {
values.add( pivot );
}
else {
Query query = new TermQuery(new Term(field, internal));
DocSet subset = searcher.getDocSet(query, docs);
SimpleFacets sf = getFacetImplementation(rb.req, subset, rb.req.getParams());
NamedList<Integer> nl = sf.getTermCounts(subField);
if (nl.size() > minMatch ) {
pivot.add( "pivot", doPivots( nl, subField, nextField, fnames, rb, subset, minMatch ) );
values.add( pivot ); // only add response if there are some counts
}
}
}
}
// put the field back on the list
fnames.push( nextField );
return values;
}
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (!rb.doFacets) {
return ResponseBuilder.STAGE_DONE;
}
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
SolrParams params = rb.req.getParams();
String[] pivots = params.getParams(FacetParams.FACET_PIVOT);
for ( ShardRequest sreq : rb.outgoing ) {
if (( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0
&& sreq.shards != null && sreq.shards.length == 1 ) {
sreq.params.set( FacetParams.FACET, "true" );
sreq.params.set( FacetParams.FACET_PIVOT, pivots );
sreq.params.set( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); // keep this at 1 regardless so that it accumulates everything
}
}
}
return ResponseBuilder.STAGE_DONE;
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if (!rb.doFacets) return;
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
if ( null == tf ) {
tf = new SimpleOrderedMap<List<NamedList<Object>>>();
rb._pivots = tf;
}
for (ShardResponse srsp: sreq.responses) {
int shardNum = rb.getShardNum(srsp.getShard());
NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
// handle facet trees from shards
SimpleOrderedMap<List<NamedList<Object>>> shard_pivots =
(SimpleOrderedMap<List<NamedList<Object>>>)facet_counts.get( PIVOT_KEY );
if ( shard_pivots != null ) {
for (int j=0; j< shard_pivots.size(); j++) {
// TODO -- accumulate the results from each shard
// The following code worked to accumulate facets for an previous
// two level patch... it is here for reference till someone can upgrade
/**
String shard_tree_name = (String) shard_pivots.getName( j );
SimpleOrderedMap<NamedList> shard_tree = (SimpleOrderedMap<NamedList>)shard_pivots.getVal( j );
SimpleOrderedMap<NamedList> facet_tree = tf.get( shard_tree_name );
if ( null == facet_tree) {
facet_tree = new SimpleOrderedMap<NamedList>();
tf.add( shard_tree_name, facet_tree );
}
for( int o = 0; o < shard_tree.size() ; o++ ) {
String shard_outer = (String) shard_tree.getName( o );
NamedList shard_innerList = (NamedList) shard_tree.getVal( o );
NamedList tree_innerList = (NamedList) facet_tree.get( shard_outer );
if ( null == tree_innerList ) {
tree_innerList = new NamedList();
facet_tree.add( shard_outer, tree_innerList );
}
for ( int i = 0 ; i < shard_innerList.size() ; i++ ) {
String shard_term = (String) shard_innerList.getName( i );
long shard_count = ((Number) shard_innerList.getVal(i)).longValue();
int tree_idx = tree_innerList.indexOf( shard_term, 0 );
if ( -1 == tree_idx ) {
tree_innerList.add( shard_term, shard_count );
} else {
long tree_count = ((Number) tree_innerList.getVal( tree_idx )).longValue();
tree_innerList.setVal( tree_idx, shard_count + tree_count );
}
} // innerList loop
} // outer loop
**/
} // each tree loop
}
}
}
return ;
}
@Override
public void finishStage(ResponseBuilder rb) {
if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
// wait until STAGE_GET_FIELDS
// so that "result" is already stored in the response (for aesthetics)
SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
// get 'facet_counts' from the response
NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts");
if (facetCounts == null) {
facetCounts = new NamedList();
rb.rsp.add("facet_counts", facetCounts);
}
facetCounts.add( PIVOT_KEY, tf );
rb._pivots = null;
}
public String getDescription() {
return "Handle Pivot (multi-level) Faceting";
}
public String getSourceId() {
return "$Id$";
}
public String getSource() {
return "$URL$";
}
public String getVersion() {
return "$Revision$";
}
}

View File

@ -0,0 +1,259 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.Term;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
/**
* This is thread safe
* @since solr 4.0
*/
public class PivotFacetHelper
{
/**
* Designed to be overridden by subclasses that provide different faceting implementations.
* TODO: Currently this is returning a SimpleFacets object, but those capabilities would
* be better as an extracted abstract class or interface.
*/
protected SimpleFacets getFacetImplementation(SolrQueryRequest req, DocSet docs, SolrParams params) {
return new SimpleFacets(req, docs, params);
}
public SimpleOrderedMap<List<NamedList<Object>>> process(ResponseBuilder rb, SolrParams params, String[] pivots) throws IOException {
if (!rb.doFacets || pivots == null)
return null;
int minMatch = params.getInt( FacetParams.FACET_PIVOT_MINCOUNT, 1 );
SimpleOrderedMap<List<NamedList<Object>>> pivotResponse = new SimpleOrderedMap<List<NamedList<Object>>>();
for (String pivot : pivots) {
String[] fields = pivot.split(","); // only support two levels for now
if( fields.length < 2 ) {
throw new SolrException( ErrorCode.BAD_REQUEST,
"Pivot Facet needs at least two fields: "+pivot );
}
DocSet docs = rb.getResults().docSet;
String field = fields[0];
String subField = fields[1];
Deque<String> fnames = new LinkedList<String>();
for( int i=fields.length-1; i>1; i-- ) {
fnames.push( fields[i] );
}
SimpleFacets sf = getFacetImplementation(rb.req, rb.getResults().docSet, rb.req.getParams());
NamedList<Integer> superFacets = sf.getTermCounts(field);
pivotResponse.add(pivot, doPivots(superFacets, field, subField, fnames, rb, docs, minMatch));
}
return pivotResponse;
}
/**
* Recursive function to do all the pivots
*/
protected List<NamedList<Object>> doPivots( NamedList<Integer> superFacets, String field, String subField, Deque<String> fnames, ResponseBuilder rb, DocSet docs, int minMatch ) throws IOException
{
SolrIndexSearcher searcher = rb.req.getSearcher();
// TODO: optimize to avoid converting to an external string and then having to convert back to internal below
FieldType ftype = searcher.getSchema().getField(field).getType();
// Required to translate back to an object
Field f = new Field( field, "X", Store.YES, Index.ANALYZED );
String nextField = fnames.poll();
List<NamedList<Object>> values = new ArrayList<NamedList<Object>>( superFacets.size() );
for (Map.Entry<String, Integer> kv : superFacets) {
// Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though
if (kv.getValue() > minMatch ) {
String internal = ftype.toInternal( kv.getKey() );
f.setValue( internal );
SimpleOrderedMap<Object> pivot = new SimpleOrderedMap<Object>();
pivot.add( "field", field );
pivot.add( "value", ftype.toObject( f ) );
pivot.add( "count", kv.getValue() );
if( subField == null ) {
values.add( pivot );
}
else {
Query query = new TermQuery(new Term(field, internal));
DocSet subset = searcher.getDocSet(query, docs);
SimpleFacets sf = getFacetImplementation(rb.req, subset, rb.req.getParams());
NamedList<Integer> nl = sf.getTermCounts(subField);
if (nl.size() > minMatch ) {
pivot.add( "pivot", doPivots( nl, subField, nextField, fnames, rb, subset, minMatch ) );
values.add( pivot ); // only add response if there are some counts
}
}
}
}
// put the field back on the list
fnames.push( nextField );
return values;
}
// TODO: This is code from various patches to support distributed search.
// Some parts may be helpful for whoever implements distributed search.
//
// @Override
// public int distributedProcess(ResponseBuilder rb) throws IOException {
// if (!rb.doFacets) {
// return ResponseBuilder.STAGE_DONE;
// }
//
// if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
// SolrParams params = rb.req.getParams();
// String[] pivots = params.getParams(FacetParams.FACET_PIVOT);
// for ( ShardRequest sreq : rb.outgoing ) {
// if (( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0
// && sreq.shards != null && sreq.shards.length == 1 ) {
// sreq.params.set( FacetParams.FACET, "true" );
// sreq.params.set( FacetParams.FACET_PIVOT, pivots );
// sreq.params.set( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); // keep this at 1 regardless so that it accumulates everything
// }
// }
// }
// return ResponseBuilder.STAGE_DONE;
// }
//
// @Override
// public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
// if (!rb.doFacets) return;
//
//
// if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
// SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
// if ( null == tf ) {
// tf = new SimpleOrderedMap<List<NamedList<Object>>>();
// rb._pivots = tf;
// }
// for (ShardResponse srsp: sreq.responses) {
// int shardNum = rb.getShardNum(srsp.getShard());
//
// NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
//
// // handle facet trees from shards
// SimpleOrderedMap<List<NamedList<Object>>> shard_pivots =
// (SimpleOrderedMap<List<NamedList<Object>>>)facet_counts.get( PIVOT_KEY );
//
// if ( shard_pivots != null ) {
// for (int j=0; j< shard_pivots.size(); j++) {
// // TODO -- accumulate the results from each shard
// // The following code worked to accumulate facets for an previous
// // two level patch... it is here for reference till someone can upgrade
// /**
// String shard_tree_name = (String) shard_pivots.getName( j );
// SimpleOrderedMap<NamedList> shard_tree = (SimpleOrderedMap<NamedList>)shard_pivots.getVal( j );
// SimpleOrderedMap<NamedList> facet_tree = tf.get( shard_tree_name );
// if ( null == facet_tree) {
// facet_tree = new SimpleOrderedMap<NamedList>();
// tf.add( shard_tree_name, facet_tree );
// }
//
// for( int o = 0; o < shard_tree.size() ; o++ ) {
// String shard_outer = (String) shard_tree.getName( o );
// NamedList shard_innerList = (NamedList) shard_tree.getVal( o );
// NamedList tree_innerList = (NamedList) facet_tree.get( shard_outer );
// if ( null == tree_innerList ) {
// tree_innerList = new NamedList();
// facet_tree.add( shard_outer, tree_innerList );
// }
//
// for ( int i = 0 ; i < shard_innerList.size() ; i++ ) {
// String shard_term = (String) shard_innerList.getName( i );
// long shard_count = ((Number) shard_innerList.getVal(i)).longValue();
// int tree_idx = tree_innerList.indexOf( shard_term, 0 );
//
// if ( -1 == tree_idx ) {
// tree_innerList.add( shard_term, shard_count );
// } else {
// long tree_count = ((Number) tree_innerList.getVal( tree_idx )).longValue();
// tree_innerList.setVal( tree_idx, shard_count + tree_count );
// }
// } // innerList loop
// } // outer loop
// **/
// } // each tree loop
// }
// }
// }
// return ;
// }
//
// @Override
// public void finishStage(ResponseBuilder rb) {
// if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
// // wait until STAGE_GET_FIELDS
// // so that "result" is already stored in the response (for aesthetics)
//
// SimpleOrderedMap<List<NamedList<Object>>> tf = rb._pivots;
//
// // get 'facet_counts' from the response
// NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts");
// if (facetCounts == null) {
// facetCounts = new NamedList();
// rb.rsp.add("facet_counts", facetCounts);
// }
// facetCounts.add( PIVOT_KEY, tf );
// rb._pivots = null;
// }
//
// public String getDescription() {
// return "Handle Pivot (multi-level) Faceting";
// }
//
// public String getSourceId() {
// return "$Id$";
// }
//
// public String getSource() {
// return "$URL$";
// }
//
// public String getVersion() {
// return "$Revision$";
// }
}