From e88daca52320f7dd1350a0aeda24e472cce89281 Mon Sep 17 00:00:00 2001 From: Ryan McKinley Date: Mon, 13 Sep 2010 22:13:42 +0000 Subject: [PATCH] SOLR-792: Adding PivotFacetComponent for Hierarchical faceting git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@996707 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 2 + solr/example/solr/conf/solrconfig.xml | 6 +- .../solr/common/params/FacetParams.java | 16 + .../java/org/apache/solr/core/SolrCore.java | 1 + .../component/PivotFacetComponent.java | 278 ++++++++++++++++++ .../handler/component/ResponseBuilder.java | 1 + .../apache/solr/client/solrj/SolrQuery.java | 13 + .../client/solrj/response/PivotField.java | 74 +++++ .../client/solrj/response/QueryResponse.java | 28 ++ .../solr/client/solrj/SolrExampleTests.java | 100 +++++++ 10 files changed, 518 insertions(+), 1 deletion(-) create mode 100644 solr/src/java/org/apache/solr/handler/component/PivotFacetComponent.java create mode 100644 solr/src/solrj/org/apache/solr/client/solrj/response/PivotField.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 814a85fda2a..dc45c6dacd4 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -265,6 +265,8 @@ New Features * SOLR-2112: Solrj API now supports streaming results. (ryan) +* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting + (erik, Jeremy Hinegardner, Thibaut Lassalle, ryan) Optimizations ---------------------- diff --git a/solr/example/solr/conf/solrconfig.xml b/solr/example/solr/conf/solrconfig.xml index 6e11651d0d0..318d0b52a26 100755 --- a/solr/example/solr/conf/solrconfig.xml +++ b/solr/example/solr/conf/solrconfig.xml @@ -498,6 +498,9 @@ 2.1 --> + + pivot + - + diff --git a/solr/src/common/org/apache/solr/common/params/FacetParams.java b/solr/src/common/org/apache/solr/common/params/FacetParams.java index d119c0ea256..d7a20cd11ea 100644 --- a/solr/src/common/org/apache/solr/common/params/FacetParams.java +++ b/solr/src/common/org/apache/solr/common/params/FacetParams.java @@ -93,6 +93,22 @@ public interface FacetParams { */ public static final String FACET_MISSING = FACET + ".missing"; + + /** + * Comma separated list of fields to pivot + * + * example: author,type (for types by author / types within author) + */ + public static final String FACET_PIVOT = FACET + ".pivot"; + + /** + * Minimum number of docs that need to match to be included in the sublist + * + * default value is 1 + */ + public static final String FACET_PIVOT_MINCOUNT = FACET_PIVOT + ".mincount"; + + /** * String option: "count" causes facets to be sorted * by the count, "index" results in index order. diff --git a/solr/src/java/org/apache/solr/core/SolrCore.java b/solr/src/java/org/apache/solr/core/SolrCore.java index 378a59aa3d7..2cb1b44bcb8 100644 --- a/solr/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/src/java/org/apache/solr/core/SolrCore.java @@ -833,6 +833,7 @@ public final class SolrCore implements SolrInfoMBean { addIfNotPresent(components,HighlightComponent.COMPONENT_NAME,HighlightComponent.class); addIfNotPresent(components,QueryComponent.COMPONENT_NAME,QueryComponent.class); addIfNotPresent(components,FacetComponent.COMPONENT_NAME,FacetComponent.class); + addIfNotPresent(components,PivotFacetComponent.COMPONENT_NAME,PivotFacetComponent.class); addIfNotPresent(components,MoreLikeThisComponent.COMPONENT_NAME,MoreLikeThisComponent.class); addIfNotPresent(components,StatsComponent.COMPONENT_NAME,StatsComponent.class); addIfNotPresent(components,DebugComponent.COMPONENT_NAME,DebugComponent.class); diff --git a/solr/src/java/org/apache/solr/handler/component/PivotFacetComponent.java b/solr/src/java/org/apache/solr/handler/component/PivotFacetComponent.java new file mode 100644 index 00000000000..9f8a714678b --- /dev/null +++ b/solr/src/java/org/apache/solr/handler/component/PivotFacetComponent.java @@ -0,0 +1,278 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.handler.component; + +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.search.DocSet; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.request.SimpleFacets; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.FieldType; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.index.Term; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Deque; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +/** + * + * @since solr 4.0 + */ +public class PivotFacetComponent extends SearchComponent +{ + public static final String COMPONENT_NAME = "pivot"; + + static final String PIVOT_KEY = "facet_pivot"; + + + /** + * Designed to be overridden by subclasses that provide different faceting implementations. + * TODO: Currently this is returning a SimpleFacets object, but those capabilities would + * be better as an extracted abstract class or interface. + */ + protected SimpleFacets getFacetImplementation(SolrQueryRequest req, + DocSet docs, + SolrParams params) { + return new SimpleFacets(req, docs, params); + } + + @Override + public void prepare(ResponseBuilder rb) throws IOException + { + if (rb.req.getParams().getBool(FacetParams.FACET,false)) { + rb.setNeedDocSet( true ); + rb.doFacets = true; + } + } + + public void process(ResponseBuilder rb) throws IOException { + if (!rb.doFacets) return; + + SolrParams params = rb.req.getParams(); + String[] pivots = params.getParams(FacetParams.FACET_PIVOT); // example: author,type (for types by author / types within author) + if (pivots == null) return; + + int minMatch = params.getInt( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); + + SimpleOrderedMap>> pivotResponse = new SimpleOrderedMap>>(); + for (String pivot : pivots) { + String[] fields = pivot.split(","); // only support two levels for now + + if( fields.length < 2 ) { + throw new SolrException( ErrorCode.BAD_REQUEST, + "Pivot Facet needs at least two fields: "+pivot ); + } + + DocSet docs = rb.getResults().docSet; + String field = fields[0]; + String subField = fields[1]; + Deque fnames = new LinkedList(); + for( int i=fields.length-1; i>1; i-- ) { + fnames.push( fields[i] ); + } + + SimpleFacets sf = getFacetImplementation(rb.req, rb.getResults().docSet, rb.req.getParams()); + NamedList superFacets = sf.getTermCounts(field); + + pivotResponse.add(pivot, doPivots(superFacets, field, subField, fnames, rb, docs, minMatch)); + } + NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts"); + if (facetCounts == null) { + facetCounts = new NamedList(); + rb.rsp.add("facet_counts", facetCounts); + } + facetCounts.add( PIVOT_KEY, pivotResponse); + } + + /** + * Recursive function to do all the pivots + */ + protected List> doPivots( NamedList superFacets, String field, String subField, Deque fnames, ResponseBuilder rb, DocSet docs, int minMatch ) throws IOException + { + SolrIndexSearcher searcher = rb.req.getSearcher(); + // TODO: optimize to avoid converting to an external string and then having to convert back to internal below + FieldType ftype = null; + + // SimpleFacets sf = getFacetImplementation(rb.req, docs, rb.req.getParams()); + String nextField = fnames.poll(); + + List> values = new ArrayList>( superFacets.size() ); + for (Map.Entry kv : superFacets) { + // Only sub-facet if parent facet has positive count - still may not be any values for the sub-field though + if (kv.getValue() > minMatch ) { + SimpleOrderedMap pivot = new SimpleOrderedMap(); + pivot.add( "field", field ); + pivot.add( "value", kv.getKey() ); + pivot.add( "count", kv.getValue() ); + + if( subField == null ) { + values.add( pivot ); + } + else { + String s = kv.getKey(); + if( ftype == null ) { + ftype = searcher.getSchema().getField(field).getType(); + } + + Query query = new TermQuery(new Term(field, ftype.toInternal(s))); + DocSet subset = searcher.getDocSet(query, docs); + SimpleFacets sf = getFacetImplementation(rb.req, subset, rb.req.getParams()); + + NamedList nl = sf.getTermCounts(subField); + if (nl.size() > minMatch ) { + pivot.add( "pivot", doPivots( nl, subField, nextField, fnames, rb, subset, minMatch ) ); + values.add( pivot ); // only add response if there are some counts + } + } + } + } + + // put the field back on the list + fnames.push( nextField ); + return values; + } + + @Override + public int distributedProcess(ResponseBuilder rb) throws IOException { + if (!rb.doFacets) { + return ResponseBuilder.STAGE_DONE; + } + + if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { + SolrParams params = rb.req.getParams(); + String[] pivots = params.getParams(FacetParams.FACET_PIVOT); + for ( ShardRequest sreq : rb.outgoing ) { + if (( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0 + && sreq.shards != null && sreq.shards.length == 1 ) { + sreq.params.set( FacetParams.FACET, "true" ); + sreq.params.set( FacetParams.FACET_PIVOT, pivots ); + sreq.params.set( FacetParams.FACET_PIVOT_MINCOUNT, 1 ); // keep this at 1 regardless so that it accumulates everything + } + } + } + return ResponseBuilder.STAGE_DONE; + } + + @Override + public void handleResponses(ResponseBuilder rb, ShardRequest sreq) { + if (!rb.doFacets) return; + + + if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) { + SimpleOrderedMap>> tf = rb._pivots; + if ( null == tf ) { + tf = new SimpleOrderedMap>>(); + rb._pivots = tf; + } + for (ShardResponse srsp: sreq.responses) { + int shardNum = rb.getShardNum(srsp.getShard()); + + NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts"); + + // handle facet trees from shards + SimpleOrderedMap>> shard_pivots = + (SimpleOrderedMap>>)facet_counts.get( PIVOT_KEY ); + + if ( shard_pivots != null ) { + for (int j=0; j< shard_pivots.size(); j++) { + // TODO -- accumulate the results from each shard + // The following code worked to accumulate facets for an previous + // two level patch... it is here for reference till someone can upgrade + /** + String shard_tree_name = (String) shard_pivots.getName( j ); + SimpleOrderedMap shard_tree = (SimpleOrderedMap)shard_pivots.getVal( j ); + SimpleOrderedMap facet_tree = tf.get( shard_tree_name ); + if ( null == facet_tree) { + facet_tree = new SimpleOrderedMap(); + tf.add( shard_tree_name, facet_tree ); + } + + for( int o = 0; o < shard_tree.size() ; o++ ) { + String shard_outer = (String) shard_tree.getName( o ); + NamedList shard_innerList = (NamedList) shard_tree.getVal( o ); + NamedList tree_innerList = (NamedList) facet_tree.get( shard_outer ); + if ( null == tree_innerList ) { + tree_innerList = new NamedList(); + facet_tree.add( shard_outer, tree_innerList ); + } + + for ( int i = 0 ; i < shard_innerList.size() ; i++ ) { + String shard_term = (String) shard_innerList.getName( i ); + long shard_count = ((Number) shard_innerList.getVal(i)).longValue(); + int tree_idx = tree_innerList.indexOf( shard_term, 0 ); + + if ( -1 == tree_idx ) { + tree_innerList.add( shard_term, shard_count ); + } else { + long tree_count = ((Number) tree_innerList.getVal( tree_idx )).longValue(); + tree_innerList.setVal( tree_idx, shard_count + tree_count ); + } + } // innerList loop + } // outer loop + **/ + } // each tree loop + } + } + } + return ; + } + + @Override + public void finishStage(ResponseBuilder rb) { + if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return; + // wait until STAGE_GET_FIELDS + // so that "result" is already stored in the response (for aesthetics) + + SimpleOrderedMap>> tf = rb._pivots; + + // get 'facet_counts' from the response + NamedList facetCounts = (NamedList) rb.rsp.getValues().get("facet_counts"); + if (facetCounts == null) { + facetCounts = new NamedList(); + rb.rsp.add("facet_counts", facetCounts); + } + facetCounts.add( PIVOT_KEY, tf ); + rb._pivots = null; + } + + public String getDescription() { + return "Handle Pivot (multi-level) Faceting"; + } + + public String getSourceId() { + return "$Id: $"; + } + + public String getSource() { + return "$URL: $"; + } + + public String getVersion() { + return "$Revision: $"; + } +} diff --git a/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java b/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java index c69e73c7009..0f54504395b 100644 --- a/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java +++ b/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java @@ -141,6 +141,7 @@ public class ResponseBuilder { SolrDocumentList _responseDocs; StatsInfo _statsInfo; TermsComponent.TermsHelper _termsHelper; + SimpleOrderedMap>> _pivots; /** * Utility function to add debugging info. This will make sure a valid diff --git a/solr/src/solrj/org/apache/solr/client/solrj/SolrQuery.java b/solr/src/solrj/org/apache/solr/client/solrj/SolrQuery.java index a3f29d5cc17..930cdbd0cbb 100644 --- a/solr/src/solrj/org/apache/solr/client/solrj/SolrQuery.java +++ b/solr/src/solrj/org/apache/solr/client/solrj/SolrQuery.java @@ -214,6 +214,19 @@ public class SolrQuery extends ModifiableSolrParams return this; } + /** Add field(s) for pivot computation. + * + * pivot fields are comma separated + * + * @param fields Array of field names from the IndexSchema + * @return this + */ + public SolrQuery addFacetPivotField(String ... fields) { + add(FacetParams.FACET_PIVOT, fields); + this.set(FacetParams.FACET, true); + return this; + } + /** get the facet fields * * @return string array of facet fields or null if not set/empty diff --git a/solr/src/solrj/org/apache/solr/client/solrj/response/PivotField.java b/solr/src/solrj/org/apache/solr/client/solrj/response/PivotField.java new file mode 100644 index 00000000000..b6babe7e259 --- /dev/null +++ b/solr/src/solrj/org/apache/solr/client/solrj/response/PivotField.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.client.solrj.response; + +import java.io.PrintStream; +import java.io.PrintWriter; +import java.io.Serializable; +import java.util.List; + +public class PivotField implements Serializable +{ + final String _field; + final Object _value; + final int _count; + final List _pivot; + + public PivotField( String f, Object v, int count, List pivot ) + { + _field = f; + _value = v; + _count = count; + _pivot = pivot; + } + + public String getField() { + return _field; + } + + public Object getValue() { + return _value; + } + + public int getCount() { + return _count; + } + + public List getPivot() { + return _pivot; + } + + @Override + public String toString() + { + return _field + ":" + _value + " ["+_count+"] "+_pivot; + } + + public void write( PrintStream out, int indent ) + { + for( int i=0; i _facetFields = null; private List _limitingFacets = null; private List _facetDates = null; + private NamedList> _facetPivot = null; // Highlight Info private Map>> _highlighting = null; @@ -241,6 +242,29 @@ public class QueryResponse extends SolrResponseBase _facetDates.add(f); } } + + //Parse pivot facets + NamedList pf = (NamedList) info.get("facet_pivot"); + if (pf != null) { + _facetPivot = new NamedList>(); + for( int i=0; i)pf.getVal(i) ) ); + } + } + } + + protected List readPivots( List list ) + { + ArrayList values = new ArrayList( list.size() ); + for( NamedList nl : list ) { + // NOTE, this is cheating, but we know the order they are written in, so no need to check + String f = (String)nl.getVal( 0 ); + Object v = (Object)nl.getVal( 1 ); + int cnt = ((Integer)nl.getVal( 2 )).intValue(); + List p = (nl.size()<4)?null:readPivots((List)nl.getVal(3) ); + values.add( new PivotField( f, v, cnt, p ) ); + } + return values; } //------------------------------------------------------ @@ -302,6 +326,10 @@ public class QueryResponse extends SolrResponseBase public List getFacetDates() { return _facetDates; } + + public NamedList> getFacetPivot() { + return _facetPivot; + } /** get * diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java index 4c75a86dc7f..0d23f8bf474 100644 --- a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java @@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest; import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.AbstractUpdateRequest.ACTION; import org.apache.solr.client.solrj.response.LukeResponse; +import org.apache.solr.client.solrj.response.PivotField; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.FacetField; import org.apache.solr.client.solrj.response.UpdateResponse; @@ -565,6 +566,105 @@ abstract public class SolrExampleTests extends SolrJettyTestBase // System.out.println( rsp.getResults().getNumFound() + " :::: 444: "+ff.getValues() ); } + @Test + public void testPivotFacet() throws Exception + { + SolrServer server = getSolrServer(); + + // Empty the database... + server.deleteByQuery( "*:*" );// delete everything! + server.commit(); + assertNumFound( "*:*", 0 ); // make sure it got in + + int id = 1; + ArrayList docs = new ArrayList(); + docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "a", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "a", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "a", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "b", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "AAA", "cat", "b", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "a", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "a", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "b", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "b", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "b", "inStock", false ) ); + docs.add( makeTestDoc( "id", id++, "features", "BBB", "cat", "b", "inStock", true ) ); + docs.add( makeTestDoc( "id", id++ ) ); // something not matching + server.add( docs ); + server.commit(); + + SolrQuery query = new SolrQuery( "*:*" ); + query.addFacetPivotField("features,cat", "cat,features", "features,cat,inStock" ); + query.setFacetMinCount( 0 ); + query.setRows( 0 ); + + QueryResponse rsp = server.query( query ); + assertEquals( docs.size(), rsp.getResults().getNumFound() ); + + NamedList> pivots = rsp.getFacetPivot(); + assertEquals( 3, pivots.size() ); + +// for(Map.Entry> entry : pivots ) { +// System.out.println( "PIVOT: "+entry.getKey() ); +// for( PivotField p : entry.getValue() ) { +// p.write(System.out, 0 ); +// } +// System.out.println(); +// } + + // Now make sure they have reasonable stuff + List pivot = pivots.getVal( 0 ); + assertEquals( "features,cat", pivots.getName( 0 ) ); + assertEquals( 2, pivot.size() ); + + PivotField ff = pivot.get( 0 ); + assertEquals( "features", ff.getField() ); + assertEquals( "bbb", ff.getValue() ); + assertEquals( 6, ff.getCount() ); + List counts = ff.getPivot(); + assertEquals( 2, counts.size() ); + assertEquals( "cat", counts.get(0).getField() ); + assertEquals( "b", counts.get(0).getValue() ); + assertEquals( 4, counts.get(0).getCount() ); + assertEquals( "a", counts.get(1).getValue() ); + assertEquals( 2, counts.get(1).getCount() ); + + ff = pivot.get( 1 ); + assertEquals( "features", ff.getField() ); + assertEquals( "aaa", ff.getValue() ); + assertEquals( 5, ff.getCount() ); + counts = ff.getPivot(); + assertEquals( 2, counts.size() ); + assertEquals( "a", counts.get(0).getValue() ); + assertEquals( 3, counts.get(0).getCount() ); + assertEquals( "b", counts.get(1).getValue() ); + assertEquals( 2, counts.get(1).getCount() ); + + // 3 deep + pivot = pivots.getVal( 2 ); + assertEquals( "features,cat,inStock", pivots.getName( 2 ) ); + assertEquals( 2, pivot.size() ); + PivotField p = pivot.get( 1 ).getPivot().get(0); + assertEquals( "cat", p.getField() ); + assertEquals( "a", p.getValue() ); + counts = p.getPivot(); + // p.write(System.out, 5 ); + assertEquals( 1, counts.size() ); + assertEquals( "inStock", counts.get(0).getField() ); + assertEquals( "true", counts.get(0).getValue() ); + assertEquals( 2, counts.get(0).getCount() ); + } + + public static SolrInputDocument makeTestDoc( Object ... kvp ) + { + SolrInputDocument doc = new SolrInputDocument(); + for( int i=0; i