From 849e40a560341bb813565199552c69f66f77a606 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Fri, 18 May 2007 17:34:27 +0000 Subject: [PATCH] SOLR-221 compare minimum count currently needed to the term df and avoid unnecessary intersection count, allow minimum term df in order to use the filterCache, otherwise iterate over TermDocs git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@539531 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/solr/request/SimpleFacets.java | 31 +++++++++- .../org/apache/solr/request/SolrParams.java | 7 +++ .../solr/util/AbstractSolrTestCase.java | 18 +++++- .../apache/solr/BasicFunctionalityTest.java | 58 ++++++++++--------- 4 files changed, 83 insertions(+), 31 deletions(-) diff --git a/src/java/org/apache/solr/request/SimpleFacets.java b/src/java/org/apache/solr/request/SimpleFacets.java index 346196aea2a..0ca28eca668 100644 --- a/src/java/org/apache/solr/request/SimpleFacets.java +++ b/src/java/org/apache/solr/request/SimpleFacets.java @@ -20,10 +20,12 @@ package org.apache.solr.request; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermDocs; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.*; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrException; +import org.apache.solr.core.SolrConfig; import org.apache.solr.request.SolrParams; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.FieldType; @@ -303,6 +305,7 @@ public class SimpleFacets { return res; } + /** * Returns a list of terms in the specified field along with the * corresponding count of documents in the set that match that constraint. @@ -321,6 +324,9 @@ public class SimpleFacets { * don't enum if we get our max from them */ + // Minimum term docFreq in order to use the filterCache for that term. + int minDfFilterCache = params.getFieldInt(field, SolrParams.FACET_ENUM_CACHE_MINDF, 0); + IndexSchema schema = searcher.getSchema(); IndexReader r = searcher.getReader(); FieldType ft = schema.getFieldType(field); @@ -335,6 +341,7 @@ public class SimpleFacets { String startTerm = prefix==null ? "" : ft.toInternal(prefix); TermEnum te = r.terms(new Term(field,startTerm)); + TermDocs td = r.termDocs(); do { Term t = te.term(); @@ -345,8 +352,23 @@ public class SimpleFacets { int df = te.docFreq(); - if (df>0) { /* check df since all docs may be deleted */ - int c = searcher.numDocs(new TermQuery(t), docs); + // If we are sorting, we can use df>min (rather than >=) since we + // are going in index order. For certain term distributions this can + // make a large difference (for example, many terms with df=1). + if (df>0 && df>min) { + int c; + + if (df >= minDfFilterCache) { + // use the filter cache + c = searcher.numDocs(new TermQuery(t), docs); + } else { + // iterate over TermDocs to calculate the intersection + td.seek(te); + c=0; + while (td.next()) { + if (docs.exists(td.doc())) c++; + } + } if (sort) { if (c>min) { @@ -373,7 +395,10 @@ public class SimpleFacets { if (missing) { res.add(null, getFieldMissingCount(searcher,docs,field)); } - + + te.close(); + td.close(); + return res; } diff --git a/src/java/org/apache/solr/request/SolrParams.java b/src/java/org/apache/solr/request/SolrParams.java index 8d45506aa5d..2ec80bc7128 100644 --- a/src/java/org/apache/solr/request/SolrParams.java +++ b/src/java/org/apache/solr/request/SolrParams.java @@ -117,6 +117,13 @@ public abstract class SolrParams { */ public static final String FACET_PREFIX = "facet.prefix"; + /** + * When faceting by enumerating the terms in a field, + * only use the filterCache for terms with a df >= to this parameter. + */ + public static final String FACET_ENUM_CACHE_MINDF = "facet.enum.cache.minDf"; + + /** If the content stream should come from a URL (using URLConnection) */ public static final String STREAM_URL = "stream.url"; diff --git a/src/java/org/apache/solr/util/AbstractSolrTestCase.java b/src/java/org/apache/solr/util/AbstractSolrTestCase.java index 5a4cc27505b..30c99b0969d 100644 --- a/src/java/org/apache/solr/util/AbstractSolrTestCase.java +++ b/src/java/org/apache/solr/util/AbstractSolrTestCase.java @@ -287,7 +287,23 @@ public abstract class AbstractSolrTestCase extends TestCase { public SolrQueryRequest req(String... q) { return lrf.makeRequest(q); } - + + /** + * Generates a SolrQueryRequest using the LocalRequestFactory + * @see #lrf + */ + public SolrQueryRequest req(String[] params, String... moreParams) { + String[] allParams = moreParams; + if (params.length!=0) { + int len = params.length + moreParams.length; + allParams = new String[len]; + System.arraycopy(params,0,allParams,0,params.length); + System.arraycopy(moreParams,0,allParams,params.length,moreParams.length); + } + + return lrf.makeRequest(allParams); + } + /** Neccessary to make method signatures un-ambiguous */ public static class Doc { public String xml; diff --git a/src/test/org/apache/solr/BasicFunctionalityTest.java b/src/test/org/apache/solr/BasicFunctionalityTest.java index ce6223606ba..962ef963e4e 100644 --- a/src/test/org/apache/solr/BasicFunctionalityTest.java +++ b/src/test/org/apache/solr/BasicFunctionalityTest.java @@ -610,13 +610,15 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { public void testFacetMultiValued() { doFacets("t_s"); + doFacets("t_s", "facet.enum.cache.minDf", "2"); + doFacets("t_s", "facet.enum.cache.minDf", "100"); } public void testFacetSingleValued() { doFacets("t_s1"); } - public void doFacets(String f) { + public void doFacets(String f, String... params) { String pre = "//lst[@name='"+f+"']"; String notc = "id:[* TO *] -"+f+":C"; @@ -637,7 +639,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { assertU(commit()); assertQ("check counts for unlimited facet", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"facet", "true" ,"facet.field", f ) @@ -654,7 +656,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("check counts for facet with generous limit", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"facet", "true" ,"facet.limit", "100" ,"facet.field", f @@ -672,7 +674,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("check counts for limited facet", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"facet", "true" ,"facet.limit", "2" ,"facet.field", f @@ -684,7 +686,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("check offset", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"facet", "true" ,"facet.offset", "1" ,"facet.limit", "1" @@ -696,7 +698,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test sorted facet paging with zero (don't count in limit)", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"fq",notc ,"facet", "true" ,"facet.field", f @@ -714,7 +716,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test sorted facet paging with zero (test offset correctness)", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"fq",notc ,"facet", "true" ,"facet.field", f @@ -729,7 +731,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet unsorted paging", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"fq",notc ,"facet", "true" ,"facet.field", f @@ -748,7 +750,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet unsorted paging", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"fq",notc ,"facet", "true" ,"facet.field", f @@ -763,7 +765,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet unsorted paging, mincount=2", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"fq",notc ,"facet", "true" ,"facet.field", f @@ -780,14 +782,16 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { public void testFacetPrefixMultiValued() { - doFacetPrefix("t_s"); + doFacetPrefix("t_s"); + doFacetPrefix("t_s", "facet.enum.cache.minDf", "3"); + doFacetPrefix("t_s", "facet.enum.cache.minDf", "100"); } public void testFacetPrefixSingleValued() { doFacetPrefix("t_s1"); } - public void doFacetPrefix(String f) { + public void doFacetPrefix(String f, String... params) { String indent="on"; String pre = "//lst[@name='"+f+"']"; String notc = "id:[* TO *] -"+f+":C"; @@ -807,7 +811,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { assertU(commit()); assertQ("test facet.prefix middle, exact match first term", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -824,7 +828,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix middle, exact match first term, unsorted", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -842,7 +846,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { assertQ("test facet.prefix middle, exact match first term, unsorted", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -860,7 +864,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { assertQ("test facet.prefix middle, paging", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -876,7 +880,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix middle, paging", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -891,7 +895,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix middle, paging", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -906,7 +910,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix end, not exact match", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -922,7 +926,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix end, exact match", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -938,7 +942,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix past end", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -952,7 +956,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix past end", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -966,7 +970,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix at start, exact match", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -980,7 +984,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ,pre+"/int[1][@name='AAA'][.='1']" ); assertQ("test facet.prefix at Start, not exact match", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -994,7 +998,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ,pre+"/int[1][@name='AAA'][.='1']" ); assertQ("test facet.prefix at Start, not exact match", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -1008,7 +1012,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ,pre+"/int[1][@name='AAA'][.='1']" ); assertQ("test facet.prefix before start", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f @@ -1022,7 +1026,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase { ); assertQ("test facet.prefix before start", - req("q", "id:[* TO *]" + req(params, "q", "id:[* TO *]" ,"indent",indent ,"facet","true" ,"facet.field", f