SOLR-221 compare minimum count currently needed to the term df and avoid unnecessary intersection count, allow minimum term df in order to use the filterCache, otherwise iterate over TermDocs

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@539531 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2007-05-18 17:34:27 +00:00
parent d272f3e473
commit 849e40a560
4 changed files with 83 additions and 31 deletions

View File

@ -20,10 +20,12 @@ package org.apache.solr.request;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException; import org.apache.solr.core.SolrException;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.request.SolrParams; import org.apache.solr.request.SolrParams;
import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
@ -303,6 +305,7 @@ public class SimpleFacets {
return res; return res;
} }
/** /**
* Returns a list of terms in the specified field along with the * Returns a list of terms in the specified field along with the
* corresponding count of documents in the set that match that constraint. * corresponding count of documents in the set that match that constraint.
@ -321,6 +324,9 @@ public class SimpleFacets {
* don't enum if we get our max from them * don't enum if we get our max from them
*/ */
// Minimum term docFreq in order to use the filterCache for that term.
int minDfFilterCache = params.getFieldInt(field, SolrParams.FACET_ENUM_CACHE_MINDF, 0);
IndexSchema schema = searcher.getSchema(); IndexSchema schema = searcher.getSchema();
IndexReader r = searcher.getReader(); IndexReader r = searcher.getReader();
FieldType ft = schema.getFieldType(field); FieldType ft = schema.getFieldType(field);
@ -335,6 +341,7 @@ public class SimpleFacets {
String startTerm = prefix==null ? "" : ft.toInternal(prefix); String startTerm = prefix==null ? "" : ft.toInternal(prefix);
TermEnum te = r.terms(new Term(field,startTerm)); TermEnum te = r.terms(new Term(field,startTerm));
TermDocs td = r.termDocs();
do { do {
Term t = te.term(); Term t = te.term();
@ -345,8 +352,23 @@ public class SimpleFacets {
int df = te.docFreq(); int df = te.docFreq();
if (df>0) { /* check df since all docs may be deleted */ // If we are sorting, we can use df>min (rather than >=) since we
int c = searcher.numDocs(new TermQuery(t), docs); // are going in index order. For certain term distributions this can
// make a large difference (for example, many terms with df=1).
if (df>0 && df>min) {
int c;
if (df >= minDfFilterCache) {
// use the filter cache
c = searcher.numDocs(new TermQuery(t), docs);
} else {
// iterate over TermDocs to calculate the intersection
td.seek(te);
c=0;
while (td.next()) {
if (docs.exists(td.doc())) c++;
}
}
if (sort) { if (sort) {
if (c>min) { if (c>min) {
@ -373,7 +395,10 @@ public class SimpleFacets {
if (missing) { if (missing) {
res.add(null, getFieldMissingCount(searcher,docs,field)); res.add(null, getFieldMissingCount(searcher,docs,field));
} }
te.close();
td.close();
return res; return res;
} }

View File

@ -117,6 +117,13 @@ public abstract class SolrParams {
*/ */
public static final String FACET_PREFIX = "facet.prefix"; public static final String FACET_PREFIX = "facet.prefix";
/**
* When faceting by enumerating the terms in a field,
* only use the filterCache for terms with a df >= to this parameter.
*/
public static final String FACET_ENUM_CACHE_MINDF = "facet.enum.cache.minDf";
/** If the content stream should come from a URL (using URLConnection) */ /** If the content stream should come from a URL (using URLConnection) */
public static final String STREAM_URL = "stream.url"; public static final String STREAM_URL = "stream.url";

View File

@ -287,7 +287,23 @@ public abstract class AbstractSolrTestCase extends TestCase {
public SolrQueryRequest req(String... q) { public SolrQueryRequest req(String... q) {
return lrf.makeRequest(q); return lrf.makeRequest(q);
} }
/**
* Generates a SolrQueryRequest using the LocalRequestFactory
* @see #lrf
*/
public SolrQueryRequest req(String[] params, String... moreParams) {
String[] allParams = moreParams;
if (params.length!=0) {
int len = params.length + moreParams.length;
allParams = new String[len];
System.arraycopy(params,0,allParams,0,params.length);
System.arraycopy(moreParams,0,allParams,params.length,moreParams.length);
}
return lrf.makeRequest(allParams);
}
/** Neccessary to make method signatures un-ambiguous */ /** Neccessary to make method signatures un-ambiguous */
public static class Doc { public static class Doc {
public String xml; public String xml;

View File

@ -610,13 +610,15 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
public void testFacetMultiValued() { public void testFacetMultiValued() {
doFacets("t_s"); doFacets("t_s");
doFacets("t_s", "facet.enum.cache.minDf", "2");
doFacets("t_s", "facet.enum.cache.minDf", "100");
} }
public void testFacetSingleValued() { public void testFacetSingleValued() {
doFacets("t_s1"); doFacets("t_s1");
} }
public void doFacets(String f) { public void doFacets(String f, String... params) {
String pre = "//lst[@name='"+f+"']"; String pre = "//lst[@name='"+f+"']";
String notc = "id:[* TO *] -"+f+":C"; String notc = "id:[* TO *] -"+f+":C";
@ -637,7 +639,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
assertU(commit()); assertU(commit());
assertQ("check counts for unlimited facet", assertQ("check counts for unlimited facet",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"facet", "true" ,"facet", "true"
,"facet.field", f ,"facet.field", f
) )
@ -654,7 +656,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("check counts for facet with generous limit", assertQ("check counts for facet with generous limit",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"facet", "true" ,"facet", "true"
,"facet.limit", "100" ,"facet.limit", "100"
,"facet.field", f ,"facet.field", f
@ -672,7 +674,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("check counts for limited facet", assertQ("check counts for limited facet",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"facet", "true" ,"facet", "true"
,"facet.limit", "2" ,"facet.limit", "2"
,"facet.field", f ,"facet.field", f
@ -684,7 +686,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("check offset", assertQ("check offset",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"facet", "true" ,"facet", "true"
,"facet.offset", "1" ,"facet.offset", "1"
,"facet.limit", "1" ,"facet.limit", "1"
@ -696,7 +698,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test sorted facet paging with zero (don't count in limit)", assertQ("test sorted facet paging with zero (don't count in limit)",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"fq",notc ,"fq",notc
,"facet", "true" ,"facet", "true"
,"facet.field", f ,"facet.field", f
@ -714,7 +716,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test sorted facet paging with zero (test offset correctness)", assertQ("test sorted facet paging with zero (test offset correctness)",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"fq",notc ,"fq",notc
,"facet", "true" ,"facet", "true"
,"facet.field", f ,"facet.field", f
@ -729,7 +731,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet unsorted paging", assertQ("test facet unsorted paging",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"fq",notc ,"fq",notc
,"facet", "true" ,"facet", "true"
,"facet.field", f ,"facet.field", f
@ -748,7 +750,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet unsorted paging", assertQ("test facet unsorted paging",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"fq",notc ,"fq",notc
,"facet", "true" ,"facet", "true"
,"facet.field", f ,"facet.field", f
@ -763,7 +765,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet unsorted paging, mincount=2", assertQ("test facet unsorted paging, mincount=2",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"fq",notc ,"fq",notc
,"facet", "true" ,"facet", "true"
,"facet.field", f ,"facet.field", f
@ -780,14 +782,16 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
public void testFacetPrefixMultiValued() { public void testFacetPrefixMultiValued() {
doFacetPrefix("t_s"); doFacetPrefix("t_s");
doFacetPrefix("t_s", "facet.enum.cache.minDf", "3");
doFacetPrefix("t_s", "facet.enum.cache.minDf", "100");
} }
public void testFacetPrefixSingleValued() { public void testFacetPrefixSingleValued() {
doFacetPrefix("t_s1"); doFacetPrefix("t_s1");
} }
public void doFacetPrefix(String f) { public void doFacetPrefix(String f, String... params) {
String indent="on"; String indent="on";
String pre = "//lst[@name='"+f+"']"; String pre = "//lst[@name='"+f+"']";
String notc = "id:[* TO *] -"+f+":C"; String notc = "id:[* TO *] -"+f+":C";
@ -807,7 +811,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
assertU(commit()); assertU(commit());
assertQ("test facet.prefix middle, exact match first term", assertQ("test facet.prefix middle, exact match first term",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -824,7 +828,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix middle, exact match first term, unsorted", assertQ("test facet.prefix middle, exact match first term, unsorted",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -842,7 +846,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
assertQ("test facet.prefix middle, exact match first term, unsorted", assertQ("test facet.prefix middle, exact match first term, unsorted",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -860,7 +864,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
assertQ("test facet.prefix middle, paging", assertQ("test facet.prefix middle, paging",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -876,7 +880,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix middle, paging", assertQ("test facet.prefix middle, paging",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -891,7 +895,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix middle, paging", assertQ("test facet.prefix middle, paging",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -906,7 +910,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix end, not exact match", assertQ("test facet.prefix end, not exact match",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -922,7 +926,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix end, exact match", assertQ("test facet.prefix end, exact match",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -938,7 +942,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix past end", assertQ("test facet.prefix past end",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -952,7 +956,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix past end", assertQ("test facet.prefix past end",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -966,7 +970,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix at start, exact match", assertQ("test facet.prefix at start, exact match",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -980,7 +984,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
,pre+"/int[1][@name='AAA'][.='1']" ,pre+"/int[1][@name='AAA'][.='1']"
); );
assertQ("test facet.prefix at Start, not exact match", assertQ("test facet.prefix at Start, not exact match",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -994,7 +998,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
,pre+"/int[1][@name='AAA'][.='1']" ,pre+"/int[1][@name='AAA'][.='1']"
); );
assertQ("test facet.prefix at Start, not exact match", assertQ("test facet.prefix at Start, not exact match",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -1008,7 +1012,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
,pre+"/int[1][@name='AAA'][.='1']" ,pre+"/int[1][@name='AAA'][.='1']"
); );
assertQ("test facet.prefix before start", assertQ("test facet.prefix before start",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f
@ -1022,7 +1026,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
); );
assertQ("test facet.prefix before start", assertQ("test facet.prefix before start",
req("q", "id:[* TO *]" req(params, "q", "id:[* TO *]"
,"indent",indent ,"indent",indent
,"facet","true" ,"facet","true"
,"facet.field", f ,"facet.field", f