mirror of https://github.com/apache/lucene.git
SOLR-221 compare minimum count currently needed to the term df and avoid unnecessary intersection count, allow minimum term df in order to use the filterCache, otherwise iterate over TermDocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@539531 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d272f3e473
commit
849e40a560
|
@ -20,10 +20,12 @@ package org.apache.solr.request;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermEnum;
|
import org.apache.lucene.index.TermEnum;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.TermDocs;
|
||||||
import org.apache.lucene.queryParser.ParseException;
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.core.SolrException;
|
import org.apache.solr.core.SolrException;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
import org.apache.solr.request.SolrParams;
|
import org.apache.solr.request.SolrParams;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
|
@ -303,6 +305,7 @@ public class SimpleFacets {
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a list of terms in the specified field along with the
|
* Returns a list of terms in the specified field along with the
|
||||||
* corresponding count of documents in the set that match that constraint.
|
* corresponding count of documents in the set that match that constraint.
|
||||||
|
@ -321,6 +324,9 @@ public class SimpleFacets {
|
||||||
* don't enum if we get our max from them
|
* don't enum if we get our max from them
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// Minimum term docFreq in order to use the filterCache for that term.
|
||||||
|
int minDfFilterCache = params.getFieldInt(field, SolrParams.FACET_ENUM_CACHE_MINDF, 0);
|
||||||
|
|
||||||
IndexSchema schema = searcher.getSchema();
|
IndexSchema schema = searcher.getSchema();
|
||||||
IndexReader r = searcher.getReader();
|
IndexReader r = searcher.getReader();
|
||||||
FieldType ft = schema.getFieldType(field);
|
FieldType ft = schema.getFieldType(field);
|
||||||
|
@ -335,6 +341,7 @@ public class SimpleFacets {
|
||||||
|
|
||||||
String startTerm = prefix==null ? "" : ft.toInternal(prefix);
|
String startTerm = prefix==null ? "" : ft.toInternal(prefix);
|
||||||
TermEnum te = r.terms(new Term(field,startTerm));
|
TermEnum te = r.terms(new Term(field,startTerm));
|
||||||
|
TermDocs td = r.termDocs();
|
||||||
do {
|
do {
|
||||||
Term t = te.term();
|
Term t = te.term();
|
||||||
|
|
||||||
|
@ -345,8 +352,23 @@ public class SimpleFacets {
|
||||||
|
|
||||||
int df = te.docFreq();
|
int df = te.docFreq();
|
||||||
|
|
||||||
if (df>0) { /* check df since all docs may be deleted */
|
// If we are sorting, we can use df>min (rather than >=) since we
|
||||||
int c = searcher.numDocs(new TermQuery(t), docs);
|
// are going in index order. For certain term distributions this can
|
||||||
|
// make a large difference (for example, many terms with df=1).
|
||||||
|
if (df>0 && df>min) {
|
||||||
|
int c;
|
||||||
|
|
||||||
|
if (df >= minDfFilterCache) {
|
||||||
|
// use the filter cache
|
||||||
|
c = searcher.numDocs(new TermQuery(t), docs);
|
||||||
|
} else {
|
||||||
|
// iterate over TermDocs to calculate the intersection
|
||||||
|
td.seek(te);
|
||||||
|
c=0;
|
||||||
|
while (td.next()) {
|
||||||
|
if (docs.exists(td.doc())) c++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (sort) {
|
if (sort) {
|
||||||
if (c>min) {
|
if (c>min) {
|
||||||
|
@ -374,6 +396,9 @@ public class SimpleFacets {
|
||||||
res.add(null, getFieldMissingCount(searcher,docs,field));
|
res.add(null, getFieldMissingCount(searcher,docs,field));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
te.close();
|
||||||
|
td.close();
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -117,6 +117,13 @@ public abstract class SolrParams {
|
||||||
*/
|
*/
|
||||||
public static final String FACET_PREFIX = "facet.prefix";
|
public static final String FACET_PREFIX = "facet.prefix";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When faceting by enumerating the terms in a field,
|
||||||
|
* only use the filterCache for terms with a df >= to this parameter.
|
||||||
|
*/
|
||||||
|
public static final String FACET_ENUM_CACHE_MINDF = "facet.enum.cache.minDf";
|
||||||
|
|
||||||
|
|
||||||
/** If the content stream should come from a URL (using URLConnection) */
|
/** If the content stream should come from a URL (using URLConnection) */
|
||||||
public static final String STREAM_URL = "stream.url";
|
public static final String STREAM_URL = "stream.url";
|
||||||
|
|
||||||
|
|
|
@ -288,6 +288,22 @@ public abstract class AbstractSolrTestCase extends TestCase {
|
||||||
return lrf.makeRequest(q);
|
return lrf.makeRequest(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a SolrQueryRequest using the LocalRequestFactory
|
||||||
|
* @see #lrf
|
||||||
|
*/
|
||||||
|
public SolrQueryRequest req(String[] params, String... moreParams) {
|
||||||
|
String[] allParams = moreParams;
|
||||||
|
if (params.length!=0) {
|
||||||
|
int len = params.length + moreParams.length;
|
||||||
|
allParams = new String[len];
|
||||||
|
System.arraycopy(params,0,allParams,0,params.length);
|
||||||
|
System.arraycopy(moreParams,0,allParams,params.length,moreParams.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
return lrf.makeRequest(allParams);
|
||||||
|
}
|
||||||
|
|
||||||
/** Neccessary to make method signatures un-ambiguous */
|
/** Neccessary to make method signatures un-ambiguous */
|
||||||
public static class Doc {
|
public static class Doc {
|
||||||
public String xml;
|
public String xml;
|
||||||
|
|
|
@ -610,13 +610,15 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
|
|
||||||
public void testFacetMultiValued() {
|
public void testFacetMultiValued() {
|
||||||
doFacets("t_s");
|
doFacets("t_s");
|
||||||
|
doFacets("t_s", "facet.enum.cache.minDf", "2");
|
||||||
|
doFacets("t_s", "facet.enum.cache.minDf", "100");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFacetSingleValued() {
|
public void testFacetSingleValued() {
|
||||||
doFacets("t_s1");
|
doFacets("t_s1");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void doFacets(String f) {
|
public void doFacets(String f, String... params) {
|
||||||
String pre = "//lst[@name='"+f+"']";
|
String pre = "//lst[@name='"+f+"']";
|
||||||
String notc = "id:[* TO *] -"+f+":C";
|
String notc = "id:[* TO *] -"+f+":C";
|
||||||
|
|
||||||
|
@ -637,7 +639,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
assertU(commit());
|
assertU(commit());
|
||||||
|
|
||||||
assertQ("check counts for unlimited facet",
|
assertQ("check counts for unlimited facet",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
)
|
)
|
||||||
|
@ -654,7 +656,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("check counts for facet with generous limit",
|
assertQ("check counts for facet with generous limit",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.limit", "100"
|
,"facet.limit", "100"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -672,7 +674,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("check counts for limited facet",
|
assertQ("check counts for limited facet",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.limit", "2"
|
,"facet.limit", "2"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -684,7 +686,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("check offset",
|
assertQ("check offset",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.offset", "1"
|
,"facet.offset", "1"
|
||||||
,"facet.limit", "1"
|
,"facet.limit", "1"
|
||||||
|
@ -696,7 +698,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test sorted facet paging with zero (don't count in limit)",
|
assertQ("test sorted facet paging with zero (don't count in limit)",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"fq",notc
|
,"fq",notc
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -714,7 +716,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test sorted facet paging with zero (test offset correctness)",
|
assertQ("test sorted facet paging with zero (test offset correctness)",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"fq",notc
|
,"fq",notc
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -729,7 +731,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet unsorted paging",
|
assertQ("test facet unsorted paging",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"fq",notc
|
,"fq",notc
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -748,7 +750,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet unsorted paging",
|
assertQ("test facet unsorted paging",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"fq",notc
|
,"fq",notc
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -763,7 +765,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet unsorted paging, mincount=2",
|
assertQ("test facet unsorted paging, mincount=2",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"fq",notc
|
,"fq",notc
|
||||||
,"facet", "true"
|
,"facet", "true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -781,13 +783,15 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
|
|
||||||
public void testFacetPrefixMultiValued() {
|
public void testFacetPrefixMultiValued() {
|
||||||
doFacetPrefix("t_s");
|
doFacetPrefix("t_s");
|
||||||
|
doFacetPrefix("t_s", "facet.enum.cache.minDf", "3");
|
||||||
|
doFacetPrefix("t_s", "facet.enum.cache.minDf", "100");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFacetPrefixSingleValued() {
|
public void testFacetPrefixSingleValued() {
|
||||||
doFacetPrefix("t_s1");
|
doFacetPrefix("t_s1");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void doFacetPrefix(String f) {
|
public void doFacetPrefix(String f, String... params) {
|
||||||
String indent="on";
|
String indent="on";
|
||||||
String pre = "//lst[@name='"+f+"']";
|
String pre = "//lst[@name='"+f+"']";
|
||||||
String notc = "id:[* TO *] -"+f+":C";
|
String notc = "id:[* TO *] -"+f+":C";
|
||||||
|
@ -807,7 +811,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
assertU(commit());
|
assertU(commit());
|
||||||
|
|
||||||
assertQ("test facet.prefix middle, exact match first term",
|
assertQ("test facet.prefix middle, exact match first term",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -824,7 +828,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix middle, exact match first term, unsorted",
|
assertQ("test facet.prefix middle, exact match first term, unsorted",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -842,7 +846,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
|
|
||||||
|
|
||||||
assertQ("test facet.prefix middle, exact match first term, unsorted",
|
assertQ("test facet.prefix middle, exact match first term, unsorted",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -860,7 +864,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
|
|
||||||
|
|
||||||
assertQ("test facet.prefix middle, paging",
|
assertQ("test facet.prefix middle, paging",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -876,7 +880,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix middle, paging",
|
assertQ("test facet.prefix middle, paging",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -891,7 +895,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix middle, paging",
|
assertQ("test facet.prefix middle, paging",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -906,7 +910,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix end, not exact match",
|
assertQ("test facet.prefix end, not exact match",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -922,7 +926,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix end, exact match",
|
assertQ("test facet.prefix end, exact match",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -938,7 +942,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix past end",
|
assertQ("test facet.prefix past end",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -952,7 +956,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix past end",
|
assertQ("test facet.prefix past end",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -966,7 +970,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix at start, exact match",
|
assertQ("test facet.prefix at start, exact match",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -980,7 +984,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
,pre+"/int[1][@name='AAA'][.='1']"
|
,pre+"/int[1][@name='AAA'][.='1']"
|
||||||
);
|
);
|
||||||
assertQ("test facet.prefix at Start, not exact match",
|
assertQ("test facet.prefix at Start, not exact match",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -994,7 +998,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
,pre+"/int[1][@name='AAA'][.='1']"
|
,pre+"/int[1][@name='AAA'][.='1']"
|
||||||
);
|
);
|
||||||
assertQ("test facet.prefix at Start, not exact match",
|
assertQ("test facet.prefix at Start, not exact match",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -1008,7 +1012,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
,pre+"/int[1][@name='AAA'][.='1']"
|
,pre+"/int[1][@name='AAA'][.='1']"
|
||||||
);
|
);
|
||||||
assertQ("test facet.prefix before start",
|
assertQ("test facet.prefix before start",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
@ -1022,7 +1026,7 @@ public class BasicFunctionalityTest extends AbstractSolrTestCase {
|
||||||
);
|
);
|
||||||
|
|
||||||
assertQ("test facet.prefix before start",
|
assertQ("test facet.prefix before start",
|
||||||
req("q", "id:[* TO *]"
|
req(params, "q", "id:[* TO *]"
|
||||||
,"indent",indent
|
,"indent",indent
|
||||||
,"facet","true"
|
,"facet","true"
|
||||||
,"facet.field", f
|
,"facet.field", f
|
||||||
|
|
Loading…
Reference in New Issue