mirror of https://github.com/apache/lucene.git
SOLR-11641: Change to default to so default behavior is to PostFilter if user specifies
This commit is contained in:
parent
79283834bc
commit
d77d54b1a6
|
@ -120,6 +120,9 @@ Optimizations
|
||||||
* SOLR-11320: Lock autoscaling triggers when changes they requested are being made. This helps to
|
* SOLR-11320: Lock autoscaling triggers when changes they requested are being made. This helps to
|
||||||
ensure that cluster is in a stable state before processing any new trigger events. (ab)
|
ensure that cluster is in a stable state before processing any new trigger events. (ab)
|
||||||
|
|
||||||
|
* SOLR-11641: Change `frange` to default to `cost=100` so default behavior is to PostFilter if user specifies
|
||||||
|
`cache=false` (hossman)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-11478: Solr should remove itself from live_nodes in zk immediately on shutdown. (Cao Manh Dat)
|
* SOLR-11478: Solr should remove itself from live_nodes in zk immediately on shutdown. (Cao Manh Dat)
|
||||||
|
|
|
@ -33,6 +33,7 @@ public class FunctionRangeQuery extends SolrConstantScoreQuery implements PostFi
|
||||||
public FunctionRangeQuery(ValueSourceRangeFilter filter) {
|
public FunctionRangeQuery(ValueSourceRangeFilter filter) {
|
||||||
super(filter);
|
super(filter);
|
||||||
this.rangeFilt = filter;
|
this.rangeFilt = filter;
|
||||||
|
this.cost = 100; // default behavior should be PostFiltering
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -83,6 +83,25 @@ public class SOLR749Test extends SolrTestCaseJ4 {
|
||||||
int count = CountUsageValueSourceParser.getAndClearCount("frange_in_bq");
|
int count = CountUsageValueSourceParser.getAndClearCount("frange_in_bq");
|
||||||
assertTrue("frange_in_bq: " + count, (19 <= count && count <= 20));
|
assertTrue("frange_in_bq: " + count, (19 <= count && count <= 20));
|
||||||
|
|
||||||
|
// non-cached frange queries should default to post-filtering
|
||||||
|
// (ie: only be computed on candidates of other q/fq restrictions)
|
||||||
|
// regardless of how few/many docs match the frange
|
||||||
|
assertQ("query matching 1 doc w/ implicitly post-filtered frange matching all docs",
|
||||||
|
req("q","{!notfoo cache=false}*:*", // match all...
|
||||||
|
"fq","{!frange cache=false l=30 u=30}abs(id_i1)", // ...restrict to 1 match
|
||||||
|
// post filter will happily match all docs, but should only be asked about 1...
|
||||||
|
"fq","{!frange cache=false l=4.5 u=4.5 v='countUsage(postfilt_match_all,4.5)'})"),
|
||||||
|
"//result[@numFound=1]");
|
||||||
|
assertEquals(1, CountUsageValueSourceParser.getAndClearCount("postfilt_match_all"));
|
||||||
|
//
|
||||||
|
assertQ("query matching all docs w/ implicitly post-filtered frange matching no docs",
|
||||||
|
req("q","{!notfoo cache=false}id_i1:[20 TO 39]", // match some...
|
||||||
|
"fq","{!frange cache=false cost=0 l=50}abs(id_i1)", // ...regular conjunction filter rules out all
|
||||||
|
// post filter will happily match all docs, but should never be asked...
|
||||||
|
"fq","{!frange cache=false l=4.5 u=4.5 v='countUsage(postfilt_match_all,4.5)'})"),
|
||||||
|
"//result[@numFound=0]");
|
||||||
|
assertEquals(0, CountUsageValueSourceParser.getAndClearCount("postfilt_match_all"));
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
CountUsageValueSourceParser.clearCounters();
|
CountUsageValueSourceParser.clearCounters();
|
||||||
}
|
}
|
||||||
|
|
|
@ -370,8 +370,10 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
|
||||||
"myField","foo_i",
|
"myField","foo_i",
|
||||||
"myInner","product(4,foo_i)");
|
"myInner","product(4,foo_i)");
|
||||||
try {
|
try {
|
||||||
|
// NOTE: unlike most queries, frange defaultsto cost==100
|
||||||
assertQueryEquals("frange", req,
|
assertQueryEquals("frange", req,
|
||||||
"{!frange l=0.2 h=20.4}sum(4,5)",
|
"{!frange l=0.2 h=20.4}sum(4,5)",
|
||||||
|
"{!frange l=0.2 h=20.4 cost=100}sum(4,5)",
|
||||||
"{!frange l=$low h=$high}sum(4,$myVar)");
|
"{!frange l=$low h=$high}sum(4,$myVar)");
|
||||||
} finally {
|
} finally {
|
||||||
req.close();
|
req.close();
|
||||||
|
|
|
@ -109,29 +109,38 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
int prevCount;
|
int prevCount;
|
||||||
|
|
||||||
|
// default cost uses post filtering (for frange)
|
||||||
prevCount = DelegatingCollector.setLastDelegateCount;
|
prevCount = DelegatingCollector.setLastDelegateCount;
|
||||||
assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false cost=100}val_i")
|
assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false}val_i")
|
||||||
,"/response/numFound==2"
|
,"/response/numFound==2"
|
||||||
);
|
);
|
||||||
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
|
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
|
||||||
// The exact same query the second time will be cached by the queryCache
|
// The exact same query the second time will be cached by the queryCache
|
||||||
prevCount = DelegatingCollector.setLastDelegateCount;
|
prevCount = DelegatingCollector.setLastDelegateCount;
|
||||||
assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false cost=100}val_i")
|
assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false}val_i")
|
||||||
,"/response/numFound==2"
|
,"/response/numFound==2"
|
||||||
);
|
);
|
||||||
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
|
||||||
// cache is true by default
|
// cache is true by default, even w/explicit low/high costs
|
||||||
prevCount = DelegatingCollector.setLastDelegateCount;
|
prevCount = DelegatingCollector.setLastDelegateCount;
|
||||||
assertJQ(req("q","*:*", "fq","{!frange l=2 u=4}val_i")
|
assertJQ(req("q","*:*", "fq","{!frange l=2 u=4}val_i")
|
||||||
,"/response/numFound==3"
|
,"/response/numFound==3"
|
||||||
);
|
);
|
||||||
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
assertJQ(req("q","*:*", "fq","{!frange l=2 u=4 cost=0}val_i")
|
||||||
|
,"/response/numFound==3"
|
||||||
|
);
|
||||||
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
assertJQ(req("q","*:*", "fq","{!frange l=2 u=4 cost=999}val_i")
|
||||||
|
,"/response/numFound==3"
|
||||||
|
);
|
||||||
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
|
||||||
// default cost avoids post filtering
|
// no caching and explicitly low cost avoids post filtering
|
||||||
prevCount = DelegatingCollector.setLastDelegateCount;
|
prevCount = DelegatingCollector.setLastDelegateCount;
|
||||||
assertJQ(req("q","*:*", "fq","{!frange l=2 u=5 cache=false}val_i")
|
assertJQ(req("q","*:*", "fq","{!frange l=2 u=5 cache=false cost=0}val_i")
|
||||||
,"/response/numFound==3"
|
,"/response/numFound==3"
|
||||||
);
|
);
|
||||||
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
@ -139,29 +148,38 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
// now re-do the same tests w/ faceting on to get the full docset
|
// now re-do the same tests w/ faceting on to get the full docset
|
||||||
|
|
||||||
|
// default cost uses post filtering (for frange)
|
||||||
prevCount = DelegatingCollector.setLastDelegateCount;
|
prevCount = DelegatingCollector.setLastDelegateCount;
|
||||||
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false cost=100}val_i")
|
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false}val_i")
|
||||||
,"/response/numFound==3"
|
,"/response/numFound==3"
|
||||||
);
|
);
|
||||||
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
|
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
|
||||||
// since we need the docset and the filter was not cached, the collector will need to be used again
|
// since we need the docset and the filter was not cached, the collector will need to be used again
|
||||||
prevCount = DelegatingCollector.setLastDelegateCount;
|
prevCount = DelegatingCollector.setLastDelegateCount;
|
||||||
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false cost=100}val_i")
|
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false}val_i")
|
||||||
,"/response/numFound==3"
|
,"/response/numFound==3"
|
||||||
);
|
);
|
||||||
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
|
assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
|
||||||
// cache is true by default
|
// cache is true by default, even w/explicit low/high costs
|
||||||
prevCount = DelegatingCollector.setLastDelegateCount;
|
prevCount = DelegatingCollector.setLastDelegateCount;
|
||||||
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=7}val_i")
|
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=7}val_i")
|
||||||
,"/response/numFound==3"
|
,"/response/numFound==3"
|
||||||
);
|
);
|
||||||
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=7 cost=0}val_i")
|
||||||
|
,"/response/numFound==3"
|
||||||
|
);
|
||||||
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=7 cost=999}val_i")
|
||||||
|
,"/response/numFound==3"
|
||||||
|
);
|
||||||
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
|
||||||
// default cost avoids post filtering
|
// no caching and explicitly low cost avoids post filtering
|
||||||
prevCount = DelegatingCollector.setLastDelegateCount;
|
prevCount = DelegatingCollector.setLastDelegateCount;
|
||||||
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=8 cache=false}val_i")
|
assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=8 cache=false cost=0}val_i")
|
||||||
,"/response/numFound==3"
|
,"/response/numFound==3"
|
||||||
);
|
);
|
||||||
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount);
|
||||||
|
|
|
@ -239,21 +239,25 @@ You can also use the `cost` option to control the order in which non-cached filt
|
||||||
|
|
||||||
For very high cost filters, if `cache=false` and `cost>=100` and the query implements the `PostFilter` interface, a Collector will be requested from that query and used to filter documents after they have matched the main query and all other filter queries. There can be multiple post filters; they are also ordered by cost.
|
For very high cost filters, if `cache=false` and `cost>=100` and the query implements the `PostFilter` interface, a Collector will be requested from that query and used to filter documents after they have matched the main query and all other filter queries. There can be multiple post filters; they are also ordered by cost.
|
||||||
|
|
||||||
|
For most queries the default behavior is `cost=0` -- but some types of queries such as `{!frange}` default to `cost=100`, because they are most efficient when used as a `PostFilter`.
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
This is a normal function range query used as a filter, all matching documents generated up front and cached:
|
This is an example of 3 regular filters, where all matching documents generated by each are computed up front and cached independently:
|
||||||
|
|
||||||
[source,text]
|
[source,text]
|
||||||
|
q=some keywords
|
||||||
|
fq=quantity_in_stock:[5 TO *]
|
||||||
fq={!frange l=10 u=100}mul(popularity,price)
|
fq={!frange l=10 u=100}mul(popularity,price)
|
||||||
|
fq={!frange cost=200 l=0}pow(mul(sum(1, query('tag:smartphone')), div(1,avg_rating)), 2.3)
|
||||||
|
|
||||||
This is a function range query run in parallel with the main query like a traditional lucene filter:
|
These are the same filters run w/o caching. The simple range query on the `quantity_in_stock` field will be run in parallel with the main query like a traditional lucene filter, while the 2 `frange` filters will only be checked against each document has already matched the main query and the `quantity_in_stock` range query -- first the simpler `mul(popularity,price)` will be checked (because of it's implicit `cost=100`) and only if it matches will the final very complex filter (with it's higher `cost=200`) be checked.
|
||||||
|
|
||||||
[source,text]
|
[source,text]
|
||||||
fq={!frange l=10 u=100 cache=false}mul(popularity,price)
|
q=some keywords
|
||||||
|
fq={!cache=false}quantity_in_stock:[5 TO *]
|
||||||
This is a function range query checked after each document that already matches the query and all other filters. This is good for really expensive function queries:
|
fq={!frange cache=false l=10 u=100}mul(popularity,price)
|
||||||
|
fq={!frange cache=false cost=200 l=0}pow(mul(sum(1, query('tag:smartphone')), div(1,avg_rating)), 2.3)
|
||||||
[source,text]
|
|
||||||
fq={!frange l=10 u=100 cache=false cost=100}mul(popularity,price)
|
|
||||||
|
|
||||||
== logParamsList Parameter
|
== logParamsList Parameter
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue