diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9fd0f2e798b..fe85f18cbdc 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -120,6 +120,9 @@ Optimizations * SOLR-11320: Lock autoscaling triggers when changes they requested are being made. This helps to ensure that cluster is in a stable state before processing any new trigger events. (ab) +* SOLR-11641: Change `frange` to default to `cost=100` so default behavior is to PostFilter if user specifies + `cache=false` (hossman) + Other Changes ---------------------- * SOLR-11478: Solr should remove itself from live_nodes in zk immediately on shutdown. (Cao Manh Dat) diff --git a/solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java b/solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java index d64060fe8b5..c5c6205395d 100644 --- a/solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java @@ -33,6 +33,7 @@ public class FunctionRangeQuery extends SolrConstantScoreQuery implements PostFi public FunctionRangeQuery(ValueSourceRangeFilter filter) { super(filter); this.rangeFilt = filter; + this.cost = 100; // default behavior should be PostFiltering } @Override diff --git a/solr/core/src/test/org/apache/solr/core/SOLR749Test.java b/solr/core/src/test/org/apache/solr/core/SOLR749Test.java index 2eda77d4f85..d3b5e32194f 100644 --- a/solr/core/src/test/org/apache/solr/core/SOLR749Test.java +++ b/solr/core/src/test/org/apache/solr/core/SOLR749Test.java @@ -83,6 +83,25 @@ public class SOLR749Test extends SolrTestCaseJ4 { int count = CountUsageValueSourceParser.getAndClearCount("frange_in_bq"); assertTrue("frange_in_bq: " + count, (19 <= count && count <= 20)); + // non-cached frange queries should default to post-filtering + // (ie: only be computed on candidates of other q/fq restrictions) + // regardless of how few/many docs match the frange + assertQ("query matching 1 doc w/ implicitly post-filtered frange matching all docs", + req("q","{!notfoo cache=false}*:*", // match all... + "fq","{!frange cache=false l=30 u=30}abs(id_i1)", // ...restrict to 1 match + // post filter will happily match all docs, but should only be asked about 1... + "fq","{!frange cache=false l=4.5 u=4.5 v='countUsage(postfilt_match_all,4.5)'})"), + "//result[@numFound=1]"); + assertEquals(1, CountUsageValueSourceParser.getAndClearCount("postfilt_match_all")); + // + assertQ("query matching all docs w/ implicitly post-filtered frange matching no docs", + req("q","{!notfoo cache=false}id_i1:[20 TO 39]", // match some... + "fq","{!frange cache=false cost=0 l=50}abs(id_i1)", // ...regular conjunction filter rules out all + // post filter will happily match all docs, but should never be asked... + "fq","{!frange cache=false l=4.5 u=4.5 v='countUsage(postfilt_match_all,4.5)'})"), + "//result[@numFound=0]"); + assertEquals(0, CountUsageValueSourceParser.getAndClearCount("postfilt_match_all")); + } finally { CountUsageValueSourceParser.clearCounters(); } diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index 4177d81be60..ae85089f898 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -370,8 +370,10 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "myField","foo_i", "myInner","product(4,foo_i)"); try { + // NOTE: unlike most queries, frange defaultsto cost==100 assertQueryEquals("frange", req, "{!frange l=0.2 h=20.4}sum(4,5)", + "{!frange l=0.2 h=20.4 cost=100}sum(4,5)", "{!frange l=$low h=$high}sum(4,$myVar)"); } finally { req.close(); diff --git a/solr/core/src/test/org/apache/solr/search/TestFiltering.java b/solr/core/src/test/org/apache/solr/search/TestFiltering.java index c7182832b38..16d91920a48 100644 --- a/solr/core/src/test/org/apache/solr/search/TestFiltering.java +++ b/solr/core/src/test/org/apache/solr/search/TestFiltering.java @@ -109,29 +109,38 @@ public class TestFiltering extends SolrTestCaseJ4 { int prevCount; + // default cost uses post filtering (for frange) prevCount = DelegatingCollector.setLastDelegateCount; - assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false cost=100}val_i") + assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false}val_i") ,"/response/numFound==2" ); assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount); // The exact same query the second time will be cached by the queryCache prevCount = DelegatingCollector.setLastDelegateCount; - assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false cost=100}val_i") + assertJQ(req("q","*:*", "fq","{!frange l=2 u=3 cache=false}val_i") ,"/response/numFound==2" ); assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); - // cache is true by default + // cache is true by default, even w/explicit low/high costs prevCount = DelegatingCollector.setLastDelegateCount; assertJQ(req("q","*:*", "fq","{!frange l=2 u=4}val_i") ,"/response/numFound==3" ); assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); + assertJQ(req("q","*:*", "fq","{!frange l=2 u=4 cost=0}val_i") + ,"/response/numFound==3" + ); + assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); + assertJQ(req("q","*:*", "fq","{!frange l=2 u=4 cost=999}val_i") + ,"/response/numFound==3" + ); + assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); - // default cost avoids post filtering + // no caching and explicitly low cost avoids post filtering prevCount = DelegatingCollector.setLastDelegateCount; - assertJQ(req("q","*:*", "fq","{!frange l=2 u=5 cache=false}val_i") + assertJQ(req("q","*:*", "fq","{!frange l=2 u=5 cache=false cost=0}val_i") ,"/response/numFound==3" ); assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); @@ -139,29 +148,38 @@ public class TestFiltering extends SolrTestCaseJ4 { // now re-do the same tests w/ faceting on to get the full docset + // default cost uses post filtering (for frange) prevCount = DelegatingCollector.setLastDelegateCount; - assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false cost=100}val_i") + assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false}val_i") ,"/response/numFound==3" ); assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount); // since we need the docset and the filter was not cached, the collector will need to be used again prevCount = DelegatingCollector.setLastDelegateCount; - assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false cost=100}val_i") + assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=6 cache=false}val_i") ,"/response/numFound==3" ); assertEquals(1, DelegatingCollector.setLastDelegateCount - prevCount); - // cache is true by default + // cache is true by default, even w/explicit low/high costs prevCount = DelegatingCollector.setLastDelegateCount; assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=7}val_i") ,"/response/numFound==3" ); assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); + assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=7 cost=0}val_i") + ,"/response/numFound==3" + ); + assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); + assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=7 cost=999}val_i") + ,"/response/numFound==3" + ); + assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); - // default cost avoids post filtering + // no caching and explicitly low cost avoids post filtering prevCount = DelegatingCollector.setLastDelegateCount; - assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=8 cache=false}val_i") + assertJQ(req("facet","true", "facet.field","id", "q","*:*", "fq","{!frange l=2 u=8 cache=false cost=0}val_i") ,"/response/numFound==3" ); assertEquals(0, DelegatingCollector.setLastDelegateCount - prevCount); diff --git a/solr/solr-ref-guide/src/common-query-parameters.adoc b/solr/solr-ref-guide/src/common-query-parameters.adoc index f59db5fc666..5986f9771ac 100644 --- a/solr/solr-ref-guide/src/common-query-parameters.adoc +++ b/solr/solr-ref-guide/src/common-query-parameters.adoc @@ -239,21 +239,25 @@ You can also use the `cost` option to control the order in which non-cached filt For very high cost filters, if `cache=false` and `cost>=100` and the query implements the `PostFilter` interface, a Collector will be requested from that query and used to filter documents after they have matched the main query and all other filter queries. There can be multiple post filters; they are also ordered by cost. +For most queries the default behavior is `cost=0` -- but some types of queries such as `{!frange}` default to `cost=100`, because they are most efficient when used as a `PostFilter`. + For example: -This is a normal function range query used as a filter, all matching documents generated up front and cached: +This is an example of 3 regular filters, where all matching documents generated by each are computed up front and cached independently: + [source,text] +q=some keywords +fq=quantity_in_stock:[5 TO *] fq={!frange l=10 u=100}mul(popularity,price) +fq={!frange cost=200 l=0}pow(mul(sum(1, query('tag:smartphone')), div(1,avg_rating)), 2.3) -This is a function range query run in parallel with the main query like a traditional lucene filter: +These are the same filters run w/o caching. The simple range query on the `quantity_in_stock` field will be run in parallel with the main query like a traditional lucene filter, while the 2 `frange` filters will only be checked against each document has already matched the main query and the `quantity_in_stock` range query -- first the simpler `mul(popularity,price)` will be checked (because of it's implicit `cost=100`) and only if it matches will the final very complex filter (with it's higher `cost=200`) be checked. [source,text] -fq={!frange l=10 u=100 cache=false}mul(popularity,price) - -This is a function range query checked after each document that already matches the query and all other filters. This is good for really expensive function queries: - -[source,text] -fq={!frange l=10 u=100 cache=false cost=100}mul(popularity,price) +q=some keywords +fq={!cache=false}quantity_in_stock:[5 TO *] +fq={!frange cache=false l=10 u=100}mul(popularity,price) +fq={!frange cache=false cost=200 l=0}pow(mul(sum(1, query('tag:smartphone')), div(1,avg_rating)), 2.3) == logParamsList Parameter