From b298d7fb160a49f552dc3987b83aa53601c7b29a Mon Sep 17 00:00:00 2001 From: Kevin Watters Date: Sun, 21 Feb 2021 11:49:36 -0500 Subject: [PATCH] SOLR-14787 - Adding support to use inequalities to the payload check query parser. (#1954) --- .../payloads/TestPayloadCheckQuery.java | 0 .../search/PayloadCheckQParserPlugin.java | 17 ++++++- .../search/TestPayloadCheckQParserPlugin.java | 29 ++++++++++-- solr/solr-ref-guide/src/other-parsers.adoc | 45 ++++++++++++------- 4 files changed, 68 insertions(+), 23 deletions(-) mode change 100755 => 100644 lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java diff --git a/lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java b/lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java old mode 100755 new mode 100644 diff --git a/solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java index e5702bf659e..a4a3db518fa 100644 --- a/solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/PayloadCheckQParserPlugin.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.payloads.FloatEncoder; @@ -28,6 +29,8 @@ import org.apache.lucene.analysis.payloads.IdentityEncoder; import org.apache.lucene.analysis.payloads.IntegerEncoder; import org.apache.lucene.analysis.payloads.PayloadEncoder; import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery; +import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation; +import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType; import org.apache.lucene.search.Query; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.util.BytesRef; @@ -53,6 +56,13 @@ public class PayloadCheckQParserPlugin extends QParserPlugin { String field = localParams.get(QueryParsing.F); String value = localParams.get(QueryParsing.V); String p = localParams.get("payloads"); + // payloads and op parameter are probably mutually exclusive. we could consider making a different query + // not a span payload check query, but something that just operates on payloads without the span? + String strOp = localParams.get("op"); + MatchOperation op = MatchOperation.EQ; + if (strOp != null) { + op = MatchOperation.valueOf(strOp.toUpperCase(Locale.ROOT)); + } if (field == null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'f' not specified"); @@ -81,12 +91,16 @@ public class PayloadCheckQParserPlugin extends QParserPlugin { PayloadEncoder encoder = null; String e = PayloadUtils.getPayloadEncoder(ft); + PayloadType payloadType = null; if ("float".equals(e)) { // TODO: centralize this string->PayloadEncoder logic (see DelimitedPayloadTokenFilterFactory) encoder = new FloatEncoder(); + payloadType = PayloadType.FLOAT; } else if ("integer".equals(e)) { encoder = new IntegerEncoder(); + payloadType = PayloadType.INT; } else if ("identity".equals(e)) { encoder = new IdentityEncoder(); + payloadType = PayloadType.STRING; } if (encoder == null) { @@ -99,8 +113,7 @@ public class PayloadCheckQParserPlugin extends QParserPlugin { if (rawPayload.length() > 0) payloads.add(encoder.encode(rawPayload.toCharArray())); } - - return new SpanPayloadCheckQuery(query, payloads); + return new SpanPayloadCheckQuery(query, payloads, payloadType, op); } }; diff --git a/solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java index 14bd833e804..f619320387f 100644 --- a/solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java +++ b/solr/core/src/test/org/apache/solr/search/TestPayloadCheckQParserPlugin.java @@ -46,20 +46,41 @@ public class TestPayloadCheckQParserPlugin extends SolrTestCaseJ4 { "{!payload_check f=vals_dpi payloads='1 2'}A B", // "{!payload_check f=vals_dpi payloads='1 2.0'}A B", // ideally this should pass, but IntegerEncoder can't handle "2.0" "{!payload_check f=vals_dpi payloads='1 2 3'}A B C", - "{!payload_check f=vals_dpf payloads='1 2'}one two", + "{!payload_check f=vals_dpf payloads='1 2' op='eq'}one two", "{!payload_check f=vals_dpf payloads='1 2.0'}one two", // shows that FloatEncoder can handle "1" - - "{!payload_check f=vals_dps payloads='NOUN VERB'}cat jumped" + "{!payload_check f=vals_dps payloads='NOUN VERB'}cat jumped", + "{!payload_check f=vals_dpf payloads='0.75' op='gt'}one", + "{!payload_check f=vals_dpf payloads='0.75 1.5' op='gt'}one two", + "{!payload_check f=vals_dpf payloads='1.25' op='lt'}one", // inequality on float lt + "{!payload_check f=vals_dpf payloads='1.0' op='lte'}one", // inequality on float lte + "{!payload_check f=vals_dpf payloads='0.75' op='gt'}one", // inequality on float gt + "{!payload_check f=vals_dpf payloads='1.0' op='gte'}one", // inequality on float gte + "{!payload_check f=vals_dpi payloads='2' op='lt'}A", // inequality on int lt + "{!payload_check f=vals_dpi payloads='1' op='lte'}A", // inequality on int lte + "{!payload_check f=vals_dpi payloads='0' op='gt'}A", // inequality on int gt + "{!payload_check f=vals_dpi payloads='1' op='gte'}A" // inequality on int gte }; String[] should_not_matches = new String[] { + "{!payload_check f=vals_dpf payloads='0.75' op='gt'}one two", // too few payloads + "{!payload_check f=vals_dpf payloads='0.75 1.5 2.0' op='gt'}one two", // too many payloads "{!payload_check f=vals_dpi v=A payloads=2}", "{!payload_check f=vals_dpi payloads='1 2'}B C", "{!payload_check f=vals_dpi payloads='1 2 3'}A B", "{!payload_check f=vals_dpi payloads='1 2'}A B C", "{!payload_check f=vals_dpf payloads='1 2.0'}two three", - "{!payload_check f=vals_dps payloads='VERB NOUN'}cat jumped" + "{!payload_check f=vals_dps payloads='VERB NOUN'}cat jumped", + "{!payload_check f=vals_dpf payloads='1.25' op='gt'}one", + "{!payload_check f=vals_dpf payloads='0.75 3' op='gt'}one two", + "{!payload_check f=vals_dpf payloads='1.0' op='lt'}one", // inequality on float lt + "{!payload_check f=vals_dpf payloads='0.75' op='lte'}one", // inequality on float lte + "{!payload_check f=vals_dpf payloads='1.0' op='gt'}one", // inequality on float gt + "{!payload_check f=vals_dpf payloads='1.25' op='gte'}one", // inequality on float gte + "{!payload_check f=vals_dpi payloads='1' op='lt'}A", // inequality on int lt + "{!payload_check f=vals_dpi payloads='0' op='lte'}A", // inequality on int lte + "{!payload_check f=vals_dpi payloads='1' op='gt'}A", // inequality on int gt + "{!payload_check f=vals_dpi payloads='2' op='gte'}A" // inequality on int gte }; for(String should_match : should_matches) { diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc index e714f12aea0..5c96dbf0723 100644 --- a/solr/solr-ref-guide/src/other-parsers.adoc +++ b/solr/solr-ref-guide/src/other-parsers.adoc @@ -977,16 +977,11 @@ For more information about the possibilities of nested queries, see Yonik Seeley == Payload Query Parsers -These query parsers utilize payloads encoded on terms during indexing. - -The main query, for both of these parsers, is parsed straightforwardly from the field type's query analysis into a `SpanQuery`. The generated `SpanQuery` will be either a `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. Payloads can be encoded on terms using either the `DelimitedPayloadTokenFilter` or the `NumericPayloadTokenFilter`. The payload using parsers are: - -* `PayloadScoreQParser` -* `PayloadCheckQParser` +These query parsers utilize payloads encoded on terms during indexing. Payloads can be encoded on terms using either the `DelimitedPayloadTokenFilter` or the `NumericPayloadTokenFilter`. === Payload Score Parser -`PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores. +`PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores. The main query is parsed from the field type's query analysis into a `SpanQuery` based on the value of the `operator` parameter below. This parser accepts the following parameters: @@ -997,7 +992,9 @@ The field to use. This parameter is required. The payload function. The options are: `min`, `max`, `average`, or `sum`. This parameter is required. `operator`:: -A search operator. The options are `or` and `phrase`, which is the default. This defines if the search query should be an OR query or a phrase query. +A search operator. The options are + * `or` will generate either a `SpanTermQuery` or a `SpanOrQuery` depending on the number of tokens emitted. + * `phrase` will generate either `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. `includeSpanScore`:: If `true`, multiples the computed payload factor by the score of the original query. If `false`, the default, the computed payload factor is the score. @@ -1012,7 +1009,9 @@ If `true`, multiples the computed payload factor by the score of the original qu === Payload Check Parser -`PayloadCheckQParser` only matches when the matching terms also have the specified payloads. +`PayloadCheckQParser` only matches when the matching terms also have the specified relationship to the payloads. The default relationship is equals, however, inequality matching can also be performed. The main query, for both of these parsers, is parsed straightforwardly from the field type's query analysis into a `SpanQuery`. The generated `SpanQuery` will be either a `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. The net effect is that the main query always operates in a manner similar to a phrase query in the standard lucene parser (thus ignoring any value for `q.op`). + +NOTE: If when the field analysis is applied to the query, it alters the number of tokens, the final number of tokens must match the number of payloads supplied in the `payloads` parameter. If there is a mismatch between the number of query tokens, and the number of payload values supplied with this query, the query will not match. This parser accepts the following parameters: @@ -1020,18 +1019,30 @@ This parser accepts the following parameters: The field to use (required). `payloads`:: -A space-separated list of payloads that must match the query terms (required) -+ -Each specified payload will be encoded using the encoder determined from the field type and encoded accordingly for matching. -+ -`DelimitedPayloadTokenFilter` 'identity' encoded payloads also work here, as well as float and integer encoded ones. +A space-separated list of payloads to be compared with payloads in the matching tokens from the document (required). Each specified payload will be encoded using the encoder determined from the field type prior to matching. Integer, float and identity (string) encodings are supported with the same meanings as for DelimitedPayloadTokenFilter. -*Example* +`op`:: +The inequality operation to apply to the payload check. All operations require that consecutive tokens derived from the analysis of the query match consecutive tokens in the document, and additionally the payloads on the document tokens must be: + * `eq` - equal to the specified payloads (default) + * `gt` - greater than the specified payloads + * `lt` - less than the specified payloads + * `gte` - greater than or equal to the specified payloads + * `lte` - less than or equal to the specified payloads +*Examples* + +Find all documents with the phrase "searching stuff" where searching has a payload of "VERB" and "stuff" has a payload of "NOUN" [source,text] ----- {!payload_check f=words_dps payloads="VERB NOUN"}searching stuff ----- + +Find all documents with "foo" where "foo" has a payload with a value of greater than or equal to 0.75 +[source,text] +{!payload_check f=words_dpf payloads="0.75" op="gte"}foo + +Find all documents with the phrase "foo bar" where term "foo" has a payload greater than 9 and "bar" has a payload greater than 5 +[source,text] +{!payload_check f=words_dpi payloads="9 5" op="gt"}foo bar + == Prefix Query Parser