mirror of https://github.com/apache/lucene.git
SOLR-14787 - Adding support to use inequalities to the payload check query parser. (#1954)
This commit is contained in:
parent
107926e486
commit
b298d7fb16
0
lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java
Executable file → Normal file
0
lucene/queries/src/test/org/apache/lucene/queries/payloads/TestPayloadCheckQuery.java
Executable file → Normal file
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.payloads.FloatEncoder;
|
import org.apache.lucene.analysis.payloads.FloatEncoder;
|
||||||
|
@ -28,6 +29,8 @@ import org.apache.lucene.analysis.payloads.IdentityEncoder;
|
||||||
import org.apache.lucene.analysis.payloads.IntegerEncoder;
|
import org.apache.lucene.analysis.payloads.IntegerEncoder;
|
||||||
import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
||||||
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
|
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery;
|
||||||
|
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.MatchOperation;
|
||||||
|
import org.apache.lucene.queries.payloads.SpanPayloadCheckQuery.PayloadType;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.spans.SpanQuery;
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
@ -53,6 +56,13 @@ public class PayloadCheckQParserPlugin extends QParserPlugin {
|
||||||
String field = localParams.get(QueryParsing.F);
|
String field = localParams.get(QueryParsing.F);
|
||||||
String value = localParams.get(QueryParsing.V);
|
String value = localParams.get(QueryParsing.V);
|
||||||
String p = localParams.get("payloads");
|
String p = localParams.get("payloads");
|
||||||
|
// payloads and op parameter are probably mutually exclusive. we could consider making a different query
|
||||||
|
// not a span payload check query, but something that just operates on payloads without the span?
|
||||||
|
String strOp = localParams.get("op");
|
||||||
|
MatchOperation op = MatchOperation.EQ;
|
||||||
|
if (strOp != null) {
|
||||||
|
op = MatchOperation.valueOf(strOp.toUpperCase(Locale.ROOT));
|
||||||
|
}
|
||||||
|
|
||||||
if (field == null) {
|
if (field == null) {
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'f' not specified");
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'f' not specified");
|
||||||
|
@ -81,12 +91,16 @@ public class PayloadCheckQParserPlugin extends QParserPlugin {
|
||||||
|
|
||||||
PayloadEncoder encoder = null;
|
PayloadEncoder encoder = null;
|
||||||
String e = PayloadUtils.getPayloadEncoder(ft);
|
String e = PayloadUtils.getPayloadEncoder(ft);
|
||||||
|
PayloadType payloadType = null;
|
||||||
if ("float".equals(e)) { // TODO: centralize this string->PayloadEncoder logic (see DelimitedPayloadTokenFilterFactory)
|
if ("float".equals(e)) { // TODO: centralize this string->PayloadEncoder logic (see DelimitedPayloadTokenFilterFactory)
|
||||||
encoder = new FloatEncoder();
|
encoder = new FloatEncoder();
|
||||||
|
payloadType = PayloadType.FLOAT;
|
||||||
} else if ("integer".equals(e)) {
|
} else if ("integer".equals(e)) {
|
||||||
encoder = new IntegerEncoder();
|
encoder = new IntegerEncoder();
|
||||||
|
payloadType = PayloadType.INT;
|
||||||
} else if ("identity".equals(e)) {
|
} else if ("identity".equals(e)) {
|
||||||
encoder = new IdentityEncoder();
|
encoder = new IdentityEncoder();
|
||||||
|
payloadType = PayloadType.STRING;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (encoder == null) {
|
if (encoder == null) {
|
||||||
|
@ -99,8 +113,7 @@ public class PayloadCheckQParserPlugin extends QParserPlugin {
|
||||||
if (rawPayload.length() > 0)
|
if (rawPayload.length() > 0)
|
||||||
payloads.add(encoder.encode(rawPayload.toCharArray()));
|
payloads.add(encoder.encode(rawPayload.toCharArray()));
|
||||||
}
|
}
|
||||||
|
return new SpanPayloadCheckQuery(query, payloads, payloadType, op);
|
||||||
return new SpanPayloadCheckQuery(query, payloads);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -46,20 +46,41 @@ public class TestPayloadCheckQParserPlugin extends SolrTestCaseJ4 {
|
||||||
"{!payload_check f=vals_dpi payloads='1 2'}A B",
|
"{!payload_check f=vals_dpi payloads='1 2'}A B",
|
||||||
// "{!payload_check f=vals_dpi payloads='1 2.0'}A B", // ideally this should pass, but IntegerEncoder can't handle "2.0"
|
// "{!payload_check f=vals_dpi payloads='1 2.0'}A B", // ideally this should pass, but IntegerEncoder can't handle "2.0"
|
||||||
"{!payload_check f=vals_dpi payloads='1 2 3'}A B C",
|
"{!payload_check f=vals_dpi payloads='1 2 3'}A B C",
|
||||||
|
|
||||||
"{!payload_check f=vals_dpf payloads='1 2'}one two",
|
"{!payload_check f=vals_dpf payloads='1 2'}one two",
|
||||||
|
"{!payload_check f=vals_dpf payloads='1 2' op='eq'}one two",
|
||||||
"{!payload_check f=vals_dpf payloads='1 2.0'}one two", // shows that FloatEncoder can handle "1"
|
"{!payload_check f=vals_dpf payloads='1 2.0'}one two", // shows that FloatEncoder can handle "1"
|
||||||
|
"{!payload_check f=vals_dps payloads='NOUN VERB'}cat jumped",
|
||||||
"{!payload_check f=vals_dps payloads='NOUN VERB'}cat jumped"
|
"{!payload_check f=vals_dpf payloads='0.75' op='gt'}one",
|
||||||
|
"{!payload_check f=vals_dpf payloads='0.75 1.5' op='gt'}one two",
|
||||||
|
"{!payload_check f=vals_dpf payloads='1.25' op='lt'}one", // inequality on float lt
|
||||||
|
"{!payload_check f=vals_dpf payloads='1.0' op='lte'}one", // inequality on float lte
|
||||||
|
"{!payload_check f=vals_dpf payloads='0.75' op='gt'}one", // inequality on float gt
|
||||||
|
"{!payload_check f=vals_dpf payloads='1.0' op='gte'}one", // inequality on float gte
|
||||||
|
"{!payload_check f=vals_dpi payloads='2' op='lt'}A", // inequality on int lt
|
||||||
|
"{!payload_check f=vals_dpi payloads='1' op='lte'}A", // inequality on int lte
|
||||||
|
"{!payload_check f=vals_dpi payloads='0' op='gt'}A", // inequality on int gt
|
||||||
|
"{!payload_check f=vals_dpi payloads='1' op='gte'}A" // inequality on int gte
|
||||||
};
|
};
|
||||||
|
|
||||||
String[] should_not_matches = new String[] {
|
String[] should_not_matches = new String[] {
|
||||||
|
"{!payload_check f=vals_dpf payloads='0.75' op='gt'}one two", // too few payloads
|
||||||
|
"{!payload_check f=vals_dpf payloads='0.75 1.5 2.0' op='gt'}one two", // too many payloads
|
||||||
"{!payload_check f=vals_dpi v=A payloads=2}",
|
"{!payload_check f=vals_dpi v=A payloads=2}",
|
||||||
"{!payload_check f=vals_dpi payloads='1 2'}B C",
|
"{!payload_check f=vals_dpi payloads='1 2'}B C",
|
||||||
"{!payload_check f=vals_dpi payloads='1 2 3'}A B",
|
"{!payload_check f=vals_dpi payloads='1 2 3'}A B",
|
||||||
"{!payload_check f=vals_dpi payloads='1 2'}A B C",
|
"{!payload_check f=vals_dpi payloads='1 2'}A B C",
|
||||||
"{!payload_check f=vals_dpf payloads='1 2.0'}two three",
|
"{!payload_check f=vals_dpf payloads='1 2.0'}two three",
|
||||||
"{!payload_check f=vals_dps payloads='VERB NOUN'}cat jumped"
|
"{!payload_check f=vals_dps payloads='VERB NOUN'}cat jumped",
|
||||||
|
"{!payload_check f=vals_dpf payloads='1.25' op='gt'}one",
|
||||||
|
"{!payload_check f=vals_dpf payloads='0.75 3' op='gt'}one two",
|
||||||
|
"{!payload_check f=vals_dpf payloads='1.0' op='lt'}one", // inequality on float lt
|
||||||
|
"{!payload_check f=vals_dpf payloads='0.75' op='lte'}one", // inequality on float lte
|
||||||
|
"{!payload_check f=vals_dpf payloads='1.0' op='gt'}one", // inequality on float gt
|
||||||
|
"{!payload_check f=vals_dpf payloads='1.25' op='gte'}one", // inequality on float gte
|
||||||
|
"{!payload_check f=vals_dpi payloads='1' op='lt'}A", // inequality on int lt
|
||||||
|
"{!payload_check f=vals_dpi payloads='0' op='lte'}A", // inequality on int lte
|
||||||
|
"{!payload_check f=vals_dpi payloads='1' op='gt'}A", // inequality on int gt
|
||||||
|
"{!payload_check f=vals_dpi payloads='2' op='gte'}A" // inequality on int gte
|
||||||
};
|
};
|
||||||
|
|
||||||
for(String should_match : should_matches) {
|
for(String should_match : should_matches) {
|
||||||
|
|
|
@ -977,16 +977,11 @@ For more information about the possibilities of nested queries, see Yonik Seeley
|
||||||
|
|
||||||
== Payload Query Parsers
|
== Payload Query Parsers
|
||||||
|
|
||||||
These query parsers utilize payloads encoded on terms during indexing.
|
These query parsers utilize payloads encoded on terms during indexing. Payloads can be encoded on terms using either the `DelimitedPayloadTokenFilter` or the `NumericPayloadTokenFilter`.
|
||||||
|
|
||||||
The main query, for both of these parsers, is parsed straightforwardly from the field type's query analysis into a `SpanQuery`. The generated `SpanQuery` will be either a `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. Payloads can be encoded on terms using either the `DelimitedPayloadTokenFilter` or the `NumericPayloadTokenFilter`. The payload using parsers are:
|
|
||||||
|
|
||||||
* `PayloadScoreQParser`
|
|
||||||
* `PayloadCheckQParser`
|
|
||||||
|
|
||||||
=== Payload Score Parser
|
=== Payload Score Parser
|
||||||
|
|
||||||
`PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores.
|
`PayloadScoreQParser` incorporates each matching term's numeric (integer or float) payloads into the scores. The main query is parsed from the field type's query analysis into a `SpanQuery` based on the value of the `operator` parameter below.
|
||||||
|
|
||||||
This parser accepts the following parameters:
|
This parser accepts the following parameters:
|
||||||
|
|
||||||
|
@ -997,7 +992,9 @@ The field to use. This parameter is required.
|
||||||
The payload function. The options are: `min`, `max`, `average`, or `sum`. This parameter is required.
|
The payload function. The options are: `min`, `max`, `average`, or `sum`. This parameter is required.
|
||||||
|
|
||||||
`operator`::
|
`operator`::
|
||||||
A search operator. The options are `or` and `phrase`, which is the default. This defines if the search query should be an OR query or a phrase query.
|
A search operator. The options are
|
||||||
|
* `or` will generate either a `SpanTermQuery` or a `SpanOrQuery` depending on the number of tokens emitted.
|
||||||
|
* `phrase` will generate either `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted.
|
||||||
|
|
||||||
`includeSpanScore`::
|
`includeSpanScore`::
|
||||||
If `true`, multiples the computed payload factor by the score of the original query. If `false`, the default, the computed payload factor is the score.
|
If `true`, multiples the computed payload factor by the score of the original query. If `false`, the default, the computed payload factor is the score.
|
||||||
|
@ -1012,7 +1009,9 @@ If `true`, multiples the computed payload factor by the score of the original qu
|
||||||
|
|
||||||
=== Payload Check Parser
|
=== Payload Check Parser
|
||||||
|
|
||||||
`PayloadCheckQParser` only matches when the matching terms also have the specified payloads.
|
`PayloadCheckQParser` only matches when the matching terms also have the specified relationship to the payloads. The default relationship is equals, however, inequality matching can also be performed. The main query, for both of these parsers, is parsed straightforwardly from the field type's query analysis into a `SpanQuery`. The generated `SpanQuery` will be either a `SpanTermQuery` or an ordered, zero slop `SpanNearQuery`, depending on how many tokens are emitted. The net effect is that the main query always operates in a manner similar to a phrase query in the standard lucene parser (thus ignoring any value for `q.op`).
|
||||||
|
|
||||||
|
NOTE: If when the field analysis is applied to the query, it alters the number of tokens, the final number of tokens must match the number of payloads supplied in the `payloads` parameter. If there is a mismatch between the number of query tokens, and the number of payload values supplied with this query, the query will not match.
|
||||||
|
|
||||||
This parser accepts the following parameters:
|
This parser accepts the following parameters:
|
||||||
|
|
||||||
|
@ -1020,18 +1019,30 @@ This parser accepts the following parameters:
|
||||||
The field to use (required).
|
The field to use (required).
|
||||||
|
|
||||||
`payloads`::
|
`payloads`::
|
||||||
A space-separated list of payloads that must match the query terms (required)
|
A space-separated list of payloads to be compared with payloads in the matching tokens from the document (required). Each specified payload will be encoded using the encoder determined from the field type prior to matching. Integer, float and identity (string) encodings are supported with the same meanings as for DelimitedPayloadTokenFilter.
|
||||||
+
|
|
||||||
Each specified payload will be encoded using the encoder determined from the field type and encoded accordingly for matching.
|
|
||||||
+
|
|
||||||
`DelimitedPayloadTokenFilter` 'identity' encoded payloads also work here, as well as float and integer encoded ones.
|
|
||||||
|
|
||||||
*Example*
|
`op`::
|
||||||
|
The inequality operation to apply to the payload check. All operations require that consecutive tokens derived from the analysis of the query match consecutive tokens in the document, and additionally the payloads on the document tokens must be:
|
||||||
|
* `eq` - equal to the specified payloads (default)
|
||||||
|
* `gt` - greater than the specified payloads
|
||||||
|
* `lt` - less than the specified payloads
|
||||||
|
* `gte` - greater than or equal to the specified payloads
|
||||||
|
* `lte` - less than or equal to the specified payloads
|
||||||
|
|
||||||
|
*Examples*
|
||||||
|
|
||||||
|
Find all documents with the phrase "searching stuff" where searching has a payload of "VERB" and "stuff" has a payload of "NOUN"
|
||||||
[source,text]
|
[source,text]
|
||||||
----
|
|
||||||
{!payload_check f=words_dps payloads="VERB NOUN"}searching stuff
|
{!payload_check f=words_dps payloads="VERB NOUN"}searching stuff
|
||||||
----
|
|
||||||
|
Find all documents with "foo" where "foo" has a payload with a value of greater than or equal to 0.75
|
||||||
|
[source,text]
|
||||||
|
{!payload_check f=words_dpf payloads="0.75" op="gte"}foo
|
||||||
|
|
||||||
|
Find all documents with the phrase "foo bar" where term "foo" has a payload greater than 9 and "bar" has a payload greater than 5
|
||||||
|
[source,text]
|
||||||
|
{!payload_check f=words_dpi payloads="9 5" op="gt"}foo bar
|
||||||
|
|
||||||
|
|
||||||
== Prefix Query Parser
|
== Prefix Query Parser
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue