Support unordered non-overlapping intervals (#2103)

* Support unordered non-overlapping intervals

This commit exposes Intervals.unorderedNoOverlaps (LUCENE-8828).

Signed-off-by: Matt Weber <matt@mattweber.org>

* Replace ordered and overlap flags with enum.

- add IntervalMode enum
- deprecate ordered
- bwc between IntervalMode and ordered flag
- update tests
- update rest tests

Signed-off-by: Matt Weber <matt@mattweber.org>

* fix annotated text plugin test

Signed-off-by: Matt Weber <matt@mattweber.org>

* bwc to 2x only

Signed-off-by: Matt Weber <matt@mattweber.org>

* BWC rest tests for ordered flag

Signed-off-by: Matt Weber <matt@mattweber.org>
This commit is contained in:
Matt Weber 2022-02-17 14:10:22 -08:00 committed by GitHub
parent b9ff91d591
commit cfd96e3e4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 655 additions and 91 deletions

View File

@ -42,6 +42,7 @@ import org.opensearch.index.mapper.ContentPath;
import org.opensearch.index.mapper.FieldTypeTestCase;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.Mapper;
import org.opensearch.index.query.IntervalMode;
import java.io.IOException;
import java.util.Collections;
@ -51,7 +52,7 @@ public class AnnotatedTextFieldTypeTests extends FieldTypeTestCase {
public void testIntervals() throws IOException {
MappedFieldType ft = new AnnotatedTextFieldMapper.AnnotatedTextFieldType("field", Collections.emptyMap());
NamedAnalyzer a = new NamedAnalyzer("name", AnalyzerScope.INDEX, new StandardAnalyzer());
IntervalsSource source = ft.intervals("Donald Trump", 0, true, a, false);
IntervalsSource source = ft.intervals("Donald Trump", 0, IntervalMode.ORDERED, a, false);
assertEquals(Intervals.phrase(Intervals.term("donald"), Intervals.term("trump")), source);
}

View File

@ -17,16 +17,23 @@ setup:
refresh: true
body:
- '{"index": {"_index": "test", "_id": "1"}}'
- '{"text" : "Some like it hot, some like it cold"}'
- '{"text" : "Some like hot and dry, some like it cold and wet"}'
- '{"index": {"_index": "test", "_id": "2"}}'
- '{"text" : "Its cold outside, theres no kind of atmosphere"}'
- '{"index": {"_index": "test", "_id": "3"}}'
- '{"text" : "Baby its cold there outside"}'
- '{"index": {"_index": "test", "_id": "4"}}'
- '{"text" : "Outside it is cold and wet"}'
- '{"index": {"_index": "test", "_id": "5"}}'
- '{"text" : "cold rain makes it wet"}'
- '{"index": {"_index": "test", "_id": "6"}}'
- '{"text" : "that is some cold cold rain"}'
---
"Test ordered matching":
"Test ordered matching with via mode":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -36,7 +43,25 @@ setup:
text:
match:
query: "cold outside"
ordered: true
mode: "ordered"
- match: { hits.total.value: 2 }
---
"Test ordered matching":
- skip:
features: allowed_warnings
- do:
allowed_warnings:
- "Deprecated field [ordered] used, this field is unused and will be removed entirely"
search:
index: test
body:
query:
intervals:
text:
match:
query: "cold outside"
ordered: true
- match: { hits.total.value: 2 }
---
@ -53,8 +78,29 @@ setup:
- match: { hits.total.value: 3 }
---
"Test explicit unordered matching":
"Test explicit unordered matching via mode":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
match:
query: "cold outside"
mode: "unordered"
- match: { hits.total.value: 3 }
---
"Test explicit unordered matching":
- skip:
features: allowed_warnings
- do:
allowed_warnings:
- "Deprecated field [ordered] used, this field is unused and will be removed entirely"
search:
index: test
body:
@ -66,8 +112,45 @@ setup:
ordered: false
- match: { hits.total.value: 3 }
---
"Test unordered with overlap in match":
- skip:
version: " - 1.99.99"
reason: "Implemented in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
match:
query: "cold wet it"
mode: "unordered"
- match: { hits.total.value: 3 }
---
"Test unordered with no overlap in match":
- skip:
version: " - 1.99.99"
reason: "Implemented in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
match:
query: "cold wet it"
mode: "unordered_no_overlap"
- match: { hits.total.value: 2 }
---
"Test phrase matching":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -77,7 +160,7 @@ setup:
text:
match:
query: "cold outside"
ordered: true
mode: "ordered"
max_gaps: 0
- match: { hits.total.value: 1 }
@ -97,6 +180,9 @@ setup:
---
"Test ordered max_gaps matching":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -107,12 +193,41 @@ setup:
match:
query: "cold outside"
max_gaps: 0
ordered: true
mode: "ordered"
- match: { hits.total.value: 1 }
---
"Test ordered combination with disjunction via mode":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- any_of:
intervals:
- match:
query: "cold"
- match:
query: "outside"
- match:
query: "atmosphere"
mode: "ordered"
- match: { hits.total.value: 1 }
---
"Test ordered combination with disjunction":
- skip:
features: allowed_warnings
- do:
allowed_warnings:
- "Deprecated field [ordered] used, this field is unused and will be removed entirely"
search:
index: test
body:
@ -134,6 +249,9 @@ setup:
---
"Test ordered combination with max_gaps":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -148,11 +266,14 @@ setup:
- match:
query: "outside"
max_gaps: 0
ordered: true
mode: "ordered"
- match: { hits.total.value: 1 }
---
"Test ordered combination":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -166,12 +287,38 @@ setup:
query: "cold"
- match:
query: "outside"
ordered: true
mode: "ordered"
- match: { hits.total.value: 2 }
---
"Test unordered combination via mode":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: "cold"
- match:
query: "outside"
max_gaps: 1
mode: "unordered"
- match: { hits.total.value: 2 }
---
"Test unordered combination":
- skip:
features: allowed_warnings
- do:
allowed_warnings:
- "Deprecated field [ordered] used, this field is unused and will be removed entirely"
search:
index: test
body:
@ -188,8 +335,107 @@ setup:
ordered: false
- match: { hits.total.value: 2 }
---
"Test unordered combination with overlap":
- skip:
version: " - 1.99.99"
reason: "Implemented in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: "cold"
- match:
query: "wet"
- match:
query: "it"
mode: "unordered"
- match: { hits.total.value: 3 }
---
"Test unordered combination no overlap":
- skip:
version: " - 1.99.99"
reason: "Implemented in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- match:
query: "cold"
- match:
query: "wet"
- match:
query: "it"
mode: "unordered_no_overlap"
- match: { hits.total.value: 2 }
---
"Test nested unordered combination with overlap":
- skip:
version: " - 1.99.99"
reason: "Implemented in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- any_of:
intervals:
- match:
query: "cold"
- match:
query: "hot"
- match:
query: "cold"
mode: "unordered"
- match: { hits.total.value: 6 }
---
"Test nested unordered combination no overlap":
- skip:
version: " - 1.99.99"
reason: "Implemented in 2.0"
- do:
search:
index: test
body:
query:
intervals:
text:
all_of:
intervals:
- any_of:
intervals:
- match:
query: "cold"
- match:
query: "hot"
- match:
query: "cold"
mode: "unordered_no_overlap"
- match: { hits.total.value: 2 }
---
"Test block combination":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -203,13 +449,16 @@ setup:
query: "cold"
- match:
query: "outside"
ordered: true
mode: "ordered"
max_gaps: 0
- match: { hits.total.value: 1 }
---
"Test containing":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -223,7 +472,7 @@ setup:
query: "cold"
- match:
query: "outside"
ordered: false
mode: "unordered"
filter:
containing:
match:
@ -233,6 +482,9 @@ setup:
---
"Test not containing":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -246,7 +498,7 @@ setup:
query: "cold"
- match:
query: "outside"
ordered: false
mode: "unordered"
filter:
not_containing:
match:
@ -255,6 +507,9 @@ setup:
---
"Test contained_by":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -272,7 +527,7 @@ setup:
query: "cold"
- match:
query: "outside"
ordered: false
mode: "unordered"
- match: { hits.total.value: 1 }
---
@ -294,10 +549,13 @@ setup:
query: "cold"
- match:
query: "outside"
- match: { hits.total.value: 1 }
- match: { hits.total.value: 2 }
---
"Test not_overlapping":
- skip:
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -311,7 +569,7 @@ setup:
query: "cold"
- match:
query: "outside"
ordered: true
mode: "ordered"
filter:
not_overlapping:
all_of:
@ -320,14 +578,14 @@ setup:
query: "baby"
- match:
query: "there"
ordered: false
mode: "unordered"
- match: { hits.total.value: 1 }
---
"Test overlapping":
- skip:
version: " - 7.1.99"
reason: "Implemented in 7.2"
version: " - 1.99.99"
reason: "mode introduced in 2.0"
- do:
search:
index: test
@ -337,12 +595,12 @@ setup:
text:
match:
query: "cold outside"
ordered: true
mode: "ordered"
filter:
overlapping:
match:
query: "baby there"
ordered: false
mode: "unordered"
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "3" }

View File

@ -59,6 +59,7 @@ import org.opensearch.common.unit.Fuzziness;
import org.opensearch.index.analysis.NamedAnalyzer;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.query.DistanceFeatureQueryBuilder;
import org.opensearch.index.query.IntervalMode;
import org.opensearch.index.query.QueryRewriteContext;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.query.QueryShardException;
@ -365,7 +366,7 @@ public abstract class MappedFieldType {
/**
* Create an {@link IntervalsSource} to be used for proximity queries
*/
public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer, boolean prefix)
public IntervalsSource intervals(String query, int max_gaps, IntervalMode mode, NamedAnalyzer analyzer, boolean prefix)
throws IOException {
throw new IllegalArgumentException(
"Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"

View File

@ -85,6 +85,7 @@ import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.index.fielddata.plain.PagedBytesIndexFieldData;
import org.opensearch.index.mapper.Mapper.TypeParser.ParserContext;
import org.opensearch.index.query.IntervalBuilder;
import org.opensearch.index.query.IntervalMode;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.similarity.SimilarityProvider;
import org.opensearch.search.aggregations.support.CoreValuesSourceType;
@ -789,7 +790,7 @@ public class TextFieldMapper extends ParametrizedFieldMapper {
}
@Override
public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer, boolean prefix)
public IntervalsSource intervals(String text, int maxGaps, IntervalMode mode, NamedAnalyzer analyzer, boolean prefix)
throws IOException {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
@ -805,7 +806,7 @@ public class TextFieldMapper extends ParametrizedFieldMapper {
return Intervals.prefix(normalizedTerm);
}
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? getTextSearchInfo().getSearchAnalyzer() : analyzer);
return builder.analyzeText(text, maxGaps, ordered);
return builder.analyzeText(text, maxGaps, mode);
}
@Override

View File

@ -69,13 +69,20 @@ public class IntervalBuilder {
}
public IntervalsSource analyzeText(String query, int maxGaps, boolean ordered) throws IOException {
return analyzeText(query, maxGaps, ordered ? IntervalMode.ORDERED : IntervalMode.UNORDERED);
}
public IntervalsSource analyzeText(String query, int maxGaps, IntervalMode mode) throws IOException {
try (TokenStream ts = analyzer.tokenStream(field, query); CachingTokenFilter stream = new CachingTokenFilter(ts)) {
return analyzeText(stream, maxGaps, ordered);
return analyzeText(stream, maxGaps, mode);
}
}
protected IntervalsSource analyzeText(CachingTokenFilter stream, int maxGaps, boolean ordered) throws IOException {
return analyzeText(stream, maxGaps, ordered ? IntervalMode.ORDERED : IntervalMode.UNORDERED);
}
protected IntervalsSource analyzeText(CachingTokenFilter stream, int maxGaps, IntervalMode mode) throws IOException {
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class);
@ -114,15 +121,15 @@ public class IntervalBuilder {
return analyzeTerm(stream);
} else if (isGraph) {
// graph
return combineSources(analyzeGraph(stream), maxGaps, ordered);
return combineSources(analyzeGraph(stream), maxGaps, mode);
} else {
// phrase
if (hasSynonyms) {
// phrase with single-term synonyms
return analyzeSynonyms(stream, maxGaps, ordered);
return analyzeSynonyms(stream, maxGaps, mode);
} else {
// simple phrase
return combineSources(analyzeTerms(stream), maxGaps, ordered);
return combineSources(analyzeTerms(stream), maxGaps, mode);
}
}
@ -135,7 +142,7 @@ public class IntervalBuilder {
return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
}
protected static IntervalsSource combineSources(List<IntervalsSource> sources, int maxGaps, boolean ordered) {
protected static IntervalsSource combineSources(List<IntervalsSource> sources, int maxGaps, IntervalMode mode) {
if (sources.size() == 0) {
return NO_INTERVALS;
}
@ -143,10 +150,21 @@ public class IntervalBuilder {
return sources.get(0);
}
IntervalsSource[] sourcesArray = sources.toArray(new IntervalsSource[0]);
if (maxGaps == 0 && ordered) {
if (maxGaps == 0 && mode == IntervalMode.ORDERED) {
return Intervals.phrase(sourcesArray);
}
IntervalsSource inner = ordered ? Intervals.ordered(sourcesArray) : Intervals.unordered(sourcesArray);
IntervalsSource inner;
if (mode == IntervalMode.ORDERED) {
inner = Intervals.ordered(sourcesArray);
} else if (mode == IntervalMode.UNORDERED) {
inner = Intervals.unordered(sourcesArray);
} else {
inner = Intervals.unorderedNoOverlaps(sourcesArray[0], sourcesArray[1]);
for (int sourceIdx = 2; sourceIdx < sourcesArray.length; sourceIdx++) {
inner = Intervals.unorderedNoOverlaps(maxGaps == -1 ? inner : Intervals.maxgaps(maxGaps, inner), sourcesArray[sourceIdx]);
}
}
if (maxGaps == -1) {
return inner;
}
@ -174,7 +192,7 @@ public class IntervalBuilder {
return Intervals.extend(source, precedingSpaces, 0);
}
protected IntervalsSource analyzeSynonyms(TokenStream ts, int maxGaps, boolean ordered) throws IOException {
protected IntervalsSource analyzeSynonyms(TokenStream ts, int maxGaps, IntervalMode mode) throws IOException {
List<IntervalsSource> terms = new ArrayList<>();
List<IntervalsSource> synonyms = new ArrayList<>();
TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
@ -199,7 +217,7 @@ public class IntervalBuilder {
} else {
terms.add(extend(Intervals.or(synonyms.toArray(new IntervalsSource[0])), spaces));
}
return combineSources(terms, maxGaps, ordered);
return combineSources(terms, maxGaps, mode);
}
protected List<IntervalsSource> analyzeGraph(TokenStream source) throws IOException {
@ -222,7 +240,7 @@ public class IntervalBuilder {
Iterator<TokenStream> it = graph.getFiniteStrings(start, end);
while (it.hasNext()) {
TokenStream ts = it.next();
IntervalsSource phrase = combineSources(analyzeTerms(ts), 0, true);
IntervalsSource phrase = combineSources(analyzeTerms(ts), 0, IntervalMode.ORDERED);
if (paths.size() >= maxClauseCount) {
throw new BooleanQuery.TooManyClauses();
}

View File

@ -0,0 +1,59 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
package org.opensearch.index.query;
import org.opensearch.OpenSearchException;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.common.io.stream.StreamOutput;
import org.opensearch.common.io.stream.Writeable;
import java.io.IOException;
public enum IntervalMode implements Writeable {
ORDERED(0),
UNORDERED(1),
UNORDERED_NO_OVERLAP(2);
private final int ordinal;
IntervalMode(int ordinal) {
this.ordinal = ordinal;
}
public static IntervalMode readFromStream(StreamInput in) throws IOException {
int ord = in.readVInt();
switch (ord) {
case (0):
return ORDERED;
case (1):
return UNORDERED;
case (2):
return UNORDERED_NO_OVERLAP;
}
throw new OpenSearchException("unknown serialized type [" + ord + "]");
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeVInt(this.ordinal);
}
public static IntervalMode fromString(String intervalMode) {
if (intervalMode == null) {
throw new IllegalArgumentException("cannot parse mode from null string");
}
for (IntervalMode mode : IntervalMode.values()) {
if (mode.name().equalsIgnoreCase(intervalMode)) {
return mode;
}
}
throw new IllegalArgumentException("no mode can be parsed from ordinal " + intervalMode);
}
}

View File

@ -73,7 +73,7 @@ import static org.opensearch.common.xcontent.ConstructingObjectParser.optionalCo
* Factory class for {@link IntervalsSource}
*
* Built-in sources include {@link Match}, which analyzes a text string and converts it
* to a proximity source (phrase, ordered or unordered depending on how
* to a proximity source (phrase, ordered, unordered, unordered without overlaps depending on how
* strict the matching should be); {@link Combine}, which allows proximity queries
* between different sub-sources; and {@link Disjunction}.
*/
@ -130,15 +130,15 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
private final String query;
private final int maxGaps;
private final boolean ordered;
private final IntervalMode mode;
private final String analyzer;
private final IntervalFilter filter;
private final String useField;
public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter, String useField) {
public Match(String query, int maxGaps, IntervalMode mode, String analyzer, IntervalFilter filter, String useField) {
this.query = query;
this.maxGaps = maxGaps;
this.ordered = ordered;
this.mode = mode;
this.analyzer = analyzer;
this.filter = filter;
this.useField = useField;
@ -147,7 +147,15 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
public Match(StreamInput in) throws IOException {
this.query = in.readString();
this.maxGaps = in.readVInt();
this.ordered = in.readBoolean();
if (in.getVersion().onOrAfter(Version.V_2_0_0)) {
this.mode = IntervalMode.readFromStream(in);
} else {
if (in.readBoolean()) {
this.mode = IntervalMode.ORDERED;
} else {
this.mode = IntervalMode.UNORDERED;
}
}
this.analyzer = in.readOptionalString();
this.filter = in.readOptionalWriteable(IntervalFilter::new);
if (in.getVersion().onOrAfter(LegacyESVersion.V_7_2_0)) {
@ -167,9 +175,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false));
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, mode, analyzer, false));
} else {
source = fieldType.intervals(query, maxGaps, ordered, analyzer, false);
source = fieldType.intervals(query, maxGaps, mode, analyzer, false);
}
if (filter != null) {
return filter.filter(source, context, fieldType);
@ -190,7 +198,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (o == null || getClass() != o.getClass()) return false;
Match match = (Match) o;
return maxGaps == match.maxGaps
&& ordered == match.ordered
&& mode == match.mode
&& Objects.equals(query, match.query)
&& Objects.equals(filter, match.filter)
&& Objects.equals(useField, match.useField)
@ -199,7 +207,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
@Override
public int hashCode() {
return Objects.hash(query, maxGaps, ordered, analyzer, filter, useField);
return Objects.hash(query, maxGaps, mode, analyzer, filter, useField);
}
@Override
@ -211,7 +219,11 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
public void writeTo(StreamOutput out) throws IOException {
out.writeString(query);
out.writeVInt(maxGaps);
out.writeBoolean(ordered);
if (out.getVersion().onOrAfter(Version.V_2_0_0)) {
mode.writeTo(out);
} else {
out.writeBoolean(mode == IntervalMode.ORDERED);
}
out.writeOptionalString(analyzer);
out.writeOptionalWriteable(filter);
if (out.getVersion().onOrAfter(LegacyESVersion.V_7_2_0)) {
@ -225,7 +237,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
builder.startObject();
builder.field("query", query);
builder.field("max_gaps", maxGaps);
builder.field("ordered", ordered);
builder.field("mode", mode);
if (analyzer != null) {
builder.field("analyzer", analyzer);
}
@ -241,16 +253,28 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
private static final ConstructingObjectParser<Match, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
String query = (String) args[0];
int max_gaps = (args[1] == null ? -1 : (Integer) args[1]);
boolean ordered = (args[2] != null && (boolean) args[2]);
String analyzer = (String) args[3];
IntervalFilter filter = (IntervalFilter) args[4];
String useField = (String) args[5];
return new Match(query, max_gaps, ordered, analyzer, filter, useField);
Boolean ordered = (Boolean) args[2];
String mode = (String) args[3];
String analyzer = (String) args[4];
IntervalFilter filter = (IntervalFilter) args[5];
String useField = (String) args[6];
IntervalMode intervalMode;
if (ordered != null) {
intervalMode = ordered ? IntervalMode.ORDERED : IntervalMode.UNORDERED;
} else if (mode != null) {
intervalMode = IntervalMode.fromString(mode);
} else {
intervalMode = IntervalMode.UNORDERED;
}
return new Match(query, max_gaps, intervalMode, analyzer, filter, useField);
});
static {
PARSER.declareString(constructorArg(), new ParseField("query"));
PARSER.declareInt(optionalConstructorArg(), new ParseField("max_gaps"));
PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered"));
PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered").withAllDeprecated());
PARSER.declareString(optionalConstructorArg(), new ParseField("mode"));
PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer"));
PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter"));
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
@ -268,8 +292,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
return maxGaps;
}
boolean isOrdered() {
return ordered;
IntervalMode getMode() {
return mode;
}
String getAnalyzer() {
@ -395,19 +419,23 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
public static final String NAME = "all_of";
private final List<IntervalsSourceProvider> subSources;
private final boolean ordered;
private final IntervalMode mode;
private final int maxGaps;
private final IntervalFilter filter;
public Combine(List<IntervalsSourceProvider> subSources, boolean ordered, int maxGaps, IntervalFilter filter) {
public Combine(List<IntervalsSourceProvider> subSources, IntervalMode mode, int maxGaps, IntervalFilter filter) {
this.subSources = subSources;
this.ordered = ordered;
this.mode = mode;
this.maxGaps = maxGaps;
this.filter = filter;
}
public Combine(StreamInput in) throws IOException {
this.ordered = in.readBoolean();
if (in.getVersion().onOrAfter(Version.V_2_0_0)) {
this.mode = IntervalMode.readFromStream(in);
} else {
this.mode = in.readBoolean() ? IntervalMode.ORDERED : IntervalMode.UNORDERED;
}
this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class);
this.maxGaps = in.readInt();
this.filter = in.readOptionalWriteable(IntervalFilter::new);
@ -419,7 +447,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
for (IntervalsSourceProvider provider : subSources) {
ss.add(provider.getSource(ctx, fieldType));
}
IntervalsSource source = IntervalBuilder.combineSources(ss, maxGaps, ordered);
IntervalsSource source = IntervalBuilder.combineSources(ss, maxGaps, mode);
if (filter != null) {
return filter.filter(source, ctx, fieldType);
}
@ -439,14 +467,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (o == null || getClass() != o.getClass()) return false;
Combine combine = (Combine) o;
return Objects.equals(subSources, combine.subSources)
&& ordered == combine.ordered
&& mode == combine.mode
&& maxGaps == combine.maxGaps
&& Objects.equals(filter, combine.filter);
}
@Override
public int hashCode() {
return Objects.hash(subSources, ordered, maxGaps, filter);
return Objects.hash(subSources, mode, maxGaps, filter);
}
@Override
@ -456,7 +484,11 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(ordered);
if (out.getVersion().onOrAfter(Version.V_2_0_0)) {
mode.writeTo(out);
} else {
out.writeBoolean(mode == IntervalMode.ORDERED);
}
out.writeNamedWriteableList(subSources);
out.writeInt(maxGaps);
out.writeOptionalWriteable(filter);
@ -465,7 +497,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(NAME);
builder.field("ordered", ordered);
builder.field("mode", mode);
builder.field("max_gaps", maxGaps);
builder.startArray("intervals");
for (IntervalsSourceProvider provider : subSources) {
@ -482,14 +514,26 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
@SuppressWarnings("unchecked")
static final ConstructingObjectParser<Combine, Void> PARSER = new ConstructingObjectParser<>(NAME, args -> {
boolean ordered = (args[0] != null && (boolean) args[0]);
List<IntervalsSourceProvider> subSources = (List<IntervalsSourceProvider>) args[1];
Integer maxGaps = (args[2] == null ? -1 : (Integer) args[2]);
IntervalFilter filter = (IntervalFilter) args[3];
return new Combine(subSources, ordered, maxGaps, filter);
Boolean ordered = (Boolean) args[0];
String mode = (String) args[1];
List<IntervalsSourceProvider> subSources = (List<IntervalsSourceProvider>) args[2];
Integer maxGaps = (args[3] == null ? -1 : (Integer) args[3]);
IntervalFilter filter = (IntervalFilter) args[4];
IntervalMode intervalMode;
if (ordered != null) {
intervalMode = ordered ? IntervalMode.ORDERED : IntervalMode.UNORDERED;
} else if (mode != null) {
intervalMode = IntervalMode.fromString(mode);
} else {
intervalMode = IntervalMode.UNORDERED;
}
return new Combine(subSources, intervalMode, maxGaps, filter);
});
static {
PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered"));
PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered").withAllDeprecated());
PARSER.declareString(optionalConstructorArg(), new ParseField("mode"));
PARSER.declareObjectArray(
constructorArg(),
(p, c) -> IntervalsSourceProvider.parseInnerIntervals(p),
@ -507,8 +551,8 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
return subSources;
}
boolean isOrdered() {
return ordered;
IntervalMode getMode() {
return mode;
}
int getMaxGaps() {
@ -550,9 +594,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
if (useField != null) {
fieldType = context.fieldMapper(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, false, analyzer, true));
source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, IntervalMode.UNORDERED, analyzer, true));
} else {
source = fieldType.intervals(prefix, 0, false, analyzer, true);
source = fieldType.intervals(prefix, 0, IntervalMode.UNORDERED, analyzer, true);
}
return source;
}

View File

@ -53,7 +53,7 @@ public class CombineIntervalsSourceProviderTests extends AbstractSerializingTest
@Override
protected Combine mutateInstance(Combine instance) throws IOException {
List<IntervalsSourceProvider> subSources = instance.getSubSources();
boolean ordered = instance.isOrdered();
IntervalMode mode = instance.getMode();
int maxGaps = instance.getMaxGaps();
IntervalsSourceProvider.IntervalFilter filter = instance.getFilter();
switch (between(0, 3)) {
@ -63,7 +63,13 @@ public class CombineIntervalsSourceProviderTests extends AbstractSerializingTest
: null;
break;
case 1:
ordered = !ordered;
if (mode == IntervalMode.ORDERED) {
mode = randomBoolean() ? IntervalMode.UNORDERED : IntervalMode.UNORDERED_NO_OVERLAP;
} else if (mode == IntervalMode.UNORDERED) {
mode = randomBoolean() ? IntervalMode.ORDERED : IntervalMode.UNORDERED_NO_OVERLAP;
} else {
mode = randomBoolean() ? IntervalMode.UNORDERED : IntervalMode.ORDERED;
}
break;
case 2:
maxGaps++;
@ -76,7 +82,7 @@ public class CombineIntervalsSourceProviderTests extends AbstractSerializingTest
default:
throw new AssertionError("Illegal randomisation branch");
}
return new Combine(subSources, ordered, maxGaps, filter);
return new Combine(subSources, mode, maxGaps, filter);
}
@Override

View File

@ -78,6 +78,19 @@ public class IntervalBuilderTests extends OpenSearchTestCase {
}
public void testUnorderedNoOverlap() throws IOException {
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6));
IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, IntervalMode.UNORDERED_NO_OVERLAP);
IntervalsSource expected = Intervals.unorderedNoOverlaps(
Intervals.unorderedNoOverlaps(Intervals.term("term1"), Intervals.term("term2")),
Intervals.term("term3")
);
assertEquals(expected, source);
}
public void testPhrase() throws IOException {
CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2), new Token("term2", 3, 4), new Token("term3", 5, 6));

View File

@ -137,10 +137,24 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
static IntervalsSourceProvider.Combine createRandomCombine(int depth, boolean useScripts) {
int count = randomInt(5) + 1;
List<IntervalsSourceProvider> subSources = createRandomSourceList(depth, useScripts, count);
boolean ordered = randomBoolean();
IntervalMode mode;
switch (randomIntBetween(0, 2)) {
case 0:
mode = IntervalMode.ORDERED;
break;
case 1:
mode = IntervalMode.UNORDERED;
break;
case 2:
mode = IntervalMode.UNORDERED_NO_OVERLAP;
break;
default:
throw new AssertionError("Illegal randomisation branch");
}
int maxGaps = randomInt(5) - 1;
IntervalsSourceProvider.IntervalFilter filter = createRandomFilter(depth + 1, useScripts);
return new IntervalsSourceProvider.Combine(subSources, ordered, maxGaps, filter);
return new IntervalsSourceProvider.Combine(subSources, mode, maxGaps, filter);
}
static List<IntervalsSourceProvider> createRandomSourceList(int depth, boolean useScripts, int count) {
@ -173,10 +187,23 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
words.add(randomRealisticUnicodeOfLengthBetween(4, 20));
}
String text = String.join(" ", words);
boolean mOrdered = randomBoolean();
IntervalMode mMode;
switch (randomIntBetween(0, 2)) {
case 0:
mMode = IntervalMode.ORDERED;
break;
case 1:
mMode = IntervalMode.UNORDERED;
break;
case 2:
mMode = IntervalMode.UNORDERED_NO_OVERLAP;
break;
default:
throw new AssertionError("Illegal randomisation branch");
}
int maxMGaps = randomInt(5) - 1;
String analyzer = randomFrom("simple", "keyword", "whitespace");
return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter(depth + 1, useScripts), useField);
return new IntervalsSourceProvider.Match(text, maxMGaps, mMode, analyzer, createRandomFilter(depth + 1, useScripts), useField);
}
@Override
@ -190,7 +217,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
IntervalsSourceProvider.IntervalFilter scriptFilter = new IntervalsSourceProvider.IntervalFilter(
new Script(ScriptType.INLINE, "mockscript", "1", Collections.emptyMap())
);
IntervalsSourceProvider source = new IntervalsSourceProvider.Match("text", 0, true, "simple", scriptFilter, null);
IntervalsSourceProvider source = new IntervalsSourceProvider.Match("text", 0, IntervalMode.ORDERED, "simple", scriptFilter, null);
queryBuilder = new IntervalQueryBuilder(TEXT_FIELD_NAME, source);
rewriteQuery = rewriteQuery(queryBuilder, new QueryShardContext(context));
assertNotNull(rewriteQuery.toQuery(context));
@ -243,7 +270,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
+ "\" : { "
+ " \"match\" : { "
+ " \"query\" : \"Hello world\","
+ " \"ordered\" : true },"
+ " \"mode\" : \"ordered\" },"
+ " \"boost\" : 2 } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
@ -253,6 +280,90 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
);
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : "
+ "{ \""
+ TEXT_FIELD_NAME
+ "\" : { "
+ " \"match\" : { "
+ " \"query\" : \"Hello world\","
+ " \"mode\" : \"unordered_no_overlap\" },"
+ " \"boost\" : 2 } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new BoostQuery(
new IntervalQuery(TEXT_FIELD_NAME, Intervals.unorderedNoOverlaps(Intervals.term("hello"), Intervals.term("world"))),
2
);
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : "
+ "{ \""
+ TEXT_FIELD_NAME
+ "\" : { "
+ " \"match\" : { "
+ " \"query\" : \"Hello world\","
+ " \"mode\" : \"unordered_no_overlap\","
+ " \"max_gaps\" : 11 },"
+ " \"boost\" : 2 } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new BoostQuery(
new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.maxgaps(11, Intervals.unorderedNoOverlaps(Intervals.term("hello"), Intervals.term("world")))
),
2
);
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : "
+ "{ \""
+ TEXT_FIELD_NAME
+ "\" : { "
+ " \"match\" : { "
+ " \"query\" : \"Hello Open Search\","
+ " \"mode\" : \"unordered_no_overlap\" },"
+ " \"boost\" : 3 } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new BoostQuery(
new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.unorderedNoOverlaps(
Intervals.unorderedNoOverlaps(Intervals.term("hello"), Intervals.term("open")),
Intervals.term("search")
)
),
3
);
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : "
+ "{ \""
+ TEXT_FIELD_NAME
+ "\" : { "
+ " \"match\" : { "
+ " \"query\" : \"Hello Open Search\","
+ " \"mode\" : \"unordered_no_overlap\","
+ " \"max_gaps\": 12 },"
+ " \"boost\" : 3 } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new BoostQuery(
new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.maxgaps(
12,
Intervals.unorderedNoOverlaps(
Intervals.maxgaps(12, Intervals.unorderedNoOverlaps(Intervals.term("hello"), Intervals.term("open"))),
Intervals.term("search")
)
)
),
3
);
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : "
+ "{ \""
+ TEXT_FIELD_NAME
@ -261,7 +372,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
+ " \"query\" : \"Hello world\","
+ " \"max_gaps\" : 10,"
+ " \"analyzer\" : \"whitespace\","
+ " \"ordered\" : true } } } }";
+ " \"mode\" : \"ordered\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(
@ -281,7 +392,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
+ " \"use_field\" : \""
+ MASKED_FIELD
+ "\","
+ " \"ordered\" : true } } } }";
+ " \"mode\" : \"ordered\" } } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new IntervalQuery(
@ -298,7 +409,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
+ " \"query\" : \"Hello world\","
+ " \"max_gaps\" : 10,"
+ " \"analyzer\" : \"whitespace\","
+ " \"ordered\" : true,"
+ " \"mode\" : \"ordered\","
+ " \"filter\" : {"
+ " \"containing\" : {"
+ " \"match\" : { \"query\" : \"blah\" } } } } } } }";
@ -350,11 +461,11 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
+ TEXT_FIELD_NAME
+ "\": {"
+ " \"all_of\" : {"
+ " \"ordered\" : true,"
+ " \"mode\" : \"ordered\","
+ " \"intervals\" : ["
+ " { \"match\" : { \"query\" : \"one\" } },"
+ " { \"all_of\" : { "
+ " \"ordered\" : false,"
+ " \"mode\" : \"unordered\","
+ " \"intervals\" : ["
+ " { \"match\" : { \"query\" : \"two\" } },"
+ " { \"match\" : { \"query\" : \"three\" } } ] } } ],"
@ -381,6 +492,52 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
);
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : { \""
+ TEXT_FIELD_NAME
+ "\": {"
+ " \"all_of\" : {"
+ " \"mode\" : \"unordered_no_overlap\","
+ " \"intervals\" : ["
+ " { \"match\" : { \"query\" : \"one\" } },"
+ " { \"match\" : { \"query\" : \"two\" } } ],"
+ " \"max_gaps\" : 30 },"
+ " \"boost\" : 1.5 } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new BoostQuery(
new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.maxgaps(30, Intervals.unorderedNoOverlaps(Intervals.term("one"), Intervals.term("two")))
),
1.5f
);
assertEquals(expected, builder.toQuery(createShardContext()));
json = "{ \"intervals\" : { \""
+ TEXT_FIELD_NAME
+ "\": {"
+ " \"all_of\" : {"
+ " \"mode\" : \"unordered_no_overlap\","
+ " \"intervals\" : ["
+ " { \"match\" : { \"query\" : \"one\" } },"
+ " { \"match\" : { \"query\" : \"two\" } },"
+ " { \"match\" : { \"query\" : \"three\" } } ],"
+ " \"max_gaps\" : 3 },"
+ " \"boost\" : 3.5 } } }";
builder = (IntervalQueryBuilder) parseQuery(json);
expected = new BoostQuery(
new IntervalQuery(
TEXT_FIELD_NAME,
Intervals.maxgaps(
3,
Intervals.unorderedNoOverlaps(
Intervals.maxgaps(3, Intervals.unorderedNoOverlaps(Intervals.term("one"), Intervals.term("two"))),
Intervals.term("three")
)
)
),
3.5f
);
assertEquals(expected, builder.toQuery(createShardContext()));
}
public void testCombineDisjunctionInterval() throws IOException {
@ -389,7 +546,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
+ TEXT_FIELD_NAME
+ "\": { "
+ " \"all_of\" : {"
+ " \"ordered\" : true,"
+ " \"mode\" : \"ordered\","
+ " \"intervals\" : ["
+ " { \"match\" : { \"query\" : \"atmosphere\" } },"
+ " { \"any_of\" : {"
@ -416,7 +573,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
}
public void testNonIndexedFields() throws IOException {
IntervalsSourceProvider provider = new IntervalsSourceProvider.Match("test", 0, true, null, null, null);
IntervalsSourceProvider provider = new IntervalsSourceProvider.Match("test", 0, IntervalMode.ORDERED, null, null, null);
IntervalQueryBuilder b = new IntervalQueryBuilder("no_such_field", provider);
assertThat(b.toQuery(createShardContext()), equalTo(new MatchNoDocsQuery()));
@ -446,7 +603,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
+ " \"use_field\" : \""
+ NO_POSITIONS_FIELD
+ "\","
+ " \"ordered\" : true } } } }";
+ " \"mode\" : \"ordered\" } } } }";
e = expectThrows(IllegalArgumentException.class, () -> {
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);

View File

@ -53,7 +53,7 @@ public class MatchIntervalsSourceProviderTests extends AbstractSerializingTestCa
protected Match mutateInstance(Match instance) throws IOException {
String query = instance.getQuery();
int maxGaps = instance.getMaxGaps();
boolean isOrdered = instance.isOrdered();
IntervalMode mode = instance.getMode();
String analyzer = instance.getAnalyzer();
IntervalsSourceProvider.IntervalFilter filter = instance.getFilter();
String useField = instance.getUseField();
@ -65,7 +65,13 @@ public class MatchIntervalsSourceProviderTests extends AbstractSerializingTestCa
maxGaps++;
break;
case 2:
isOrdered = !isOrdered;
if (mode == IntervalMode.ORDERED) {
mode = randomBoolean() ? IntervalMode.UNORDERED : IntervalMode.UNORDERED_NO_OVERLAP;
} else if (mode == IntervalMode.UNORDERED) {
mode = randomBoolean() ? IntervalMode.ORDERED : IntervalMode.UNORDERED_NO_OVERLAP;
} else {
mode = randomBoolean() ? IntervalMode.UNORDERED : IntervalMode.ORDERED;
}
break;
case 3:
analyzer = analyzer == null ? randomAlphaOfLength(5) : null;
@ -81,7 +87,7 @@ public class MatchIntervalsSourceProviderTests extends AbstractSerializingTestCa
default:
throw new AssertionError("Illegal randomisation branch");
}
return new Match(query, maxGaps, isOrdered, analyzer, filter, useField);
return new Match(query, maxGaps, mode, analyzer, filter, useField);
}
@Override