From 37235fafd952b0cd311cdbb9a2c3a07193093bcc Mon Sep 17 00:00:00 2001 From: Matt Weber Date: Thu, 24 Feb 2022 12:54:13 -0800 Subject: [PATCH] Case Insensitive Support in Regexp Interval (#2237) Add a `case_insensitive` flag to regexp interval source. Signed-off-by: Matt Weber --- .../test/search/230_interval_query.yml | 50 +++++++++++++++++++ .../index/query/IntervalsSourceProvider.java | 45 ++++++++++++++--- .../query/IntervalQueryBuilderTests.java | 15 +++++- .../RegexpIntervalsSourceProviderTests.java | 12 +++-- 4 files changed, 111 insertions(+), 11 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index dc376a16a91..93e73764d8e 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -25,6 +25,56 @@ setup: - '{"index": {"_index": "test", "_id": "6"}}' - '{"text" : "that is some cold cold rain"}' +--- +"Test regexp": + - skip: + version: " - 1.2.99" + reason: "regexp introduced in 1.3" + - do: + search: + index: test + body: + query: + intervals: + text: + regexp: + pattern: "at[a-z]{2,}here" + - match: { hits.total.value: 1 } + +--- +"Test regexp, explicit case sensitive": + - skip: + version: " - 1.99.99" + reason: "case_insensitive introduced in 2.0" + - do: + search: + index: test + body: + query: + intervals: + text: + regexp: + pattern: "AT[a-z]{2,}HERE" + case_insensitive: false + - match: { hits.total.value: 0 } + +--- +"Test regexp, explicit case insensitive": + - skip: + version: " - 1.99.99" + reason: "case_insensitive introduced in 2.0" + - do: + search: + index: test + body: + query: + intervals: + text: + regexp: + pattern: "AT[a-z]{2,}HERE" + case_insensitive: true + - match: { hits.total.value: 1 } + --- "Test ordered matching with via mode": - skip: diff --git a/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java index b610883e777..ab2e65b3b65 100644 --- a/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/opensearch/index/query/IntervalsSourceProvider.java @@ -40,6 +40,7 @@ import org.apache.lucene.queries.intervals.IntervalsSource; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.CompiledAutomaton; +import org.apache.lucene.util.automaton.RegExp; import org.opensearch.LegacyESVersion; import org.opensearch.Version; import org.opensearch.common.ParseField; @@ -687,12 +688,20 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont private final int flags; private final String useField; private final Integer maxExpansions; + private final boolean caseInsensitive; - public Regexp(String pattern, int flags, String useField, Integer maxExpansions) { + /** + * Constructor + * + * {@code flags} is Lucene's syntax flags + * and {@code caseInsensitive} enables Lucene's only matching flag. + */ + public Regexp(String pattern, int flags, String useField, Integer maxExpansions, boolean caseInsensitive) { this.pattern = pattern; this.flags = flags; this.useField = useField; this.maxExpansions = (maxExpansions != null && maxExpansions > 0) ? maxExpansions : null; + this.caseInsensitive = caseInsensitive; } public Regexp(StreamInput in) throws IOException { @@ -700,11 +709,20 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont this.flags = in.readVInt(); this.useField = in.readOptionalString(); this.maxExpansions = in.readOptionalVInt(); + if (in.getVersion().onOrAfter(Version.V_2_0_0)) { + this.caseInsensitive = in.readBoolean(); + } else { + this.caseInsensitive = false; + } } @Override public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) { - final org.apache.lucene.util.automaton.RegExp regexp = new org.apache.lucene.util.automaton.RegExp(pattern, flags); + final org.apache.lucene.util.automaton.RegExp regexp = new org.apache.lucene.util.automaton.RegExp( + pattern, + flags, + caseInsensitive ? RegExp.ASCII_CASE_INSENSITIVE : 0 + ); final CompiledAutomaton automaton = new CompiledAutomaton(regexp.toAutomaton()); if (useField != null) { @@ -745,12 +763,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont return Objects.equals(pattern, regexp.pattern) && Objects.equals(flags, regexp.flags) && Objects.equals(useField, regexp.useField) - && Objects.equals(maxExpansions, regexp.maxExpansions); + && Objects.equals(maxExpansions, regexp.maxExpansions) + && Objects.equals(caseInsensitive, regexp.caseInsensitive); } @Override public int hashCode() { - return Objects.hash(pattern, flags, useField, maxExpansions); + return Objects.hash(pattern, flags, useField, maxExpansions, caseInsensitive); } @Override @@ -764,6 +783,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont out.writeVInt(flags); out.writeOptionalString(useField); out.writeOptionalVInt(maxExpansions); + if (out.getVersion().onOrAfter(Version.V_2_0_0)) { + out.writeBoolean(caseInsensitive); + } } @Override @@ -779,6 +801,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont if (maxExpansions != null) { builder.field("max_expansions", maxExpansions); } + if (caseInsensitive) { + builder.field("case_insensitive", caseInsensitive); + } builder.endObject(); return builder; } @@ -789,13 +814,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont Integer flagsValue = (Integer) args[2]; String useField = (String) args[3]; Integer maxExpansions = (Integer) args[4]; + boolean caseInsensitive = args[5] != null && (boolean) args[5]; if (flagsValue != null) { - return new Regexp(pattern, flagsValue, useField, maxExpansions); + return new Regexp(pattern, flagsValue, useField, maxExpansions, caseInsensitive); } else if (flags != null) { - return new Regexp(pattern, RegexpFlag.resolveValue(flags), useField, maxExpansions); + return new Regexp(pattern, RegexpFlag.resolveValue(flags), useField, maxExpansions, caseInsensitive); } else { - return new Regexp(pattern, DEFAULT_FLAGS_VALUE, useField, maxExpansions); + return new Regexp(pattern, DEFAULT_FLAGS_VALUE, useField, maxExpansions, caseInsensitive); } }); static { @@ -804,6 +830,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont PARSER.declareInt(optionalConstructorArg(), new ParseField("flags_value")); PARSER.declareString(optionalConstructorArg(), new ParseField("use_field")); PARSER.declareInt(optionalConstructorArg(), new ParseField("max_expansions")); + PARSER.declareBoolean(optionalConstructorArg(), new ParseField("case_insensitive")); } public static Regexp fromXContent(XContentParser parser) throws IOException { @@ -825,6 +852,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont Integer getMaxExpansions() { return maxExpansions; } + + boolean isCaseInsensitive() { + return caseInsensitive; + } } public static class Wildcard extends IntervalsSourceProvider { diff --git a/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java index 66035c2fbd1..d7f57eef5e0 100644 --- a/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/IntervalQueryBuilderTests.java @@ -846,7 +846,11 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase