Case Insensitive Support in Regexp Interval (#2237)
Add a `case_insensitive` flag to regexp interval source. Signed-off-by: Matt Weber <matt@mattweber.org>
This commit is contained in:
parent
788ba99915
commit
37235fafd9
|
@ -25,6 +25,56 @@ setup:
|
||||||
- '{"index": {"_index": "test", "_id": "6"}}'
|
- '{"index": {"_index": "test", "_id": "6"}}'
|
||||||
- '{"text" : "that is some cold cold rain"}'
|
- '{"text" : "that is some cold cold rain"}'
|
||||||
|
|
||||||
|
---
|
||||||
|
"Test regexp":
|
||||||
|
- skip:
|
||||||
|
version: " - 1.2.99"
|
||||||
|
reason: "regexp introduced in 1.3"
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
intervals:
|
||||||
|
text:
|
||||||
|
regexp:
|
||||||
|
pattern: "at[a-z]{2,}here"
|
||||||
|
- match: { hits.total.value: 1 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"Test regexp, explicit case sensitive":
|
||||||
|
- skip:
|
||||||
|
version: " - 1.99.99"
|
||||||
|
reason: "case_insensitive introduced in 2.0"
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
intervals:
|
||||||
|
text:
|
||||||
|
regexp:
|
||||||
|
pattern: "AT[a-z]{2,}HERE"
|
||||||
|
case_insensitive: false
|
||||||
|
- match: { hits.total.value: 0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"Test regexp, explicit case insensitive":
|
||||||
|
- skip:
|
||||||
|
version: " - 1.99.99"
|
||||||
|
reason: "case_insensitive introduced in 2.0"
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
intervals:
|
||||||
|
text:
|
||||||
|
regexp:
|
||||||
|
pattern: "AT[a-z]{2,}HERE"
|
||||||
|
case_insensitive: true
|
||||||
|
- match: { hits.total.value: 1 }
|
||||||
|
|
||||||
---
|
---
|
||||||
"Test ordered matching with via mode":
|
"Test ordered matching with via mode":
|
||||||
- skip:
|
- skip:
|
||||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||||
import org.apache.lucene.search.FuzzyQuery;
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
import org.opensearch.LegacyESVersion;
|
import org.opensearch.LegacyESVersion;
|
||||||
import org.opensearch.Version;
|
import org.opensearch.Version;
|
||||||
import org.opensearch.common.ParseField;
|
import org.opensearch.common.ParseField;
|
||||||
|
@ -687,12 +688,20 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||||
private final int flags;
|
private final int flags;
|
||||||
private final String useField;
|
private final String useField;
|
||||||
private final Integer maxExpansions;
|
private final Integer maxExpansions;
|
||||||
|
private final boolean caseInsensitive;
|
||||||
|
|
||||||
public Regexp(String pattern, int flags, String useField, Integer maxExpansions) {
|
/**
|
||||||
|
* Constructor
|
||||||
|
*
|
||||||
|
* {@code flags} is Lucene's <a href="https://github.com/apache/lucene/blob/main/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java#L391-L411">syntax flags</a>
|
||||||
|
* and {@code caseInsensitive} enables Lucene's only <a href="https://github.com/apache/lucene/blob/main/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java#L416">matching flag</a>.
|
||||||
|
*/
|
||||||
|
public Regexp(String pattern, int flags, String useField, Integer maxExpansions, boolean caseInsensitive) {
|
||||||
this.pattern = pattern;
|
this.pattern = pattern;
|
||||||
this.flags = flags;
|
this.flags = flags;
|
||||||
this.useField = useField;
|
this.useField = useField;
|
||||||
this.maxExpansions = (maxExpansions != null && maxExpansions > 0) ? maxExpansions : null;
|
this.maxExpansions = (maxExpansions != null && maxExpansions > 0) ? maxExpansions : null;
|
||||||
|
this.caseInsensitive = caseInsensitive;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Regexp(StreamInput in) throws IOException {
|
public Regexp(StreamInput in) throws IOException {
|
||||||
|
@ -700,11 +709,20 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||||
this.flags = in.readVInt();
|
this.flags = in.readVInt();
|
||||||
this.useField = in.readOptionalString();
|
this.useField = in.readOptionalString();
|
||||||
this.maxExpansions = in.readOptionalVInt();
|
this.maxExpansions = in.readOptionalVInt();
|
||||||
|
if (in.getVersion().onOrAfter(Version.V_2_0_0)) {
|
||||||
|
this.caseInsensitive = in.readBoolean();
|
||||||
|
} else {
|
||||||
|
this.caseInsensitive = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) {
|
public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) {
|
||||||
final org.apache.lucene.util.automaton.RegExp regexp = new org.apache.lucene.util.automaton.RegExp(pattern, flags);
|
final org.apache.lucene.util.automaton.RegExp regexp = new org.apache.lucene.util.automaton.RegExp(
|
||||||
|
pattern,
|
||||||
|
flags,
|
||||||
|
caseInsensitive ? RegExp.ASCII_CASE_INSENSITIVE : 0
|
||||||
|
);
|
||||||
final CompiledAutomaton automaton = new CompiledAutomaton(regexp.toAutomaton());
|
final CompiledAutomaton automaton = new CompiledAutomaton(regexp.toAutomaton());
|
||||||
|
|
||||||
if (useField != null) {
|
if (useField != null) {
|
||||||
|
@ -745,12 +763,13 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||||
return Objects.equals(pattern, regexp.pattern)
|
return Objects.equals(pattern, regexp.pattern)
|
||||||
&& Objects.equals(flags, regexp.flags)
|
&& Objects.equals(flags, regexp.flags)
|
||||||
&& Objects.equals(useField, regexp.useField)
|
&& Objects.equals(useField, regexp.useField)
|
||||||
&& Objects.equals(maxExpansions, regexp.maxExpansions);
|
&& Objects.equals(maxExpansions, regexp.maxExpansions)
|
||||||
|
&& Objects.equals(caseInsensitive, regexp.caseInsensitive);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(pattern, flags, useField, maxExpansions);
|
return Objects.hash(pattern, flags, useField, maxExpansions, caseInsensitive);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -764,6 +783,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||||
out.writeVInt(flags);
|
out.writeVInt(flags);
|
||||||
out.writeOptionalString(useField);
|
out.writeOptionalString(useField);
|
||||||
out.writeOptionalVInt(maxExpansions);
|
out.writeOptionalVInt(maxExpansions);
|
||||||
|
if (out.getVersion().onOrAfter(Version.V_2_0_0)) {
|
||||||
|
out.writeBoolean(caseInsensitive);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -779,6 +801,9 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||||
if (maxExpansions != null) {
|
if (maxExpansions != null) {
|
||||||
builder.field("max_expansions", maxExpansions);
|
builder.field("max_expansions", maxExpansions);
|
||||||
}
|
}
|
||||||
|
if (caseInsensitive) {
|
||||||
|
builder.field("case_insensitive", caseInsensitive);
|
||||||
|
}
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
@ -789,13 +814,14 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||||
Integer flagsValue = (Integer) args[2];
|
Integer flagsValue = (Integer) args[2];
|
||||||
String useField = (String) args[3];
|
String useField = (String) args[3];
|
||||||
Integer maxExpansions = (Integer) args[4];
|
Integer maxExpansions = (Integer) args[4];
|
||||||
|
boolean caseInsensitive = args[5] != null && (boolean) args[5];
|
||||||
|
|
||||||
if (flagsValue != null) {
|
if (flagsValue != null) {
|
||||||
return new Regexp(pattern, flagsValue, useField, maxExpansions);
|
return new Regexp(pattern, flagsValue, useField, maxExpansions, caseInsensitive);
|
||||||
} else if (flags != null) {
|
} else if (flags != null) {
|
||||||
return new Regexp(pattern, RegexpFlag.resolveValue(flags), useField, maxExpansions);
|
return new Regexp(pattern, RegexpFlag.resolveValue(flags), useField, maxExpansions, caseInsensitive);
|
||||||
} else {
|
} else {
|
||||||
return new Regexp(pattern, DEFAULT_FLAGS_VALUE, useField, maxExpansions);
|
return new Regexp(pattern, DEFAULT_FLAGS_VALUE, useField, maxExpansions, caseInsensitive);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
static {
|
static {
|
||||||
|
@ -804,6 +830,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||||
PARSER.declareInt(optionalConstructorArg(), new ParseField("flags_value"));
|
PARSER.declareInt(optionalConstructorArg(), new ParseField("flags_value"));
|
||||||
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
|
PARSER.declareString(optionalConstructorArg(), new ParseField("use_field"));
|
||||||
PARSER.declareInt(optionalConstructorArg(), new ParseField("max_expansions"));
|
PARSER.declareInt(optionalConstructorArg(), new ParseField("max_expansions"));
|
||||||
|
PARSER.declareBoolean(optionalConstructorArg(), new ParseField("case_insensitive"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Regexp fromXContent(XContentParser parser) throws IOException {
|
public static Regexp fromXContent(XContentParser parser) throws IOException {
|
||||||
|
@ -825,6 +852,10 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||||
Integer getMaxExpansions() {
|
Integer getMaxExpansions() {
|
||||||
return maxExpansions;
|
return maxExpansions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean isCaseInsensitive() {
|
||||||
|
return caseInsensitive;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Wildcard extends IntervalsSourceProvider {
|
public static class Wildcard extends IntervalsSourceProvider {
|
||||||
|
|
|
@ -846,7 +846,11 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||||
}
|
}
|
||||||
|
|
||||||
private static IntervalsSource buildRegexpSource(String pattern, int flags, Integer maxExpansions) {
|
private static IntervalsSource buildRegexpSource(String pattern, int flags, Integer maxExpansions) {
|
||||||
final RegExp regexp = new RegExp(pattern, flags);
|
return buildRegexpSource(pattern, flags, 0, maxExpansions);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static IntervalsSource buildRegexpSource(String pattern, int flags, int matchFlags, Integer maxExpansions) {
|
||||||
|
final RegExp regexp = new RegExp(pattern, flags, matchFlags);
|
||||||
CompiledAutomaton automaton = new CompiledAutomaton(regexp.toAutomaton());
|
CompiledAutomaton automaton = new CompiledAutomaton(regexp.toAutomaton());
|
||||||
|
|
||||||
if (maxExpansions != null) {
|
if (maxExpansions != null) {
|
||||||
|
@ -922,6 +926,15 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||||
expected = new IntervalQuery(TEXT_FIELD_NAME, buildRegexpSource("te.m", DEFAULT_FLAGS, 500));
|
expected = new IntervalQuery(TEXT_FIELD_NAME, buildRegexpSource("te.m", DEFAULT_FLAGS, 500));
|
||||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||||
|
|
||||||
|
String regexp_case_insensitive_json = "{ \"intervals\" : { \""
|
||||||
|
+ TEXT_FIELD_NAME
|
||||||
|
+ "\": { "
|
||||||
|
+ "\"regexp\" : { \"pattern\" : \"TE.M\", \"case_insensitive\" : true } } } }";
|
||||||
|
|
||||||
|
builder = (IntervalQueryBuilder) parseQuery(regexp_case_insensitive_json);
|
||||||
|
expected = new IntervalQuery(TEXT_FIELD_NAME, buildRegexpSource("TE.M", DEFAULT_FLAGS, RegExp.ASCII_CASE_INSENSITIVE, null));
|
||||||
|
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||||
|
|
||||||
String regexp_neg_max_expand_json = "{ \"intervals\" : { \""
|
String regexp_neg_max_expand_json = "{ \"intervals\" : { \""
|
||||||
+ TEXT_FIELD_NAME
|
+ TEXT_FIELD_NAME
|
||||||
+ "\": { "
|
+ "\": { "
|
||||||
|
|
|
@ -32,7 +32,8 @@ public class RegexpIntervalsSourceProviderTests extends AbstractSerializingTestC
|
||||||
randomAlphaOfLengthBetween(0, 3) + (randomBoolean() ? ".*?" : "." + randomAlphaOfLength(4)) + randomAlphaOfLengthBetween(0, 5),
|
randomAlphaOfLengthBetween(0, 3) + (randomBoolean() ? ".*?" : "." + randomAlphaOfLength(4)) + randomAlphaOfLengthBetween(0, 5),
|
||||||
randomBoolean() ? RegexpFlag.resolveValue(randomFrom(FLAGS)) : RegexpFlag.ALL.value(),
|
randomBoolean() ? RegexpFlag.resolveValue(randomFrom(FLAGS)) : RegexpFlag.ALL.value(),
|
||||||
randomBoolean() ? randomAlphaOfLength(10) : null,
|
randomBoolean() ? randomAlphaOfLength(10) : null,
|
||||||
randomBoolean() ? randomIntBetween(-1, Integer.MAX_VALUE) : null
|
randomBoolean() ? randomIntBetween(-1, Integer.MAX_VALUE) : null,
|
||||||
|
randomBoolean()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +43,9 @@ public class RegexpIntervalsSourceProviderTests extends AbstractSerializingTestC
|
||||||
int flags = instance.getFlags();
|
int flags = instance.getFlags();
|
||||||
String useField = instance.getUseField();
|
String useField = instance.getUseField();
|
||||||
Integer maxExpansions = instance.getMaxExpansions();
|
Integer maxExpansions = instance.getMaxExpansions();
|
||||||
int ran = between(0, 3);
|
boolean caseInsensitive = instance.isCaseInsensitive();
|
||||||
|
|
||||||
|
int ran = between(0, 4);
|
||||||
switch (ran) {
|
switch (ran) {
|
||||||
case 0:
|
case 0:
|
||||||
pattern += randomBoolean() ? ".*?" : randomAlphaOfLength(5);
|
pattern += randomBoolean() ? ".*?" : randomAlphaOfLength(5);
|
||||||
|
@ -56,10 +59,13 @@ public class RegexpIntervalsSourceProviderTests extends AbstractSerializingTestC
|
||||||
case 3:
|
case 3:
|
||||||
maxExpansions = maxExpansions == null ? randomIntBetween(1, Integer.MAX_VALUE) : null;
|
maxExpansions = maxExpansions == null ? randomIntBetween(1, Integer.MAX_VALUE) : null;
|
||||||
break;
|
break;
|
||||||
|
case 4:
|
||||||
|
caseInsensitive = !caseInsensitive;
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new AssertionError("Illegal randomisation branch");
|
throw new AssertionError("Illegal randomisation branch");
|
||||||
}
|
}
|
||||||
return new Regexp(pattern, flags, useField, maxExpansions);
|
return new Regexp(pattern, flags, useField, maxExpansions, caseInsensitive);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue