Search - add case insensitive support for regex queries. (#59441) (#61532)

Backport to add case insensitive support for regex queries. 
Forks a copy of Lucene’s RegexpQuery and RegExp from Lucene master.
This can be removed when 8.7 Lucene is released.

Closes #59235
This commit is contained in:
markharwood 2020-08-25 17:18:59 +01:00 committed by GitHub
parent e3d23c34ab
commit 8b56441d2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 1411 additions and 96 deletions

View File

@ -28,6 +28,7 @@ GET /_search
"user.id": { "user.id": {
"value": "k.*y", "value": "k.*y",
"flags": "ALL", "flags": "ALL",
"case_insensitive": true,
"max_determinized_states": 10000, "max_determinized_states": 10000,
"rewrite": "constant_score" "rewrite": "constant_score"
} }
@ -67,6 +68,10 @@ provided. To improve performance, avoid using wildcard patterns, such as `.*` or
valid values and more information, see <<regexp-optional-operators, Regular valid values and more information, see <<regexp-optional-operators, Regular
expression syntax>>. expression syntax>>.
`case_insensitive`::
(Optional, boolean) allows case insensitive matching of the regular expression
value with the indexed field values when set to true. Setting to false is disallowed.
`max_determinized_states`:: `max_determinized_states`::
+ +
-- --

View File

@ -146,7 +146,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
} }
@Override @Override
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
MultiTermQuery.RewriteMethod method, QueryShardContext context) { MultiTermQuery.RewriteMethod method, QueryShardContext context) {
throw new UnsupportedOperationException("[regexp] queries are not supported on [" + CONTENT_TYPE + "] fields."); throw new UnsupportedOperationException("[regexp] queries are not supported on [" + CONTENT_TYPE + "] fields.");
} }

View File

@ -91,7 +91,7 @@ public class CollationFieldTypeTests extends FieldTypeTestCase{
public void testRegexpQuery() { public void testRegexpQuery() {
MappedFieldType ft = new CollationFieldType("field", DEFAULT_COLLATOR); MappedFieldType ft = new CollationFieldType("field", DEFAULT_COLLATOR);
UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class, UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class,
() -> ft.regexpQuery("foo.*", 0, 10, null, randomMockShardContext())); () -> ft.regexpQuery("foo.*", 0, 0, 10, null, randomMockShardContext()));
assertEquals("[regexp] queries are not supported on [icu_collation_keyword] fields.", e.getMessage()); assertEquals("[regexp] queries are not supported on [icu_collation_keyword] fields.", e.getMessage());
} }

View File

@ -333,5 +333,8 @@ tasks.named("dependencyLicenses").configure {
tasks.named("licenseHeaders").configure { tasks.named("licenseHeaders").configure {
// Ignore our vendored version of Google Guice // Ignore our vendored version of Google Guice
excludes << 'org/elasticsearch/common/inject/**/*' excludes << 'org/elasticsearch/common/inject/**/*'
// Ignore temporary copies of impending 8.7 Lucene classes
excludes << 'org/apache/lucene/search/RegExp87*'
excludes << 'org/apache/lucene/search/RegexpQuery87*'
excludes << 'org/elasticsearch/client/documentation/placeholder.txt' excludes << 'org/elasticsearch/client/documentation/placeholder.txt'
} }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.AutomatonProvider;
import org.apache.lucene.util.automaton.Operations;
/**
* Copy of Lucene's RegExpQuery class coming in 8.7 with case
* insensitive search option
* @deprecated
*/
@Deprecated
public class RegexpQuery87 extends AutomatonQuery {
/**
* A provider that provides no named automata
*/
private static AutomatonProvider defaultProvider = new AutomatonProvider() {
@Override
public Automaton getAutomaton(String name) {
return null;
}
};
/**
* Constructs a query for terms matching <code>term</code>.
* <p>
* By default, all regular expression features are enabled.
* </p>
*
* @param term regular expression.
*/
public RegexpQuery87(Term term) {
this(term, RegExp87.ALL);
}
/**
* Constructs a query for terms matching <code>term</code>.
*
* @param term regular expression.
* @param flags optional RegExp features from {@link RegExp87}
*/
public RegexpQuery87(Term term, int flags) {
this(term, flags, defaultProvider,
Operations.DEFAULT_MAX_DETERMINIZED_STATES);
}
/**
* Constructs a query for terms matching <code>term</code>.
*
* @param term regular expression.
* @param flags optional RegExp syntax features from {@link RegExp87}
* @param maxDeterminizedStates maximum number of states that compiling the
* automaton for the regexp can result in. Set higher to allow more complex
* queries and lower to prevent memory exhaustion.
*/
public RegexpQuery87(Term term, int flags, int maxDeterminizedStates) {
this(term, flags, defaultProvider, maxDeterminizedStates);
}
/**
* Constructs a query for terms matching <code>term</code>.
*
* @param term regular expression.
* @param syntax_flags optional RegExp syntax features from {@link RegExp87}
* automaton for the regexp can result in. Set higher to allow more complex
* queries and lower to prevent memory exhaustion.
* @param match_flags boolean 'or' of match behavior options such as case insensitivity
* @param maxDeterminizedStates maximum number of states that compiling the
*/
public RegexpQuery87(Term term, int syntax_flags, int match_flags, int maxDeterminizedStates) {
this(term, syntax_flags, match_flags, defaultProvider, maxDeterminizedStates);
}
/**
* Constructs a query for terms matching <code>term</code>.
*
* @param term regular expression.
* @param syntax_flags optional RegExp features from {@link RegExp87}
* @param provider custom AutomatonProvider for named automata
* @param maxDeterminizedStates maximum number of states that compiling the
* automaton for the regexp can result in. Set higher to allow more complex
* queries and lower to prevent memory exhaustion.
*/
public RegexpQuery87(Term term, int syntax_flags, AutomatonProvider provider,
int maxDeterminizedStates) {
this(term, syntax_flags, 0, provider, maxDeterminizedStates);
}
/**
* Constructs a query for terms matching <code>term</code>.
*
* @param term regular expression.
* @param syntax_flags optional RegExp features from {@link RegExp87}
* @param match_flags boolean 'or' of match behavior options such as case insensitivity
* @param provider custom AutomatonProvider for named automata
* @param maxDeterminizedStates maximum number of states that compiling the
* automaton for the regexp can result in. Set higher to allow more complex
* queries and lower to prevent memory exhaustion.
*/
public RegexpQuery87(Term term, int syntax_flags, int match_flags, AutomatonProvider provider,
int maxDeterminizedStates) {
super(term,
new RegExp87(term.text(), syntax_flags, match_flags).toAutomaton(
provider, maxDeterminizedStates), maxDeterminizedStates);
}
/** Returns the regexp of this query wrapped in a Term. */
public Term getRegexp() {
return term;
}
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
if (!term.field().equals(field)) {
buffer.append(term.field());
buffer.append(":");
}
buffer.append('/');
buffer.append(term.text());
buffer.append('/');
return buffer.toString();
}
}

View File

@ -214,8 +214,8 @@ public abstract class MappedFieldType {
+ "] which is of type [" + typeName() + "]"); + "] which is of type [" + typeName() + "]");
} }
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, @Nullable MultiTermQuery.RewriteMethod method, public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
QueryShardContext context) { @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) {
throw new QueryShardException(context, "Can only use regexp queries on keyword and text fields - not on [" + name throw new QueryShardException(context, "Can only use regexp queries on keyword and text fields - not on [" + name
+ "] which is of type [" + typeName() + "]"); + "] which is of type [" + typeName() + "]");
} }

View File

@ -25,7 +25,7 @@ import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -134,14 +134,15 @@ public abstract class StringFieldType extends TermBasedFieldType {
} }
@Override @Override
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
MultiTermQuery.RewriteMethod method, QueryShardContext context) { MultiTermQuery.RewriteMethod method, QueryShardContext context) {
if (context.allowExpensiveQueries() == false) { if (context.allowExpensiveQueries() == false) {
throw new ElasticsearchException("[regexp] queries cannot be executed when '" + throw new ElasticsearchException("[regexp] queries cannot be executed when '" +
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false."); ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
} }
failIfNotIndexed(); failIfNotIndexed();
RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates); RegexpQuery87 query = new RegexpQuery87(new Term(name(), indexedValueForSearch(value)), syntaxFlags,
matchFlags, maxDeterminizedStates);
if (method != null) { if (method != null) {
query.setRewriteMethod(method); query.setRewriteMethod(method);
} }

View File

@ -18,7 +18,7 @@
*/ */
package org.elasticsearch.index.query; package org.elasticsearch.index.query;
import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.search.RegExp87;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import java.util.Locale; import java.util.Locale;
@ -43,37 +43,37 @@ public enum RegexpFlag {
/** /**
* Enables intersection of the form: {@code &lt;expression&gt; &amp; &lt;expression&gt;} * Enables intersection of the form: {@code &lt;expression&gt; &amp; &lt;expression&gt;}
*/ */
INTERSECTION(RegExp.INTERSECTION), INTERSECTION(RegExp87.INTERSECTION),
/** /**
* Enables complement expression of the form: {@code ~&lt;expression&gt;} * Enables complement expression of the form: {@code ~&lt;expression&gt;}
*/ */
COMPLEMENT(RegExp.COMPLEMENT), COMPLEMENT(RegExp87.COMPLEMENT),
/** /**
* Enables empty language expression: {@code #} * Enables empty language expression: {@code #}
*/ */
EMPTY(RegExp.EMPTY), EMPTY(RegExp87.EMPTY),
/** /**
* Enables any string expression: {@code @} * Enables any string expression: {@code @}
*/ */
ANYSTRING(RegExp.ANYSTRING), ANYSTRING(RegExp87.ANYSTRING),
/** /**
* Enables numerical interval expression: {@code &lt;n-m&gt;} * Enables numerical interval expression: {@code &lt;n-m&gt;}
*/ */
INTERVAL(RegExp.INTERVAL), INTERVAL(RegExp87.INTERVAL),
/** /**
* Disables all available option flags * Disables all available option flags
*/ */
NONE(RegExp.NONE), NONE(RegExp87.NONE),
/** /**
* Enables all available option flags * Enables all available option flags
*/ */
ALL(RegExp.ALL); ALL(RegExp87.ALL);
final int value; final int value;
@ -110,9 +110,9 @@ public enum RegexpFlag {
*/ */
public static int resolveValue(String flags) { public static int resolveValue(String flags) {
if (flags == null || flags.isEmpty()) { if (flags == null || flags.isEmpty()) {
return RegExp.ALL; return RegExp87.ALL;
} }
int magic = RegExp.NONE; int magic = RegExp87.NONE;
for (String s : Strings.delimitedListToStringArray(flags, "|")) { for (String s : Strings.delimitedListToStringArray(flags, "|")) {
if (s.isEmpty()) { if (s.isEmpty()) {
continue; continue;

View File

@ -22,8 +22,10 @@ package org.elasticsearch.index.query;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegExp87;
import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
@ -48,10 +50,12 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
public static final int DEFAULT_FLAGS_VALUE = RegexpFlag.ALL.value(); public static final int DEFAULT_FLAGS_VALUE = RegexpFlag.ALL.value();
public static final int DEFAULT_MAX_DETERMINIZED_STATES = Operations.DEFAULT_MAX_DETERMINIZED_STATES; public static final int DEFAULT_MAX_DETERMINIZED_STATES = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
public static final boolean DEFAULT_CASE_INSENSITIVITY = false;
private static final ParseField FLAGS_VALUE_FIELD = new ParseField("flags_value"); private static final ParseField FLAGS_VALUE_FIELD = new ParseField("flags_value");
private static final ParseField MAX_DETERMINIZED_STATES_FIELD = new ParseField("max_determinized_states"); private static final ParseField MAX_DETERMINIZED_STATES_FIELD = new ParseField("max_determinized_states");
private static final ParseField FLAGS_FIELD = new ParseField("flags"); private static final ParseField FLAGS_FIELD = new ParseField("flags");
private static final ParseField CASE_INSENSITIVE_FIELD = new ParseField("case_insensitive");
private static final ParseField REWRITE_FIELD = new ParseField("rewrite"); private static final ParseField REWRITE_FIELD = new ParseField("rewrite");
private static final ParseField VALUE_FIELD = new ParseField("value"); private static final ParseField VALUE_FIELD = new ParseField("value");
@ -59,7 +63,8 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
private final String value; private final String value;
private int flagsValue = DEFAULT_FLAGS_VALUE; private int syntaxFlagsValue = DEFAULT_FLAGS_VALUE;
private boolean caseInsensitive = DEFAULT_CASE_INSENSITIVITY;
private int maxDeterminizedStates = DEFAULT_MAX_DETERMINIZED_STATES; private int maxDeterminizedStates = DEFAULT_MAX_DETERMINIZED_STATES;
@ -89,18 +94,24 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
super(in); super(in);
fieldName = in.readString(); fieldName = in.readString();
value = in.readString(); value = in.readString();
flagsValue = in.readVInt(); syntaxFlagsValue = in.readVInt();
maxDeterminizedStates = in.readVInt(); maxDeterminizedStates = in.readVInt();
rewrite = in.readOptionalString(); rewrite = in.readOptionalString();
if (in.getVersion().onOrAfter(Version.V_7_10_0)) {
caseInsensitive = in.readBoolean();
}
} }
@Override @Override
protected void doWriteTo(StreamOutput out) throws IOException { protected void doWriteTo(StreamOutput out) throws IOException {
out.writeString(fieldName); out.writeString(fieldName);
out.writeString(value); out.writeString(value);
out.writeVInt(flagsValue); out.writeVInt(syntaxFlagsValue);
out.writeVInt(maxDeterminizedStates); out.writeVInt(maxDeterminizedStates);
out.writeOptionalString(rewrite); out.writeOptionalString(rewrite);
if (out.getVersion().onOrAfter(Version.V_7_10_0)) {
out.writeBoolean(caseInsensitive);
}
} }
/** Returns the field name used in this query. */ /** Returns the field name used in this query. */
@ -118,7 +129,7 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
public RegexpQueryBuilder flags(RegexpFlag... flags) { public RegexpQueryBuilder flags(RegexpFlag... flags) {
if (flags == null) { if (flags == null) {
this.flagsValue = DEFAULT_FLAGS_VALUE; this.syntaxFlagsValue = DEFAULT_FLAGS_VALUE;
return this; return this;
} }
int value = 0; int value = 0;
@ -129,17 +140,29 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
value |= flag.value; value |= flag.value;
} }
} }
this.flagsValue = value; this.syntaxFlagsValue = value;
return this; return this;
} }
public RegexpQueryBuilder flags(int flags) { public RegexpQueryBuilder flags(int flags) {
this.flagsValue = flags; this.syntaxFlagsValue = flags;
return this; return this;
} }
public int flags() { public int flags() {
return this.flagsValue; return this.syntaxFlagsValue;
}
public RegexpQueryBuilder caseInsensitive(boolean caseInsensitive) {
if (caseInsensitive == false) {
throw new IllegalArgumentException("The case insensitive setting cannot be set to false.");
}
this.caseInsensitive = caseInsensitive;
return this;
}
public boolean caseInsensitive() {
return this.caseInsensitive;
} }
/** /**
@ -168,7 +191,10 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
builder.startObject(NAME); builder.startObject(NAME);
builder.startObject(fieldName); builder.startObject(fieldName);
builder.field(VALUE_FIELD.getPreferredName(), this.value); builder.field(VALUE_FIELD.getPreferredName(), this.value);
builder.field(FLAGS_VALUE_FIELD.getPreferredName(), flagsValue); builder.field(FLAGS_VALUE_FIELD.getPreferredName(), syntaxFlagsValue);
if (caseInsensitive != DEFAULT_CASE_INSENSITIVITY) {
builder.field(CASE_INSENSITIVE_FIELD.getPreferredName(), caseInsensitive);
}
builder.field(MAX_DETERMINIZED_STATES_FIELD.getPreferredName(), maxDeterminizedStates); builder.field(MAX_DETERMINIZED_STATES_FIELD.getPreferredName(), maxDeterminizedStates);
if (rewrite != null) { if (rewrite != null) {
builder.field(REWRITE_FIELD.getPreferredName(), rewrite); builder.field(REWRITE_FIELD.getPreferredName(), rewrite);
@ -184,6 +210,7 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
String value = null; String value = null;
float boost = AbstractQueryBuilder.DEFAULT_BOOST; float boost = AbstractQueryBuilder.DEFAULT_BOOST;
int flagsValue = RegexpQueryBuilder.DEFAULT_FLAGS_VALUE; int flagsValue = RegexpQueryBuilder.DEFAULT_FLAGS_VALUE;
boolean caseInsensitive = DEFAULT_CASE_INSENSITIVITY;
int maxDeterminizedStates = RegexpQueryBuilder.DEFAULT_MAX_DETERMINIZED_STATES; int maxDeterminizedStates = RegexpQueryBuilder.DEFAULT_MAX_DETERMINIZED_STATES;
String queryName = null; String queryName = null;
String currentFieldName = null; String currentFieldName = null;
@ -211,6 +238,12 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
maxDeterminizedStates = parser.intValue(); maxDeterminizedStates = parser.intValue();
} else if (FLAGS_VALUE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { } else if (FLAGS_VALUE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
flagsValue = parser.intValue(); flagsValue = parser.intValue();
} else if (CASE_INSENSITIVE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
caseInsensitive = parser.booleanValue();
if (caseInsensitive == false) {
throw new ParsingException(parser.getTokenLocation(),
"[regexp] query does not support [" + currentFieldName + "] = false");
}
} else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
queryName = parser.text(); queryName = parser.text();
} else { } else {
@ -226,12 +259,16 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
} }
} }
return new RegexpQueryBuilder(fieldName, value) RegexpQueryBuilder result = new RegexpQueryBuilder(fieldName, value)
.flags(flagsValue) .flags(flagsValue)
.maxDeterminizedStates(maxDeterminizedStates) .maxDeterminizedStates(maxDeterminizedStates)
.rewrite(rewrite) .rewrite(rewrite)
.boost(boost) .boost(boost)
.queryName(queryName); .queryName(queryName);
if (caseInsensitive) {
result.caseInsensitive(caseInsensitive);
}
return result;
} }
@Override @Override
@ -251,13 +288,18 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
} }
MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewrite, null, LoggingDeprecationHandler.INSTANCE); MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewrite, null, LoggingDeprecationHandler.INSTANCE);
int matchFlagsValue = caseInsensitive ? RegExp87.ASCII_CASE_INSENSITIVE : 0;
Query query = null; Query query = null;
// For BWC we mask irrelevant bits (RegExp changed ALL from 0xffff to 0xff)
int sanitisedSyntaxFlag = syntaxFlagsValue & RegExp87.ALL;
MappedFieldType fieldType = context.fieldMapper(fieldName); MappedFieldType fieldType = context.fieldMapper(fieldName);
if (fieldType != null) { if (fieldType != null) {
query = fieldType.regexpQuery(value, flagsValue, maxDeterminizedStates, method, context); query = fieldType.regexpQuery(value, sanitisedSyntaxFlag, matchFlagsValue, maxDeterminizedStates, method, context);
} }
if (query == null) { if (query == null) {
RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue, maxDeterminizedStates); RegexpQuery87 regexpQuery = new RegexpQuery87(new Term(fieldName, BytesRefs.toBytesRef(value)), sanitisedSyntaxFlag,
matchFlagsValue, maxDeterminizedStates);
if (method != null) { if (method != null) {
regexpQuery.setRewriteMethod(method); regexpQuery.setRewriteMethod(method);
} }
@ -268,14 +310,15 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
@Override @Override
protected int doHashCode() { protected int doHashCode() {
return Objects.hash(fieldName, value, flagsValue, maxDeterminizedStates, rewrite); return Objects.hash(fieldName, value, syntaxFlagsValue, caseInsensitive, maxDeterminizedStates, rewrite);
} }
@Override @Override
protected boolean doEquals(RegexpQueryBuilder other) { protected boolean doEquals(RegexpQueryBuilder other) {
return Objects.equals(fieldName, other.fieldName) && return Objects.equals(fieldName, other.fieldName) &&
Objects.equals(value, other.value) && Objects.equals(value, other.value) &&
Objects.equals(flagsValue, other.flagsValue) && Objects.equals(syntaxFlagsValue, other.syntaxFlagsValue) &&
Objects.equals(caseInsensitive, other.caseInsensitive) &&
Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates) && Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates) &&
Objects.equals(rewrite, other.rewrite); Objects.equals(rewrite, other.rewrite);
} }

View File

@ -37,13 +37,13 @@ import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegExp87;
import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.unit.Fuzziness;
@ -731,7 +731,7 @@ public class QueryStringQueryParser extends XQueryParser {
setAnalyzer(forceAnalyzer); setAnalyzer(forceAnalyzer);
return super.getRegexpQuery(field, termStr); return super.getRegexpQuery(field, termStr);
} }
return currentFieldType.regexpQuery(termStr, RegExp.ALL, getMaxDeterminizedStates(), return currentFieldType.regexpQuery(termStr, RegExp87.ALL, 0, getMaxDeterminizedStates(),
getMultiTermRewriteMethod(), context); getMultiTermRewriteMethod(), context);
} catch (RuntimeException e) { } catch (RuntimeException e) {
if (lenient) { if (lenient) {

View File

@ -19,8 +19,8 @@
package org.elasticsearch.search.suggest.completion; package org.elasticsearch.search.suggest.completion;
import org.apache.lucene.search.RegExp87;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
@ -143,7 +143,7 @@ public class RegexOptions implements ToXContentFragment, Writeable {
* Options for regular expression queries * Options for regular expression queries
*/ */
public static class Builder { public static class Builder {
private int flagsValue = RegExp.ALL; private int flagsValue = RegExp87.ALL;
private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES; private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
public Builder() { public Builder() {

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegExp87;
import org.apache.lucene.search.suggest.document.CompletionAnalyzer; import org.apache.lucene.search.suggest.document.CompletionAnalyzer;
import org.apache.lucene.search.suggest.document.ContextSuggestField; import org.apache.lucene.search.suggest.document.ContextSuggestField;
import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery; import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery;
@ -31,7 +32,6 @@ import org.apache.lucene.search.suggest.document.SuggestField;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
@ -889,7 +889,7 @@ public class CompletionFieldMapperTests extends ESSingleNodeTestCase {
Mapper fieldMapper = defaultMapper.mappers().getMapper("completion"); Mapper fieldMapper = defaultMapper.mappers().getMapper("completion");
CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper; CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper;
Query prefixQuery = completionFieldMapper.fieldType() Query prefixQuery = completionFieldMapper.fieldType()
.regexpQuery(new BytesRef("co"), RegExp.ALL, Operations.DEFAULT_MAX_DETERMINIZED_STATES); .regexpQuery(new BytesRef("co"), RegExp87.ALL, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
assertThat(prefixQuery, instanceOf(RegexCompletionQuery.class)); assertThat(prefixQuery, instanceOf(RegexCompletionQuery.class));
} }

View File

@ -22,7 +22,7 @@ package org.elasticsearch.index.mapper;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
@ -44,11 +44,11 @@ public class IgnoredFieldTypeTests extends FieldTypeTestCase {
public void testRegexpQuery() { public void testRegexpQuery() {
MappedFieldType ft = IgnoredFieldMapper.IgnoredFieldType.INSTANCE; MappedFieldType ft = IgnoredFieldMapper.IgnoredFieldType.INSTANCE;
Query expected = new RegexpQuery(new Term("_ignored", new BytesRef("foo?"))); Query expected = new RegexpQuery87(new Term("_ignored", new BytesRef("foo?")));
assertEquals(expected, ft.regexpQuery("foo?", 0, 10, null, MOCK_QSC)); assertEquals(expected, ft.regexpQuery("foo?", 0, 0, 10, null, MOCK_QSC));
ElasticsearchException ee = expectThrows(ElasticsearchException.class, ElasticsearchException ee = expectThrows(ElasticsearchException.class,
() -> ft.regexpQuery("foo?", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE)); () -> ft.regexpQuery("foo?", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
ee.getMessage()); ee.getMessage());
} }

View File

@ -53,7 +53,7 @@ public class IndexFieldTypeTests extends ESTestCase {
MappedFieldType ft = IndexFieldMapper.IndexFieldType.INSTANCE; MappedFieldType ft = IndexFieldMapper.IndexFieldType.INSTANCE;
QueryShardException e = expectThrows(QueryShardException.class, () -> QueryShardException e = expectThrows(QueryShardException.class, () ->
assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("ind.x", 0, 10, null, createContext()))); assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("ind.x", 0, 0, 10, null, createContext())));
assertThat(e.getMessage(), containsString("Can only use regexp queries on keyword and text fields")); assertThat(e.getMessage(), containsString("Can only use regexp queries on keyword and text fields"));
} }

View File

@ -29,7 +29,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.NormsFieldExistsQuery; import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
@ -128,16 +128,16 @@ public class KeywordFieldTypeTests extends FieldTypeTestCase {
public void testRegexpQuery() { public void testRegexpQuery() {
MappedFieldType ft = new KeywordFieldType("field"); MappedFieldType ft = new KeywordFieldType("field");
assertEquals(new RegexpQuery(new Term("field","foo.*")), assertEquals(new RegexpQuery87(new Term("field","foo.*")),
ft.regexpQuery("foo.*", 0, 10, null, MOCK_QSC)); ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC));
MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap());
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> unsearchable.regexpQuery("foo.*", 0, 10, null, MOCK_QSC)); () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
ElasticsearchException ee = expectThrows(ElasticsearchException.class, ElasticsearchException ee = expectThrows(ElasticsearchException.class,
() -> ft.regexpQuery("foo.*", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE)); () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
ee.getMessage()); ee.getMessage());
} }

View File

@ -21,7 +21,7 @@ package org.elasticsearch.index.mapper;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
@ -43,11 +43,11 @@ public class RoutingFieldTypeTests extends FieldTypeTestCase {
public void testRegexpQuery() { public void testRegexpQuery() {
MappedFieldType ft = RoutingFieldMapper.RoutingFieldType.INSTANCE; MappedFieldType ft = RoutingFieldMapper.RoutingFieldType.INSTANCE;
Query expected = new RegexpQuery(new Term("_routing", new BytesRef("foo?"))); Query expected = new RegexpQuery87(new Term("_routing", new BytesRef("foo?")));
assertEquals(expected, ft.regexpQuery("foo?", 0, 10, null, MOCK_QSC)); assertEquals(expected, ft.regexpQuery("foo?", 0, 0, 10, null, MOCK_QSC));
ElasticsearchException ee = expectThrows(ElasticsearchException.class, ElasticsearchException ee = expectThrows(ElasticsearchException.class,
() -> ft.regexpQuery("foo?", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE)); () -> ft.regexpQuery("foo?", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
ee.getMessage()); ee.getMessage());
} }

View File

@ -26,7 +26,7 @@ import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
@ -86,16 +86,16 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
public void testRegexpQuery() { public void testRegexpQuery() {
MappedFieldType ft = new TextFieldType("field"); MappedFieldType ft = new TextFieldType("field");
assertEquals(new RegexpQuery(new Term("field","foo.*")), assertEquals(new RegexpQuery87(new Term("field","foo.*")),
ft.regexpQuery("foo.*", 0, 10, null, MOCK_QSC)); ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC));
MappedFieldType unsearchable = new TextFieldType("field", false, Collections.emptyMap()); MappedFieldType unsearchable = new TextFieldType("field", false, Collections.emptyMap());
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> unsearchable.regexpQuery("foo.*", 0, 10, null, MOCK_QSC)); () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
ElasticsearchException ee = expectThrows(ElasticsearchException.class, ElasticsearchException ee = expectThrows(ElasticsearchException.class,
() -> ft.regexpQuery("foo.*", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE)); () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
ee.getMessage()); ee.getMessage());
} }

View File

@ -40,7 +40,7 @@ import org.apache.lucene.search.NormsFieldExistsQuery;
import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
@ -733,8 +733,8 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
Query query = queryStringQuery("/foo*bar/").defaultField(TEXT_FIELD_NAME) Query query = queryStringQuery("/foo*bar/").defaultField(TEXT_FIELD_NAME)
.maxDeterminizedStates(5000) .maxDeterminizedStates(5000)
.toQuery(createShardContext()); .toQuery(createShardContext());
assertThat(query, instanceOf(RegexpQuery.class)); assertThat(query, instanceOf(RegexpQuery87.class));
RegexpQuery regexpQuery = (RegexpQuery) query; RegexpQuery87 regexpQuery = (RegexpQuery87) query;
assertTrue(regexpQuery.toString().contains("/foo*bar/")); assertTrue(regexpQuery.toString().contains("/foo*bar/"));
} }

View File

@ -20,7 +20,9 @@
package org.elasticsearch.index.query; package org.elasticsearch.index.query;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegExp87;
import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.ParsingException;
import org.elasticsearch.test.AbstractQueryTestCase; import org.elasticsearch.test.AbstractQueryTestCase;
@ -46,6 +48,9 @@ public class RegexpQueryBuilderTests extends AbstractQueryTestCase<RegexpQueryBu
} }
query.flags(flags.toArray(new RegexpFlag[flags.size()])); query.flags(flags.toArray(new RegexpFlag[flags.size()]));
} }
if (randomBoolean()) {
query.caseInsensitive(true);
}
if (randomBoolean()) { if (randomBoolean()) {
query.maxDeterminizedStates(randomInt(50000)); query.maxDeterminizedStates(randomInt(50000));
} }
@ -77,8 +82,8 @@ public class RegexpQueryBuilderTests extends AbstractQueryTestCase<RegexpQueryBu
@Override @Override
protected void doAssertLuceneQuery(RegexpQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException { protected void doAssertLuceneQuery(RegexpQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
assertThat(query, instanceOf(RegexpQuery.class)); assertThat(query, instanceOf(RegexpQuery87.class));
RegexpQuery regexpQuery = (RegexpQuery) query; RegexpQuery87 regexpQuery = (RegexpQuery87) query;
String expectedFieldName = expectedFieldName( queryBuilder.fieldName()); String expectedFieldName = expectedFieldName( queryBuilder.fieldName());
assertThat(regexpQuery.getField(), equalTo(expectedFieldName)); assertThat(regexpQuery.getField(), equalTo(expectedFieldName));
@ -101,6 +106,7 @@ public class RegexpQueryBuilderTests extends AbstractQueryTestCase<RegexpQueryBu
" \"name.first\" : {\n" + " \"name.first\" : {\n" +
" \"value\" : \"s.*y\",\n" + " \"value\" : \"s.*y\",\n" +
" \"flags_value\" : 7,\n" + " \"flags_value\" : 7,\n" +
" \"case_insensitive\" : true,\n" +
" \"max_determinized_states\" : 20000,\n" + " \"max_determinized_states\" : 20000,\n" +
" \"boost\" : 1.0\n" + " \"boost\" : 1.0\n" +
" }\n" + " }\n" +
@ -147,4 +153,25 @@ public class RegexpQueryBuilderTests extends AbstractQueryTestCase<RegexpQueryBu
e = expectThrows(ParsingException.class, () -> parseQuery(shortJson)); e = expectThrows(ParsingException.class, () -> parseQuery(shortJson));
assertEquals("[regexp] query doesn't support multiple fields, found [user1] and [user2]", e.getMessage()); assertEquals("[regexp] query doesn't support multiple fields, found [user1] and [user2]", e.getMessage());
} }
public void testParseFailsWithCaseSensitive() throws IOException {
String json =
"{\n" +
" \"regexp\": {\n" +
" \"user1\": {\n" +
" \"value\": \"k.*y\",\n" +
" \"case_insensitive\": false\n" +
" },\n" +
" }\n" +
"}";
ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(json));
assertEquals("[regexp] query does not support [case_insensitive] = false", e.getMessage());
}
public void testDeadCode() {
assertTrue(RegExp87.class + " should be replaced with 8.7's "+RegExp.class,
org.apache.lucene.util.Version.LATEST.major == 8 && org.apache.lucene.util.Version.LATEST.minor < 7);
}
} }

View File

@ -13,12 +13,12 @@ import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegExp87;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata; import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.regex.Regex;
@ -208,13 +208,13 @@ public class ConstantKeywordFieldMapper extends FieldMapper {
} }
@Override @Override
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
MultiTermQuery.RewriteMethod method, QueryShardContext context) { MultiTermQuery.RewriteMethod method, QueryShardContext context) {
if (this.value == null) { if (this.value == null) {
return new MatchNoDocsQuery(); return new MatchNoDocsQuery();
} }
final Automaton automaton = new RegExp(value, flags).toAutomaton(maxDeterminizedStates); final Automaton automaton = new RegExp87(value, syntaxFlags, matchFlags).toAutomaton(maxDeterminizedStates);
final CharacterRunAutomaton runAutomaton = new CharacterRunAutomaton(automaton); final CharacterRunAutomaton runAutomaton = new CharacterRunAutomaton(automaton);
if (runAutomaton.run(this.value)) { if (runAutomaton.run(this.value)) {
return new MatchAllDocsQuery(); return new MatchAllDocsQuery();

View File

@ -8,7 +8,7 @@ package org.elasticsearch.xpack.constantkeyword.mapper;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.search.RegExp87;
import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.xpack.constantkeyword.mapper.ConstantKeywordFieldMapper.ConstantKeywordFieldType; import org.elasticsearch.xpack.constantkeyword.mapper.ConstantKeywordFieldMapper.ConstantKeywordFieldType;
@ -86,9 +86,9 @@ public class ConstantKeywordFieldTypeTests extends FieldTypeTestCase {
public void testRegexpQuery() { public void testRegexpQuery() {
ConstantKeywordFieldType none = new ConstantKeywordFieldType("f", null); ConstantKeywordFieldType none = new ConstantKeywordFieldType("f", null);
assertEquals(new MatchNoDocsQuery(), none.regexpQuery("f..o", RegExp.ALL, 10, null, null)); assertEquals(new MatchNoDocsQuery(), none.regexpQuery("f..o", RegExp87.ALL, 0, 10, null, null));
ConstantKeywordFieldType ft = new ConstantKeywordFieldType("f", "foo"); ConstantKeywordFieldType ft = new ConstantKeywordFieldType("f", "foo");
assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("f.o", RegExp.ALL, 10, null, null)); assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("f.o", RegExp87.ALL, 0, 10, null, null));
assertEquals(new MatchNoDocsQuery(), ft.regexpQuery("f..o", RegExp.ALL, 10, null, null)); assertEquals(new MatchNoDocsQuery(), ft.regexpQuery("f..o", RegExp87.ALL, 0, 10, null, null));
} }
} }

View File

@ -291,7 +291,7 @@ public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper {
} }
@Override @Override
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
MultiTermQuery.RewriteMethod method, QueryShardContext context) { MultiTermQuery.RewriteMethod method, QueryShardContext context) {
throw new UnsupportedOperationException("[regexp] queries are not currently supported on keyed " + throw new UnsupportedOperationException("[regexp] queries are not currently supported on keyed " +
"[" + CONTENT_TYPE + "] fields."); "[" + CONTENT_TYPE + "] fields.");

View File

@ -130,7 +130,7 @@ public class KeyedFlatObjectFieldTypeTests extends FieldTypeTestCase {
KeyedFlatObjectFieldType ft = createFieldType(); KeyedFlatObjectFieldType ft = createFieldType();
UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class, UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class,
() -> ft.regexpQuery("valu*", 0, 10, null, randomMockShardContext())); () -> ft.regexpQuery("valu*", 0, 0, 10, null, randomMockShardContext()));
assertEquals("[regexp] queries are not currently supported on keyed [flattened] fields.", e.getMessage()); assertEquals("[regexp] queries are not currently supported on keyed [flattened] fields.", e.getMessage());
} }

View File

@ -10,7 +10,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery87;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
@ -96,12 +96,12 @@ public class RootFlatObjectFieldTypeTests extends FieldTypeTestCase {
public void testRegexpQuery() { public void testRegexpQuery() {
RootFlatObjectFieldType ft = createDefaultFieldType(); RootFlatObjectFieldType ft = createDefaultFieldType();
Query expected = new RegexpQuery(new Term("field", "val.*")); Query expected = new RegexpQuery87(new Term("field", "val.*"));
Query actual = ft.regexpQuery("val.*", 0, 10, null, MOCK_QSC); Query actual = ft.regexpQuery("val.*", 0, 0, 10, null, MOCK_QSC);
assertEquals(expected, actual); assertEquals(expected, actual);
ElasticsearchException ee = expectThrows(ElasticsearchException.class, ElasticsearchException ee = expectThrows(ElasticsearchException.class,
() -> ft.regexpQuery("val.*", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE)); () -> ft.regexpQuery("val.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
ee.getMessage()); ee.getMessage());
} }

View File

@ -106,6 +106,23 @@ setup:
- match: {hits.total.value: 1} - match: {hits.total.value: 1}
---
"Case insensitive query":
- skip:
features: headers
version: " - 7.9.99"
reason: "Case insensitive flag added in 7.10"
- do:
search:
body:
track_total_hits: true
query:
regexp:
my_wildcard: {value: ".*Worl.*", case_insensitive: true}
- match: {hits.total.value: 3}
--- ---
"null query": "null query":
- do: - do:

View File

@ -29,13 +29,13 @@ import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiTermQuery.RewriteMethod; import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegExp87;
import org.apache.lucene.search.RegExp87.Kind;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.automaton.RegExp.Kind;
import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.BytesRefs;
@ -294,12 +294,13 @@ public class WildcardFieldMapper extends FieldMapper {
} }
@Override @Override
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, RewriteMethod method, QueryShardContext context) { public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
RewriteMethod method, QueryShardContext context) {
if (value.length() == 0) { if (value.length() == 0) {
return new MatchNoDocsQuery(); return new MatchNoDocsQuery();
} }
RegExp ngramRegex = new RegExp(addLineEndChars(toLowerCase(value)), flags); RegExp87 ngramRegex = new RegExp87(addLineEndChars(toLowerCase(value)), syntaxFlags, matchFlags);
Query approxBooleanQuery = toApproximationQuery(ngramRegex); Query approxBooleanQuery = toApproximationQuery(ngramRegex);
Query approxNgramQuery = rewriteBoolToNgramQuery(approxBooleanQuery); Query approxNgramQuery = rewriteBoolToNgramQuery(approxBooleanQuery);
@ -310,7 +311,7 @@ public class WildcardFieldMapper extends FieldMapper {
return existsQuery(context); return existsQuery(context);
} }
Supplier<Automaton> deferredAutomatonSupplier = ()-> { Supplier<Automaton> deferredAutomatonSupplier = ()-> {
RegExp regex = new RegExp(value, flags); RegExp87 regex = new RegExp87(value, syntaxFlags, matchFlags);
return regex.toAutomaton(maxDeterminizedStates); return regex.toAutomaton(maxDeterminizedStates);
}; };
@ -339,7 +340,7 @@ public class WildcardFieldMapper extends FieldMapper {
// * If an expression resolves to a RegExpQuery eg ?? then only the verification // * If an expression resolves to a RegExpQuery eg ?? then only the verification
// query is run. // query is run.
// * Anything else is a concrete query that should be run on the ngram index. // * Anything else is a concrete query that should be run on the ngram index.
public static Query toApproximationQuery(RegExp r) throws IllegalArgumentException { public static Query toApproximationQuery(RegExp87 r) throws IllegalArgumentException {
Query result = null; Query result = null;
switch (r.kind) { switch (r.kind) {
case REGEXP_UNION: case REGEXP_UNION:
@ -400,7 +401,7 @@ public class WildcardFieldMapper extends FieldMapper {
return result; return result;
} }
private static Query createConcatenationQuery(RegExp r) { private static Query createConcatenationQuery(RegExp87 r) {
// Create ANDs of expressions plus collapse consecutive TermQuerys into single longer ones // Create ANDs of expressions plus collapse consecutive TermQuerys into single longer ones
ArrayList<Query> queries = new ArrayList<>(); ArrayList<Query> queries = new ArrayList<>();
findLeaves(r.exp1, Kind.REGEXP_CONCATENATION, queries); findLeaves(r.exp1, Kind.REGEXP_CONCATENATION, queries);
@ -431,7 +432,7 @@ public class WildcardFieldMapper extends FieldMapper {
} }
private static Query createUnionQuery(RegExp r) { private static Query createUnionQuery(RegExp87 r) {
// Create an OR of clauses // Create an OR of clauses
ArrayList<Query> queries = new ArrayList<>(); ArrayList<Query> queries = new ArrayList<>();
findLeaves(r.exp1, Kind.REGEXP_UNION, queries); findLeaves(r.exp1, Kind.REGEXP_UNION, queries);
@ -458,7 +459,7 @@ public class WildcardFieldMapper extends FieldMapper {
return new MatchAllButRequireVerificationQuery(); return new MatchAllButRequireVerificationQuery();
} }
private static void findLeaves(RegExp exp, Kind kind, List<Query> queries) { private static void findLeaves(RegExp87 exp, Kind kind, List<Query> queries) {
if (exp.kind == kind) { if (exp.kind == kind) {
findLeaves(exp.exp1, kind, queries); findLeaves(exp.exp1, kind, queries);
findLeaves( exp.exp2, kind, queries); findLeaves( exp.exp2, kind, queries);

View File

@ -26,6 +26,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegExp87;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
@ -37,7 +38,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.RegExp; //import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.collect.List; import org.elasticsearch.common.collect.List;
@ -168,7 +169,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L)); assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L));
// Test regexp query // Test regexp query
wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(queryString, RegExp.ALL, 20000, null, MOCK_QSC); wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(queryString, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
wildcardFieldTopDocs = searcher.search(wildcardFieldQuery, 10, Sort.INDEXORDER); wildcardFieldTopDocs = searcher.search(wildcardFieldQuery, 10, Sort.INDEXORDER);
assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L)); assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L));
@ -225,8 +226,8 @@ public class WildcardFieldMapperTests extends ESTestCase {
break; break;
case 1: case 1:
pattern = getRandomRegexPattern(values); pattern = getRandomRegexPattern(values);
wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(pattern, RegExp.ALL, 20000, null, MOCK_QSC); wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(pattern, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
keywordFieldQuery = keywordFieldType.fieldType().regexpQuery(pattern, RegExp.ALL, 20000, null, MOCK_QSC); keywordFieldQuery = keywordFieldType.fieldType().regexpQuery(pattern, RegExp87.ALL, 0,20000, null, MOCK_QSC);
break; break;
case 2: case 2:
pattern = randomABString(5); pattern = randomABString(5);
@ -379,12 +380,12 @@ public class WildcardFieldMapperTests extends ESTestCase {
// All these expressions should rewrite to a match all with no verification step required at all // All these expressions should rewrite to a match all with no verification step required at all
String superfastRegexes[]= { ".*", "...*..", "(foo|bar|.*)", "@"}; String superfastRegexes[]= { ".*", "...*..", "(foo|bar|.*)", "@"};
for (String regex : superfastRegexes) { for (String regex : superfastRegexes) {
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
assertTrue(wildcardFieldQuery instanceof DocValuesFieldExistsQuery); assertTrue(wildcardFieldQuery instanceof DocValuesFieldExistsQuery);
} }
String matchNoDocsRegexes[]= { ""}; String matchNoDocsRegexes[]= { ""};
for (String regex : matchNoDocsRegexes) { for (String regex : matchNoDocsRegexes) {
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
assertTrue(wildcardFieldQuery instanceof MatchNoDocsQuery); assertTrue(wildcardFieldQuery instanceof MatchNoDocsQuery);
} }
@ -404,7 +405,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
for (String[] test : acceleratedTests) { for (String[] test : acceleratedTests) {
String regex = test[0]; String regex = test[0];
String expectedAccelerationQueryString = test[1].replaceAll("_", ""+WildcardFieldMapper.TOKEN_START_OR_END_CHAR); String expectedAccelerationQueryString = test[1].replaceAll("_", ""+WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
testExpectedAccelerationQuery(regex, wildcardFieldQuery, expectedAccelerationQueryString); testExpectedAccelerationQuery(regex, wildcardFieldQuery, expectedAccelerationQueryString);
} }
@ -412,7 +413,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
// TODO we can possibly improve on some of these // TODO we can possibly improve on some of these
String matchAllButVerifyTests[]= { "..", "(a)?","(a|b){0,3}", "((foo)?|(foo|bar)?)", "@&~(abc.+)", "aaa.+&.+bbb"}; String matchAllButVerifyTests[]= { "..", "(a)?","(a|b){0,3}", "((foo)?|(foo|bar)?)", "@&~(abc.+)", "aaa.+&.+bbb"};
for (String regex : matchAllButVerifyTests) { for (String regex : matchAllButVerifyTests) {
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
assertTrue(regex +" was not a pure verify query " +formatQuery(wildcardFieldQuery), assertTrue(regex +" was not a pure verify query " +formatQuery(wildcardFieldQuery),
wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv); wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv);
} }
@ -428,7 +429,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
for (String[] test : suboptimalTests) { for (String[] test : suboptimalTests) {
String regex = test[0]; String regex = test[0];
String expectedAccelerationQueryString = test[1].replaceAll("_", ""+WildcardFieldMapper.TOKEN_START_OR_END_CHAR); String expectedAccelerationQueryString = test[1].replaceAll("_", ""+WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC); Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
testExpectedAccelerationQuery(regex, wildcardFieldQuery, expectedAccelerationQueryString); testExpectedAccelerationQuery(regex, wildcardFieldQuery, expectedAccelerationQueryString);
} }
@ -767,7 +768,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
} }
//Assert our randomly generated regex actually matches the provided raw input. //Assert our randomly generated regex actually matches the provided raw input.
RegExp regex = new RegExp(result.toString()); RegExp87 regex = new RegExp87(result.toString());
Automaton automaton = regex.toAutomaton(); Automaton automaton = regex.toAutomaton();
ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automaton); ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automaton);
BytesRef br = new BytesRef(randomValue); BytesRef br = new BytesRef(randomValue);