Backport to add case insensitive support for regex queries. Forks a copy of Lucene’s RegexpQuery and RegExp from Lucene master. This can be removed when 8.7 Lucene is released. Closes #59235
This commit is contained in:
parent
e3d23c34ab
commit
8b56441d2b
|
@ -28,6 +28,7 @@ GET /_search
|
|||
"user.id": {
|
||||
"value": "k.*y",
|
||||
"flags": "ALL",
|
||||
"case_insensitive": true,
|
||||
"max_determinized_states": 10000,
|
||||
"rewrite": "constant_score"
|
||||
}
|
||||
|
@ -67,6 +68,10 @@ provided. To improve performance, avoid using wildcard patterns, such as `.*` or
|
|||
valid values and more information, see <<regexp-optional-operators, Regular
|
||||
expression syntax>>.
|
||||
|
||||
`case_insensitive`::
|
||||
(Optional, boolean) allows case insensitive matching of the regular expression
|
||||
value with the indexed field values when set to true. Setting to false is disallowed.
|
||||
|
||||
`max_determinized_states`::
|
||||
+
|
||||
--
|
||||
|
|
|
@ -146,7 +146,7 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
|
||||
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
throw new UnsupportedOperationException("[regexp] queries are not supported on [" + CONTENT_TYPE + "] fields.");
|
||||
}
|
||||
|
|
|
@ -91,7 +91,7 @@ public class CollationFieldTypeTests extends FieldTypeTestCase{
|
|||
public void testRegexpQuery() {
|
||||
MappedFieldType ft = new CollationFieldType("field", DEFAULT_COLLATOR);
|
||||
UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class,
|
||||
() -> ft.regexpQuery("foo.*", 0, 10, null, randomMockShardContext()));
|
||||
() -> ft.regexpQuery("foo.*", 0, 0, 10, null, randomMockShardContext()));
|
||||
assertEquals("[regexp] queries are not supported on [icu_collation_keyword] fields.", e.getMessage());
|
||||
}
|
||||
|
||||
|
|
|
@ -333,5 +333,8 @@ tasks.named("dependencyLicenses").configure {
|
|||
tasks.named("licenseHeaders").configure {
|
||||
// Ignore our vendored version of Google Guice
|
||||
excludes << 'org/elasticsearch/common/inject/**/*'
|
||||
// Ignore temporary copies of impending 8.7 Lucene classes
|
||||
excludes << 'org/apache/lucene/search/RegExp87*'
|
||||
excludes << 'org/apache/lucene/search/RegexpQuery87*'
|
||||
excludes << 'org/elasticsearch/client/documentation/placeholder.txt'
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search;
|
||||
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.AutomatonProvider;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
|
||||
/**
|
||||
* Copy of Lucene's RegExpQuery class coming in 8.7 with case
|
||||
* insensitive search option
|
||||
* @deprecated
|
||||
*/
|
||||
@Deprecated
|
||||
public class RegexpQuery87 extends AutomatonQuery {
|
||||
/**
|
||||
* A provider that provides no named automata
|
||||
*/
|
||||
private static AutomatonProvider defaultProvider = new AutomatonProvider() {
|
||||
@Override
|
||||
public Automaton getAutomaton(String name) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Constructs a query for terms matching <code>term</code>.
|
||||
* <p>
|
||||
* By default, all regular expression features are enabled.
|
||||
* </p>
|
||||
*
|
||||
* @param term regular expression.
|
||||
*/
|
||||
public RegexpQuery87(Term term) {
|
||||
this(term, RegExp87.ALL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query for terms matching <code>term</code>.
|
||||
*
|
||||
* @param term regular expression.
|
||||
* @param flags optional RegExp features from {@link RegExp87}
|
||||
*/
|
||||
public RegexpQuery87(Term term, int flags) {
|
||||
this(term, flags, defaultProvider,
|
||||
Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query for terms matching <code>term</code>.
|
||||
*
|
||||
* @param term regular expression.
|
||||
* @param flags optional RegExp syntax features from {@link RegExp87}
|
||||
* @param maxDeterminizedStates maximum number of states that compiling the
|
||||
* automaton for the regexp can result in. Set higher to allow more complex
|
||||
* queries and lower to prevent memory exhaustion.
|
||||
*/
|
||||
public RegexpQuery87(Term term, int flags, int maxDeterminizedStates) {
|
||||
this(term, flags, defaultProvider, maxDeterminizedStates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query for terms matching <code>term</code>.
|
||||
*
|
||||
* @param term regular expression.
|
||||
* @param syntax_flags optional RegExp syntax features from {@link RegExp87}
|
||||
* automaton for the regexp can result in. Set higher to allow more complex
|
||||
* queries and lower to prevent memory exhaustion.
|
||||
* @param match_flags boolean 'or' of match behavior options such as case insensitivity
|
||||
* @param maxDeterminizedStates maximum number of states that compiling the
|
||||
*/
|
||||
public RegexpQuery87(Term term, int syntax_flags, int match_flags, int maxDeterminizedStates) {
|
||||
this(term, syntax_flags, match_flags, defaultProvider, maxDeterminizedStates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query for terms matching <code>term</code>.
|
||||
*
|
||||
* @param term regular expression.
|
||||
* @param syntax_flags optional RegExp features from {@link RegExp87}
|
||||
* @param provider custom AutomatonProvider for named automata
|
||||
* @param maxDeterminizedStates maximum number of states that compiling the
|
||||
* automaton for the regexp can result in. Set higher to allow more complex
|
||||
* queries and lower to prevent memory exhaustion.
|
||||
*/
|
||||
public RegexpQuery87(Term term, int syntax_flags, AutomatonProvider provider,
|
||||
int maxDeterminizedStates) {
|
||||
this(term, syntax_flags, 0, provider, maxDeterminizedStates);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query for terms matching <code>term</code>.
|
||||
*
|
||||
* @param term regular expression.
|
||||
* @param syntax_flags optional RegExp features from {@link RegExp87}
|
||||
* @param match_flags boolean 'or' of match behavior options such as case insensitivity
|
||||
* @param provider custom AutomatonProvider for named automata
|
||||
* @param maxDeterminizedStates maximum number of states that compiling the
|
||||
* automaton for the regexp can result in. Set higher to allow more complex
|
||||
* queries and lower to prevent memory exhaustion.
|
||||
*/
|
||||
public RegexpQuery87(Term term, int syntax_flags, int match_flags, AutomatonProvider provider,
|
||||
int maxDeterminizedStates) {
|
||||
super(term,
|
||||
new RegExp87(term.text(), syntax_flags, match_flags).toAutomaton(
|
||||
provider, maxDeterminizedStates), maxDeterminizedStates);
|
||||
}
|
||||
|
||||
/** Returns the regexp of this query wrapped in a Term. */
|
||||
public Term getRegexp() {
|
||||
return term;
|
||||
}
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
if (!term.field().equals(field)) {
|
||||
buffer.append(term.field());
|
||||
buffer.append(":");
|
||||
}
|
||||
buffer.append('/');
|
||||
buffer.append(term.text());
|
||||
buffer.append('/');
|
||||
return buffer.toString();
|
||||
}
|
||||
}
|
|
@ -214,8 +214,8 @@ public abstract class MappedFieldType {
|
|||
+ "] which is of type [" + typeName() + "]");
|
||||
}
|
||||
|
||||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, @Nullable MultiTermQuery.RewriteMethod method,
|
||||
QueryShardContext context) {
|
||||
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
|
||||
@Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
throw new QueryShardException(context, "Can only use regexp queries on keyword and text fields - not on [" + name
|
||||
+ "] which is of type [" + typeName() + "]");
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.search.FuzzyQuery;
|
|||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -134,14 +134,15 @@ public abstract class StringFieldType extends TermBasedFieldType {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
|
||||
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
if (context.allowExpensiveQueries() == false) {
|
||||
throw new ElasticsearchException("[regexp] queries cannot be executed when '" +
|
||||
ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to false.");
|
||||
}
|
||||
failIfNotIndexed();
|
||||
RegexpQuery query = new RegexpQuery(new Term(name(), indexedValueForSearch(value)), flags, maxDeterminizedStates);
|
||||
RegexpQuery87 query = new RegexpQuery87(new Term(name(), indexedValueForSearch(value)), syntaxFlags,
|
||||
matchFlags, maxDeterminizedStates);
|
||||
if (method != null) {
|
||||
query.setRewriteMethod(method);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
||||
import java.util.Locale;
|
||||
|
@ -43,37 +43,37 @@ public enum RegexpFlag {
|
|||
/**
|
||||
* Enables intersection of the form: {@code <expression> & <expression>}
|
||||
*/
|
||||
INTERSECTION(RegExp.INTERSECTION),
|
||||
INTERSECTION(RegExp87.INTERSECTION),
|
||||
|
||||
/**
|
||||
* Enables complement expression of the form: {@code ~<expression>}
|
||||
*/
|
||||
COMPLEMENT(RegExp.COMPLEMENT),
|
||||
COMPLEMENT(RegExp87.COMPLEMENT),
|
||||
|
||||
/**
|
||||
* Enables empty language expression: {@code #}
|
||||
*/
|
||||
EMPTY(RegExp.EMPTY),
|
||||
EMPTY(RegExp87.EMPTY),
|
||||
|
||||
/**
|
||||
* Enables any string expression: {@code @}
|
||||
*/
|
||||
ANYSTRING(RegExp.ANYSTRING),
|
||||
ANYSTRING(RegExp87.ANYSTRING),
|
||||
|
||||
/**
|
||||
* Enables numerical interval expression: {@code <n-m>}
|
||||
*/
|
||||
INTERVAL(RegExp.INTERVAL),
|
||||
INTERVAL(RegExp87.INTERVAL),
|
||||
|
||||
/**
|
||||
* Disables all available option flags
|
||||
*/
|
||||
NONE(RegExp.NONE),
|
||||
NONE(RegExp87.NONE),
|
||||
|
||||
/**
|
||||
* Enables all available option flags
|
||||
*/
|
||||
ALL(RegExp.ALL);
|
||||
ALL(RegExp87.ALL);
|
||||
|
||||
|
||||
final int value;
|
||||
|
@ -110,9 +110,9 @@ public enum RegexpFlag {
|
|||
*/
|
||||
public static int resolveValue(String flags) {
|
||||
if (flags == null || flags.isEmpty()) {
|
||||
return RegExp.ALL;
|
||||
return RegExp87.ALL;
|
||||
}
|
||||
int magic = RegExp.NONE;
|
||||
int magic = RegExp87.NONE;
|
||||
for (String s : Strings.delimitedListToStringArray(flags, "|")) {
|
||||
if (s.isEmpty()) {
|
||||
continue;
|
||||
|
|
|
@ -22,8 +22,10 @@ package org.elasticsearch.index.query;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -48,10 +50,12 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
|
||||
public static final int DEFAULT_FLAGS_VALUE = RegexpFlag.ALL.value();
|
||||
public static final int DEFAULT_MAX_DETERMINIZED_STATES = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
|
||||
public static final boolean DEFAULT_CASE_INSENSITIVITY = false;
|
||||
|
||||
private static final ParseField FLAGS_VALUE_FIELD = new ParseField("flags_value");
|
||||
private static final ParseField MAX_DETERMINIZED_STATES_FIELD = new ParseField("max_determinized_states");
|
||||
private static final ParseField FLAGS_FIELD = new ParseField("flags");
|
||||
private static final ParseField CASE_INSENSITIVE_FIELD = new ParseField("case_insensitive");
|
||||
private static final ParseField REWRITE_FIELD = new ParseField("rewrite");
|
||||
private static final ParseField VALUE_FIELD = new ParseField("value");
|
||||
|
||||
|
@ -59,7 +63,8 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
|
||||
private final String value;
|
||||
|
||||
private int flagsValue = DEFAULT_FLAGS_VALUE;
|
||||
private int syntaxFlagsValue = DEFAULT_FLAGS_VALUE;
|
||||
private boolean caseInsensitive = DEFAULT_CASE_INSENSITIVITY;
|
||||
|
||||
private int maxDeterminizedStates = DEFAULT_MAX_DETERMINIZED_STATES;
|
||||
|
||||
|
@ -89,18 +94,24 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
super(in);
|
||||
fieldName = in.readString();
|
||||
value = in.readString();
|
||||
flagsValue = in.readVInt();
|
||||
syntaxFlagsValue = in.readVInt();
|
||||
maxDeterminizedStates = in.readVInt();
|
||||
rewrite = in.readOptionalString();
|
||||
if (in.getVersion().onOrAfter(Version.V_7_10_0)) {
|
||||
caseInsensitive = in.readBoolean();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doWriteTo(StreamOutput out) throws IOException {
|
||||
out.writeString(fieldName);
|
||||
out.writeString(value);
|
||||
out.writeVInt(flagsValue);
|
||||
out.writeVInt(syntaxFlagsValue);
|
||||
out.writeVInt(maxDeterminizedStates);
|
||||
out.writeOptionalString(rewrite);
|
||||
if (out.getVersion().onOrAfter(Version.V_7_10_0)) {
|
||||
out.writeBoolean(caseInsensitive);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the field name used in this query. */
|
||||
|
@ -118,7 +129,7 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
|
||||
public RegexpQueryBuilder flags(RegexpFlag... flags) {
|
||||
if (flags == null) {
|
||||
this.flagsValue = DEFAULT_FLAGS_VALUE;
|
||||
this.syntaxFlagsValue = DEFAULT_FLAGS_VALUE;
|
||||
return this;
|
||||
}
|
||||
int value = 0;
|
||||
|
@ -129,19 +140,31 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
value |= flag.value;
|
||||
}
|
||||
}
|
||||
this.flagsValue = value;
|
||||
this.syntaxFlagsValue = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public RegexpQueryBuilder flags(int flags) {
|
||||
this.flagsValue = flags;
|
||||
this.syntaxFlagsValue = flags;
|
||||
return this;
|
||||
}
|
||||
|
||||
public int flags() {
|
||||
return this.flagsValue;
|
||||
return this.syntaxFlagsValue;
|
||||
}
|
||||
|
||||
public RegexpQueryBuilder caseInsensitive(boolean caseInsensitive) {
|
||||
if (caseInsensitive == false) {
|
||||
throw new IllegalArgumentException("The case insensitive setting cannot be set to false.");
|
||||
}
|
||||
this.caseInsensitive = caseInsensitive;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean caseInsensitive() {
|
||||
return this.caseInsensitive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the regexp maxDeterminizedStates.
|
||||
*/
|
||||
|
@ -168,7 +191,10 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
builder.startObject(NAME);
|
||||
builder.startObject(fieldName);
|
||||
builder.field(VALUE_FIELD.getPreferredName(), this.value);
|
||||
builder.field(FLAGS_VALUE_FIELD.getPreferredName(), flagsValue);
|
||||
builder.field(FLAGS_VALUE_FIELD.getPreferredName(), syntaxFlagsValue);
|
||||
if (caseInsensitive != DEFAULT_CASE_INSENSITIVITY) {
|
||||
builder.field(CASE_INSENSITIVE_FIELD.getPreferredName(), caseInsensitive);
|
||||
}
|
||||
builder.field(MAX_DETERMINIZED_STATES_FIELD.getPreferredName(), maxDeterminizedStates);
|
||||
if (rewrite != null) {
|
||||
builder.field(REWRITE_FIELD.getPreferredName(), rewrite);
|
||||
|
@ -184,6 +210,7 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
String value = null;
|
||||
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
|
||||
int flagsValue = RegexpQueryBuilder.DEFAULT_FLAGS_VALUE;
|
||||
boolean caseInsensitive = DEFAULT_CASE_INSENSITIVITY;
|
||||
int maxDeterminizedStates = RegexpQueryBuilder.DEFAULT_MAX_DETERMINIZED_STATES;
|
||||
String queryName = null;
|
||||
String currentFieldName = null;
|
||||
|
@ -211,6 +238,12 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
maxDeterminizedStates = parser.intValue();
|
||||
} else if (FLAGS_VALUE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
|
||||
flagsValue = parser.intValue();
|
||||
} else if (CASE_INSENSITIVE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
|
||||
caseInsensitive = parser.booleanValue();
|
||||
if (caseInsensitive == false) {
|
||||
throw new ParsingException(parser.getTokenLocation(),
|
||||
"[regexp] query does not support [" + currentFieldName + "] = false");
|
||||
}
|
||||
} else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
|
||||
queryName = parser.text();
|
||||
} else {
|
||||
|
@ -226,12 +259,16 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
}
|
||||
}
|
||||
|
||||
return new RegexpQueryBuilder(fieldName, value)
|
||||
RegexpQueryBuilder result = new RegexpQueryBuilder(fieldName, value)
|
||||
.flags(flagsValue)
|
||||
.maxDeterminizedStates(maxDeterminizedStates)
|
||||
.rewrite(rewrite)
|
||||
.boost(boost)
|
||||
.queryName(queryName);
|
||||
if (caseInsensitive) {
|
||||
result.caseInsensitive(caseInsensitive);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -251,13 +288,18 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
}
|
||||
MultiTermQuery.RewriteMethod method = QueryParsers.parseRewriteMethod(rewrite, null, LoggingDeprecationHandler.INSTANCE);
|
||||
|
||||
int matchFlagsValue = caseInsensitive ? RegExp87.ASCII_CASE_INSENSITIVE : 0;
|
||||
Query query = null;
|
||||
// For BWC we mask irrelevant bits (RegExp changed ALL from 0xffff to 0xff)
|
||||
int sanitisedSyntaxFlag = syntaxFlagsValue & RegExp87.ALL;
|
||||
|
||||
MappedFieldType fieldType = context.fieldMapper(fieldName);
|
||||
if (fieldType != null) {
|
||||
query = fieldType.regexpQuery(value, flagsValue, maxDeterminizedStates, method, context);
|
||||
query = fieldType.regexpQuery(value, sanitisedSyntaxFlag, matchFlagsValue, maxDeterminizedStates, method, context);
|
||||
}
|
||||
if (query == null) {
|
||||
RegexpQuery regexpQuery = new RegexpQuery(new Term(fieldName, BytesRefs.toBytesRef(value)), flagsValue, maxDeterminizedStates);
|
||||
RegexpQuery87 regexpQuery = new RegexpQuery87(new Term(fieldName, BytesRefs.toBytesRef(value)), sanitisedSyntaxFlag,
|
||||
matchFlagsValue, maxDeterminizedStates);
|
||||
if (method != null) {
|
||||
regexpQuery.setRewriteMethod(method);
|
||||
}
|
||||
|
@ -268,14 +310,15 @@ public class RegexpQueryBuilder extends AbstractQueryBuilder<RegexpQueryBuilder>
|
|||
|
||||
@Override
|
||||
protected int doHashCode() {
|
||||
return Objects.hash(fieldName, value, flagsValue, maxDeterminizedStates, rewrite);
|
||||
return Objects.hash(fieldName, value, syntaxFlagsValue, caseInsensitive, maxDeterminizedStates, rewrite);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doEquals(RegexpQueryBuilder other) {
|
||||
return Objects.equals(fieldName, other.fieldName) &&
|
||||
Objects.equals(value, other.value) &&
|
||||
Objects.equals(flagsValue, other.flagsValue) &&
|
||||
Objects.equals(syntaxFlagsValue, other.syntaxFlagsValue) &&
|
||||
Objects.equals(caseInsensitive, other.caseInsensitive) &&
|
||||
Objects.equals(maxDeterminizedStates, other.maxDeterminizedStates) &&
|
||||
Objects.equals(rewrite, other.rewrite);
|
||||
}
|
||||
|
|
|
@ -37,13 +37,13 @@ import org.apache.lucene.search.MultiPhraseQuery;
|
|||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
|
@ -731,7 +731,7 @@ public class QueryStringQueryParser extends XQueryParser {
|
|||
setAnalyzer(forceAnalyzer);
|
||||
return super.getRegexpQuery(field, termStr);
|
||||
}
|
||||
return currentFieldType.regexpQuery(termStr, RegExp.ALL, getMaxDeterminizedStates(),
|
||||
return currentFieldType.regexpQuery(termStr, RegExp87.ALL, 0, getMaxDeterminizedStates(),
|
||||
getMultiTermRewriteMethod(), context);
|
||||
} catch (RuntimeException e) {
|
||||
if (lenient) {
|
||||
|
|
|
@ -19,8 +19,8 @@
|
|||
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
|
@ -143,7 +143,7 @@ public class RegexOptions implements ToXContentFragment, Writeable {
|
|||
* Options for regular expression queries
|
||||
*/
|
||||
public static class Builder {
|
||||
private int flagsValue = RegExp.ALL;
|
||||
private int flagsValue = RegExp87.ALL;
|
||||
private int maxDeterminizedStates = Operations.DEFAULT_MAX_DETERMINIZED_STATES;
|
||||
|
||||
public Builder() {
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.apache.lucene.search.suggest.document.CompletionAnalyzer;
|
||||
import org.apache.lucene.search.suggest.document.ContextSuggestField;
|
||||
import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery;
|
||||
|
@ -31,7 +32,6 @@ import org.apache.lucene.search.suggest.document.SuggestField;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetadata;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -889,7 +889,7 @@ public class CompletionFieldMapperTests extends ESSingleNodeTestCase {
|
|||
Mapper fieldMapper = defaultMapper.mappers().getMapper("completion");
|
||||
CompletionFieldMapper completionFieldMapper = (CompletionFieldMapper) fieldMapper;
|
||||
Query prefixQuery = completionFieldMapper.fieldType()
|
||||
.regexpQuery(new BytesRef("co"), RegExp.ALL, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
.regexpQuery(new BytesRef("co"), RegExp87.ALL, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
assertThat(prefixQuery, instanceOf(RegexCompletionQuery.class));
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ package org.elasticsearch.index.mapper;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
|
@ -44,11 +44,11 @@ public class IgnoredFieldTypeTests extends FieldTypeTestCase {
|
|||
public void testRegexpQuery() {
|
||||
MappedFieldType ft = IgnoredFieldMapper.IgnoredFieldType.INSTANCE;
|
||||
|
||||
Query expected = new RegexpQuery(new Term("_ignored", new BytesRef("foo?")));
|
||||
assertEquals(expected, ft.regexpQuery("foo?", 0, 10, null, MOCK_QSC));
|
||||
Query expected = new RegexpQuery87(new Term("_ignored", new BytesRef("foo?")));
|
||||
assertEquals(expected, ft.regexpQuery("foo?", 0, 0, 10, null, MOCK_QSC));
|
||||
|
||||
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
|
||||
() -> ft.regexpQuery("foo?", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
() -> ft.regexpQuery("foo?", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
|
||||
ee.getMessage());
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ public class IndexFieldTypeTests extends ESTestCase {
|
|||
MappedFieldType ft = IndexFieldMapper.IndexFieldType.INSTANCE;
|
||||
|
||||
QueryShardException e = expectThrows(QueryShardException.class, () ->
|
||||
assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("ind.x", 0, 10, null, createContext())));
|
||||
assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("ind.x", 0, 0, 10, null, createContext())));
|
||||
assertThat(e.getMessage(), containsString("Can only use regexp queries on keyword and text fields"));
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.NormsFieldExistsQuery;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
|
@ -128,16 +128,16 @@ public class KeywordFieldTypeTests extends FieldTypeTestCase {
|
|||
|
||||
public void testRegexpQuery() {
|
||||
MappedFieldType ft = new KeywordFieldType("field");
|
||||
assertEquals(new RegexpQuery(new Term("field","foo.*")),
|
||||
ft.regexpQuery("foo.*", 0, 10, null, MOCK_QSC));
|
||||
assertEquals(new RegexpQuery87(new Term("field","foo.*")),
|
||||
ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC));
|
||||
|
||||
MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap());
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> unsearchable.regexpQuery("foo.*", 0, 10, null, MOCK_QSC));
|
||||
() -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC));
|
||||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||
|
||||
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
|
||||
() -> ft.regexpQuery("foo.*", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
() -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
|
||||
ee.getMessage());
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ package org.elasticsearch.index.mapper;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
|
@ -43,11 +43,11 @@ public class RoutingFieldTypeTests extends FieldTypeTestCase {
|
|||
public void testRegexpQuery() {
|
||||
MappedFieldType ft = RoutingFieldMapper.RoutingFieldType.INSTANCE;
|
||||
|
||||
Query expected = new RegexpQuery(new Term("_routing", new BytesRef("foo?")));
|
||||
assertEquals(expected, ft.regexpQuery("foo?", 0, 10, null, MOCK_QSC));
|
||||
Query expected = new RegexpQuery87(new Term("_routing", new BytesRef("foo?")));
|
||||
assertEquals(expected, ft.regexpQuery("foo?", 0, 0, 10, null, MOCK_QSC));
|
||||
|
||||
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
|
||||
() -> ft.regexpQuery("foo?", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
() -> ft.regexpQuery("foo?", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
|
||||
ee.getMessage());
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.search.ConstantScoreQuery;
|
|||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
|
@ -86,16 +86,16 @@ public class TextFieldTypeTests extends FieldTypeTestCase {
|
|||
|
||||
public void testRegexpQuery() {
|
||||
MappedFieldType ft = new TextFieldType("field");
|
||||
assertEquals(new RegexpQuery(new Term("field","foo.*")),
|
||||
ft.regexpQuery("foo.*", 0, 10, null, MOCK_QSC));
|
||||
assertEquals(new RegexpQuery87(new Term("field","foo.*")),
|
||||
ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC));
|
||||
|
||||
MappedFieldType unsearchable = new TextFieldType("field", false, Collections.emptyMap());
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> unsearchable.regexpQuery("foo.*", 0, 10, null, MOCK_QSC));
|
||||
() -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC));
|
||||
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
|
||||
|
||||
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
|
||||
() -> ft.regexpQuery("foo.*", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
() -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
|
||||
ee.getMessage());
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.apache.lucene.search.NormsFieldExistsQuery;
|
|||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
|
@ -733,8 +733,8 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
|
|||
Query query = queryStringQuery("/foo*bar/").defaultField(TEXT_FIELD_NAME)
|
||||
.maxDeterminizedStates(5000)
|
||||
.toQuery(createShardContext());
|
||||
assertThat(query, instanceOf(RegexpQuery.class));
|
||||
RegexpQuery regexpQuery = (RegexpQuery) query;
|
||||
assertThat(query, instanceOf(RegexpQuery87.class));
|
||||
RegexpQuery87 regexpQuery = (RegexpQuery87) query;
|
||||
assertTrue(regexpQuery.toString().contains("/foo*bar/"));
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,9 @@
|
|||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.common.ParsingException;
|
||||
import org.elasticsearch.test.AbstractQueryTestCase;
|
||||
|
||||
|
@ -46,6 +48,9 @@ public class RegexpQueryBuilderTests extends AbstractQueryTestCase<RegexpQueryBu
|
|||
}
|
||||
query.flags(flags.toArray(new RegexpFlag[flags.size()]));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
query.caseInsensitive(true);
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
query.maxDeterminizedStates(randomInt(50000));
|
||||
}
|
||||
|
@ -77,8 +82,8 @@ public class RegexpQueryBuilderTests extends AbstractQueryTestCase<RegexpQueryBu
|
|||
|
||||
@Override
|
||||
protected void doAssertLuceneQuery(RegexpQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
|
||||
assertThat(query, instanceOf(RegexpQuery.class));
|
||||
RegexpQuery regexpQuery = (RegexpQuery) query;
|
||||
assertThat(query, instanceOf(RegexpQuery87.class));
|
||||
RegexpQuery87 regexpQuery = (RegexpQuery87) query;
|
||||
|
||||
String expectedFieldName = expectedFieldName( queryBuilder.fieldName());
|
||||
assertThat(regexpQuery.getField(), equalTo(expectedFieldName));
|
||||
|
@ -101,6 +106,7 @@ public class RegexpQueryBuilderTests extends AbstractQueryTestCase<RegexpQueryBu
|
|||
" \"name.first\" : {\n" +
|
||||
" \"value\" : \"s.*y\",\n" +
|
||||
" \"flags_value\" : 7,\n" +
|
||||
" \"case_insensitive\" : true,\n" +
|
||||
" \"max_determinized_states\" : 20000,\n" +
|
||||
" \"boost\" : 1.0\n" +
|
||||
" }\n" +
|
||||
|
@ -146,5 +152,26 @@ public class RegexpQueryBuilderTests extends AbstractQueryTestCase<RegexpQueryBu
|
|||
"}";
|
||||
e = expectThrows(ParsingException.class, () -> parseQuery(shortJson));
|
||||
assertEquals("[regexp] query doesn't support multiple fields, found [user1] and [user2]", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public void testParseFailsWithCaseSensitive() throws IOException {
|
||||
String json =
|
||||
"{\n" +
|
||||
" \"regexp\": {\n" +
|
||||
" \"user1\": {\n" +
|
||||
" \"value\": \"k.*y\",\n" +
|
||||
" \"case_insensitive\": false\n" +
|
||||
" },\n" +
|
||||
" }\n" +
|
||||
"}";
|
||||
ParsingException e = expectThrows(ParsingException.class, () -> parseQuery(json));
|
||||
assertEquals("[regexp] query does not support [case_insensitive] = false", e.getMessage());
|
||||
}
|
||||
|
||||
public void testDeadCode() {
|
||||
assertTrue(RegExp87.class + " should be replaced with 8.7's "+RegExp.class,
|
||||
org.apache.lucene.util.Version.LATEST.major == 8 && org.apache.lucene.util.Version.LATEST.minor < 7);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -13,12 +13,12 @@ import org.apache.lucene.search.MatchAllDocsQuery;
|
|||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.common.geo.ShapeRelation;
|
||||
import org.elasticsearch.common.lucene.BytesRefs;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
|
@ -208,13 +208,13 @@ public class ConstantKeywordFieldMapper extends FieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
|
||||
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
if (this.value == null) {
|
||||
return new MatchNoDocsQuery();
|
||||
}
|
||||
|
||||
final Automaton automaton = new RegExp(value, flags).toAutomaton(maxDeterminizedStates);
|
||||
final Automaton automaton = new RegExp87(value, syntaxFlags, matchFlags).toAutomaton(maxDeterminizedStates);
|
||||
final CharacterRunAutomaton runAutomaton = new CharacterRunAutomaton(automaton);
|
||||
if (runAutomaton.run(this.value)) {
|
||||
return new MatchAllDocsQuery();
|
||||
|
|
|
@ -8,7 +8,7 @@ package org.elasticsearch.xpack.constantkeyword.mapper;
|
|||
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.index.mapper.FieldTypeTestCase;
|
||||
import org.elasticsearch.xpack.constantkeyword.mapper.ConstantKeywordFieldMapper.ConstantKeywordFieldType;
|
||||
|
@ -86,9 +86,9 @@ public class ConstantKeywordFieldTypeTests extends FieldTypeTestCase {
|
|||
|
||||
public void testRegexpQuery() {
|
||||
ConstantKeywordFieldType none = new ConstantKeywordFieldType("f", null);
|
||||
assertEquals(new MatchNoDocsQuery(), none.regexpQuery("f..o", RegExp.ALL, 10, null, null));
|
||||
assertEquals(new MatchNoDocsQuery(), none.regexpQuery("f..o", RegExp87.ALL, 0, 10, null, null));
|
||||
ConstantKeywordFieldType ft = new ConstantKeywordFieldType("f", "foo");
|
||||
assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("f.o", RegExp.ALL, 10, null, null));
|
||||
assertEquals(new MatchNoDocsQuery(), ft.regexpQuery("f..o", RegExp.ALL, 10, null, null));
|
||||
assertEquals(new MatchAllDocsQuery(), ft.regexpQuery("f.o", RegExp87.ALL, 0, 10, null, null));
|
||||
assertEquals(new MatchNoDocsQuery(), ft.regexpQuery("f..o", RegExp87.ALL, 0, 10, null, null));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -291,7 +291,7 @@ public final class FlatObjectFieldMapper extends DynamicKeyFieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates,
|
||||
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
|
||||
MultiTermQuery.RewriteMethod method, QueryShardContext context) {
|
||||
throw new UnsupportedOperationException("[regexp] queries are not currently supported on keyed " +
|
||||
"[" + CONTENT_TYPE + "] fields.");
|
||||
|
|
|
@ -130,7 +130,7 @@ public class KeyedFlatObjectFieldTypeTests extends FieldTypeTestCase {
|
|||
KeyedFlatObjectFieldType ft = createFieldType();
|
||||
|
||||
UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class,
|
||||
() -> ft.regexpQuery("valu*", 0, 10, null, randomMockShardContext()));
|
||||
() -> ft.regexpQuery("valu*", 0, 0, 10, null, randomMockShardContext()));
|
||||
assertEquals("[regexp] queries are not currently supported on keyed [flattened] fields.", e.getMessage());
|
||||
}
|
||||
|
||||
|
|
|
@ -10,7 +10,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.RegexpQuery87;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
|
@ -96,12 +96,12 @@ public class RootFlatObjectFieldTypeTests extends FieldTypeTestCase {
|
|||
public void testRegexpQuery() {
|
||||
RootFlatObjectFieldType ft = createDefaultFieldType();
|
||||
|
||||
Query expected = new RegexpQuery(new Term("field", "val.*"));
|
||||
Query actual = ft.regexpQuery("val.*", 0, 10, null, MOCK_QSC);
|
||||
Query expected = new RegexpQuery87(new Term("field", "val.*"));
|
||||
Query actual = ft.regexpQuery("val.*", 0, 0, 10, null, MOCK_QSC);
|
||||
assertEquals(expected, actual);
|
||||
|
||||
ElasticsearchException ee = expectThrows(ElasticsearchException.class,
|
||||
() -> ft.regexpQuery("val.*", randomInt(10), randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
() -> ft.regexpQuery("val.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_QSC_DISALLOW_EXPENSIVE));
|
||||
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.",
|
||||
ee.getMessage());
|
||||
}
|
||||
|
|
|
@ -106,6 +106,23 @@ setup:
|
|||
|
||||
- match: {hits.total.value: 1}
|
||||
|
||||
---
|
||||
"Case insensitive query":
|
||||
- skip:
|
||||
features: headers
|
||||
version: " - 7.9.99"
|
||||
reason: "Case insensitive flag added in 7.10"
|
||||
- do:
|
||||
search:
|
||||
body:
|
||||
track_total_hits: true
|
||||
query:
|
||||
regexp:
|
||||
my_wildcard: {value: ".*Worl.*", case_insensitive: true}
|
||||
|
||||
|
||||
- match: {hits.total.value: 3}
|
||||
|
||||
---
|
||||
"null query":
|
||||
- do:
|
||||
|
|
|
@ -29,13 +29,13 @@ import org.apache.lucene.search.MultiTermQuery;
|
|||
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.apache.lucene.search.RegExp87.Kind;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.apache.lucene.util.automaton.RegExp.Kind;
|
||||
import org.elasticsearch.ElasticsearchParseException;
|
||||
import org.elasticsearch.common.geo.ShapeRelation;
|
||||
import org.elasticsearch.common.lucene.BytesRefs;
|
||||
|
@ -294,12 +294,13 @@ public class WildcardFieldMapper extends FieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query regexpQuery(String value, int flags, int maxDeterminizedStates, RewriteMethod method, QueryShardContext context) {
|
||||
public Query regexpQuery(String value, int syntaxFlags, int matchFlags, int maxDeterminizedStates,
|
||||
RewriteMethod method, QueryShardContext context) {
|
||||
if (value.length() == 0) {
|
||||
return new MatchNoDocsQuery();
|
||||
}
|
||||
|
||||
RegExp ngramRegex = new RegExp(addLineEndChars(toLowerCase(value)), flags);
|
||||
RegExp87 ngramRegex = new RegExp87(addLineEndChars(toLowerCase(value)), syntaxFlags, matchFlags);
|
||||
|
||||
Query approxBooleanQuery = toApproximationQuery(ngramRegex);
|
||||
Query approxNgramQuery = rewriteBoolToNgramQuery(approxBooleanQuery);
|
||||
|
@ -310,7 +311,7 @@ public class WildcardFieldMapper extends FieldMapper {
|
|||
return existsQuery(context);
|
||||
}
|
||||
Supplier<Automaton> deferredAutomatonSupplier = ()-> {
|
||||
RegExp regex = new RegExp(value, flags);
|
||||
RegExp87 regex = new RegExp87(value, syntaxFlags, matchFlags);
|
||||
return regex.toAutomaton(maxDeterminizedStates);
|
||||
};
|
||||
|
||||
|
@ -339,7 +340,7 @@ public class WildcardFieldMapper extends FieldMapper {
|
|||
// * If an expression resolves to a RegExpQuery eg ?? then only the verification
|
||||
// query is run.
|
||||
// * Anything else is a concrete query that should be run on the ngram index.
|
||||
public static Query toApproximationQuery(RegExp r) throws IllegalArgumentException {
|
||||
public static Query toApproximationQuery(RegExp87 r) throws IllegalArgumentException {
|
||||
Query result = null;
|
||||
switch (r.kind) {
|
||||
case REGEXP_UNION:
|
||||
|
@ -400,7 +401,7 @@ public class WildcardFieldMapper extends FieldMapper {
|
|||
return result;
|
||||
}
|
||||
|
||||
private static Query createConcatenationQuery(RegExp r) {
|
||||
private static Query createConcatenationQuery(RegExp87 r) {
|
||||
// Create ANDs of expressions plus collapse consecutive TermQuerys into single longer ones
|
||||
ArrayList<Query> queries = new ArrayList<>();
|
||||
findLeaves(r.exp1, Kind.REGEXP_CONCATENATION, queries);
|
||||
|
@ -431,7 +432,7 @@ public class WildcardFieldMapper extends FieldMapper {
|
|||
|
||||
}
|
||||
|
||||
private static Query createUnionQuery(RegExp r) {
|
||||
private static Query createUnionQuery(RegExp87 r) {
|
||||
// Create an OR of clauses
|
||||
ArrayList<Query> queries = new ArrayList<>();
|
||||
findLeaves(r.exp1, Kind.REGEXP_UNION, queries);
|
||||
|
@ -458,7 +459,7 @@ public class WildcardFieldMapper extends FieldMapper {
|
|||
return new MatchAllButRequireVerificationQuery();
|
||||
}
|
||||
|
||||
private static void findLeaves(RegExp exp, Kind kind, List<Query> queries) {
|
||||
private static void findLeaves(RegExp87 exp, Kind kind, List<Query> queries) {
|
||||
if (exp.kind == kind) {
|
||||
findLeaves(exp.exp1, kind, queries);
|
||||
findLeaves( exp.exp2, kind, queries);
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegExp87;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
|
@ -37,7 +38,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
//import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetadata;
|
||||
import org.elasticsearch.common.collect.List;
|
||||
|
@ -168,7 +169,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
|
|||
assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L));
|
||||
|
||||
// Test regexp query
|
||||
wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(queryString, RegExp.ALL, 20000, null, MOCK_QSC);
|
||||
wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(queryString, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
|
||||
wildcardFieldTopDocs = searcher.search(wildcardFieldQuery, 10, Sort.INDEXORDER);
|
||||
assertThat(wildcardFieldTopDocs.totalHits.value, equalTo(0L));
|
||||
|
||||
|
@ -225,8 +226,8 @@ public class WildcardFieldMapperTests extends ESTestCase {
|
|||
break;
|
||||
case 1:
|
||||
pattern = getRandomRegexPattern(values);
|
||||
wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(pattern, RegExp.ALL, 20000, null, MOCK_QSC);
|
||||
keywordFieldQuery = keywordFieldType.fieldType().regexpQuery(pattern, RegExp.ALL, 20000, null, MOCK_QSC);
|
||||
wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(pattern, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
|
||||
keywordFieldQuery = keywordFieldType.fieldType().regexpQuery(pattern, RegExp87.ALL, 0,20000, null, MOCK_QSC);
|
||||
break;
|
||||
case 2:
|
||||
pattern = randomABString(5);
|
||||
|
@ -379,12 +380,12 @@ public class WildcardFieldMapperTests extends ESTestCase {
|
|||
// All these expressions should rewrite to a match all with no verification step required at all
|
||||
String superfastRegexes[]= { ".*", "...*..", "(foo|bar|.*)", "@"};
|
||||
for (String regex : superfastRegexes) {
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC);
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
|
||||
assertTrue(wildcardFieldQuery instanceof DocValuesFieldExistsQuery);
|
||||
}
|
||||
String matchNoDocsRegexes[]= { ""};
|
||||
for (String regex : matchNoDocsRegexes) {
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC);
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
|
||||
assertTrue(wildcardFieldQuery instanceof MatchNoDocsQuery);
|
||||
}
|
||||
|
||||
|
@ -404,7 +405,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
|
|||
for (String[] test : acceleratedTests) {
|
||||
String regex = test[0];
|
||||
String expectedAccelerationQueryString = test[1].replaceAll("_", ""+WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC);
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
|
||||
testExpectedAccelerationQuery(regex, wildcardFieldQuery, expectedAccelerationQueryString);
|
||||
}
|
||||
|
||||
|
@ -412,7 +413,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
|
|||
// TODO we can possibly improve on some of these
|
||||
String matchAllButVerifyTests[]= { "..", "(a)?","(a|b){0,3}", "((foo)?|(foo|bar)?)", "@&~(abc.+)", "aaa.+&.+bbb"};
|
||||
for (String regex : matchAllButVerifyTests) {
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC);
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
|
||||
assertTrue(regex +" was not a pure verify query " +formatQuery(wildcardFieldQuery),
|
||||
wildcardFieldQuery instanceof AutomatonQueryOnBinaryDv);
|
||||
}
|
||||
|
@ -428,7 +429,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
|
|||
for (String[] test : suboptimalTests) {
|
||||
String regex = test[0];
|
||||
String expectedAccelerationQueryString = test[1].replaceAll("_", ""+WildcardFieldMapper.TOKEN_START_OR_END_CHAR);
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp.ALL, 20000, null, MOCK_QSC);
|
||||
Query wildcardFieldQuery = wildcardFieldType.fieldType().regexpQuery(regex, RegExp87.ALL, 0, 20000, null, MOCK_QSC);
|
||||
|
||||
testExpectedAccelerationQuery(regex, wildcardFieldQuery, expectedAccelerationQueryString);
|
||||
}
|
||||
|
@ -767,7 +768,7 @@ public class WildcardFieldMapperTests extends ESTestCase {
|
|||
}
|
||||
|
||||
//Assert our randomly generated regex actually matches the provided raw input.
|
||||
RegExp regex = new RegExp(result.toString());
|
||||
RegExp87 regex = new RegExp87(result.toString());
|
||||
Automaton automaton = regex.toAutomaton();
|
||||
ByteRunAutomaton bytesMatcher = new ByteRunAutomaton(automaton);
|
||||
BytesRef br = new BytesRef(randomValue);
|
||||
|
|
Loading…
Reference in New Issue