From 29074e7055300fec4ae7c27021fb8c1a2e897348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 23 Sep 2020 11:48:34 +0200 Subject: [PATCH] Add case insensitive prefix and wildcard to 'version' field (#62754) (#62782) This change adds support for the recently introduced case insensitivity flag for wildcard and prefix queries. Since version field values are encoded differently we need to adapt our own AutomatonQuery variation to add both cases if case insensitivity is turned on. --- .../lucene/search/AutomatonQueries.java | 42 +++++++++---------- .../VersionFieldWildcardQuery.java | 13 ++++-- .../VersionStringFieldMapper.java | 2 +- .../versionfield/VersionStringFieldTests.java | 20 ++++++++- 4 files changed, 50 insertions(+), 27 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/AutomatonQueries.java b/server/src/main/java/org/elasticsearch/common/lucene/search/AutomatonQueries.java index 180cd09a74c..7fa6b16b500 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/search/AutomatonQueries.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/AutomatonQueries.java @@ -37,7 +37,7 @@ import java.util.List; public class AutomatonQueries { - + /** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */ public static Automaton caseInsensitivePrefix(String s) { List list = new ArrayList<>(); @@ -50,27 +50,27 @@ public class AutomatonQueries { Automaton a = Operations.concatenate(list); a = MinimizationOperations.minimize(a, Integer.MAX_VALUE); return a; - } - - + } + + /** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */ public static AutomatonQuery caseInsensitivePrefixQuery(Term prefix) { return new AutomatonQuery(prefix, caseInsensitivePrefix(prefix.text())); - } - + } + /** Build an automaton accepting all terms ASCII case insensitive. */ public static AutomatonQuery caseInsensitiveTermQuery(Term term) { BytesRef prefix = term.bytes(); return new AutomatonQuery(term, toCaseInsensitiveString(prefix,Integer.MAX_VALUE)); - } + } + - /** Build an automaton matching a wildcard pattern, ASCII case insensitive. */ public static AutomatonQuery caseInsensitiveWildcardQuery(Term wildcardquery) { return new AutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery,Integer.MAX_VALUE)); - } - - + } + + /** String equality with support for wildcards */ public static final char WILDCARD_STRING = '*'; @@ -78,21 +78,21 @@ public class AutomatonQueries { public static final char WILDCARD_CHAR = '?'; /** Escape character */ - public static final char WILDCARD_ESCAPE = '\\'; + public static final char WILDCARD_ESCAPE = '\\'; /** * Convert Lucene wildcard syntax into an automaton. */ @SuppressWarnings("fallthrough") public static Automaton toCaseInsensitiveWildcardAutomaton(Term wildcardquery, int maxDeterminizedStates) { List automata = new ArrayList<>(); - + String wildcardText = wildcardquery.text(); - + for (int i = 0; i < wildcardText.length();) { final int c = wildcardText.codePointAt(i); int length = Character.charCount(c); switch(c) { - case WILDCARD_STRING: + case WILDCARD_STRING: automata.add(Automata.makeAnyString()); break; case WILDCARD_CHAR: @@ -111,14 +111,14 @@ public class AutomatonQueries { } i += length; } - + return Operations.concatenate(automata); - } + } protected static Automaton toCaseInsensitiveString(BytesRef br, int maxDeterminizedStates) { return toCaseInsensitiveString(br.utf8ToString(), maxDeterminizedStates); } - + public static Automaton toCaseInsensitiveString(String s, int maxDeterminizedStates) { List list = new ArrayList<>(); Iterator iter = s.codePoints().iterator(); @@ -129,11 +129,11 @@ public class AutomatonQueries { Automaton a = Operations.concatenate(list); a = MinimizationOperations.minimize(a, maxDeterminizedStates); return a; - - + + } - protected static Automaton toCaseInsensitiveChar(int codepoint, int maxDeterminizedStates) { + public static Automaton toCaseInsensitiveChar(int codepoint, int maxDeterminizedStates) { Automaton case1 = Automata.makeChar(codepoint); // For now we only work with ASCII characters if (codepoint > 128) { diff --git a/x-pack/plugin/versionfield/src/main/java/org/elasticsearch/xpack/versionfield/VersionFieldWildcardQuery.java b/x-pack/plugin/versionfield/src/main/java/org/elasticsearch/xpack/versionfield/VersionFieldWildcardQuery.java index 07ab92877c9..557eccac8ff 100644 --- a/x-pack/plugin/versionfield/src/main/java/org/elasticsearch/xpack/versionfield/VersionFieldWildcardQuery.java +++ b/x-pack/plugin/versionfield/src/main/java/org/elasticsearch/xpack/versionfield/VersionFieldWildcardQuery.java @@ -13,6 +13,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; +import org.elasticsearch.common.lucene.search.AutomatonQueries; import java.util.ArrayList; import java.util.List; @@ -37,11 +38,11 @@ class VersionFieldWildcardQuery extends AutomatonQuery { private static final byte WILDCARD_CHAR = '?'; - VersionFieldWildcardQuery(Term term) { - super(term, toAutomaton(term), Integer.MAX_VALUE, true); + VersionFieldWildcardQuery(Term term, boolean caseInsensitive) { + super(term, toAutomaton(term, caseInsensitive), Integer.MAX_VALUE, true); } - private static Automaton toAutomaton(Term wildcardquery) { + private static Automaton toAutomaton(Term wildcardquery, boolean caseInsensitive) { List automata = new ArrayList<>(); BytesRef wildcardText = wildcardquery.bytes(); @@ -96,7 +97,11 @@ class VersionFieldWildcardQuery extends AutomatonQuery { automata.add(Automata.makeChar(c)); break; default: - automata.add(Automata.makeChar(c)); + if (caseInsensitive == false) { + automata.add(Automata.makeChar(c)); + } else { + automata.add(AutomatonQueries.toCaseInsensitiveChar(c, Integer.MAX_VALUE)); + } } i += length; } diff --git a/x-pack/plugin/versionfield/src/main/java/org/elasticsearch/xpack/versionfield/VersionStringFieldMapper.java b/x-pack/plugin/versionfield/src/main/java/org/elasticsearch/xpack/versionfield/VersionStringFieldMapper.java index a1eedcf94cb..87eca47582a 100644 --- a/x-pack/plugin/versionfield/src/main/java/org/elasticsearch/xpack/versionfield/VersionStringFieldMapper.java +++ b/x-pack/plugin/versionfield/src/main/java/org/elasticsearch/xpack/versionfield/VersionStringFieldMapper.java @@ -244,7 +244,7 @@ public class VersionStringFieldMapper extends ParametrizedFieldMapper { ); } - VersionFieldWildcardQuery query = new VersionFieldWildcardQuery(new Term(name(), value)); + VersionFieldWildcardQuery query = new VersionFieldWildcardQuery(new Term(name(), value), caseInsensitive); QueryParsers.setRewriteMethod(query, method); return query; } diff --git a/x-pack/plugin/versionfield/src/test/java/org/elasticsearch/xpack/versionfield/VersionStringFieldTests.java b/x-pack/plugin/versionfield/src/test/java/org/elasticsearch/xpack/versionfield/VersionStringFieldTests.java index bce53ce0239..86acb954db1 100644 --- a/x-pack/plugin/versionfield/src/test/java/org/elasticsearch/xpack/versionfield/VersionStringFieldTests.java +++ b/x-pack/plugin/versionfield/src/test/java/org/elasticsearch/xpack/versionfield/VersionStringFieldTests.java @@ -166,6 +166,14 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase { response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "21.11")).get(); assertEquals(1, response.getHits().getTotalHits().value); + + // test case sensitivity / insensitivity + response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "2.1.0-A")).get(); + assertEquals(0, response.getHits().getTotalHits().value); + + response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "2.1.0-A").caseInsensitive(true)).get(); + assertEquals(1, response.getHits().getTotalHits().value); + assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[0].getSourceAsMap().get("version")); } public void testSort() throws IOException { @@ -261,7 +269,6 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase { assertEquals(2, response.getHits().getTotalHits().value); assertEquals("1.0.0alpha2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version")); assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[1].getSourceAsMap().get("version")); - } public void testFuzzyQuery() throws Exception { @@ -342,6 +349,17 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase { checkWildcardQuery(indexName, "*-*", new String[] { "1.0.0-alpha.2.1.0-rc.1", "2.1.0-alpha.beta", "3.1.1-a" }); checkWildcardQuery(indexName, "1.3.0+b*", new String[] { "1.3.0+build.1234567" }); checkWildcardQuery(indexName, "3.1.1??", new String[] { "3.1.1-a", "3.1.1+b", "3.1.123" }); + + // test case sensitivity / insensitivity + SearchResponse response = client().prepareSearch(indexName).setQuery(QueryBuilders.wildcardQuery("version", "*Alpha*")).get(); + assertEquals(0, response.getHits().getTotalHits().value); + + response = client().prepareSearch(indexName) + .setQuery(QueryBuilders.wildcardQuery("version", "*Alpha*").caseInsensitive(true)) + .get(); + assertEquals(2, response.getHits().getTotalHits().value); + assertEquals("1.0.0-alpha.2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version")); + assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[1].getSourceAsMap().get("version")); } private void checkWildcardQuery(String indexName, String query, String... expectedResults) {