Add case insensitive prefix and wildcard to 'version' field (#62754) (#62782)

This change adds support for the recently introduced case insensitivity flag for
wildcard and prefix queries. Since version field values are encoded differently we
need to adapt our own AutomatonQuery variation to add both cases if case insensitivity
is turned on.
This commit is contained in:
Christoph Büscher 2020-09-23 11:48:34 +02:00 committed by GitHub
parent 81645ec2cc
commit 29074e7055
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 27 deletions

View File

@ -37,7 +37,7 @@ import java.util.List;
public class AutomatonQueries { public class AutomatonQueries {
/** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */ /** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */
public static Automaton caseInsensitivePrefix(String s) { public static Automaton caseInsensitivePrefix(String s) {
List<Automaton> list = new ArrayList<>(); List<Automaton> list = new ArrayList<>();
@ -50,27 +50,27 @@ public class AutomatonQueries {
Automaton a = Operations.concatenate(list); Automaton a = Operations.concatenate(list);
a = MinimizationOperations.minimize(a, Integer.MAX_VALUE); a = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
return a; return a;
} }
/** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */ /** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */
public static AutomatonQuery caseInsensitivePrefixQuery(Term prefix) { public static AutomatonQuery caseInsensitivePrefixQuery(Term prefix) {
return new AutomatonQuery(prefix, caseInsensitivePrefix(prefix.text())); return new AutomatonQuery(prefix, caseInsensitivePrefix(prefix.text()));
} }
/** Build an automaton accepting all terms ASCII case insensitive. */ /** Build an automaton accepting all terms ASCII case insensitive. */
public static AutomatonQuery caseInsensitiveTermQuery(Term term) { public static AutomatonQuery caseInsensitiveTermQuery(Term term) {
BytesRef prefix = term.bytes(); BytesRef prefix = term.bytes();
return new AutomatonQuery(term, toCaseInsensitiveString(prefix,Integer.MAX_VALUE)); return new AutomatonQuery(term, toCaseInsensitiveString(prefix,Integer.MAX_VALUE));
} }
/** Build an automaton matching a wildcard pattern, ASCII case insensitive. */ /** Build an automaton matching a wildcard pattern, ASCII case insensitive. */
public static AutomatonQuery caseInsensitiveWildcardQuery(Term wildcardquery) { public static AutomatonQuery caseInsensitiveWildcardQuery(Term wildcardquery) {
return new AutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery,Integer.MAX_VALUE)); return new AutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery,Integer.MAX_VALUE));
} }
/** String equality with support for wildcards */ /** String equality with support for wildcards */
public static final char WILDCARD_STRING = '*'; public static final char WILDCARD_STRING = '*';
@ -78,21 +78,21 @@ public class AutomatonQueries {
public static final char WILDCARD_CHAR = '?'; public static final char WILDCARD_CHAR = '?';
/** Escape character */ /** Escape character */
public static final char WILDCARD_ESCAPE = '\\'; public static final char WILDCARD_ESCAPE = '\\';
/** /**
* Convert Lucene wildcard syntax into an automaton. * Convert Lucene wildcard syntax into an automaton.
*/ */
@SuppressWarnings("fallthrough") @SuppressWarnings("fallthrough")
public static Automaton toCaseInsensitiveWildcardAutomaton(Term wildcardquery, int maxDeterminizedStates) { public static Automaton toCaseInsensitiveWildcardAutomaton(Term wildcardquery, int maxDeterminizedStates) {
List<Automaton> automata = new ArrayList<>(); List<Automaton> automata = new ArrayList<>();
String wildcardText = wildcardquery.text(); String wildcardText = wildcardquery.text();
for (int i = 0; i < wildcardText.length();) { for (int i = 0; i < wildcardText.length();) {
final int c = wildcardText.codePointAt(i); final int c = wildcardText.codePointAt(i);
int length = Character.charCount(c); int length = Character.charCount(c);
switch(c) { switch(c) {
case WILDCARD_STRING: case WILDCARD_STRING:
automata.add(Automata.makeAnyString()); automata.add(Automata.makeAnyString());
break; break;
case WILDCARD_CHAR: case WILDCARD_CHAR:
@ -111,14 +111,14 @@ public class AutomatonQueries {
} }
i += length; i += length;
} }
return Operations.concatenate(automata); return Operations.concatenate(automata);
} }
protected static Automaton toCaseInsensitiveString(BytesRef br, int maxDeterminizedStates) { protected static Automaton toCaseInsensitiveString(BytesRef br, int maxDeterminizedStates) {
return toCaseInsensitiveString(br.utf8ToString(), maxDeterminizedStates); return toCaseInsensitiveString(br.utf8ToString(), maxDeterminizedStates);
} }
public static Automaton toCaseInsensitiveString(String s, int maxDeterminizedStates) { public static Automaton toCaseInsensitiveString(String s, int maxDeterminizedStates) {
List<Automaton> list = new ArrayList<>(); List<Automaton> list = new ArrayList<>();
Iterator<Integer> iter = s.codePoints().iterator(); Iterator<Integer> iter = s.codePoints().iterator();
@ -129,11 +129,11 @@ public class AutomatonQueries {
Automaton a = Operations.concatenate(list); Automaton a = Operations.concatenate(list);
a = MinimizationOperations.minimize(a, maxDeterminizedStates); a = MinimizationOperations.minimize(a, maxDeterminizedStates);
return a; return a;
} }
protected static Automaton toCaseInsensitiveChar(int codepoint, int maxDeterminizedStates) { public static Automaton toCaseInsensitiveChar(int codepoint, int maxDeterminizedStates) {
Automaton case1 = Automata.makeChar(codepoint); Automaton case1 = Automata.makeChar(codepoint);
// For now we only work with ASCII characters // For now we only work with ASCII characters
if (codepoint > 128) { if (codepoint > 128) {

View File

@ -13,6 +13,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -37,11 +38,11 @@ class VersionFieldWildcardQuery extends AutomatonQuery {
private static final byte WILDCARD_CHAR = '?'; private static final byte WILDCARD_CHAR = '?';
VersionFieldWildcardQuery(Term term) { VersionFieldWildcardQuery(Term term, boolean caseInsensitive) {
super(term, toAutomaton(term), Integer.MAX_VALUE, true); super(term, toAutomaton(term, caseInsensitive), Integer.MAX_VALUE, true);
} }
private static Automaton toAutomaton(Term wildcardquery) { private static Automaton toAutomaton(Term wildcardquery, boolean caseInsensitive) {
List<Automaton> automata = new ArrayList<>(); List<Automaton> automata = new ArrayList<>();
BytesRef wildcardText = wildcardquery.bytes(); BytesRef wildcardText = wildcardquery.bytes();
@ -96,7 +97,11 @@ class VersionFieldWildcardQuery extends AutomatonQuery {
automata.add(Automata.makeChar(c)); automata.add(Automata.makeChar(c));
break; break;
default: default:
automata.add(Automata.makeChar(c)); if (caseInsensitive == false) {
automata.add(Automata.makeChar(c));
} else {
automata.add(AutomatonQueries.toCaseInsensitiveChar(c, Integer.MAX_VALUE));
}
} }
i += length; i += length;
} }

View File

@ -244,7 +244,7 @@ public class VersionStringFieldMapper extends ParametrizedFieldMapper {
); );
} }
VersionFieldWildcardQuery query = new VersionFieldWildcardQuery(new Term(name(), value)); VersionFieldWildcardQuery query = new VersionFieldWildcardQuery(new Term(name(), value), caseInsensitive);
QueryParsers.setRewriteMethod(query, method); QueryParsers.setRewriteMethod(query, method);
return query; return query;
} }

View File

@ -166,6 +166,14 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase {
response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "21.11")).get(); response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "21.11")).get();
assertEquals(1, response.getHits().getTotalHits().value); assertEquals(1, response.getHits().getTotalHits().value);
// test case sensitivity / insensitivity
response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "2.1.0-A")).get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "2.1.0-A").caseInsensitive(true)).get();
assertEquals(1, response.getHits().getTotalHits().value);
assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[0].getSourceAsMap().get("version"));
} }
public void testSort() throws IOException { public void testSort() throws IOException {
@ -261,7 +269,6 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase {
assertEquals(2, response.getHits().getTotalHits().value); assertEquals(2, response.getHits().getTotalHits().value);
assertEquals("1.0.0alpha2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version")); assertEquals("1.0.0alpha2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version"));
assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[1].getSourceAsMap().get("version")); assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[1].getSourceAsMap().get("version"));
} }
public void testFuzzyQuery() throws Exception { public void testFuzzyQuery() throws Exception {
@ -342,6 +349,17 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase {
checkWildcardQuery(indexName, "*-*", new String[] { "1.0.0-alpha.2.1.0-rc.1", "2.1.0-alpha.beta", "3.1.1-a" }); checkWildcardQuery(indexName, "*-*", new String[] { "1.0.0-alpha.2.1.0-rc.1", "2.1.0-alpha.beta", "3.1.1-a" });
checkWildcardQuery(indexName, "1.3.0+b*", new String[] { "1.3.0+build.1234567" }); checkWildcardQuery(indexName, "1.3.0+b*", new String[] { "1.3.0+build.1234567" });
checkWildcardQuery(indexName, "3.1.1??", new String[] { "3.1.1-a", "3.1.1+b", "3.1.123" }); checkWildcardQuery(indexName, "3.1.1??", new String[] { "3.1.1-a", "3.1.1+b", "3.1.123" });
// test case sensitivity / insensitivity
SearchResponse response = client().prepareSearch(indexName).setQuery(QueryBuilders.wildcardQuery("version", "*Alpha*")).get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch(indexName)
.setQuery(QueryBuilders.wildcardQuery("version", "*Alpha*").caseInsensitive(true))
.get();
assertEquals(2, response.getHits().getTotalHits().value);
assertEquals("1.0.0-alpha.2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version"));
assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[1].getSourceAsMap().get("version"));
} }
private void checkWildcardQuery(String indexName, String query, String... expectedResults) { private void checkWildcardQuery(String indexName, String query, String... expectedResults) {