Add case insensitive prefix and wildcard to 'version' field (#62754) (#62782)

This change adds support for the recently introduced case insensitivity flag for
wildcard and prefix queries. Since version field values are encoded differently we
need to adapt our own AutomatonQuery variation to add both cases if case insensitivity
is turned on.
This commit is contained in:
Christoph Büscher 2020-09-23 11:48:34 +02:00 committed by GitHub
parent 81645ec2cc
commit 29074e7055
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 50 additions and 27 deletions

View File

@ -37,7 +37,7 @@ import java.util.List;
public class AutomatonQueries {
/** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */
public static Automaton caseInsensitivePrefix(String s) {
List<Automaton> list = new ArrayList<>();
@ -50,27 +50,27 @@ public class AutomatonQueries {
Automaton a = Operations.concatenate(list);
a = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
return a;
}
}
/** Build an automaton query accepting all terms with the specified prefix, ASCII case insensitive. */
public static AutomatonQuery caseInsensitivePrefixQuery(Term prefix) {
return new AutomatonQuery(prefix, caseInsensitivePrefix(prefix.text()));
}
}
/** Build an automaton accepting all terms ASCII case insensitive. */
public static AutomatonQuery caseInsensitiveTermQuery(Term term) {
BytesRef prefix = term.bytes();
return new AutomatonQuery(term, toCaseInsensitiveString(prefix,Integer.MAX_VALUE));
}
}
/** Build an automaton matching a wildcard pattern, ASCII case insensitive. */
public static AutomatonQuery caseInsensitiveWildcardQuery(Term wildcardquery) {
return new AutomatonQuery(wildcardquery, toCaseInsensitiveWildcardAutomaton(wildcardquery,Integer.MAX_VALUE));
}
}
/** String equality with support for wildcards */
public static final char WILDCARD_STRING = '*';
@ -78,21 +78,21 @@ public class AutomatonQueries {
public static final char WILDCARD_CHAR = '?';
/** Escape character */
public static final char WILDCARD_ESCAPE = '\\';
public static final char WILDCARD_ESCAPE = '\\';
/**
* Convert Lucene wildcard syntax into an automaton.
*/
@SuppressWarnings("fallthrough")
public static Automaton toCaseInsensitiveWildcardAutomaton(Term wildcardquery, int maxDeterminizedStates) {
List<Automaton> automata = new ArrayList<>();
String wildcardText = wildcardquery.text();
for (int i = 0; i < wildcardText.length();) {
final int c = wildcardText.codePointAt(i);
int length = Character.charCount(c);
switch(c) {
case WILDCARD_STRING:
case WILDCARD_STRING:
automata.add(Automata.makeAnyString());
break;
case WILDCARD_CHAR:
@ -111,14 +111,14 @@ public class AutomatonQueries {
}
i += length;
}
return Operations.concatenate(automata);
}
}
protected static Automaton toCaseInsensitiveString(BytesRef br, int maxDeterminizedStates) {
return toCaseInsensitiveString(br.utf8ToString(), maxDeterminizedStates);
}
public static Automaton toCaseInsensitiveString(String s, int maxDeterminizedStates) {
List<Automaton> list = new ArrayList<>();
Iterator<Integer> iter = s.codePoints().iterator();
@ -129,11 +129,11 @@ public class AutomatonQueries {
Automaton a = Operations.concatenate(list);
a = MinimizationOperations.minimize(a, maxDeterminizedStates);
return a;
}
protected static Automaton toCaseInsensitiveChar(int codepoint, int maxDeterminizedStates) {
public static Automaton toCaseInsensitiveChar(int codepoint, int maxDeterminizedStates) {
Automaton case1 = Automata.makeChar(codepoint);
// For now we only work with ASCII characters
if (codepoint > 128) {

View File

@ -13,6 +13,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import java.util.ArrayList;
import java.util.List;
@ -37,11 +38,11 @@ class VersionFieldWildcardQuery extends AutomatonQuery {
private static final byte WILDCARD_CHAR = '?';
VersionFieldWildcardQuery(Term term) {
super(term, toAutomaton(term), Integer.MAX_VALUE, true);
VersionFieldWildcardQuery(Term term, boolean caseInsensitive) {
super(term, toAutomaton(term, caseInsensitive), Integer.MAX_VALUE, true);
}
private static Automaton toAutomaton(Term wildcardquery) {
private static Automaton toAutomaton(Term wildcardquery, boolean caseInsensitive) {
List<Automaton> automata = new ArrayList<>();
BytesRef wildcardText = wildcardquery.bytes();
@ -96,7 +97,11 @@ class VersionFieldWildcardQuery extends AutomatonQuery {
automata.add(Automata.makeChar(c));
break;
default:
automata.add(Automata.makeChar(c));
if (caseInsensitive == false) {
automata.add(Automata.makeChar(c));
} else {
automata.add(AutomatonQueries.toCaseInsensitiveChar(c, Integer.MAX_VALUE));
}
}
i += length;
}

View File

@ -244,7 +244,7 @@ public class VersionStringFieldMapper extends ParametrizedFieldMapper {
);
}
VersionFieldWildcardQuery query = new VersionFieldWildcardQuery(new Term(name(), value));
VersionFieldWildcardQuery query = new VersionFieldWildcardQuery(new Term(name(), value), caseInsensitive);
QueryParsers.setRewriteMethod(query, method);
return query;
}

View File

@ -166,6 +166,14 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase {
response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "21.11")).get();
assertEquals(1, response.getHits().getTotalHits().value);
// test case sensitivity / insensitivity
response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "2.1.0-A")).get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch(indexName).setQuery(QueryBuilders.prefixQuery("version", "2.1.0-A").caseInsensitive(true)).get();
assertEquals(1, response.getHits().getTotalHits().value);
assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[0].getSourceAsMap().get("version"));
}
public void testSort() throws IOException {
@ -261,7 +269,6 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase {
assertEquals(2, response.getHits().getTotalHits().value);
assertEquals("1.0.0alpha2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version"));
assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[1].getSourceAsMap().get("version"));
}
public void testFuzzyQuery() throws Exception {
@ -342,6 +349,17 @@ public class VersionStringFieldTests extends ESSingleNodeTestCase {
checkWildcardQuery(indexName, "*-*", new String[] { "1.0.0-alpha.2.1.0-rc.1", "2.1.0-alpha.beta", "3.1.1-a" });
checkWildcardQuery(indexName, "1.3.0+b*", new String[] { "1.3.0+build.1234567" });
checkWildcardQuery(indexName, "3.1.1??", new String[] { "3.1.1-a", "3.1.1+b", "3.1.123" });
// test case sensitivity / insensitivity
SearchResponse response = client().prepareSearch(indexName).setQuery(QueryBuilders.wildcardQuery("version", "*Alpha*")).get();
assertEquals(0, response.getHits().getTotalHits().value);
response = client().prepareSearch(indexName)
.setQuery(QueryBuilders.wildcardQuery("version", "*Alpha*").caseInsensitive(true))
.get();
assertEquals(2, response.getHits().getTotalHits().value);
assertEquals("1.0.0-alpha.2.1.0-rc.1", response.getHits().getHits()[0].getSourceAsMap().get("version"));
assertEquals("2.1.0-alpha.beta", response.getHits().getHits()[1].getSourceAsMap().get("version"));
}
private void checkWildcardQuery(String indexName, String query, String... expectedResults) {