Speed up parsing of large terms queries. (#24210)

The addition of the normalization feature on keywords slowed down the parsing of large `terms` queries since all terms now have to go through normalization. However this can be avoided in the default case that the analyzer is a `keyword` analyzer since all that normalization will do is a UTF8 conversion. Using `Analyzer.normalize` for that is a bit overkill and could be skipped.
2025-02-26 23:07:45 +00:00 · 2017-04-21 10:32:33 +02:00 · 2017-04-21 10:32:33 +02:00 · f322f537e4
commit f322f537e4
parent a4365971a0
2 changed files with 18 additions and 0 deletions
--- a/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
+++ b/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java
@ -236,6 +236,15 @@ public final class KeywordFieldMapper extends FieldMapper {

        @Override
        protected BytesRef indexedValueForSearch(Object value) {
+            if (searchAnalyzer() == Lucene.KEYWORD_ANALYZER) {
+                // keyword analyzer with the default attribute source which encodes terms using UTF8
+                // in that case we skip normalization, which may be slow if there many terms need to
+                // parse (eg. large terms query) since Analyzer.normalize involves things like creating
+                // attributes through reflection
+                // This if statement will be used whenever a normalizer is NOT configured
+                return super.indexedValueForSearch(value);
+            }
+
            if (value == null) {
                return null;
            }
--- a/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java
+++ b/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java
@ -150,4 +150,13 @@ public class KeywordFieldTypeTests extends FieldTypeTestCase {
                () -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
        assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
    }
+
+    public void testNormalizeQueries() {
+        MappedFieldType ft = createDefaultFieldType();
+        ft.setName("field");
+        ft.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
+        assertEquals(new TermQuery(new Term("field", new BytesRef("FOO"))), ft.termQuery("FOO", null));
+        ft.setSearchAnalyzer(Lucene.STANDARD_ANALYZER);
+        assertEquals(new TermQuery(new Term("field", new BytesRef("foo"))), ft.termQuery("FOO", null));
+    }
 }