Speed up parsing of large `terms` queries. (#24210)

The addition of the normalization feature on keywords slowed down the parsing
of large `terms` queries since all terms now have to go through normalization.
However this can be avoided in the default case that the analyzer is a
`keyword` analyzer since all that normalization will do is a UTF8 conversion.
Using `Analyzer.normalize` for that is a bit overkill and could be skipped.
This commit is contained in:
Adrien Grand 2017-04-21 10:32:33 +02:00 committed by GitHub
parent a4365971a0
commit f322f537e4
2 changed files with 18 additions and 0 deletions

View File

@ -236,6 +236,15 @@ public final class KeywordFieldMapper extends FieldMapper {
@Override
protected BytesRef indexedValueForSearch(Object value) {
if (searchAnalyzer() == Lucene.KEYWORD_ANALYZER) {
// keyword analyzer with the default attribute source which encodes terms using UTF8
// in that case we skip normalization, which may be slow if there many terms need to
// parse (eg. large terms query) since Analyzer.normalize involves things like creating
// attributes through reflection
// This if statement will be used whenever a normalizer is NOT configured
return super.indexedValueForSearch(value);
}
if (value == null) {
return null;
}

View File

@ -150,4 +150,13 @@ public class KeywordFieldTypeTests extends FieldTypeTestCase {
() -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true));
assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage());
}
public void testNormalizeQueries() {
MappedFieldType ft = createDefaultFieldType();
ft.setName("field");
ft.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER);
assertEquals(new TermQuery(new Term("field", new BytesRef("FOO"))), ft.termQuery("FOO", null));
ft.setSearchAnalyzer(Lucene.STANDARD_ANALYZER);
assertEquals(new TermQuery(new Term("field", new BytesRef("foo"))), ft.termQuery("FOO", null));
}
}