Fix normalization in TeluguAnalyzer (#13059)

DecimalDigitFilter and IndicNormalizationFilter were mistakenly omitted.
This commit is contained in:
Dmitry Cherniachenko 2024-02-01 14:45:27 +01:00 committed by GitHub
parent d4c0eaf9db
commit 9caeb9395d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 16 additions and 2 deletions

View File

@ -250,6 +250,8 @@ Bug Fixes
* GITHUB#13031: ScorerSupplier created by QueryProfilerWeight will propagate topLevelScoringClause to the sub ScorerSupplier. (Shintaro Murakami) * GITHUB#13031: ScorerSupplier created by QueryProfilerWeight will propagate topLevelScoringClause to the sub ScorerSupplier. (Shintaro Murakami)
* GITHUB#13059: Fixed missing IndicNormalization and DecimalDigit filters in TeluguAnalyzer normalization (Dmitry Cherniachenko)
Build Build
--------------------- ---------------------

View File

@ -127,8 +127,8 @@ public final class TeluguAnalyzer extends StopwordAnalyzerBase {
@Override @Override
protected TokenStream normalize(String fieldName, TokenStream in) { protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = new DecimalDigitFilter(in); TokenStream result = new DecimalDigitFilter(in);
result = new IndicNormalizationFilter(in); result = new IndicNormalizationFilter(result);
result = new TeluguNormalizationFilter(in); result = new TeluguNormalizationFilter(result);
return result; return result;
} }
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.te;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase; import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.BytesRef;
public class TestTeluguAnalyzer extends BaseTokenStreamTestCase { public class TestTeluguAnalyzer extends BaseTokenStreamTestCase {
@ -48,6 +49,17 @@ public class TestTeluguAnalyzer extends BaseTokenStreamTestCase {
a.close(); a.close();
} }
public void testNormalization() {
TeluguAnalyzer a = new TeluguAnalyzer();
// DecimalDigitsFilter
assertEquals(new BytesRef("1234"), a.normalize("dummy", "౧౨౩౪"));
// IndicNormalizationFilter
assertEquals(new BytesRef("ऑऑ"), a.normalize("dummy", "अाॅअाॅ"));
// TeluguNormalizationFilter
assertEquals(new BytesRef("ఓనమాల"), a.normalize("dummy", "ఒౕనమాల"));
a.close();
}
/** Send some random strings to the analyzer */ /** Send some random strings to the analyzer */
public void testRandomStrings() throws Exception { public void testRandomStrings() throws Exception {
TeluguAnalyzer analyzer = new TeluguAnalyzer(); TeluguAnalyzer analyzer = new TeluguAnalyzer();