Fix normalization in TeluguAnalyzer (#13059)

DecimalDigitFilter and IndicNormalizationFilter were mistakenly omitted.
2024-02-01 14:45:27 +01:00 · 2024-02-01 14:45:27 +01:00 · 9caeb9395d
parent d4c0eaf9db
commit 9caeb9395d
3 changed files with 16 additions and 2 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -250,6 +250,8 @@ Bug Fixes
 * GITHUB#13031: ScorerSupplier created by QueryProfilerWeight will propagate topLevelScoringClause to the sub ScorerSupplier. (Shintaro Murakami)
 * GITHUB#13059: Fixed missing IndicNormalization and DecimalDigit filters in TeluguAnalyzer normalization (Dmitry Cherniachenko)
 Build
 ---------------------
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java
@ -127,8 +127,8 @@ public final class TeluguAnalyzer extends StopwordAnalyzerBase {
  @Override
  protected TokenStream normalize(String fieldName, TokenStream in) {
    TokenStream result = new DecimalDigitFilter(in);
-    result = new IndicNormalizationFilter(in);
+    result = new IndicNormalizationFilter(result);
-    result = new TeluguNormalizationFilter(in);
+    result = new TeluguNormalizationFilter(result);
    return result;
  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguAnalyzer.java
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.te;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.BytesRef;
 public class TestTeluguAnalyzer extends BaseTokenStreamTestCase {
@ -48,6 +49,17 @@ public class TestTeluguAnalyzer extends BaseTokenStreamTestCase {
    a.close();
  }
  public void testNormalization() {
    TeluguAnalyzer a = new TeluguAnalyzer();
    // DecimalDigitsFilter
    assertEquals(new BytesRef("1234"), a.normalize("dummy", "౧౨౩౪"));
    // IndicNormalizationFilter
    assertEquals(new BytesRef("ऑऑ"), a.normalize("dummy", "अाॅअाॅ"));
    // TeluguNormalizationFilter
    assertEquals(new BytesRef("ఓనమాల"), a.normalize("dummy", "ఒౕనమాల"));
    a.close();
  }
  /** Send some random strings to the analyzer */
  public void testRandomStrings() throws Exception {
    TeluguAnalyzer analyzer = new TeluguAnalyzer();