From 9caeb9395dad6a535882888335915e4ea78e8b43 Mon Sep 17 00:00:00 2001 From: Dmitry Cherniachenko <2sabio@gmail.com> Date: Thu, 1 Feb 2024 14:45:27 +0100 Subject: [PATCH] Fix normalization in TeluguAnalyzer (#13059) DecimalDigitFilter and IndicNormalizationFilter were mistakenly omitted. --- lucene/CHANGES.txt | 2 ++ .../apache/lucene/analysis/te/TeluguAnalyzer.java | 4 ++-- .../lucene/analysis/te/TestTeluguAnalyzer.java | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 71defbfbaf7..8489b3c4c1a 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -250,6 +250,8 @@ Bug Fixes * GITHUB#13031: ScorerSupplier created by QueryProfilerWeight will propagate topLevelScoringClause to the sub ScorerSupplier. (Shintaro Murakami) +* GITHUB#13059: Fixed missing IndicNormalization and DecimalDigit filters in TeluguAnalyzer normalization (Dmitry Cherniachenko) + Build --------------------- diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java index c7d39a72cce..55bc9a15955 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/te/TeluguAnalyzer.java @@ -127,8 +127,8 @@ public final class TeluguAnalyzer extends StopwordAnalyzerBase { @Override protected TokenStream normalize(String fieldName, TokenStream in) { TokenStream result = new DecimalDigitFilter(in); - result = new IndicNormalizationFilter(in); - result = new TeluguNormalizationFilter(in); + result = new IndicNormalizationFilter(result); + result = new TeluguNormalizationFilter(result); return result; } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguAnalyzer.java index 5c3145a5677..af076ee3318 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/te/TestTeluguAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.lucene.analysis.te; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.util.BytesRef; public class TestTeluguAnalyzer extends BaseTokenStreamTestCase { @@ -48,6 +49,17 @@ public class TestTeluguAnalyzer extends BaseTokenStreamTestCase { a.close(); } + public void testNormalization() { + TeluguAnalyzer a = new TeluguAnalyzer(); + // DecimalDigitsFilter + assertEquals(new BytesRef("1234"), a.normalize("dummy", "౧౨౩౪")); + // IndicNormalizationFilter + assertEquals(new BytesRef("ऑऑ"), a.normalize("dummy", "अाॅअाॅ")); + // TeluguNormalizationFilter + assertEquals(new BytesRef("ఓనమాల"), a.normalize("dummy", "ఒౕనమాల")); + a.close(); + } + /** Send some random strings to the analyzer */ public void testRandomStrings() throws Exception { TeluguAnalyzer analyzer = new TeluguAnalyzer();