From 71a9acb2e2aa55257021eefce1e5d8d390bc7048 Mon Sep 17 00:00:00 2001 From: Tomoko Uchida Date: Sat, 21 May 2022 12:43:02 +0900 Subject: [PATCH] LUCENE-10312: MIGRATE entry and small follow-ups (#908) --- lucene/MIGRATE.md | 5 +++++ .../org/apache/lucene/analysis/fa/PersianAnalyzer.java | 4 ++-- .../apache/lucene/analysis/fa/TestPersianStemFilter.java | 9 ++++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md index a6836166046..8a1c5c535bb 100644 --- a/lucene/MIGRATE.md +++ b/lucene/MIGRATE.md @@ -19,6 +19,11 @@ ## Migration from Lucene 9.x to Lucene 10.0 +### PersianStemFilter is added to PersianAnalyzer (LUCENE-10312) + +PersianAnalyzer now includes PersianStemFilter, that would change analysis results. If you need the exactly same analysis +behaviour as 9.x, clone `PersianAnalyzer` in 9.x or create custom analyzer by using `CustomAnalyzer` on your own. + ### AutomatonQuery/CompiledAutomaton/RunAutomaton/RegExp no longer determinize (LUCENE-10010) These classes no longer take a `determinizeWorkLimit` and no longer determinize diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java index afc41233906..2da9df1f3cc 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java @@ -121,8 +121,8 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase { * * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} built from a {@link * StandardTokenizer} filtered with {@link LowerCaseFilter}, {@link DecimalDigitFilter}, - * {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} and Persian Stop - * words + * {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter}, Persian Stop words, + * and {@link PersianStemFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java index 983dd077347..5b7716cf124 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianStemFilter.java @@ -32,7 +32,14 @@ public class TestPersianStemFilter extends BaseTokenStreamTestCase { @Override public void setUp() throws Exception { super.setUp(); - a = new PersianAnalyzer(); + a = + new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source = new MockTokenizer(); + return new TokenStreamComponents(source, new PersianStemFilter(source)); + } + }; } @Override