mirror of https://github.com/apache/lucene.git
LUCENE-8937: Avoid agressive stemming on numbers in the FrenchMinimalStemmer
This commit is contained in:
parent
cb94eeb491
commit
d9d16eec95
|
@ -40,6 +40,9 @@ Improvements
|
|||
docs on equal scores. Also, remove the ability of TopDocs.merge to set shard
|
||||
indices (Atri Sharma, Adrien Grand, Simon Willnauer)
|
||||
|
||||
* LUCENE-8937: Avoid agressive stemming on numbers in the FrenchMinimalStemmer.
|
||||
(Adrien Gallou via Tomoko Uchida)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while
|
||||
|
|
|
@ -74,7 +74,7 @@ public class FrenchMinimalStemmer {
|
|||
if (s[len-1] == 'r') len--;
|
||||
if (s[len-1] == 'e') len--;
|
||||
if (s[len-1] == 'é') len--;
|
||||
if (s[len-1] == s[len-2]) len--;
|
||||
if (s[len-1] == s[len-2] && Character.isLetter(s[len-1])) len--;
|
||||
return len;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,6 +68,23 @@ public class TestFrenchMinimalStemFilter extends BaseTokenStreamTestCase {
|
|||
checkOneTerm(analyzer, "baron", "baron");
|
||||
}
|
||||
|
||||
public void testIntergerWithLastCharactersEqual() throws IOException {
|
||||
// Trailing repeated char elision :
|
||||
checkOneTerm(analyzer, "1234555", "1234555");
|
||||
// Repeated char within numbers with more than 6 characters :
|
||||
checkOneTerm(analyzer, "12333345", "12333345");
|
||||
// Short numbers weren't affected already:
|
||||
checkOneTerm(analyzer, "1234", "1234");
|
||||
// Ensure behaviour is preserved for words!
|
||||
// Trailing repeated char elision :
|
||||
checkOneTerm(analyzer, "abcdeff", "abcdef");
|
||||
// Repeated char within words with more than 6 characters :
|
||||
checkOneTerm(analyzer, "abcccddeef", "abcccddeef");
|
||||
checkOneTerm(analyzer, "créées", "cré");
|
||||
// Combined letter and digit repetition
|
||||
checkOneTerm(analyzer, "22hh00", "22hh00"); // 10:00pm
|
||||
}
|
||||
|
||||
public void testKeyword() throws IOException {
|
||||
final CharArraySet exclusionSet = new CharArraySet( asSet("chevaux"), false);
|
||||
Analyzer a = new Analyzer() {
|
||||
|
|
Loading…
Reference in New Issue