mirror of https://github.com/apache/lucene.git
LUCENE-1003: Don't let RussianAnalyzer drop numbers.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@656111 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b182881092
commit
aa0074f5db
|
@ -103,7 +103,10 @@ Bug fixes
|
|||
This is needed when you want to update an index as part of a
|
||||
transaction involving external resources (eg a database). Also
|
||||
deprecated abort(), renaming it to rollback(). (Mike McCandless)
|
||||
|
||||
|
||||
10. LUCENE-1003: Stop RussianAnalyzer from removing numbers.
|
||||
(TUSUR OpenTeam, Dmitry Lihachev via Otis Gospodnetic)
|
||||
|
||||
New features
|
||||
|
||||
1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis
|
||||
|
|
|
@ -94,7 +94,18 @@ public class RussianCharsets
|
|||
'\u042C',
|
||||
'\u042D',
|
||||
'\u042E',
|
||||
'\u042F'
|
||||
'\u042F',
|
||||
// numbers
|
||||
'0',
|
||||
'1',
|
||||
'2',
|
||||
'3',
|
||||
'4',
|
||||
'5',
|
||||
'6',
|
||||
'7',
|
||||
'8',
|
||||
'9'
|
||||
};
|
||||
|
||||
// KOI8 charset
|
||||
|
@ -163,7 +174,18 @@ public class RussianCharsets
|
|||
0xf8,
|
||||
0xfc,
|
||||
0xe0,
|
||||
0xf1
|
||||
0xf1,
|
||||
// numbers
|
||||
'0',
|
||||
'1',
|
||||
'2',
|
||||
'3',
|
||||
'4',
|
||||
'5',
|
||||
'6',
|
||||
'7',
|
||||
'8',
|
||||
'9'
|
||||
};
|
||||
|
||||
// CP1251 eharset
|
||||
|
@ -232,7 +254,18 @@ public class RussianCharsets
|
|||
0xDC,
|
||||
0xDD,
|
||||
0xDE,
|
||||
0xDF
|
||||
0xDF,
|
||||
// numbers
|
||||
'0',
|
||||
'1',
|
||||
'2',
|
||||
'3',
|
||||
'4',
|
||||
'5',
|
||||
'6',
|
||||
'7',
|
||||
'8',
|
||||
'9'
|
||||
};
|
||||
|
||||
public static char toLowerCase(char letter, char[] charset)
|
||||
|
|
|
@ -168,4 +168,21 @@ public class TestRussianAnalyzer extends TestCase
|
|||
inWords1251.close();
|
||||
sample1251.close();
|
||||
}
|
||||
|
||||
public void testDigitsInRussianCharset()
|
||||
{
|
||||
Reader reader = new StringReader("text 1000");
|
||||
RussianAnalyzer ra = new RussianAnalyzer();
|
||||
TokenStream stream = ra.tokenStream("", reader);
|
||||
|
||||
try {
|
||||
assertEquals("text", stream.next().termText());
|
||||
assertNotNull("RussianAnalyzer's tokenizer skips numbers from input text", stream.next());
|
||||
}
|
||||
catch (IOException e)
|
||||
{
|
||||
fail("unexpected IOException");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue