mirror of https://github.com/apache/lucene.git
fix broken regex from r1225920 allowing most of unicode as a letter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1450410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
241436ab4c
commit
2fb6ee3652
|
@ -53,7 +53,7 @@ public class MockTokenizer extends Tokenizer {
|
|||
/** Acts like LetterTokenizer. */
|
||||
// the ugly regex below is incomplete Unicode 5.2 [:Letter:]
|
||||
public static final CharacterRunAutomaton SIMPLE =
|
||||
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-Z]+").toAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁ]+").toAutomaton());
|
||||
|
||||
private final CharacterRunAutomaton runAutomaton;
|
||||
private final boolean lowerCase;
|
||||
|
|
Loading…
Reference in New Issue