fix broken regex from r1225920 allowing most of unicode as a letter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1450410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-02-26 20:56:29 +00:00
parent 241436ab4c
commit 2fb6ee3652
1 changed files with 1 additions and 1 deletions

View File

@ -53,7 +53,7 @@ public class MockTokenizer extends Tokenizer {
/** Acts like LetterTokenizer. */
// the ugly regex below is incomplete Unicode 5.2 [:Letter:]
public static final CharacterRunAutomaton SIMPLE =
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-]+").toAutomaton());
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁ]+").toAutomaton());
private final CharacterRunAutomaton runAutomaton;
private final boolean lowerCase;