mirror of https://github.com/apache/lucene.git
add back cjk range to this regex
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1450441 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2fb6ee3652
commit
dd4b910c02
|
@ -53,7 +53,7 @@ public class MockTokenizer extends Tokenizer {
|
|||
/** Acts like LetterTokenizer. */
|
||||
// the ugly regex below is incomplete Unicode 5.2 [:Letter:]
|
||||
public static final CharacterRunAutomaton SIMPLE =
|
||||
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁ]+").toAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁ一-鿌]+").toAutomaton());
|
||||
|
||||
private final CharacterRunAutomaton runAutomaton;
|
||||
private final boolean lowerCase;
|
||||
|
|
Loading…
Reference in New Issue