From f00afeee7aba8e1024c6f792c6bb469b7a16def9 Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Sat, 12 Nov 2005 08:33:21 +0000 Subject: [PATCH] LUCENE-461 - Fix for "StandardTokenizer splitting all of Korean words into separate characters", contributed by Cheolgoo Kang git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@332745 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 7 +- .../analysis/standard/StandardTokenizer.java | 10 +- .../analysis/standard/StandardTokenizer.jj | 14 +- .../standard/StandardTokenizerConstants.java | 10 +- .../StandardTokenizerTokenManager.java | 959 ++++++++++-------- .../lucene/analysis/TestStandardAnalyzer.java | 3 + 6 files changed, 536 insertions(+), 467 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 20ab4d089b5..e7507e40195 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -234,8 +234,11 @@ Bug fixes the original token. (Yonik Seeley via Erik Hatcher, LUCENE-437) -12. Added Unicode range to fix tokenization of Korean. - (Otis, http://issues.apache.org/jira/browse/LUCENE-444) +12. Added Unicode range of Korean characters to StandardTokenizer, + grouping contiguous characters into a token rather than one token + per character. This change also changes the token type to "" + for Chinese and Japanese character tokens (previously it was ""). + (Otis and Erik, via Cheolgoo Kang LUCENE-444 and LUCENE-461) 13. FieldsReader now looks at FieldInfo.storeOffsetWithTermVector and FieldInfo.storePositionWithTermVector and creates the Field with diff --git a/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index 4276f317ae5..857b5ff035f 100644 --- a/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -55,8 +55,8 @@ public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer impl case NUM: token = jj_consume_token(NUM); break; - case CJK: - token = jj_consume_token(CJK); + case CJ: + token = jj_consume_token(CJ); break; case 0: token = jj_consume_token(0); @@ -166,8 +166,8 @@ public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer impl public ParseException generateParseException() { jj_expentries.removeAllElements(); - boolean[] la1tokens = new boolean[15]; - for (int i = 0; i < 15; i++) { + boolean[] la1tokens = new boolean[16]; + for (int i = 0; i < 16; i++) { la1tokens[i] = false; } if (jj_kind >= 0) { @@ -183,7 +183,7 @@ public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer impl } } } - for (int i = 0; i < 15; i++) { + for (int i = 0; i < 16; i++) { if (la1tokens[i]) { jj_expentry = new int[1]; jj_expentry[0] = i; diff --git a/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj b/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj index f2d1414c0db..8409ba3f0a3 100644 --- a/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj +++ b/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj @@ -1,4 +1,4 @@ -/** +/**f * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -59,7 +59,7 @@ PARSER_END(StandardTokenizer) TOKEN : { // token patterns // basic word: a sequence of digits & letters - |)+ > + ||)+ > // internal apostrophes: O'Reilly, you're, O'Reilly's // use a post-filter to remove possesives @@ -106,16 +106,20 @@ TOKEN : { // token patterns "\u0100"-"\u1fff" ] > -| < CJK: // non-alphabets +| < CJ: // Chinese, Japanese [ "\u3040"-"\u318f", "\u3300"-"\u337f", "\u3400"-"\u3d2d", "\u4e00"-"\u9fff", - "\uac00"-"\ud7af", "\uf900"-"\ufaff" ] > +| < KOREAN: // Korean + [ + "\uac00"-"\ud7af" + ] + > | < #DIGIT: // unicode digits [ "\u0030"-"\u0039", @@ -157,7 +161,7 @@ org.apache.lucene.analysis.Token next() throws IOException : token = | token = | token = | - token = | + token = | token = ) { diff --git a/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java b/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java index f117c625587..623a9b426c9 100644 --- a/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java +++ b/src/java/org/apache/lucene/analysis/standard/StandardTokenizerConstants.java @@ -15,9 +15,10 @@ public interface StandardTokenizerConstants { int HAS_DIGIT = 9; int ALPHA = 10; int LETTER = 11; - int CJK = 12; - int DIGIT = 13; - int NOISE = 14; + int CJ = 12; + int KOREAN = 13; + int DIGIT = 14; + int NOISE = 15; int DEFAULT = 0; @@ -34,7 +35,8 @@ public interface StandardTokenizerConstants { "", "", "", - "", + "", + "", "", "", }; diff --git a/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java b/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java index 2ee22bb2c49..d8d6ca160de 100644 --- a/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java +++ b/src/java/org/apache/lucene/analysis/standard/StandardTokenizerTokenManager.java @@ -41,7 +41,7 @@ private final void jjCheckNAddStates(int start) jjCheckNAdd(jjnextStates[start + 1]); } static final long[] jjbitVec0 = { - 0x1ff0000000000000L, 0xffffffffffffc000L, 0xfffff000ffffffffL, 0x6000000007fffffL + 0x1ff0000000000000L, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L }; static final long[] jjbitVec2 = { 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL @@ -56,40 +56,46 @@ static final long[] jjbitVec5 = { 0x3fffffffffffL, 0x0L, 0x0L, 0x0L }; static final long[] jjbitVec6 = { - 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffL, 0x0L + 0x0L, 0x0L, 0xfffff00000000000L, 0x7fffffL }; static final long[] jjbitVec7 = { - 0x1600L, 0x0L, 0x0L, 0x0L + 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffL, 0x0L }; static final long[] jjbitVec8 = { - 0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L -}; -static final long[] jjbitVec9 = { - 0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L -}; -static final long[] jjbitVec10 = { - 0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L -}; -static final long[] jjbitVec11 = { - 0x0L, 0xffc000000000L, 0x0L, 0x0L -}; -static final long[] jjbitVec12 = { - 0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L -}; -static final long[] jjbitVec13 = { - 0x0L, 0x3ffL, 0x0L, 0x0L -}; -static final long[] jjbitVec14 = { 0xfffffffeL, 0x0L, 0x0L, 0x0L }; -static final long[] jjbitVec15 = { +static final long[] jjbitVec9 = { 0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL }; +static final long[] jjbitVec10 = { + 0x1600L, 0x0L, 0x0L, 0x0L +}; +static final long[] jjbitVec11 = { + 0x0L, 0xffc000000000L, 0x0L, 0xffc000000000L +}; +static final long[] jjbitVec12 = { + 0x0L, 0x3ff00000000L, 0x0L, 0x3ff000000000000L +}; +static final long[] jjbitVec13 = { + 0x0L, 0xffc000000000L, 0x0L, 0xff8000000000L +}; +static final long[] jjbitVec14 = { + 0x0L, 0xffc000000000L, 0x0L, 0x0L +}; +static final long[] jjbitVec15 = { + 0x0L, 0x3ff0000L, 0x0L, 0x3ff0000L +}; +static final long[] jjbitVec16 = { + 0x0L, 0x3ffL, 0x0L, 0x0L +}; +static final long[] jjbitVec17 = { + 0xfffffffeL, 0x0L, 0xfffff00000000000L, 0x7fffffL +}; private final int jjMoveNfa_0(int startState, int curPos) { int[] nextStates; int startsAt = 0; - jjnewStateCnt = 73; + jjnewStateCnt = 75; int i = 1; jjstateSet[0] = startState; int j, kind = 0x7fffffff; @@ -109,33 +115,23 @@ private final int jjMoveNfa_0(int startState, int curPos) { if (kind > 1) kind = 1; - jjCheckNAddStates(0, 17); + jjCheckNAddStates(0, 11); } if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddStates(18, 23); - break; - case 1: + jjCheckNAddStates(12, 17); if ((0x3ff000000000000L & l) != 0L) jjCheckNAddStates(18, 23); break; case 2: - case 39: if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(2, 3); + jjCheckNAddStates(18, 23); break; case 3: - if ((0xf00000000000L & l) != 0L) - jjCheckNAdd(4); + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(3, 4); break; case 4: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 7) - kind = 7; - jjCheckNAdd(4); - break; case 5: - case 48: if ((0x3ff000000000000L & l) != 0L) jjCheckNAddTwoStates(5, 6); break; @@ -144,237 +140,253 @@ private final int jjMoveNfa_0(int startState, int curPos) jjCheckNAdd(7); break; case 7: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(7, 8); - break; - case 8: - if ((0xf00000000000L & l) != 0L) - jjCheckNAddTwoStates(9, 10); - break; - case 9: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(9, 10); - break; - case 10: - case 11: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(6, 11); + jjCheckNAdd(7); + break; + case 8: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(8, 9); + break; + case 9: + case 10: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(10, 11); + break; + case 11: + if ((0xf00000000000L & l) != 0L) + jjCheckNAdd(12); break; case 12: - case 61: if ((0x3ff000000000000L & l) != 0L) jjCheckNAddTwoStates(12, 13); break; case 13: if ((0xf00000000000L & l) != 0L) - jjCheckNAdd(14); + jjCheckNAddTwoStates(14, 15); break; case 14: if ((0x3ff000000000000L & l) != 0L) jjCheckNAddTwoStates(14, 15); break; case 15: - if ((0xf00000000000L & l) != 0L) - jjCheckNAddTwoStates(16, 17); - break; case 16: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(16, 17); - break; - case 17: - case 18: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(18, 19); - break; - case 19: - if ((0xf00000000000L & l) != 0L) - jjCheckNAdd(20); - break; - case 20: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(15, 20); + jjCheckNAddTwoStates(11, 16); + break; + case 17: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(17, 18); + break; + case 18: + case 19: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(19, 20); + break; + case 20: + if ((0xf00000000000L & l) != 0L) + jjCheckNAdd(21); break; case 21: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 1) - kind = 1; - jjCheckNAddStates(0, 17); + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(21, 22); break; case 22: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 1) - kind = 1; - jjCheckNAdd(22); + if ((0xf00000000000L & l) != 0L) + jjCheckNAddTwoStates(23, 24); break; case 23: if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddStates(24, 26); + jjCheckNAddTwoStates(23, 24); break; case 24: - if ((0x600000000000L & l) != 0L) - jjCheckNAdd(25); - break; case 25: if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddStates(27, 29); + jjCheckNAddTwoStates(25, 26); + break; + case 26: + if ((0xf00000000000L & l) != 0L) + jjCheckNAdd(27); break; case 27: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(27, 28); + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 7) + kind = 7; + jjCheckNAddTwoStates(22, 27); break; case 28: - if ((0x600000000000L & l) != 0L) - jjCheckNAdd(29); + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddStates(12, 17); break; case 29: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 5) - kind = 5; - jjCheckNAddTwoStates(28, 29); + if (kind > 1) + kind = 1; + jjCheckNAddStates(0, 11); break; case 30: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(30, 31); - break; - case 31: - if (curChar == 46) - jjCheckNAdd(32); - break; - case 32: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 6) - kind = 6; - jjCheckNAddTwoStates(31, 32); + if (kind > 1) + kind = 1; + jjCheckNAdd(30); + break; + case 31: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddStates(24, 26); + break; + case 32: + if ((0x600000000000L & l) != 0L) + jjCheckNAdd(33); break; case 33: if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(33, 34); - break; - case 34: - if ((0xf00000000000L & l) != 0L) - jjCheckNAddTwoStates(35, 36); + jjCheckNAddStates(27, 29); break; case 35: if ((0x3ff000000000000L & l) != 0L) jjCheckNAddTwoStates(35, 36); break; case 36: + if ((0x600000000000L & l) != 0L) + jjCheckNAdd(37); + break; case 37: if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 7) - kind = 7; - jjCheckNAdd(37); + if (kind > 5) + kind = 5; + jjCheckNAddTwoStates(36, 37); break; case 38: if ((0x3ff000000000000L & l) != 0L) jjCheckNAddTwoStates(38, 39); break; + case 39: + if (curChar == 46) + jjCheckNAdd(40); + break; case 40: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(40, 41); + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 6) + kind = 6; + jjCheckNAddTwoStates(39, 40); break; case 41: - if ((0xf00000000000L & l) != 0L) - jjCheckNAddTwoStates(42, 43); + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(41, 42); break; case 42: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(42, 43); + if ((0xf00000000000L & l) != 0L) + jjCheckNAddTwoStates(43, 44); break; case 43: - case 44: if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(44, 45); + jjCheckNAddTwoStates(43, 44); break; + case 44: case 45: - if ((0xf00000000000L & l) != 0L) - jjCheckNAdd(46); - break; - case 46: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(41, 46); + jjCheckNAdd(45); + break; + case 46: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(46, 47); break; case 47: + if ((0xf00000000000L & l) != 0L) + jjCheckNAddTwoStates(48, 49); + break; + case 48: if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(47, 48); + jjCheckNAddTwoStates(48, 49); break; case 49: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(49, 50); - break; case 50: - if ((0xf00000000000L & l) != 0L) - jjCheckNAddTwoStates(51, 52); + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(50, 51); break; case 51: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(51, 52); + if ((0xf00000000000L & l) != 0L) + jjCheckNAdd(52); break; case 52: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 7) + kind = 7; + jjCheckNAddTwoStates(47, 52); + break; case 53: if ((0x3ff000000000000L & l) != 0L) jjCheckNAddTwoStates(53, 54); break; case 54: if ((0xf00000000000L & l) != 0L) - jjCheckNAdd(55); + jjCheckNAddTwoStates(55, 56); break; case 55: if ((0x3ff000000000000L & l) != 0L) jjCheckNAddTwoStates(55, 56); break; case 56: - if ((0xf00000000000L & l) != 0L) - jjCheckNAddTwoStates(57, 58); - break; case 57: if ((0x3ff000000000000L & l) != 0L) jjCheckNAddTwoStates(57, 58); break; case 58: + if ((0xf00000000000L & l) != 0L) + jjCheckNAdd(59); + break; case 59: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(59, 60); + break; + case 60: + if ((0xf00000000000L & l) != 0L) + jjCheckNAddTwoStates(61, 62); + break; + case 61: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(61, 62); + break; + case 62: + case 63: if ((0x3ff000000000000L & l) == 0L) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(54, 59); + jjCheckNAddTwoStates(58, 63); break; - case 60: - if ((0x3ff000000000000L & l) != 0L) - jjCheckNAddTwoStates(60, 61); - break; - case 64: + case 66: if (curChar == 39) - jjstateSet[jjnewStateCnt++] = 65; - break; - case 67: - if (curChar == 46) - jjCheckNAdd(68); + jjstateSet[jjnewStateCnt++] = 67; break; case 69: + if (curChar == 46) + jjCheckNAdd(70); + break; + case 71: if (curChar != 46) break; if (kind > 3) kind = 3; - jjCheckNAdd(68); + jjCheckNAdd(70); break; - case 71: + case 73: if (curChar == 38) - jjstateSet[jjnewStateCnt++] = 72; + jjstateSet[jjnewStateCnt++] = 74; break; default : break; } @@ -394,145 +406,133 @@ private final int jjMoveNfa_0(int startState, int curPos) { if (kind > 1) kind = 1; - jjCheckNAddStates(0, 17); + jjCheckNAddStates(0, 11); } + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddStates(18, 23); break; case 2: if ((0x7fffffe07fffffeL & l) != 0L) - jjAddStates(36, 37); + jjCheckNAddStates(18, 23); break; case 3: - if (curChar == 95) - jjCheckNAdd(4); - break; - case 4: - if ((0x7fffffe07fffffeL & l) == 0L) - break; - if (kind > 7) - kind = 7; - jjCheckNAdd(4); + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(3, 4); break; case 5: if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(5, 6); + jjAddStates(36, 37); break; case 6: if (curChar == 95) jjCheckNAdd(7); break; case 7: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(7, 8); - break; - case 8: - if (curChar == 95) - jjCheckNAddTwoStates(9, 10); - break; - case 9: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(9, 10); - break; - case 11: if ((0x7fffffe07fffffeL & l) == 0L) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(6, 11); + jjCheckNAdd(7); + break; + case 8: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(8, 9); + break; + case 10: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(10, 11); + break; + case 11: + if (curChar == 95) + jjCheckNAdd(12); break; case 12: if ((0x7fffffe07fffffeL & l) != 0L) - jjAddStates(38, 39); + jjCheckNAddTwoStates(12, 13); break; case 13: if (curChar == 95) - jjCheckNAdd(14); + jjCheckNAddTwoStates(14, 15); break; case 14: if ((0x7fffffe07fffffeL & l) != 0L) jjCheckNAddTwoStates(14, 15); break; - case 15: - if (curChar == 95) - jjCheckNAddTwoStates(16, 17); - break; case 16: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(16, 17); - break; - case 18: - if ((0x7fffffe07fffffeL & l) != 0L) - jjAddStates(40, 41); - break; - case 19: - if (curChar == 95) - jjCheckNAdd(20); - break; - case 20: if ((0x7fffffe07fffffeL & l) == 0L) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(15, 20); + jjCheckNAddTwoStates(11, 16); + break; + case 17: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(17, 18); + break; + case 19: + if ((0x7fffffe07fffffeL & l) != 0L) + jjAddStates(38, 39); + break; + case 20: + if (curChar == 95) + jjCheckNAdd(21); break; case 21: - if ((0x7fffffe07fffffeL & l) == 0L) - break; - if (kind > 1) - kind = 1; - jjCheckNAddStates(0, 17); + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(21, 22); break; case 22: - if ((0x7fffffe07fffffeL & l) == 0L) - break; - if (kind > 1) - kind = 1; - jjCheckNAdd(22); + if (curChar == 95) + jjCheckNAddTwoStates(23, 24); break; case 23: if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddStates(24, 26); - break; - case 24: - if (curChar == 95) - jjCheckNAdd(25); + jjCheckNAddTwoStates(23, 24); break; case 25: if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddStates(27, 29); + jjAddStates(40, 41); break; case 26: - if (curChar == 64) + if (curChar == 95) jjCheckNAdd(27); break; case 27: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(27, 28); + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 7) + kind = 7; + jjCheckNAddTwoStates(22, 27); break; case 29: if ((0x7fffffe07fffffeL & l) == 0L) break; - if (kind > 5) - kind = 5; - jjCheckNAddTwoStates(28, 29); + if (kind > 1) + kind = 1; + jjCheckNAddStates(0, 11); break; case 30: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(30, 31); - break; - case 32: if ((0x7fffffe07fffffeL & l) == 0L) break; - if (kind > 6) - kind = 6; - jjCheckNAddTwoStates(31, 32); + if (kind > 1) + kind = 1; + jjCheckNAdd(30); + break; + case 31: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddStates(24, 26); + break; + case 32: + if (curChar == 95) + jjCheckNAdd(33); break; case 33: if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(33, 34); + jjCheckNAddStates(27, 29); break; case 34: - if (curChar == 95) - jjCheckNAddTwoStates(35, 36); + if (curChar == 64) + jjCheckNAdd(35); break; case 35: if ((0x7fffffe07fffffeL & l) != 0L) @@ -541,56 +541,66 @@ private final int jjMoveNfa_0(int startState, int curPos) case 37: if ((0x7fffffe07fffffeL & l) == 0L) break; - if (kind > 7) - kind = 7; - jjstateSet[jjnewStateCnt++] = 37; + if (kind > 5) + kind = 5; + jjCheckNAddTwoStates(36, 37); break; case 38: if ((0x7fffffe07fffffeL & l) != 0L) jjCheckNAddTwoStates(38, 39); break; case 40: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(40, 41); + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 6) + kind = 6; + jjCheckNAddTwoStates(39, 40); break; case 41: - if (curChar == 95) - jjCheckNAddTwoStates(42, 43); + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(41, 42); break; case 42: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(42, 43); + if (curChar == 95) + jjCheckNAddTwoStates(43, 44); break; - case 44: + case 43: if ((0x7fffffe07fffffeL & l) != 0L) - jjAddStates(42, 43); + jjCheckNAddTwoStates(43, 44); break; case 45: - if (curChar == 95) - jjCheckNAdd(46); - break; - case 46: if ((0x7fffffe07fffffeL & l) == 0L) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(41, 46); + jjstateSet[jjnewStateCnt++] = 45; + break; + case 46: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(46, 47); break; case 47: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(47, 48); + if (curChar == 95) + jjCheckNAddTwoStates(48, 49); break; - case 49: + case 48: if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(49, 50); + jjCheckNAddTwoStates(48, 49); break; case 50: - if (curChar == 95) - jjCheckNAddTwoStates(51, 52); + if ((0x7fffffe07fffffeL & l) != 0L) + jjAddStates(42, 43); break; case 51: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(51, 52); + if (curChar == 95) + jjCheckNAdd(52); + break; + case 52: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 7) + kind = 7; + jjCheckNAddTwoStates(47, 52); break; case 53: if ((0x7fffffe07fffffeL & l) != 0L) @@ -598,68 +608,76 @@ private final int jjMoveNfa_0(int startState, int curPos) break; case 54: if (curChar == 95) - jjCheckNAdd(55); + jjCheckNAddTwoStates(55, 56); break; case 55: if ((0x7fffffe07fffffeL & l) != 0L) jjCheckNAddTwoStates(55, 56); break; - case 56: - if (curChar == 95) - jjCheckNAddTwoStates(57, 58); - break; case 57: if ((0x7fffffe07fffffeL & l) != 0L) jjCheckNAddTwoStates(57, 58); break; + case 58: + if (curChar == 95) + jjCheckNAdd(59); + break; case 59: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(59, 60); + break; + case 60: + if (curChar == 95) + jjCheckNAddTwoStates(61, 62); + break; + case 61: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(61, 62); + break; + case 63: if ((0x7fffffe07fffffeL & l) == 0L) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(54, 59); + jjCheckNAddTwoStates(58, 63); break; - case 60: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(60, 61); - break; - case 62: + case 64: if ((0x7fffffe07fffffeL & l) != 0L) jjCheckNAddStates(30, 35); break; - case 63: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(63, 64); - break; case 65: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(65, 66); + break; + case 67: if ((0x7fffffe07fffffeL & l) == 0L) break; if (kind > 2) kind = 2; - jjCheckNAddTwoStates(64, 65); - break; - case 66: - if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(66, 67); + jjCheckNAddTwoStates(66, 67); break; case 68: if ((0x7fffffe07fffffeL & l) != 0L) - jjAddStates(44, 45); + jjCheckNAddTwoStates(68, 69); break; case 70: if ((0x7fffffe07fffffeL & l) != 0L) - jjCheckNAddTwoStates(70, 71); - break; - case 71: - if (curChar == 64) - jjCheckNAdd(72); + jjAddStates(44, 45); break; case 72: + if ((0x7fffffe07fffffeL & l) != 0L) + jjCheckNAddTwoStates(72, 73); + break; + case 73: + if (curChar == 64) + jjCheckNAdd(74); + break; + case 74: if ((0x7fffffe07fffffeL & l) == 0L) break; if (kind > 4) kind = 4; - jjCheckNAdd(72); + jjCheckNAdd(74); break; default : break; } @@ -683,268 +701,283 @@ private final int jjMoveNfa_0(int startState, int curPos) kind = 12; } if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(18, 23); + { + if (kind > 13) + kind = 13; + } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(18, 23); + if (jjCanMove_3(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(12, 17); + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) { if (kind > 1) kind = 1; - jjCheckNAddStates(0, 17); + jjCheckNAddStates(0, 11); } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(30, 35); break; case 1: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(18, 23); + if (jjCanMove_1(hiByte, i1, i2, l1, l2) && kind > 13) + kind = 13; break; case 2: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(2, 3); + jjCheckNAddStates(18, 23); + break; + case 3: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(3, 4); break; case 4: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 7) - kind = 7; - jjstateSet[jjnewStateCnt++] = 4; + if (jjCanMove_3(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(5, 6); break; case 5: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) jjCheckNAddTwoStates(5, 6); break; case 7: + if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) + break; + if (kind > 7) + kind = 7; + jjstateSet[jjnewStateCnt++] = 7; + break; + case 8: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(46, 47); + jjCheckNAddTwoStates(8, 9); break; case 9: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(48, 49); + if (jjCanMove_3(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(10, 11); break; case 10: - if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) - break; - if (kind > 7) - kind = 7; - jjCheckNAddTwoStates(6, 11); - break; - case 11: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 7) - kind = 7; - jjCheckNAddTwoStates(6, 11); + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(10, 11); break; case 12: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(12, 13); + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjAddStates(46, 47); break; case 14: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(14, 15); + jjAddStates(48, 49); + break; + case 15: + if (!jjCanMove_3(hiByte, i1, i2, l1, l2)) + break; + if (kind > 7) + kind = 7; + jjCheckNAddTwoStates(11, 16); break; case 16: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(50, 51); - break; - case 17: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(18, 19); - break; - case 18: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(18, 19); - break; - case 20: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(15, 20); + jjCheckNAddTwoStates(11, 16); + break; + case 17: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(17, 18); + break; + case 18: + if (jjCanMove_3(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(19, 20); + break; + case 19: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(19, 20); break; case 21: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 1) - kind = 1; - jjCheckNAddStates(0, 17); - break; - case 22: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 1) - kind = 1; - jjCheckNAdd(22); + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(21, 22); break; case 23: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(24, 26); + jjAddStates(50, 51); + break; + case 24: + if (jjCanMove_3(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(25, 26); break; case 25: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(27, 29); + jjCheckNAddTwoStates(25, 26); break; case 27: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(27, 28); + if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) + break; + if (kind > 7) + kind = 7; + jjCheckNAddTwoStates(22, 27); + break; + case 28: + if (jjCanMove_3(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(12, 17); break; case 29: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) + if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) + break; + if (kind > 1) + kind = 1; + jjCheckNAddStates(0, 11); + break; + case 30: + if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) + break; + if (kind > 1) + kind = 1; + jjCheckNAdd(30); + break; + case 31: + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(24, 26); + break; + case 33: + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(27, 29); + break; + case 35: + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(35, 36); + break; + case 37: + if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) break; if (kind > 5) kind = 5; - jjCheckNAddTwoStates(28, 29); + jjCheckNAddTwoStates(36, 37); break; - case 30: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(30, 31); + case 38: + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(38, 39); break; - case 32: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) + case 40: + if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) break; if (kind > 6) kind = 6; - jjCheckNAddTwoStates(31, 32); + jjCheckNAddTwoStates(39, 40); break; - case 33: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(33, 34); + case 41: + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(41, 42); break; - case 35: + case 43: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) jjAddStates(52, 53); break; - case 36: - if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) + case 44: + if (!jjCanMove_3(hiByte, i1, i2, l1, l2)) break; if (kind > 7) kind = 7; - jjCheckNAdd(37); + jjCheckNAdd(45); break; - case 37: + case 45: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 7) kind = 7; - jjCheckNAdd(37); + jjCheckNAdd(45); break; - case 38: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(38, 39); + case 46: + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(46, 47); break; - case 39: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(2, 3); - break; - case 40: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(40, 41); - break; - case 42: + case 48: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) jjAddStates(54, 55); break; - case 43: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(44, 45); + case 49: + if (jjCanMove_3(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(50, 51); break; - case 44: + case 50: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(44, 45); + jjCheckNAddTwoStates(50, 51); break; - case 46: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) + case 52: + if (!jjCanMove_4(hiByte, i1, i2, l1, l2)) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(41, 46); - break; - case 47: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(47, 48); - break; - case 48: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(5, 6); - break; - case 49: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(49, 50); - break; - case 51: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(56, 57); - break; - case 52: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(53, 54); + jjCheckNAddTwoStates(47, 52); break; case 53: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) jjCheckNAddTwoStates(53, 54); break; case 55: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(58, 59); + jjAddStates(56, 57); + break; + case 56: + if (jjCanMove_3(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(57, 58); break; case 57: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(57, 58); + break; + case 59: + if (jjCanMove_4(hiByte, i1, i2, l1, l2)) + jjAddStates(58, 59); + break; + case 61: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) jjAddStates(60, 61); break; - case 58: - if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) + case 62: + if (!jjCanMove_3(hiByte, i1, i2, l1, l2)) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(54, 59); + jjCheckNAddTwoStates(58, 63); break; - case 59: + case 63: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 7) kind = 7; - jjCheckNAddTwoStates(54, 59); + jjCheckNAddTwoStates(58, 63); break; - case 60: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(60, 61); - break; - case 61: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(12, 13); - break; - case 62: + case 64: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) jjCheckNAddStates(30, 35); break; - case 63: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(63, 64); - break; case 65: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(65, 66); + break; + case 67: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 2) kind = 2; - jjCheckNAddTwoStates(64, 65); - break; - case 66: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(66, 67); + jjCheckNAddTwoStates(66, 67); break; case 68: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjAddStates(44, 45); + jjCheckNAddTwoStates(68, 69); break; case 70: if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddTwoStates(70, 71); + jjAddStates(44, 45); break; case 72: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(72, 73); + break; + case 74: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; if (kind > 4) kind = 4; - jjstateSet[jjnewStateCnt++] = 72; + jjstateSet[jjnewStateCnt++] = 74; break; default : break; } @@ -957,17 +990,17 @@ private final int jjMoveNfa_0(int startState, int curPos) kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 73 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 75 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } } } static final int[] jjnextStates = { - 22, 23, 24, 26, 30, 31, 33, 34, 38, 39, 40, 41, 47, 48, 49, 50, - 60, 61, 2, 3, 5, 6, 12, 13, 23, 24, 26, 24, 25, 26, 63, 64, - 66, 67, 70, 71, 2, 3, 12, 13, 18, 19, 44, 45, 68, 69, 7, 8, - 9, 10, 16, 17, 35, 36, 42, 43, 51, 52, 55, 56, 57, 58, + 30, 31, 32, 34, 38, 39, 41, 42, 46, 47, 53, 54, 5, 6, 10, 11, + 19, 20, 3, 4, 8, 9, 17, 18, 31, 32, 34, 32, 33, 34, 65, 66, + 68, 69, 72, 73, 5, 6, 19, 20, 25, 26, 50, 51, 70, 71, 12, 13, + 14, 15, 23, 24, 43, 44, 48, 49, 55, 56, 59, 60, 61, 62, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -981,8 +1014,6 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo return ((jjbitVec4[i2] & l2) != 0L); case 61: return ((jjbitVec5[i2] & l2) != 0L); - case 215: - return ((jjbitVec6[i2] & l2) != 0L); default : if ((jjbitVec0[i1] & l1) != 0L) return true; @@ -993,21 +1024,10 @@ private static final boolean jjCanMove_1(int hiByte, int i1, int i2, long l1, lo { switch(hiByte) { - case 6: - return ((jjbitVec9[i2] & l2) != 0L); - case 11: - return ((jjbitVec10[i2] & l2) != 0L); - case 13: - return ((jjbitVec11[i2] & l2) != 0L); - case 14: - return ((jjbitVec12[i2] & l2) != 0L); - case 16: - return ((jjbitVec13[i2] & l2) != 0L); + case 215: + return ((jjbitVec7[i2] & l2) != 0L); default : - if ((jjbitVec7[i1] & l1) != 0L) - if ((jjbitVec8[i2] & l2) == 0L) - return false; - else + if ((jjbitVec6[i1] & l1) != 0L) return true; return false; } @@ -1017,28 +1037,65 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo switch(hiByte) { case 0: - return ((jjbitVec15[i2] & l2) != 0L); + return ((jjbitVec9[i2] & l2) != 0L); default : - if ((jjbitVec14[i1] & l1) != 0L) + if ((jjbitVec8[i1] & l1) != 0L) + return true; + return false; + } +} +private static final boolean jjCanMove_3(int hiByte, int i1, int i2, long l1, long l2) +{ + switch(hiByte) + { + case 6: + return ((jjbitVec12[i2] & l2) != 0L); + case 11: + return ((jjbitVec13[i2] & l2) != 0L); + case 13: + return ((jjbitVec14[i2] & l2) != 0L); + case 14: + return ((jjbitVec15[i2] & l2) != 0L); + case 16: + return ((jjbitVec16[i2] & l2) != 0L); + default : + if ((jjbitVec10[i1] & l1) != 0L) + if ((jjbitVec11[i2] & l2) == 0L) + return false; + else + return true; + return false; + } +} +private static final boolean jjCanMove_4(int hiByte, int i1, int i2, long l1, long l2) +{ + switch(hiByte) + { + case 0: + return ((jjbitVec9[i2] & l2) != 0L); + case 215: + return ((jjbitVec7[i2] & l2) != 0L); + default : + if ((jjbitVec17[i1] & l1) != 0L) return true; return false; } } public static final String[] jjstrLiteralImages = { "", null, null, null, null, null, null, null, null, null, null, null, null, -null, null, }; +null, null, null, }; public static final String[] lexStateNames = { "DEFAULT", }; static final long[] jjtoToken = { - 0x10ffL, + 0x30ffL, }; static final long[] jjtoSkip = { - 0x4000L, + 0x8000L, }; protected CharStream input_stream; -private final int[] jjrounds = new int[73]; -private final int[] jjstateSet = new int[146]; +private final int[] jjrounds = new int[75]; +private final int[] jjstateSet = new int[150]; protected char curChar; public StandardTokenizerTokenManager(CharStream stream) { @@ -1060,7 +1117,7 @@ private final void ReInitRounds() { int i; jjround = 0x80000001; - for (i = 73; i-- > 0;) + for (i = 75; i-- > 0;) jjrounds[i] = 0x80000000; } public void ReInit(CharStream stream, int lexState) @@ -1120,9 +1177,9 @@ public Token getNextToken() jjmatchedKind = 0x7fffffff; jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_0(); - if (jjmatchedPos == 0 && jjmatchedKind > 14) + if (jjmatchedPos == 0 && jjmatchedKind > 15) { - jjmatchedKind = 14; + jjmatchedKind = 15; } if (jjmatchedKind != 0x7fffffff) { diff --git a/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java b/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java index c4e8dee1799..1184c093948 100644 --- a/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java +++ b/src/test/org/apache/lucene/analysis/TestStandardAnalyzer.java @@ -94,5 +94,8 @@ public class TestStandardAnalyzer extends TestCase { assertAnalyzesTo(a, "C++", new String[]{"c"}); assertAnalyzesTo(a, "C#", new String[]{"c"}); + // Korean words + assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"}); + } }