mirror of https://github.com/apache/lucene.git
LUCENE-2911: synchronize grammar/token types across StandardTokenizer, UAX29EmailURLTokenizer, ICUTokenizer; add CJK types
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1068979 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
762272e48a
commit
6386f77138
|
@ -849,6 +849,13 @@ New features
|
|||
IndexReader, to allow apps that maintain external per-segment caches
|
||||
to evict entries when a segment is finished. (Shay Banon, Yonik
|
||||
Seeley, Mike McCandless)
|
||||
|
||||
* LUCENE-2911: The new StandardTokenizer, UAX29URLEmailTokenizer, and
|
||||
the ICUTokenizer in contrib now all tag types with a consistent set
|
||||
of token types (defined in StandardTokenizer). Tokens in the major
|
||||
CJK types are explicitly marked to allow for custom downstream handling:
|
||||
<IDEOGRAPHIC>, <HANGUL>, <KATAKANA>, and <HIRAGANA>.
|
||||
(Robert Muir, Steven Rowe)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
*/
|
||||
|
||||
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
|
||||
// file version from Wednesday, January 5, 2011 12:34:09 PM UTC
|
||||
// generated on Thursday, January 6, 2011 5:09:41 AM UTC
|
||||
// file version from Wednesday, February 9, 2011 12:34:10 PM UTC
|
||||
// generated on Wednesday, February 9, 2011 4:45:18 PM UTC
|
||||
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
|
||||
|
||||
ASCIITLD = "." (
|
||||
|
@ -285,13 +285,19 @@ ASCIITLD = "." (
|
|||
| [wW][sS]
|
||||
| [xX][nN]--0[zZ][wW][mM]56[dD]
|
||||
| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
|
||||
| [xX][nN]--3[eE]0[bB]707[eE]
|
||||
| [xX][nN]--45[bB][rR][jJ]9[cC]
|
||||
| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
|
||||
| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
|
||||
| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
|
||||
| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
|
||||
| [xX][nN]--[fF][iI][qQ][sS]8[sS]
|
||||
| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
|
||||
| [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
|
||||
| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
|
||||
| [xX][nN]--[gG]6[wW]251[dD]
|
||||
| [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
|
||||
| [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
|
||||
| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
|
||||
| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
|
||||
| [xX][nN]--[jJ]6[wW]193[gG]
|
||||
|
@ -301,13 +307,18 @@ ASCIITLD = "." (
|
|||
| [xX][nN]--[kK][pP][rR][yY]57[dD]
|
||||
| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
|
||||
| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
|
||||
| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
|
||||
| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
|
||||
| [xX][nN]--[oO]3[cC][wW]4[hH]
|
||||
| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
|
||||
| [xX][nN]--[pP]1[aA][iI]
|
||||
| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
|
||||
| [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
|
||||
| [xX][nN]--[wW][gG][bB][hH]1[cC]
|
||||
| [xX][nN]--[wW][gG][bB][lL]6[aA]
|
||||
| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
|
||||
| [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
|
||||
| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
|
||||
| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
|
||||
| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
|
||||
| [yY][eE]
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 12/4/10 7:24 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
|
@ -26,14 +26,15 @@ WARNING: if you change ClassicTokenizerImpl.jflex and need to regenerate
|
|||
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
|
||||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 12/4/10 7:24 PM from the specification file
|
||||
* <tt>C:/cygwin/home/us/svn/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
|
||||
* on 2/9/11 11:45 AM from the specification file
|
||||
* <tt>C:/Users/rmuir/workspace/lucene-2911/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||
|
||||
|
@ -681,45 +682,45 @@ public final void getText(CharTermAttribute t) {
|
|||
zzMarkedPos = zzMarkedPosL;
|
||||
|
||||
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
|
||||
case 5:
|
||||
{ return NUM;
|
||||
}
|
||||
case 11: break;
|
||||
case 9:
|
||||
{ return ACRONYM;
|
||||
}
|
||||
case 12: break;
|
||||
case 7:
|
||||
{ return COMPANY;
|
||||
}
|
||||
case 13: break;
|
||||
case 10:
|
||||
{ return EMAIL;
|
||||
}
|
||||
case 14: break;
|
||||
case 1:
|
||||
{ /* ignore */
|
||||
}
|
||||
case 15: break;
|
||||
case 6:
|
||||
{ return APOSTROPHE;
|
||||
}
|
||||
case 16: break;
|
||||
case 3:
|
||||
{ return CJ;
|
||||
}
|
||||
case 17: break;
|
||||
case 8:
|
||||
{ return ACRONYM_DEP;
|
||||
}
|
||||
case 18: break;
|
||||
case 11: break;
|
||||
case 2:
|
||||
{ return ALPHANUM;
|
||||
}
|
||||
case 19: break;
|
||||
case 12: break;
|
||||
case 4:
|
||||
{ return HOST;
|
||||
}
|
||||
case 13: break;
|
||||
case 1:
|
||||
{ /* ignore */
|
||||
}
|
||||
case 14: break;
|
||||
case 8:
|
||||
{ return ACRONYM_DEP;
|
||||
}
|
||||
case 15: break;
|
||||
case 5:
|
||||
{ return NUM;
|
||||
}
|
||||
case 16: break;
|
||||
case 9:
|
||||
{ return ACRONYM;
|
||||
}
|
||||
case 17: break;
|
||||
case 7:
|
||||
{ return COMPANY;
|
||||
}
|
||||
case 18: break;
|
||||
case 6:
|
||||
{ return APOSTROPHE;
|
||||
}
|
||||
case 19: break;
|
||||
case 3:
|
||||
{ return CJ;
|
||||
}
|
||||
case 20: break;
|
||||
default:
|
||||
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Generated using ICU4J 4.6.0.0 on Thursday, January 6, 2011 7:02:52 PM UTC
|
||||
// Generated using ICU4J 4.6.0.0 on Wednesday, February 9, 2011 4:45:11 PM UTC
|
||||
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
||||
|
||||
|
||||
|
|
|
@ -78,6 +78,8 @@ public final class StandardTokenizer extends Tokenizer {
|
|||
public static final int SOUTHEAST_ASIAN = 9;
|
||||
public static final int IDEOGRAPHIC = 10;
|
||||
public static final int HIRAGANA = 11;
|
||||
public static final int KATAKANA = 12;
|
||||
public static final int HANGUL = 13;
|
||||
|
||||
/** String token types that correspond to token type int constants */
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
|
@ -92,7 +94,9 @@ public final class StandardTokenizer extends Tokenizer {
|
|||
"<ACRONYM_DEP>",
|
||||
"<SOUTHEAST_ASIAN>",
|
||||
"<IDEOGRAPHIC>",
|
||||
"<HIRAGANA>"
|
||||
"<HIRAGANA>",
|
||||
"<KATAKANA>",
|
||||
"<HANGUL>"
|
||||
};
|
||||
|
||||
private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/6/11 12:09 AM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
|
@ -116,84 +116,85 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
"\1\133\71\0\53\142\24\143\1\142\12\134\6\0\6\142\4\143\4\142"+
|
||||
"\3\143\1\142\3\143\2\142\7\143\3\142\4\143\15\142\14\143\1\142"+
|
||||
"\1\143\12\134\4\143\2\142\46\132\12\0\53\132\1\0\1\132\3\0"+
|
||||
"\u0149\132\1\0\4\132\2\0\7\132\1\0\1\132\1\0\4\132\2\0"+
|
||||
"\51\132\1\0\4\132\2\0\41\132\1\0\4\132\2\0\7\132\1\0"+
|
||||
"\1\132\1\0\4\132\2\0\17\132\1\0\71\132\1\0\4\132\2\0"+
|
||||
"\103\132\2\0\3\133\40\0\20\132\20\0\125\132\14\0\u026c\132\2\0"+
|
||||
"\21\132\1\0\32\132\5\0\113\132\3\0\3\132\17\0\15\132\1\0"+
|
||||
"\4\132\3\133\13\0\22\132\3\133\13\0\22\132\2\133\14\0\15\132"+
|
||||
"\1\0\3\132\1\0\2\133\14\0\64\142\2\143\36\143\3\0\1\142"+
|
||||
"\4\0\1\142\1\143\2\0\12\134\41\0\3\133\2\0\12\134\6\0"+
|
||||
"\130\132\10\0\51\132\1\133\1\132\5\0\106\132\12\0\35\132\3\0"+
|
||||
"\14\133\4\0\14\133\12\0\12\134\36\142\2\0\5\142\13\0\54\142"+
|
||||
"\4\0\21\143\7\142\2\143\6\0\12\134\1\142\3\0\2\142\40\0"+
|
||||
"\27\132\5\133\4\0\65\142\12\143\1\0\35\143\2\0\1\133\12\134"+
|
||||
"\6\0\12\134\6\0\16\142\122\0\5\133\57\132\21\133\7\132\4\0"+
|
||||
"\12\134\21\0\11\133\14\0\3\133\36\132\12\133\3\0\2\132\12\134"+
|
||||
"\6\0\46\132\16\133\14\0\44\132\24\133\10\0\12\134\3\0\3\132"+
|
||||
"\12\134\44\132\122\0\3\133\1\0\25\133\4\132\1\133\4\132\1\133"+
|
||||
"\15\0\300\132\47\133\25\0\4\133\u0116\132\2\0\6\132\2\0\46\132"+
|
||||
"\2\0\6\132\2\0\10\132\1\0\1\132\1\0\1\132\1\0\1\132"+
|
||||
"\1\0\37\132\2\0\65\132\1\0\7\132\1\0\1\132\3\0\3\132"+
|
||||
"\1\0\7\132\3\0\4\132\2\0\6\132\4\0\15\132\5\0\3\132"+
|
||||
"\1\0\7\132\17\0\2\133\2\133\10\0\2\140\12\0\1\140\2\0"+
|
||||
"\1\136\2\0\5\133\20\0\2\141\3\0\1\137\17\0\1\141\13\0"+
|
||||
"\5\133\5\0\6\133\1\0\1\132\15\0\1\132\20\0\15\132\63\0"+
|
||||
"\41\133\21\0\1\132\4\0\1\132\2\0\12\132\1\0\1\132\3\0"+
|
||||
"\5\132\6\0\1\132\1\0\1\132\1\0\1\132\1\0\4\132\1\0"+
|
||||
"\13\132\2\0\4\132\5\0\5\132\4\0\1\132\21\0\51\132\u032d\0"+
|
||||
"\64\132\u0716\0\57\132\1\0\57\132\1\0\205\132\6\0\4\132\3\133"+
|
||||
"\16\0\46\132\12\0\66\132\11\0\1\132\17\0\1\133\27\132\11\0"+
|
||||
"\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0"+
|
||||
"\7\132\1\0\7\132\1\0\7\132\1\0\40\133\57\0\1\132\120\0"+
|
||||
"\32\144\1\0\131\144\14\0\326\144\57\0\1\132\1\0\1\144\31\0"+
|
||||
"\11\144\6\133\1\0\5\135\2\0\3\144\1\132\1\132\4\0\126\145"+
|
||||
"\2\0\2\133\2\135\3\145\133\135\1\0\4\135\5\0\51\132\3\0"+
|
||||
"\136\132\21\0\33\132\65\0\20\135\320\0\57\135\1\0\130\135\250\0"+
|
||||
"\u19b6\144\112\0\u51cc\144\64\0\u048d\132\103\0\56\132\2\0\u010d\132\3\0"+
|
||||
"\20\132\12\134\2\132\24\0\57\132\4\133\11\0\2\133\1\0\31\132"+
|
||||
"\10\0\120\132\2\133\45\0\11\132\2\0\147\132\2\0\4\132\1\0"+
|
||||
"\2\132\16\0\12\132\120\0\10\132\1\133\3\132\1\133\4\132\1\133"+
|
||||
"\27\132\5\133\30\0\64\132\14\0\2\133\62\132\21\133\13\0\12\134"+
|
||||
"\6\0\22\133\6\132\3\0\1\132\4\0\12\134\34\132\10\133\2\0"+
|
||||
"\27\132\15\133\14\0\35\132\3\0\4\133\57\132\16\133\16\0\1\132"+
|
||||
"\12\134\46\0\51\132\16\133\11\0\3\132\1\133\10\132\2\133\2\0"+
|
||||
"\12\134\6\0\33\142\1\143\4\0\60\142\1\143\1\142\3\143\2\142"+
|
||||
"\2\143\5\142\2\143\1\142\1\143\1\142\30\0\5\142\41\0\6\132"+
|
||||
"\2\0\6\132\2\0\6\132\11\0\7\132\1\0\7\132\221\0\43\132"+
|
||||
"\10\133\1\0\2\133\2\0\12\134\6\0\u2ba4\132\14\0\27\132\4\0"+
|
||||
"\61\132\4\0\1\31\1\25\1\46\1\43\1\13\3\0\1\7\1\5"+
|
||||
"\2\0\1\3\1\1\14\0\1\11\21\0\1\112\7\0\1\65\1\17"+
|
||||
"\6\0\1\130\3\0\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
|
||||
"\u0100\146\111\132\1\0\4\132\2\0\7\132\1\0\1\132\1\0\4\132"+
|
||||
"\2\0\51\132\1\0\4\132\2\0\41\132\1\0\4\132\2\0\7\132"+
|
||||
"\1\0\1\132\1\0\4\132\2\0\17\132\1\0\71\132\1\0\4\132"+
|
||||
"\2\0\103\132\2\0\3\133\40\0\20\132\20\0\125\132\14\0\u026c\132"+
|
||||
"\2\0\21\132\1\0\32\132\5\0\113\132\3\0\3\132\17\0\15\132"+
|
||||
"\1\0\4\132\3\133\13\0\22\132\3\133\13\0\22\132\2\133\14\0"+
|
||||
"\15\132\1\0\3\132\1\0\2\133\14\0\64\142\2\143\36\143\3\0"+
|
||||
"\1\142\4\0\1\142\1\143\2\0\12\134\41\0\3\133\2\0\12\134"+
|
||||
"\6\0\130\132\10\0\51\132\1\133\1\132\5\0\106\132\12\0\35\132"+
|
||||
"\3\0\14\133\4\0\14\133\12\0\12\134\36\142\2\0\5\142\13\0"+
|
||||
"\54\142\4\0\21\143\7\142\2\143\6\0\12\134\1\142\3\0\2\142"+
|
||||
"\40\0\27\132\5\133\4\0\65\142\12\143\1\0\35\143\2\0\1\133"+
|
||||
"\12\134\6\0\12\134\6\0\16\142\122\0\5\133\57\132\21\133\7\132"+
|
||||
"\4\0\12\134\21\0\11\133\14\0\3\133\36\132\12\133\3\0\2\132"+
|
||||
"\12\134\6\0\46\132\16\133\14\0\44\132\24\133\10\0\12\134\3\0"+
|
||||
"\3\132\12\134\44\132\122\0\3\133\1\0\25\133\4\132\1\133\4\132"+
|
||||
"\1\133\15\0\300\132\47\133\25\0\4\133\u0116\132\2\0\6\132\2\0"+
|
||||
"\46\132\2\0\6\132\2\0\10\132\1\0\1\132\1\0\1\132\1\0"+
|
||||
"\1\132\1\0\37\132\2\0\65\132\1\0\7\132\1\0\1\132\3\0"+
|
||||
"\3\132\1\0\7\132\3\0\4\132\2\0\6\132\4\0\15\132\5\0"+
|
||||
"\3\132\1\0\7\132\17\0\2\133\2\133\10\0\2\140\12\0\1\140"+
|
||||
"\2\0\1\136\2\0\5\133\20\0\2\141\3\0\1\137\17\0\1\141"+
|
||||
"\13\0\5\133\5\0\6\133\1\0\1\132\15\0\1\132\20\0\15\132"+
|
||||
"\63\0\41\133\21\0\1\132\4\0\1\132\2\0\12\132\1\0\1\132"+
|
||||
"\3\0\5\132\6\0\1\132\1\0\1\132\1\0\1\132\1\0\4\132"+
|
||||
"\1\0\13\132\2\0\4\132\5\0\5\132\4\0\1\132\21\0\51\132"+
|
||||
"\u032d\0\64\132\u0716\0\57\132\1\0\57\132\1\0\205\132\6\0\4\132"+
|
||||
"\3\133\16\0\46\132\12\0\66\132\11\0\1\132\17\0\1\133\27\132"+
|
||||
"\11\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132\1\0\7\132"+
|
||||
"\1\0\7\132\1\0\7\132\1\0\7\132\1\0\40\133\57\0\1\132"+
|
||||
"\120\0\32\144\1\0\131\144\14\0\326\144\57\0\1\132\1\0\1\144"+
|
||||
"\31\0\11\144\4\133\2\133\1\0\5\135\2\0\3\144\1\132\1\132"+
|
||||
"\4\0\126\145\2\0\2\133\2\135\3\145\133\135\1\0\4\135\5\0"+
|
||||
"\51\132\3\0\136\146\21\0\33\132\65\0\20\135\37\0\101\0\37\0"+
|
||||
"\121\0\57\135\1\0\130\135\250\0\u19b6\144\112\0\u51cc\144\64\0\u048d\132"+
|
||||
"\103\0\56\132\2\0\u010d\132\3\0\20\132\12\134\2\132\24\0\57\132"+
|
||||
"\4\133\11\0\2\133\1\0\31\132\10\0\120\132\2\133\45\0\11\132"+
|
||||
"\2\0\147\132\2\0\4\132\1\0\2\132\16\0\12\132\120\0\10\132"+
|
||||
"\1\133\3\132\1\133\4\132\1\133\27\132\5\133\30\0\64\132\14\0"+
|
||||
"\2\133\62\132\21\133\13\0\12\134\6\0\22\133\6\132\3\0\1\132"+
|
||||
"\4\0\12\134\34\132\10\133\2\0\27\132\15\133\14\0\35\146\3\0"+
|
||||
"\4\133\57\132\16\133\16\0\1\132\12\134\46\0\51\132\16\133\11\0"+
|
||||
"\3\132\1\133\10\132\2\133\2\0\12\134\6\0\33\142\1\143\4\0"+
|
||||
"\60\142\1\143\1\142\3\143\2\142\2\143\5\142\2\143\1\142\1\143"+
|
||||
"\1\142\30\0\5\142\41\0\6\132\2\0\6\132\2\0\6\132\11\0"+
|
||||
"\7\132\1\0\7\132\221\0\43\132\10\133\1\0\2\133\2\0\12\134"+
|
||||
"\6\0\u2ba4\146\14\0\27\146\4\0\61\146\4\0\1\31\1\25\1\46"+
|
||||
"\1\43\1\13\3\0\1\7\1\5\2\0\1\3\1\1\14\0\1\11"+
|
||||
"\21\0\1\112\7\0\1\65\1\17\6\0\1\130\3\0\1\120\1\120"+
|
||||
"\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
|
||||
"\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
|
||||
"\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120"+
|
||||
"\1\120\1\120\1\120\1\120\1\121\1\120\1\120\1\120\1\125\1\123"+
|
||||
"\17\0\1\114\u02c1\0\1\70\277\0\1\113\1\71\1\2\3\124\2\35"+
|
||||
"\1\124\1\35\2\124\1\14\21\124\2\60\7\73\1\72\7\73\7\52"+
|
||||
"\1\15\1\52\1\75\2\45\1\44\1\75\1\45\1\44\10\75\2\63"+
|
||||
"\5\61\2\54\5\61\1\6\10\37\5\21\3\27\12\106\20\27\3\42"+
|
||||
"\32\30\1\26\2\24\2\110\1\111\2\110\2\111\2\110\1\111\3\24"+
|
||||
"\1\16\2\24\12\64\1\74\1\41\1\34\1\64\6\41\1\34\66\41"+
|
||||
"\5\115\6\103\1\51\4\103\2\51\10\103\1\51\7\100\1\12\2\100"+
|
||||
"\32\103\1\12\4\100\1\12\5\102\1\101\1\102\3\101\7\102\1\101"+
|
||||
"\23\102\5\67\3\102\6\67\2\67\6\66\10\66\2\100\7\66\36\100"+
|
||||
"\4\66\102\100\15\115\1\77\2\115\1\131\3\117\1\115\2\117\5\115"+
|
||||
"\4\117\4\116\1\115\3\116\1\115\5\116\26\56\4\23\1\105\2\104"+
|
||||
"\4\122\1\104\2\122\3\76\33\122\35\55\3\122\35\126\3\122\6\126"+
|
||||
"\2\33\31\126\1\33\17\126\6\122\4\22\1\10\37\22\1\10\4\22"+
|
||||
"\25\62\1\127\11\62\21\55\5\62\1\57\12\40\13\62\4\55\1\50"+
|
||||
"\6\55\12\122\17\55\1\47\3\53\15\20\11\36\1\32\24\36\2\20"+
|
||||
"\11\36\1\32\31\36\1\32\4\20\4\36\2\32\2\107\1\4\5\107"+
|
||||
"\52\4\u1900\0\u012e\144\2\0\76\144\2\0\152\144\46\0\7\132\14\0"+
|
||||
"\5\132\5\0\1\132\1\133\12\132\1\0\15\132\1\0\5\132\1\0"+
|
||||
"\1\132\1\0\2\132\1\0\2\132\1\0\154\132\41\0\u016b\132\22\0"+
|
||||
"\100\132\2\0\66\132\50\0\14\132\4\0\20\133\1\137\2\0\1\136"+
|
||||
"\1\137\13\0\7\133\14\0\2\141\30\0\3\141\1\137\1\0\1\140"+
|
||||
"\1\0\1\137\1\136\32\0\5\132\1\0\207\132\2\0\1\133\7\0"+
|
||||
"\1\140\4\0\1\137\1\0\1\140\1\0\12\134\1\136\1\137\5\0"+
|
||||
"\32\132\4\0\1\141\1\0\32\132\13\0\70\135\2\133\37\132\3\0"+
|
||||
"\6\132\2\0\6\132\2\0\6\132\2\0\3\132\34\0\3\133\4\0";
|
||||
"\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\120\1\121"+
|
||||
"\1\120\1\120\1\120\1\125\1\123\17\0\1\114\u02c1\0\1\70\277\0"+
|
||||
"\1\113\1\71\1\2\3\124\2\35\1\124\1\35\2\124\1\14\21\124"+
|
||||
"\2\60\7\73\1\72\7\73\7\52\1\15\1\52\1\75\2\45\1\44"+
|
||||
"\1\75\1\45\1\44\10\75\2\63\5\61\2\54\5\61\1\6\10\37"+
|
||||
"\5\21\3\27\12\106\20\27\3\42\32\30\1\26\2\24\2\110\1\111"+
|
||||
"\2\110\2\111\2\110\1\111\3\24\1\16\2\24\12\64\1\74\1\41"+
|
||||
"\1\34\1\64\6\41\1\34\66\41\5\115\6\103\1\51\4\103\2\51"+
|
||||
"\10\103\1\51\7\100\1\12\2\100\32\103\1\12\4\100\1\12\5\102"+
|
||||
"\1\101\1\102\3\101\7\102\1\101\23\102\5\67\3\102\6\67\2\67"+
|
||||
"\6\66\10\66\2\100\7\66\36\100\4\66\102\100\15\115\1\77\2\115"+
|
||||
"\1\131\3\117\1\115\2\117\5\115\4\117\4\116\1\115\3\116\1\115"+
|
||||
"\5\116\26\56\4\23\1\105\2\104\4\122\1\104\2\122\3\76\33\122"+
|
||||
"\35\55\3\122\35\126\3\122\6\126\2\33\31\126\1\33\17\126\6\122"+
|
||||
"\4\22\1\10\37\22\1\10\4\22\25\62\1\127\11\62\21\55\5\62"+
|
||||
"\1\57\12\40\13\62\4\55\1\50\6\55\12\122\17\55\1\47\3\53"+
|
||||
"\15\20\11\36\1\32\24\36\2\20\11\36\1\32\31\36\1\32\4\20"+
|
||||
"\4\36\2\32\2\107\1\4\5\107\52\4\u1900\0\u012e\144\2\0\76\144"+
|
||||
"\2\0\152\144\46\0\7\132\14\0\5\132\5\0\1\132\1\133\12\132"+
|
||||
"\1\0\15\132\1\0\5\132\1\0\1\132\1\0\2\132\1\0\2\132"+
|
||||
"\1\0\154\132\41\0\u016b\132\22\0\100\132\2\0\66\132\50\0\14\132"+
|
||||
"\4\0\20\133\1\137\2\0\1\136\1\137\13\0\7\133\14\0\2\141"+
|
||||
"\30\0\3\141\1\137\1\0\1\140\1\0\1\137\1\136\32\0\5\132"+
|
||||
"\1\0\207\132\2\0\1\133\7\0\1\140\4\0\1\137\1\0\1\140"+
|
||||
"\1\0\12\134\1\136\1\137\5\0\32\132\4\0\1\141\1\0\32\132"+
|
||||
"\13\0\70\135\2\133\37\146\3\0\6\146\2\0\6\146\2\0\6\146"+
|
||||
"\2\0\3\146\34\0\3\133\4\0";
|
||||
|
||||
/**
|
||||
* Translates characters to character classes
|
||||
|
@ -206,11 +207,12 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
private static final int [] ZZ_ACTION = zzUnpackAction();
|
||||
|
||||
private static final String ZZ_ACTION_PACKED_0 =
|
||||
"\1\0\23\1\1\2\1\3\1\2\1\1\1\4\1\5"+
|
||||
"\1\6\15\0\1\2\1\0\1\2\10\0\1\3\61\0";
|
||||
"\1\0\23\1\1\2\1\3\1\4\1\1\1\5\1\6"+
|
||||
"\1\7\1\10\15\0\1\2\1\0\1\2\10\0\1\3"+
|
||||
"\15\0\1\2\57\0";
|
||||
|
||||
private static int [] zzUnpackAction() {
|
||||
int [] result = new int[101];
|
||||
int [] result = new int[114];
|
||||
int offset = 0;
|
||||
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
|
||||
return result;
|
||||
|
@ -235,22 +237,24 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
|
||||
|
||||
private static final String ZZ_ROWMAP_PACKED_0 =
|
||||
"\0\0\0\146\0\314\0\u0132\0\u0198\0\u01fe\0\u0264\0\u02ca"+
|
||||
"\0\u0330\0\u0396\0\u03fc\0\u0462\0\u04c8\0\u052e\0\u0594\0\u05fa"+
|
||||
"\0\u0660\0\u06c6\0\u072c\0\u0792\0\u07f8\0\u085e\0\u08c4\0\u092a"+
|
||||
"\0\u0990\0\146\0\146\0\314\0\u0132\0\u0198\0\u01fe\0\u0264"+
|
||||
"\0\u09f6\0\u0a5c\0\u0ac2\0\u0b28\0\u0462\0\u0b8e\0\u0bf4\0\u0c5a"+
|
||||
"\0\u0cc0\0\u0d26\0\u0d8c\0\u0df2\0\u0330\0\u0396\0\u0e58\0\u0ebe"+
|
||||
"\0\u0f24\0\u0f8a\0\u0ff0\0\u1056\0\u10bc\0\u1122\0\u1188\0\u11ee"+
|
||||
"\0\u1254\0\u12ba\0\u1320\0\u1386\0\u13ec\0\u1452\0\u14b8\0\u092a"+
|
||||
"\0\u151e\0\u1584\0\u15ea\0\u1650\0\u16b6\0\u171c\0\u1782\0\u17e8"+
|
||||
"\0\u184e\0\u18b4\0\u191a\0\u1980\0\u19e6\0\u1a4c\0\u1ab2\0\u1b18"+
|
||||
"\0\u1b7e\0\u1be4\0\u1c4a\0\u1cb0\0\u1d16\0\u1d7c\0\u1de2\0\u1e48"+
|
||||
"\0\u1eae\0\u1f14\0\u1f7a\0\u1fe0\0\u2046\0\u20ac\0\u2112\0\u2178"+
|
||||
"\0\u21de\0\u2244\0\u22aa\0\u2310\0\u2376";
|
||||
"\0\0\0\147\0\316\0\u0135\0\u019c\0\u0203\0\u026a\0\u02d1"+
|
||||
"\0\u0338\0\u039f\0\u0406\0\u046d\0\u04d4\0\u053b\0\u05a2\0\u0609"+
|
||||
"\0\u0670\0\u06d7\0\u073e\0\u07a5\0\u080c\0\u0873\0\u08da\0\u0941"+
|
||||
"\0\u09a8\0\147\0\147\0\u0a0f\0\316\0\u0135\0\u019c\0\u0203"+
|
||||
"\0\u026a\0\u0a76\0\u0add\0\u0b44\0\u0bab\0\u046d\0\u0c12\0\u0c79"+
|
||||
"\0\u0ce0\0\u0d47\0\u0dae\0\u0e15\0\u0e7c\0\u0338\0\u039f\0\u0ee3"+
|
||||
"\0\u0f4a\0\u0fb1\0\u1018\0\u107f\0\u10e6\0\u114d\0\u11b4\0\u121b"+
|
||||
"\0\u1282\0\u12e9\0\u1350\0\u13b7\0\u141e\0\u1485\0\u14ec\0\u1553"+
|
||||
"\0\u15ba\0\u0941\0\u1621\0\u1688\0\u16ef\0\u1756\0\u17bd\0\u1824"+
|
||||
"\0\u188b\0\u18f2\0\u1959\0\u19c0\0\u1a27\0\u1a8e\0\u1af5\0\u1b5c"+
|
||||
"\0\u1bc3\0\u1c2a\0\u1c91\0\u1cf8\0\u1d5f\0\u1dc6\0\u1e2d\0\u1e94"+
|
||||
"\0\u1efb\0\u1f62\0\u1fc9\0\u2030\0\u2097\0\u20fe\0\u2165\0\u21cc"+
|
||||
"\0\u2233\0\u229a\0\u2301\0\u2368\0\u23cf\0\u2436\0\u249d\0\u2504"+
|
||||
"\0\u256b\0\u25d2\0\u2639\0\u26a0\0\u2707\0\u276e\0\u27d5\0\u283c"+
|
||||
"\0\u28a3\0\u290a";
|
||||
|
||||
private static int [] zzUnpackRowMap() {
|
||||
int [] result = new int[101];
|
||||
int [] result = new int[114];
|
||||
int offset = 0;
|
||||
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
|
||||
return result;
|
||||
|
@ -278,275 +282,308 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
"\3\2\1\13\11\2\1\14\2\2\1\15\43\2\1\16"+
|
||||
"\1\2\1\17\3\2\1\20\1\21\1\2\1\22\1\2"+
|
||||
"\1\23\2\2\1\24\1\2\1\25\1\2\1\26\1\27"+
|
||||
"\3\2\1\30\2\31\1\32\1\33\150\0\1\25\11\0"+
|
||||
"\1\25\20\0\1\25\22\0\1\25\10\0\3\25\17\0"+
|
||||
"\1\25\10\0\1\25\23\0\1\25\1\0\1\25\1\0"+
|
||||
"\1\25\1\0\1\25\1\0\1\25\1\0\3\25\1\0"+
|
||||
"\5\25\1\0\3\25\1\0\11\25\1\0\2\25\1\0"+
|
||||
"\16\25\1\0\2\25\1\0\21\25\1\0\1\25\1\0"+
|
||||
"\3\25\2\0\1\25\1\0\1\25\1\0\2\25\1\0"+
|
||||
"\1\25\16\0\1\25\3\0\1\25\5\0\2\25\3\0"+
|
||||
"\1\25\13\0\1\25\1\0\1\25\4\0\2\25\4\0"+
|
||||
"\1\25\1\0\1\25\3\0\2\25\1\0\1\25\5\0"+
|
||||
"\3\25\1\0\1\25\15\0\1\25\10\0\1\25\23\0"+
|
||||
"\1\25\3\0\1\25\1\0\1\25\1\0\1\25\1\0"+
|
||||
"\3\25\2\0\4\25\1\0\3\25\2\0\3\25\1\0"+
|
||||
"\4\25\1\0\2\25\2\0\3\25\1\0\11\25\1\0"+
|
||||
"\2\25\1\0\16\25\1\0\2\25\1\0\1\25\1\0"+
|
||||
"\3\25\2\0\1\25\1\0\1\25\1\0\2\25\1\0"+
|
||||
"\1\25\16\0\1\25\3\0\1\25\3\0\1\25\1\0"+
|
||||
"\3\25\2\0\1\25\1\0\2\25\1\0\3\25\3\0"+
|
||||
"\2\25\1\0\1\25\1\0\2\25\1\0\2\25\3\0"+
|
||||
"\2\25\1\0\1\25\1\0\1\25\1\0\2\25\1\0"+
|
||||
"\2\25\1\0\2\25\1\0\5\25\1\0\5\25\1\0"+
|
||||
"\2\25\1\0\2\25\1\0\1\25\1\0\3\25\4\0"+
|
||||
"\1\25\4\0\1\25\30\0\3\25\5\0\1\25\1\0"+
|
||||
"\1\25\1\0\1\25\4\0\1\25\14\0\1\25\5\0"+
|
||||
"\1\25\11\0\2\25\12\0\1\26\1\0\2\25\12\0"+
|
||||
"\1\25\23\0\1\25\1\0\1\26\7\0\2\25\2\0"+
|
||||
"\3\2\1\30\2\31\1\32\1\33\1\34\151\0\1\25"+
|
||||
"\11\0\1\25\20\0\1\25\22\0\1\25\10\0\3\25"+
|
||||
"\17\0\1\25\10\0\1\25\24\0\1\25\1\0\1\25"+
|
||||
"\1\0\1\25\1\0\1\25\1\0\1\25\1\0\3\25"+
|
||||
"\1\0\5\25\1\0\3\25\1\0\11\25\1\0\2\25"+
|
||||
"\1\0\16\25\1\0\2\25\1\0\21\25\1\0\1\25"+
|
||||
"\1\0\3\25\2\0\1\25\1\0\1\25\1\0\2\25"+
|
||||
"\1\0\1\25\17\0\1\25\3\0\1\25\5\0\2\25"+
|
||||
"\3\0\1\25\13\0\1\25\1\0\1\25\4\0\2\25"+
|
||||
"\4\0\1\25\1\0\1\25\3\0\2\25\1\0\1\25"+
|
||||
"\5\0\3\25\1\0\1\25\15\0\1\25\10\0\1\25"+
|
||||
"\24\0\1\25\3\0\1\25\1\0\1\25\1\0\1\25"+
|
||||
"\1\0\3\25\2\0\4\25\1\0\3\25\2\0\3\25"+
|
||||
"\1\0\4\25\1\0\2\25\2\0\3\25\1\0\11\25"+
|
||||
"\1\0\2\25\1\0\16\25\1\0\2\25\1\0\1\25"+
|
||||
"\1\0\3\25\2\0\1\25\1\0\1\25\1\0\2\25"+
|
||||
"\1\0\1\25\17\0\1\25\3\0\1\25\3\0\1\25"+
|
||||
"\1\0\3\25\2\0\1\25\1\0\2\25\1\0\3\25"+
|
||||
"\3\0\2\25\1\0\1\25\1\0\2\25\1\0\2\25"+
|
||||
"\3\0\2\25\1\0\1\25\1\0\1\25\1\0\2\25"+
|
||||
"\1\0\2\25\1\0\2\25\1\0\5\25\1\0\5\25"+
|
||||
"\1\0\2\25\1\0\2\25\1\0\1\25\1\0\3\25"+
|
||||
"\4\0\1\25\4\0\1\25\31\0\3\25\5\0\1\25"+
|
||||
"\1\0\1\25\1\0\1\25\4\0\1\25\14\0\1\25"+
|
||||
"\5\0\1\25\11\0\2\25\12\0\1\26\1\0\2\25"+
|
||||
"\12\0\1\25\24\0\1\25\1\0\1\26\7\0\2\25"+
|
||||
"\2\0\5\25\2\0\2\25\4\0\6\25\1\0\2\25"+
|
||||
"\4\0\5\25\1\0\5\25\1\0\2\25\1\0\3\25"+
|
||||
"\1\0\4\25\1\0\5\25\1\26\1\0\1\25\1\0"+
|
||||
"\1\25\1\0\3\25\2\0\1\25\1\0\1\25\1\0"+
|
||||
"\1\25\2\0\1\25\17\0\1\25\3\0\1\25\5\0"+
|
||||
"\2\25\3\0\1\25\4\0\3\25\4\0\1\25\1\0"+
|
||||
"\1\25\2\0\1\25\1\0\2\25\4\0\1\25\1\0"+
|
||||
"\1\25\3\0\2\25\1\0\1\25\5\0\3\25\1\0"+
|
||||
"\1\25\10\0\1\25\1\0\2\26\1\0\1\25\10\0"+
|
||||
"\1\25\24\0\1\25\3\0\1\25\6\0\2\25\5\0"+
|
||||
"\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
|
||||
"\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
|
||||
"\1\25\1\0\3\25\3\0\2\25\4\0\3\25\1\0"+
|
||||
"\1\25\10\0\1\25\1\0\2\25\21\0\1\25\11\0"+
|
||||
"\2\25\17\0\1\25\6\0\2\25\4\0\1\25\5\0"+
|
||||
"\1\25\2\0\1\25\5\0\3\25\1\0\1\25\15\0"+
|
||||
"\1\25\10\0\1\25\24\0\1\25\3\0\1\25\5\0"+
|
||||
"\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+
|
||||
"\1\25\7\0\1\25\2\0\1\25\5\0\1\25\2\0"+
|
||||
"\1\25\1\0\1\25\106\0\1\33\21\0\1\27\35\0"+
|
||||
"\1\32\3\0\1\32\3\0\1\32\1\0\3\32\2\0"+
|
||||
"\1\32\2\0\1\32\1\0\3\32\3\0\2\32\1\0"+
|
||||
"\1\32\1\0\2\32\1\0\2\32\3\0\2\32\1\0"+
|
||||
"\1\32\3\0\2\32\1\0\2\32\1\0\2\32\1\0"+
|
||||
"\5\32\1\0\5\32\2\0\1\32\1\0\2\32\1\0"+
|
||||
"\1\32\1\0\3\32\4\0\1\32\4\0\1\32\17\0"+
|
||||
"\1\32\1\0\1\32\1\0\1\32\1\0\1\32\1\0"+
|
||||
"\1\32\1\0\3\32\1\0\5\32\1\0\3\32\1\0"+
|
||||
"\11\32\1\0\2\32\1\0\16\32\1\0\2\32\1\0"+
|
||||
"\21\32\1\0\1\32\1\0\3\32\2\0\1\32\1\0"+
|
||||
"\1\32\1\0\2\32\1\0\1\32\17\0\1\32\1\0"+
|
||||
"\1\32\1\0\1\32\3\0\1\32\1\0\3\32\1\0"+
|
||||
"\2\32\1\0\2\32\1\0\3\32\1\0\11\32\1\0"+
|
||||
"\2\32\1\0\16\32\1\0\2\32\1\0\21\32\1\0"+
|
||||
"\1\32\1\0\3\32\2\0\1\32\1\0\1\32\1\0"+
|
||||
"\2\32\1\0\1\32\17\0\1\32\11\0\1\32\20\0"+
|
||||
"\1\32\33\0\1\32\21\0\1\32\10\0\1\32\24\0"+
|
||||
"\1\32\1\0\1\32\1\0\1\32\1\0\1\32\1\0"+
|
||||
"\1\32\1\0\3\32\1\0\5\32\1\0\3\32\1\0"+
|
||||
"\6\32\1\0\2\32\1\0\2\32\1\0\10\32\1\0"+
|
||||
"\5\32\1\0\2\32\1\0\21\32\1\0\1\32\1\0"+
|
||||
"\3\32\2\0\1\32\1\0\1\32\1\0\2\32\1\0"+
|
||||
"\1\32\146\0\1\33\16\0\1\35\1\0\1\36\1\0"+
|
||||
"\1\37\1\0\1\40\1\0\1\41\1\0\1\42\3\0"+
|
||||
"\1\43\5\0\1\44\3\0\1\45\11\0\1\46\2\0"+
|
||||
"\1\47\16\0\1\50\2\0\1\51\41\0\2\25\1\52"+
|
||||
"\1\0\1\53\1\0\1\53\1\54\1\0\1\25\2\0"+
|
||||
"\1\25\1\0\1\35\1\0\1\36\1\0\1\37\1\0"+
|
||||
"\1\40\1\0\1\41\1\0\1\55\3\0\1\56\5\0"+
|
||||
"\1\57\3\0\1\60\11\0\1\46\2\0\1\61\16\0"+
|
||||
"\1\62\2\0\1\63\41\0\1\25\2\26\2\0\2\64"+
|
||||
"\1\65\1\0\1\26\2\0\1\25\13\0\1\66\15\0"+
|
||||
"\1\67\14\0\1\70\16\0\1\71\2\0\1\72\21\0"+
|
||||
"\1\73\20\0\1\27\1\0\1\27\3\0\1\54\1\0"+
|
||||
"\1\27\4\0\1\35\1\0\1\36\1\0\1\37\1\0"+
|
||||
"\1\40\1\0\1\41\1\0\1\74\3\0\1\56\5\0"+
|
||||
"\1\57\3\0\1\75\11\0\1\46\2\0\1\76\16\0"+
|
||||
"\1\77\2\0\1\100\21\0\1\101\17\0\1\25\1\102"+
|
||||
"\1\26\1\103\3\0\1\102\1\0\1\102\2\0\1\25"+
|
||||
"\142\0\2\31\4\0\1\35\1\0\1\36\1\0\1\37"+
|
||||
"\1\0\1\40\1\0\1\41\1\0\1\104\3\0\1\43"+
|
||||
"\5\0\1\44\3\0\1\105\11\0\1\46\2\0\1\106"+
|
||||
"\16\0\1\107\2\0\1\110\41\0\1\25\1\34\1\52"+
|
||||
"\1\0\1\53\1\0\1\53\1\54\1\0\1\34\2\0"+
|
||||
"\1\34\2\0\1\25\11\0\3\25\5\0\1\25\1\0"+
|
||||
"\1\25\1\0\1\25\4\0\1\25\4\0\1\25\1\0"+
|
||||
"\2\25\4\0\1\25\5\0\1\25\3\0\1\25\4\0"+
|
||||
"\5\25\10\0\1\52\1\0\2\25\1\0\1\25\10\0"+
|
||||
"\1\25\24\0\1\25\1\0\1\52\7\0\2\25\2\0"+
|
||||
"\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
|
||||
"\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
|
||||
"\4\25\1\0\5\25\1\26\1\0\1\25\1\0\1\25"+
|
||||
"\4\25\1\0\5\25\1\52\1\0\1\25\1\0\1\25"+
|
||||
"\1\0\3\25\2\0\1\25\1\0\1\25\1\0\1\25"+
|
||||
"\2\0\1\25\16\0\1\25\3\0\1\25\5\0\2\25"+
|
||||
"\2\0\1\25\17\0\1\25\3\0\1\25\5\0\2\25"+
|
||||
"\3\0\1\25\4\0\3\25\4\0\1\25\1\0\1\25"+
|
||||
"\2\0\1\25\1\0\2\25\4\0\1\25\1\0\1\25"+
|
||||
"\3\0\2\25\1\0\1\25\5\0\3\25\1\0\1\25"+
|
||||
"\10\0\1\25\1\0\2\26\1\0\1\25\10\0\1\25"+
|
||||
"\23\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
|
||||
"\10\0\1\25\1\0\2\52\1\0\1\25\10\0\1\25"+
|
||||
"\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
|
||||
"\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
|
||||
"\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
|
||||
"\1\0\3\25\3\0\2\25\4\0\3\25\1\0\1\25"+
|
||||
"\10\0\1\25\1\0\2\25\20\0\1\25\11\0\2\25"+
|
||||
"\17\0\1\25\6\0\2\25\4\0\1\25\5\0\1\25"+
|
||||
"\2\0\1\25\5\0\3\25\1\0\1\25\15\0\1\25"+
|
||||
"\10\0\1\25\23\0\1\25\3\0\1\25\5\0\1\25"+
|
||||
"\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
|
||||
"\7\0\1\25\2\0\1\25\5\0\1\25\2\0\1\25"+
|
||||
"\1\0\1\25\105\0\1\33\21\0\1\27\34\0\1\32"+
|
||||
"\3\0\1\32\3\0\1\32\1\0\3\32\2\0\1\32"+
|
||||
"\2\0\1\32\1\0\3\32\3\0\2\32\1\0\1\32"+
|
||||
"\1\0\2\32\1\0\2\32\3\0\2\32\1\0\1\32"+
|
||||
"\3\0\2\32\1\0\2\32\1\0\2\32\1\0\5\32"+
|
||||
"\1\0\5\32\2\0\1\32\1\0\2\32\1\0\1\32"+
|
||||
"\1\0\3\32\4\0\1\32\4\0\1\32\16\0\1\32"+
|
||||
"\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+
|
||||
"\1\0\3\32\1\0\5\32\1\0\3\32\1\0\11\32"+
|
||||
"\1\0\2\32\1\0\16\32\1\0\2\32\1\0\21\32"+
|
||||
"\1\0\1\32\1\0\3\32\2\0\1\32\1\0\1\32"+
|
||||
"\1\0\2\32\1\0\1\32\16\0\1\32\1\0\1\32"+
|
||||
"\1\0\1\32\3\0\1\32\1\0\3\32\1\0\2\32"+
|
||||
"\1\0\2\32\1\0\3\32\1\0\11\32\1\0\2\32"+
|
||||
"\1\0\16\32\1\0\2\32\1\0\21\32\1\0\1\32"+
|
||||
"\1\0\3\32\2\0\1\32\1\0\1\32\1\0\2\32"+
|
||||
"\1\0\1\32\16\0\1\32\11\0\1\32\20\0\1\32"+
|
||||
"\33\0\1\32\21\0\1\32\10\0\1\32\23\0\1\32"+
|
||||
"\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+
|
||||
"\1\0\3\32\1\0\5\32\1\0\3\32\1\0\6\32"+
|
||||
"\1\0\2\32\1\0\2\32\1\0\10\32\1\0\5\32"+
|
||||
"\1\0\2\32\1\0\21\32\1\0\1\32\1\0\3\32"+
|
||||
"\2\0\1\32\1\0\1\32\1\0\2\32\1\0\1\32"+
|
||||
"\145\0\1\33\15\0\1\34\1\0\1\35\1\0\1\36"+
|
||||
"\1\0\1\37\1\0\1\40\1\0\1\41\3\0\1\42"+
|
||||
"\5\0\1\43\3\0\1\44\11\0\1\45\2\0\1\46"+
|
||||
"\16\0\1\47\2\0\1\50\41\0\2\25\1\51\1\0"+
|
||||
"\1\52\1\0\1\52\1\53\1\0\1\25\3\0\1\34"+
|
||||
"\1\0\1\35\1\0\1\36\1\0\1\37\1\0\1\40"+
|
||||
"\1\0\1\54\3\0\1\55\5\0\1\56\3\0\1\57"+
|
||||
"\11\0\1\45\2\0\1\60\16\0\1\61\2\0\1\62"+
|
||||
"\41\0\1\25\2\26\2\0\2\63\1\64\1\0\1\26"+
|
||||
"\15\0\1\65\15\0\1\66\14\0\1\67\16\0\1\70"+
|
||||
"\2\0\1\71\21\0\1\72\20\0\1\27\1\0\1\27"+
|
||||
"\3\0\1\53\1\0\1\27\3\0\1\34\1\0\1\35"+
|
||||
"\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\73"+
|
||||
"\3\0\1\55\5\0\1\56\3\0\1\74\11\0\1\45"+
|
||||
"\2\0\1\75\16\0\1\76\2\0\1\77\21\0\1\72"+
|
||||
"\17\0\1\25\1\100\1\26\1\27\3\0\1\100\1\0"+
|
||||
"\1\100\144\0\2\31\4\0\1\25\11\0\3\25\5\0"+
|
||||
"\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+
|
||||
"\1\25\1\0\2\25\4\0\1\25\5\0\1\25\3\0"+
|
||||
"\1\25\4\0\5\25\10\0\1\51\1\0\2\25\1\0"+
|
||||
"\1\25\10\0\1\25\23\0\1\25\1\0\1\51\7\0"+
|
||||
"\2\25\2\0\5\25\2\0\2\25\4\0\6\25\1\0"+
|
||||
"\2\25\4\0\5\25\1\0\5\25\1\0\2\25\1\0"+
|
||||
"\3\25\1\0\4\25\1\0\5\25\1\51\1\0\1\25"+
|
||||
"\1\0\1\25\1\0\3\25\2\0\1\25\1\0\1\25"+
|
||||
"\1\0\1\25\2\0\1\25\16\0\1\25\3\0\1\25"+
|
||||
"\5\0\2\25\3\0\1\25\4\0\3\25\4\0\1\25"+
|
||||
"\1\0\1\25\2\0\1\25\1\0\2\25\4\0\1\25"+
|
||||
"\1\0\1\25\3\0\2\25\1\0\1\25\5\0\3\25"+
|
||||
"\1\0\1\25\10\0\1\25\1\0\2\51\1\0\1\25"+
|
||||
"\10\0\1\25\23\0\1\25\3\0\1\25\6\0\2\25"+
|
||||
"\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
|
||||
"\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
|
||||
"\1\0\1\25\1\0\3\25\1\0\1\25\1\0\2\25"+
|
||||
"\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
|
||||
"\20\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
|
||||
"\5\0\3\25\1\0\1\25\5\0\3\25\5\0\1\25"+
|
||||
"\2\0\2\25\4\0\1\25\2\0\1\25\1\0\1\25"+
|
||||
"\102\0\2\25\6\0\1\25\55\0\1\25\3\0\1\25"+
|
||||
"\2\0\1\25\3\0\1\25\5\0\1\25\7\0\1\25"+
|
||||
"\4\0\2\25\3\0\2\25\1\0\1\25\4\0\1\25"+
|
||||
"\1\0\1\25\2\0\2\25\1\0\3\25\1\0\1\25"+
|
||||
"\2\0\4\25\2\0\1\25\40\0\1\34\1\0\1\35"+
|
||||
"\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\101"+
|
||||
"\3\0\1\42\5\0\1\43\3\0\1\102\11\0\1\45"+
|
||||
"\2\0\1\103\16\0\1\104\2\0\1\105\41\0\1\25"+
|
||||
"\2\51\2\0\2\106\1\53\1\0\1\51\3\0\1\34"+
|
||||
"\1\0\1\35\1\0\1\36\1\0\1\37\1\0\1\40"+
|
||||
"\1\0\1\107\3\0\1\110\5\0\1\111\3\0\1\112"+
|
||||
"\11\0\1\45\2\0\1\113\16\0\1\114\2\0\1\115"+
|
||||
"\41\0\1\25\1\52\7\0\1\52\3\0\1\34\1\0"+
|
||||
"\1\0\3\25\1\0\1\25\1\0\2\25\4\0\3\25"+
|
||||
"\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
|
||||
"\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
|
||||
"\1\0\1\25\5\0\3\25\5\0\1\25\2\0\2\25"+
|
||||
"\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\25"+
|
||||
"\6\0\1\25\56\0\1\25\3\0\1\25\2\0\1\25"+
|
||||
"\3\0\1\25\5\0\1\25\7\0\1\25\4\0\2\25"+
|
||||
"\3\0\2\25\1\0\1\25\4\0\1\25\1\0\1\25"+
|
||||
"\2\0\2\25\1\0\3\25\1\0\1\25\2\0\4\25"+
|
||||
"\2\0\1\25\41\0\1\35\1\0\1\36\1\0\1\37"+
|
||||
"\1\0\1\40\1\0\1\41\1\0\1\111\3\0\1\43"+
|
||||
"\5\0\1\44\3\0\1\112\11\0\1\46\2\0\1\113"+
|
||||
"\16\0\1\114\2\0\1\115\41\0\1\25\2\52\2\0"+
|
||||
"\2\116\1\54\1\0\1\52\2\0\1\25\1\0\1\35"+
|
||||
"\1\0\1\36\1\0\1\37\1\0\1\40\1\0\1\41"+
|
||||
"\1\0\1\117\3\0\1\120\5\0\1\121\3\0\1\122"+
|
||||
"\11\0\1\46\2\0\1\123\16\0\1\124\2\0\1\125"+
|
||||
"\41\0\1\25\1\53\7\0\1\53\2\0\1\25\1\0"+
|
||||
"\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
|
||||
"\1\116\3\0\1\42\5\0\1\43\3\0\1\117\11\0"+
|
||||
"\1\45\2\0\1\120\16\0\1\121\2\0\1\122\21\0"+
|
||||
"\1\72\17\0\1\25\1\53\1\51\1\27\3\0\1\53"+
|
||||
"\1\0\1\53\4\0\1\26\11\0\3\25\5\0\1\25"+
|
||||
"\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\26"+
|
||||
"\1\0\2\26\4\0\1\25\5\0\1\25\3\0\1\26"+
|
||||
"\4\0\1\26\2\25\2\26\10\0\1\26\1\0\2\25"+
|
||||
"\1\0\1\26\10\0\1\25\23\0\1\25\3\0\1\25"+
|
||||
"\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
|
||||
"\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
|
||||
"\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\26"+
|
||||
"\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
|
||||
"\1\0\2\25\20\0\1\25\3\0\1\25\5\0\1\25"+
|
||||
"\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
|
||||
"\2\26\5\0\1\25\2\0\1\25\1\26\4\0\1\25"+
|
||||
"\2\0\1\25\1\0\1\25\102\0\2\26\6\0\1\26"+
|
||||
"\55\0\1\26\3\0\1\26\2\0\1\26\3\0\1\26"+
|
||||
"\5\0\1\26\7\0\1\26\4\0\2\26\3\0\2\26"+
|
||||
"\1\0\1\26\4\0\1\26\1\0\1\26\2\0\2\26"+
|
||||
"\1\0\3\26\1\0\1\26\2\0\4\26\2\0\1\26"+
|
||||
"\52\0\1\123\3\0\1\124\5\0\1\125\3\0\1\126"+
|
||||
"\14\0\1\127\16\0\1\130\2\0\1\131\42\0\1\63"+
|
||||
"\1\26\6\0\1\63\3\0\1\34\1\0\1\35\1\0"+
|
||||
"\1\36\1\0\1\37\1\0\1\40\1\0\1\132\3\0"+
|
||||
"\1\55\5\0\1\56\3\0\1\133\11\0\1\45\2\0"+
|
||||
"\1\134\16\0\1\135\2\0\1\136\21\0\1\72\17\0"+
|
||||
"\1\25\1\64\1\26\1\27\3\0\1\64\1\0\1\64"+
|
||||
"\4\0\1\27\37\0\1\27\1\0\2\27\16\0\1\27"+
|
||||
"\4\0\1\27\2\0\2\27\15\0\1\27\131\0\1\27"+
|
||||
"\152\0\2\27\11\0\1\27\114\0\2\27\6\0\1\27"+
|
||||
"\55\0\1\27\3\0\1\27\2\0\1\27\3\0\1\27"+
|
||||
"\5\0\1\27\7\0\1\27\4\0\2\27\3\0\2\27"+
|
||||
"\1\0\1\27\4\0\1\27\1\0\1\27\2\0\2\27"+
|
||||
"\1\0\3\27\1\0\1\27\2\0\4\27\2\0\1\27"+
|
||||
"\152\0\1\27\34\0\1\100\11\0\3\25\5\0\1\25"+
|
||||
"\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\100"+
|
||||
"\1\0\2\100\4\0\1\25\5\0\1\25\3\0\1\100"+
|
||||
"\4\0\1\100\2\25\2\100\10\0\1\26\1\0\2\25"+
|
||||
"\1\0\1\100\10\0\1\25\23\0\1\25\3\0\1\25"+
|
||||
"\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
|
||||
"\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
|
||||
"\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\100"+
|
||||
"\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
|
||||
"\1\0\2\25\20\0\1\25\3\0\1\25\5\0\1\25"+
|
||||
"\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
|
||||
"\2\100\5\0\1\25\2\0\1\25\1\100\4\0\1\25"+
|
||||
"\2\0\1\25\1\0\1\25\102\0\2\100\6\0\1\100"+
|
||||
"\55\0\1\100\3\0\1\100\2\0\1\100\3\0\1\100"+
|
||||
"\5\0\1\100\7\0\1\100\4\0\2\100\3\0\2\100"+
|
||||
"\1\0\1\100\4\0\1\100\1\0\1\100\2\0\2\100"+
|
||||
"\1\0\3\100\1\0\1\100\2\0\4\100\2\0\1\100"+
|
||||
"\41\0\1\51\11\0\3\25\5\0\1\25\1\0\1\25"+
|
||||
"\1\0\1\25\4\0\1\25\4\0\1\51\1\0\2\51"+
|
||||
"\4\0\1\25\5\0\1\25\3\0\1\51\4\0\1\51"+
|
||||
"\2\25\2\51\10\0\1\51\1\0\2\25\1\0\1\51"+
|
||||
"\10\0\1\25\23\0\1\25\3\0\1\25\6\0\2\25"+
|
||||
"\1\41\1\0\1\126\3\0\1\43\5\0\1\44\3\0"+
|
||||
"\1\127\11\0\1\46\2\0\1\130\16\0\1\131\2\0"+
|
||||
"\1\132\21\0\1\101\17\0\1\25\1\54\1\52\1\103"+
|
||||
"\3\0\1\54\1\0\1\54\2\0\1\25\2\0\1\26"+
|
||||
"\11\0\3\25\5\0\1\25\1\0\1\25\1\0\1\25"+
|
||||
"\4\0\1\25\4\0\1\26\1\0\2\26\4\0\1\25"+
|
||||
"\5\0\1\25\3\0\1\26\4\0\1\26\2\25\2\26"+
|
||||
"\10\0\1\26\1\0\2\25\1\0\1\26\10\0\1\25"+
|
||||
"\24\0\1\25\3\0\1\25\6\0\2\25\5\0\1\25"+
|
||||
"\1\0\1\25\1\0\1\25\1\0\11\25\2\0\1\25"+
|
||||
"\4\0\1\25\4\0\6\25\2\0\1\25\1\0\1\25"+
|
||||
"\1\0\3\25\1\0\1\26\1\0\2\25\4\0\3\25"+
|
||||
"\1\0\1\25\10\0\1\25\1\0\2\25\21\0\1\25"+
|
||||
"\3\0\1\25\5\0\1\25\32\0\15\25\5\0\3\25"+
|
||||
"\1\0\1\25\5\0\1\25\2\26\5\0\1\25\2\0"+
|
||||
"\1\25\1\26\4\0\1\25\2\0\1\25\1\0\1\25"+
|
||||
"\103\0\2\26\6\0\1\26\56\0\1\26\3\0\1\26"+
|
||||
"\2\0\1\26\3\0\1\26\5\0\1\26\7\0\1\26"+
|
||||
"\4\0\2\26\3\0\2\26\1\0\1\26\4\0\1\26"+
|
||||
"\1\0\1\26\2\0\2\26\1\0\3\26\1\0\1\26"+
|
||||
"\2\0\4\26\2\0\1\26\53\0\1\133\3\0\1\134"+
|
||||
"\5\0\1\135\3\0\1\136\14\0\1\137\16\0\1\140"+
|
||||
"\2\0\1\141\42\0\1\64\1\26\6\0\1\64\4\0"+
|
||||
"\1\35\1\0\1\36\1\0\1\37\1\0\1\40\1\0"+
|
||||
"\1\41\1\0\1\142\3\0\1\56\5\0\1\57\3\0"+
|
||||
"\1\143\11\0\1\46\2\0\1\144\16\0\1\145\2\0"+
|
||||
"\1\146\21\0\1\101\17\0\1\25\1\65\1\26\1\103"+
|
||||
"\3\0\1\65\1\0\1\65\2\0\1\25\2\0\1\27"+
|
||||
"\37\0\1\27\1\0\2\27\16\0\1\27\4\0\1\27"+
|
||||
"\2\0\2\27\15\0\1\27\132\0\1\27\153\0\2\27"+
|
||||
"\11\0\1\27\115\0\2\27\6\0\1\27\56\0\1\27"+
|
||||
"\3\0\1\27\2\0\1\27\3\0\1\27\5\0\1\27"+
|
||||
"\7\0\1\27\4\0\2\27\3\0\2\27\1\0\1\27"+
|
||||
"\4\0\1\27\1\0\1\27\2\0\2\27\1\0\3\27"+
|
||||
"\1\0\1\27\2\0\4\27\2\0\1\27\153\0\1\27"+
|
||||
"\35\0\1\102\11\0\3\25\5\0\1\25\1\0\1\25"+
|
||||
"\1\0\1\25\4\0\1\25\4\0\1\102\1\0\2\102"+
|
||||
"\4\0\1\25\5\0\1\25\3\0\1\102\4\0\1\102"+
|
||||
"\2\25\2\102\10\0\1\26\1\0\2\25\1\0\1\102"+
|
||||
"\10\0\1\25\24\0\1\25\3\0\1\25\6\0\2\25"+
|
||||
"\5\0\1\25\1\0\1\25\1\0\1\25\1\0\11\25"+
|
||||
"\2\0\1\25\4\0\1\25\4\0\6\25\2\0\1\25"+
|
||||
"\1\0\1\25\1\0\3\25\1\0\1\51\1\0\2\25"+
|
||||
"\1\0\1\25\1\0\3\25\1\0\1\102\1\0\2\25"+
|
||||
"\4\0\3\25\1\0\1\25\10\0\1\25\1\0\2\25"+
|
||||
"\20\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
|
||||
"\5\0\3\25\1\0\1\25\5\0\1\25\2\51\5\0"+
|
||||
"\1\25\2\0\1\25\1\51\4\0\1\25\2\0\1\25"+
|
||||
"\1\0\1\25\102\0\2\51\6\0\1\51\55\0\1\51"+
|
||||
"\3\0\1\51\2\0\1\51\3\0\1\51\5\0\1\51"+
|
||||
"\7\0\1\51\4\0\2\51\3\0\2\51\1\0\1\51"+
|
||||
"\4\0\1\51\1\0\1\51\2\0\2\51\1\0\3\51"+
|
||||
"\1\0\1\51\2\0\4\51\2\0\1\51\52\0\1\137"+
|
||||
"\3\0\1\140\5\0\1\141\3\0\1\142\14\0\1\143"+
|
||||
"\16\0\1\144\2\0\1\145\42\0\1\106\1\51\6\0"+
|
||||
"\1\106\4\0\1\52\11\0\3\25\5\0\1\25\1\0"+
|
||||
"\1\25\1\0\1\25\4\0\1\25\4\0\1\52\1\0"+
|
||||
"\2\52\4\0\1\25\5\0\1\25\3\0\1\52\4\0"+
|
||||
"\1\52\2\25\2\52\12\0\2\25\1\0\1\52\10\0"+
|
||||
"\1\25\23\0\1\25\11\0\2\25\2\0\5\25\2\0"+
|
||||
"\2\25\4\0\6\25\1\0\2\25\4\0\5\25\1\0"+
|
||||
"\5\25\1\0\2\25\1\0\3\25\1\0\4\25\1\0"+
|
||||
"\5\25\2\0\1\25\1\0\1\25\1\0\3\25\2\0"+
|
||||
"\1\25\1\0\1\25\1\0\1\25\2\0\1\25\16\0"+
|
||||
"\1\25\3\0\1\25\5\0\2\25\3\0\1\25\4\0"+
|
||||
"\3\25\4\0\1\25\1\0\1\25\2\0\1\25\1\0"+
|
||||
"\2\25\4\0\1\25\1\0\1\25\3\0\2\25\1\0"+
|
||||
"\1\25\5\0\3\25\1\0\1\25\10\0\1\25\4\0"+
|
||||
"\1\25\10\0\1\25\23\0\1\25\3\0\1\25\6\0"+
|
||||
"\21\0\1\25\3\0\1\25\5\0\1\25\32\0\15\25"+
|
||||
"\5\0\3\25\1\0\1\25\5\0\1\25\2\102\5\0"+
|
||||
"\1\25\2\0\1\25\1\102\4\0\1\25\2\0\1\25"+
|
||||
"\1\0\1\25\103\0\2\102\6\0\1\102\56\0\1\102"+
|
||||
"\3\0\1\102\2\0\1\102\3\0\1\102\5\0\1\102"+
|
||||
"\7\0\1\102\4\0\2\102\3\0\2\102\1\0\1\102"+
|
||||
"\4\0\1\102\1\0\1\102\2\0\2\102\1\0\3\102"+
|
||||
"\1\0\1\102\2\0\4\102\2\0\1\102\153\0\1\103"+
|
||||
"\46\0\1\147\15\0\1\150\14\0\1\151\16\0\1\152"+
|
||||
"\2\0\1\153\21\0\1\101\20\0\1\103\1\0\1\103"+
|
||||
"\3\0\1\54\1\0\1\103\5\0\1\34\11\0\3\25"+
|
||||
"\5\0\1\25\1\0\1\25\1\0\1\25\4\0\1\25"+
|
||||
"\4\0\1\34\1\0\2\34\4\0\1\25\5\0\1\25"+
|
||||
"\3\0\1\34\4\0\1\34\2\25\2\34\10\0\1\52"+
|
||||
"\1\0\2\25\1\0\1\34\10\0\1\25\24\0\1\25"+
|
||||
"\3\0\1\25\6\0\2\25\5\0\1\25\1\0\1\25"+
|
||||
"\1\0\1\25\1\0\11\25\2\0\1\25\4\0\1\25"+
|
||||
"\4\0\6\25\2\0\1\25\1\0\1\25\1\0\3\25"+
|
||||
"\1\0\1\34\1\0\2\25\4\0\3\25\1\0\1\25"+
|
||||
"\10\0\1\25\1\0\2\25\21\0\1\25\3\0\1\25"+
|
||||
"\5\0\1\25\32\0\15\25\5\0\3\25\1\0\1\25"+
|
||||
"\5\0\1\25\2\34\5\0\1\25\2\0\1\25\1\34"+
|
||||
"\4\0\1\25\2\0\1\25\1\0\1\25\103\0\2\34"+
|
||||
"\6\0\1\34\56\0\1\34\3\0\1\34\2\0\1\34"+
|
||||
"\3\0\1\34\5\0\1\34\7\0\1\34\4\0\2\34"+
|
||||
"\3\0\2\34\1\0\1\34\4\0\1\34\1\0\1\34"+
|
||||
"\2\0\2\34\1\0\3\34\1\0\1\34\2\0\4\34"+
|
||||
"\2\0\1\34\42\0\1\52\11\0\3\25\5\0\1\25"+
|
||||
"\1\0\1\25\1\0\1\25\4\0\1\25\4\0\1\52"+
|
||||
"\1\0\2\52\4\0\1\25\5\0\1\25\3\0\1\52"+
|
||||
"\4\0\1\52\2\25\2\52\10\0\1\52\1\0\2\25"+
|
||||
"\1\0\1\52\10\0\1\25\24\0\1\25\3\0\1\25"+
|
||||
"\6\0\2\25\5\0\1\25\1\0\1\25\1\0\1\25"+
|
||||
"\1\0\11\25\2\0\1\25\4\0\1\25\4\0\6\25"+
|
||||
"\2\0\1\25\1\0\1\25\1\0\3\25\1\0\1\52"+
|
||||
"\1\0\2\25\4\0\3\25\1\0\1\25\10\0\1\25"+
|
||||
"\1\0\2\25\21\0\1\25\3\0\1\25\5\0\1\25"+
|
||||
"\32\0\15\25\5\0\3\25\1\0\1\25\5\0\1\25"+
|
||||
"\2\52\5\0\1\25\2\0\1\25\1\52\4\0\1\25"+
|
||||
"\2\0\1\25\1\0\1\25\103\0\2\52\6\0\1\52"+
|
||||
"\56\0\1\52\3\0\1\52\2\0\1\52\3\0\1\52"+
|
||||
"\5\0\1\52\7\0\1\52\4\0\2\52\3\0\2\52"+
|
||||
"\1\0\1\52\4\0\1\52\1\0\1\52\2\0\2\52"+
|
||||
"\1\0\3\52\1\0\1\52\2\0\4\52\2\0\1\52"+
|
||||
"\53\0\1\154\3\0\1\155\5\0\1\156\3\0\1\157"+
|
||||
"\14\0\1\160\16\0\1\161\2\0\1\162\42\0\1\116"+
|
||||
"\1\52\6\0\1\116\5\0\1\53\11\0\3\25\5\0"+
|
||||
"\1\25\1\0\1\25\1\0\1\25\4\0\1\25\4\0"+
|
||||
"\1\53\1\0\2\53\4\0\1\25\5\0\1\25\3\0"+
|
||||
"\1\53\4\0\1\53\2\25\2\53\12\0\2\25\1\0"+
|
||||
"\1\53\10\0\1\25\24\0\1\25\11\0\2\25\2\0"+
|
||||
"\5\25\2\0\2\25\4\0\6\25\1\0\2\25\4\0"+
|
||||
"\5\25\1\0\5\25\1\0\2\25\1\0\3\25\1\0"+
|
||||
"\4\25\1\0\5\25\2\0\1\25\1\0\1\25\1\0"+
|
||||
"\3\25\2\0\1\25\1\0\1\25\1\0\1\25\2\0"+
|
||||
"\1\25\17\0\1\25\3\0\1\25\5\0\2\25\3\0"+
|
||||
"\1\25\4\0\3\25\4\0\1\25\1\0\1\25\2\0"+
|
||||
"\1\25\1\0\2\25\4\0\1\25\1\0\1\25\3\0"+
|
||||
"\2\25\1\0\1\25\5\0\3\25\1\0\1\25\10\0"+
|
||||
"\1\25\4\0\1\25\10\0\1\25\24\0\1\25\3\0"+
|
||||
"\1\25\6\0\2\25\5\0\1\25\1\0\1\25\1\0"+
|
||||
"\1\25\1\0\11\25\2\0\1\25\4\0\1\25\4\0"+
|
||||
"\6\25\2\0\1\25\1\0\1\25\1\0\3\25\1\0"+
|
||||
"\1\53\1\0\2\25\4\0\3\25\1\0\1\25\10\0"+
|
||||
"\1\25\1\0\2\25\21\0\1\25\3\0\1\25\5\0"+
|
||||
"\1\25\32\0\15\25\5\0\3\25\1\0\1\25\5\0"+
|
||||
"\1\25\2\53\5\0\1\25\2\0\1\25\1\53\4\0"+
|
||||
"\1\25\2\0\1\25\1\0\1\25\103\0\2\53\6\0"+
|
||||
"\1\53\56\0\1\53\3\0\1\53\2\0\1\53\3\0"+
|
||||
"\1\53\5\0\1\53\7\0\1\53\4\0\2\53\3\0"+
|
||||
"\2\53\1\0\1\53\4\0\1\53\1\0\1\53\2\0"+
|
||||
"\2\53\1\0\3\53\1\0\1\53\2\0\4\53\2\0"+
|
||||
"\1\53\42\0\1\54\11\0\3\25\5\0\1\25\1\0"+
|
||||
"\1\25\1\0\1\25\4\0\1\25\4\0\1\54\1\0"+
|
||||
"\2\54\4\0\1\25\5\0\1\25\3\0\1\54\4\0"+
|
||||
"\1\54\2\25\2\54\10\0\1\52\1\0\2\25\1\0"+
|
||||
"\1\54\10\0\1\25\24\0\1\25\3\0\1\25\6\0"+
|
||||
"\2\25\5\0\1\25\1\0\1\25\1\0\1\25\1\0"+
|
||||
"\11\25\2\0\1\25\4\0\1\25\4\0\6\25\2\0"+
|
||||
"\1\25\1\0\1\25\1\0\3\25\1\0\1\52\1\0"+
|
||||
"\1\25\1\0\1\25\1\0\3\25\1\0\1\54\1\0"+
|
||||
"\2\25\4\0\3\25\1\0\1\25\10\0\1\25\1\0"+
|
||||
"\2\25\20\0\1\25\3\0\1\25\5\0\1\25\32\0"+
|
||||
"\15\25\5\0\3\25\1\0\1\25\5\0\1\25\2\52"+
|
||||
"\5\0\1\25\2\0\1\25\1\52\4\0\1\25\2\0"+
|
||||
"\1\25\1\0\1\25\102\0\2\52\6\0\1\52\55\0"+
|
||||
"\1\52\3\0\1\52\2\0\1\52\3\0\1\52\5\0"+
|
||||
"\1\52\7\0\1\52\4\0\2\52\3\0\2\52\1\0"+
|
||||
"\1\52\4\0\1\52\1\0\1\52\2\0\2\52\1\0"+
|
||||
"\3\52\1\0\1\52\2\0\4\52\2\0\1\52\41\0"+
|
||||
"\1\53\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+
|
||||
"\1\25\4\0\1\25\4\0\1\53\1\0\2\53\4\0"+
|
||||
"\1\25\5\0\1\25\3\0\1\53\4\0\1\53\2\25"+
|
||||
"\2\53\10\0\1\51\1\0\2\25\1\0\1\53\10\0"+
|
||||
"\1\25\23\0\1\25\3\0\1\25\6\0\2\25\5\0"+
|
||||
"\2\25\21\0\1\25\3\0\1\25\5\0\1\25\32\0"+
|
||||
"\15\25\5\0\3\25\1\0\1\25\5\0\1\25\2\54"+
|
||||
"\5\0\1\25\2\0\1\25\1\54\4\0\1\25\2\0"+
|
||||
"\1\25\1\0\1\25\103\0\2\54\6\0\1\54\56\0"+
|
||||
"\1\54\3\0\1\54\2\0\1\54\3\0\1\54\5\0"+
|
||||
"\1\54\7\0\1\54\4\0\2\54\3\0\2\54\1\0"+
|
||||
"\1\54\4\0\1\54\1\0\1\54\2\0\2\54\1\0"+
|
||||
"\3\54\1\0\1\54\2\0\4\54\2\0\1\54\42\0"+
|
||||
"\1\64\37\0\1\64\1\0\2\64\16\0\1\64\4\0"+
|
||||
"\1\64\2\0\2\64\10\0\1\26\4\0\1\64\37\0"+
|
||||
"\1\26\102\0\1\26\147\0\2\26\134\0\1\64\153\0"+
|
||||
"\2\64\11\0\1\64\115\0\2\64\6\0\1\64\56\0"+
|
||||
"\1\64\3\0\1\64\2\0\1\64\3\0\1\64\5\0"+
|
||||
"\1\64\7\0\1\64\4\0\2\64\3\0\2\64\1\0"+
|
||||
"\1\64\4\0\1\64\1\0\1\64\2\0\2\64\1\0"+
|
||||
"\3\64\1\0\1\64\2\0\4\64\2\0\1\64\42\0"+
|
||||
"\1\65\11\0\3\25\5\0\1\25\1\0\1\25\1\0"+
|
||||
"\1\25\4\0\1\25\4\0\1\65\1\0\2\65\4\0"+
|
||||
"\1\25\5\0\1\25\3\0\1\65\4\0\1\65\2\25"+
|
||||
"\2\65\10\0\1\26\1\0\2\25\1\0\1\65\10\0"+
|
||||
"\1\25\24\0\1\25\3\0\1\25\6\0\2\25\5\0"+
|
||||
"\1\25\1\0\1\25\1\0\1\25\1\0\11\25\2\0"+
|
||||
"\1\25\4\0\1\25\4\0\6\25\2\0\1\25\1\0"+
|
||||
"\1\25\1\0\3\25\1\0\1\53\1\0\2\25\4\0"+
|
||||
"\3\25\1\0\1\25\10\0\1\25\1\0\2\25\20\0"+
|
||||
"\1\25\1\0\3\25\1\0\1\65\1\0\2\25\4\0"+
|
||||
"\3\25\1\0\1\25\10\0\1\25\1\0\2\25\21\0"+
|
||||
"\1\25\3\0\1\25\5\0\1\25\32\0\15\25\5\0"+
|
||||
"\3\25\1\0\1\25\5\0\1\25\2\53\5\0\1\25"+
|
||||
"\2\0\1\25\1\53\4\0\1\25\2\0\1\25\1\0"+
|
||||
"\1\25\102\0\2\53\6\0\1\53\55\0\1\53\3\0"+
|
||||
"\1\53\2\0\1\53\3\0\1\53\5\0\1\53\7\0"+
|
||||
"\1\53\4\0\2\53\3\0\2\53\1\0\1\53\4\0"+
|
||||
"\1\53\1\0\1\53\2\0\2\53\1\0\3\53\1\0"+
|
||||
"\1\53\2\0\4\53\2\0\1\53\41\0\1\63\37\0"+
|
||||
"\1\63\1\0\2\63\16\0\1\63\4\0\1\63\2\0"+
|
||||
"\2\63\10\0\1\26\4\0\1\63\36\0\1\26\102\0"+
|
||||
"\1\26\146\0\2\26\133\0\1\63\152\0\2\63\11\0"+
|
||||
"\1\63\114\0\2\63\6\0\1\63\55\0\1\63\3\0"+
|
||||
"\1\63\2\0\1\63\3\0\1\63\5\0\1\63\7\0"+
|
||||
"\1\63\4\0\2\63\3\0\2\63\1\0\1\63\4\0"+
|
||||
"\1\63\1\0\1\63\2\0\2\63\1\0\3\63\1\0"+
|
||||
"\1\63\2\0\4\63\2\0\1\63\41\0\1\64\11\0"+
|
||||
"\3\25\5\0\1\25\1\0\1\25\1\0\1\25\4\0"+
|
||||
"\1\25\4\0\1\64\1\0\2\64\4\0\1\25\5\0"+
|
||||
"\1\25\3\0\1\64\4\0\1\64\2\25\2\64\10\0"+
|
||||
"\1\26\1\0\2\25\1\0\1\64\10\0\1\25\23\0"+
|
||||
"\1\25\3\0\1\25\6\0\2\25\5\0\1\25\1\0"+
|
||||
"\1\25\1\0\1\25\1\0\11\25\2\0\1\25\4\0"+
|
||||
"\1\25\4\0\6\25\2\0\1\25\1\0\1\25\1\0"+
|
||||
"\3\25\1\0\1\64\1\0\2\25\4\0\3\25\1\0"+
|
||||
"\1\25\10\0\1\25\1\0\2\25\20\0\1\25\3\0"+
|
||||
"\1\25\5\0\1\25\32\0\15\25\5\0\3\25\1\0"+
|
||||
"\1\25\5\0\1\25\2\64\5\0\1\25\2\0\1\25"+
|
||||
"\1\64\4\0\1\25\2\0\1\25\1\0\1\25\102\0"+
|
||||
"\2\64\6\0\1\64\55\0\1\64\3\0\1\64\2\0"+
|
||||
"\1\64\3\0\1\64\5\0\1\64\7\0\1\64\4\0"+
|
||||
"\2\64\3\0\2\64\1\0\1\64\4\0\1\64\1\0"+
|
||||
"\1\64\2\0\2\64\1\0\3\64\1\0\1\64\2\0"+
|
||||
"\4\64\2\0\1\64\41\0\1\106\37\0\1\106\1\0"+
|
||||
"\2\106\16\0\1\106\4\0\1\106\2\0\2\106\10\0"+
|
||||
"\1\51\4\0\1\106\36\0\1\51\102\0\1\51\146\0"+
|
||||
"\2\51\133\0\1\106\152\0\2\106\11\0\1\106\114\0"+
|
||||
"\2\106\6\0\1\106\55\0\1\106\3\0\1\106\2\0"+
|
||||
"\1\106\3\0\1\106\5\0\1\106\7\0\1\106\4\0"+
|
||||
"\2\106\3\0\2\106\1\0\1\106\4\0\1\106\1\0"+
|
||||
"\1\106\2\0\2\106\1\0\3\106\1\0\1\106\2\0"+
|
||||
"\4\106\2\0\1\106\37\0";
|
||||
"\3\25\1\0\1\25\5\0\1\25\2\65\5\0\1\25"+
|
||||
"\2\0\1\25\1\65\4\0\1\25\2\0\1\25\1\0"+
|
||||
"\1\25\103\0\2\65\6\0\1\65\56\0\1\65\3\0"+
|
||||
"\1\65\2\0\1\65\3\0\1\65\5\0\1\65\7\0"+
|
||||
"\1\65\4\0\2\65\3\0\2\65\1\0\1\65\4\0"+
|
||||
"\1\65\1\0\1\65\2\0\2\65\1\0\3\65\1\0"+
|
||||
"\1\65\2\0\4\65\2\0\1\65\42\0\1\103\37\0"+
|
||||
"\1\103\1\0\2\103\16\0\1\103\4\0\1\103\2\0"+
|
||||
"\2\103\15\0\1\103\132\0\1\103\153\0\2\103\11\0"+
|
||||
"\1\103\115\0\2\103\6\0\1\103\56\0\1\103\3\0"+
|
||||
"\1\103\2\0\1\103\3\0\1\103\5\0\1\103\7\0"+
|
||||
"\1\103\4\0\2\103\3\0\2\103\1\0\1\103\4\0"+
|
||||
"\1\103\1\0\1\103\2\0\2\103\1\0\3\103\1\0"+
|
||||
"\1\103\2\0\4\103\2\0\1\103\42\0\1\116\37\0"+
|
||||
"\1\116\1\0\2\116\16\0\1\116\4\0\1\116\2\0"+
|
||||
"\2\116\10\0\1\52\4\0\1\116\37\0\1\52\102\0"+
|
||||
"\1\52\147\0\2\52\134\0\1\116\153\0\2\116\11\0"+
|
||||
"\1\116\115\0\2\116\6\0\1\116\56\0\1\116\3\0"+
|
||||
"\1\116\2\0\1\116\3\0\1\116\5\0\1\116\7\0"+
|
||||
"\1\116\4\0\2\116\3\0\2\116\1\0\1\116\4\0"+
|
||||
"\1\116\1\0\1\116\2\0\2\116\1\0\3\116\1\0"+
|
||||
"\1\116\2\0\4\116\2\0\1\116\40\0";
|
||||
|
||||
private static int [] zzUnpackTrans() {
|
||||
int [] result = new int[9180];
|
||||
int [] result = new int[10609];
|
||||
int offset = 0;
|
||||
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
|
||||
return result;
|
||||
|
@ -584,11 +621,11 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
|
||||
|
||||
private static final String ZZ_ATTRIBUTE_PACKED_0 =
|
||||
"\1\0\1\11\27\1\2\11\15\0\1\1\1\0\1\1"+
|
||||
"\10\0\1\1\61\0";
|
||||
"\1\0\1\11\27\1\2\11\1\1\15\0\1\1\1\0"+
|
||||
"\1\1\10\0\1\1\15\0\1\1\57\0";
|
||||
|
||||
private static int [] zzUnpackAttribute() {
|
||||
int [] result = new int[101];
|
||||
int [] result = new int[114];
|
||||
int offset = 0;
|
||||
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
|
||||
return result;
|
||||
|
@ -675,6 +712,10 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
|
||||
|
||||
public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
|
||||
|
||||
public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
|
||||
|
||||
public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
|
||||
|
||||
public final int yychar()
|
||||
{
|
||||
|
@ -719,7 +760,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
char [] map = new char[0x10000];
|
||||
int i = 0; /* index in packed string */
|
||||
int j = 0; /* index in unpacked array */
|
||||
while (i < 2640) {
|
||||
while (i < 2650) {
|
||||
int count = packed.charAt(i++);
|
||||
char value = packed.charAt(i++);
|
||||
do map[j++] = value; while (--count > 0);
|
||||
|
@ -1001,27 +1042,35 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
|||
case 2:
|
||||
{ return WORD_TYPE;
|
||||
}
|
||||
case 7: break;
|
||||
case 4:
|
||||
case 9: break;
|
||||
case 5:
|
||||
{ return SOUTH_EAST_ASIAN_TYPE;
|
||||
}
|
||||
case 8: break;
|
||||
case 5:
|
||||
{ return IDEOGRAPHIC_TYPE;
|
||||
}
|
||||
case 9: break;
|
||||
case 1:
|
||||
{ /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
|
||||
}
|
||||
case 10: break;
|
||||
case 3:
|
||||
{ return NUMERIC_TYPE;
|
||||
case 4:
|
||||
{ return KATAKANA_TYPE;
|
||||
}
|
||||
case 11: break;
|
||||
case 6:
|
||||
{ return HIRAGANA_TYPE;
|
||||
{ return IDEOGRAPHIC_TYPE;
|
||||
}
|
||||
case 12: break;
|
||||
case 1:
|
||||
{ /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
|
||||
}
|
||||
case 13: break;
|
||||
case 8:
|
||||
{ return HANGUL_TYPE;
|
||||
}
|
||||
case 14: break;
|
||||
case 3:
|
||||
{ return NUMERIC_TYPE;
|
||||
}
|
||||
case 15: break;
|
||||
case 7:
|
||||
{ return HIRAGANA_TYPE;
|
||||
}
|
||||
case 16: break;
|
||||
default:
|
||||
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
||||
zzAtEOF = true;
|
||||
|
|
|
@ -59,6 +59,8 @@ ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
|
|||
Han = ([\p{Script:Han}] | {HanSupp})
|
||||
Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
|
||||
|
||||
// Script=Hangul & Aletter
|
||||
HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
|
||||
// UAX#29 WB4. X (Extend | Format)* --> X
|
||||
//
|
||||
ALetterEx = {ALetter} ({Format} | {Extend})*
|
||||
|
@ -90,6 +92,10 @@ ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
|
|||
public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
|
||||
|
||||
public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
|
||||
|
||||
public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
|
||||
|
||||
public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
|
||||
|
||||
public final int yychar()
|
||||
{
|
||||
|
@ -123,6 +129,12 @@ ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
|
|||
{ExtendNumLetEx}*
|
||||
{ return NUMERIC_TYPE; }
|
||||
|
||||
// subset of the below for typing purposes only!
|
||||
{HangulEx}+
|
||||
{ return HANGUL_TYPE; }
|
||||
|
||||
{KatakanaEx}+
|
||||
{ return KATAKANA_TYPE; }
|
||||
|
||||
// UAX#29 WB5. ALetter × ALetter
|
||||
// WB6. ALetter × (MidLetter | MidNumLet) ALetter
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -77,6 +77,8 @@ ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
|
|||
Han = ([\p{Script:Han}] | {HanSupp})
|
||||
Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
|
||||
|
||||
// Script=Hangul & Aletter
|
||||
HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
|
||||
// UAX#29 WB4. X (Extend | Format)* --> X
|
||||
//
|
||||
ALetterEx = {ALetter} ({Format} | {Extend})*
|
||||
|
@ -168,16 +170,16 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
|||
|
||||
%{
|
||||
/** Alphanumeric sequences */
|
||||
public static final String WORD_TYPE = "<ALPHANUM>";
|
||||
public static final String WORD_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
|
||||
|
||||
/** Numbers */
|
||||
public static final String NUMERIC_TYPE = "<NUM>";
|
||||
public static final String NUMERIC_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM];
|
||||
|
||||
/** URLs with scheme: HTTP(S), FTP, or FILE; no-scheme URLs match HTTP syntax */
|
||||
public static final String URL_TYPE = "<URL>";
|
||||
|
||||
/** E-mail addresses */
|
||||
public static final String EMAIL_TYPE = "<EMAIL";
|
||||
public static final String EMAIL_TYPE = "<EMAIL>";
|
||||
|
||||
/**
|
||||
* Chars in class \p{Line_Break = Complex_Context} are from South East Asian
|
||||
|
@ -187,12 +189,16 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
|||
* <p>
|
||||
* See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
|
||||
*/
|
||||
public static final String SOUTH_EAST_ASIAN_TYPE = "<SOUTHEAST_ASIAN>";
|
||||
public static final String SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.SOUTHEAST_ASIAN];
|
||||
|
||||
public static final String IDEOGRAPHIC_TYPE = "<IDEOGRAPHIC>";
|
||||
public static final String IDEOGRAPHIC_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
|
||||
|
||||
public static final String HIRAGANA_TYPE = "<HIRAGANA>";
|
||||
public static final String HIRAGANA_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA];
|
||||
|
||||
public static final String KATAKANA_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
|
||||
|
||||
public static final String HANGUL_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt
|
||||
|
@ -316,6 +322,12 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
|||
{ExtendNumLetEx}*
|
||||
{ if (populateAttributes(NUMERIC_TYPE)) return true; }
|
||||
|
||||
// subset of the below for typing purposes only!
|
||||
{HangulEx}+
|
||||
{ if (populateAttributes(HANGUL_TYPE)) return true; }
|
||||
|
||||
{KatakanaEx}+
|
||||
{ if (populateAttributes(KATAKANA_TYPE)) return true; }
|
||||
|
||||
// UAX#29 WB5. ALetter × ALetter
|
||||
// WB6. ALetter × (MidLetter | MidNumLet) ALetter
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 10/3/10 9:07 AM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */
|
||||
|
||||
package org.apache.lucene.analysis.wikipedia;
|
||||
|
||||
|
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 10/3/10 9:07 AM from the specification file
|
||||
* <tt>C:/Users/rmuir/workspace/lucene-clean/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
|
||||
* on 2/9/11 11:45 AM from the specification file
|
||||
* <tt>C:/Users/rmuir/workspace/lucene-2911/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class WikipediaTokenizerImpl {
|
||||
|
||||
|
@ -757,6 +757,12 @@ final int setText(StringBuilder buffer){
|
|||
|
||||
zzState = ZZ_LEXSTATE[zzLexicalState];
|
||||
|
||||
// set up zzAction for empty match case:
|
||||
int zzAttributes = zzAttrL[zzState];
|
||||
if ( (zzAttributes & 1) == 1 ) {
|
||||
zzAction = zzState;
|
||||
}
|
||||
|
||||
|
||||
zzForAction: {
|
||||
while (true) {
|
||||
|
@ -789,7 +795,7 @@ final int setText(StringBuilder buffer){
|
|||
if (zzNext == -1) break zzForAction;
|
||||
zzState = zzNext;
|
||||
|
||||
int zzAttributes = zzAttrL[zzState];
|
||||
zzAttributes = zzAttrL[zzState];
|
||||
if ( (zzAttributes & 1) == 1 ) {
|
||||
zzAction = zzState;
|
||||
zzMarkedPosL = zzCurrentPosL;
|
||||
|
|
|
@ -207,4 +207,16 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|||
new String[] {"𩬅", "艱", "鍟", "䇹", "愯", "瀛"},
|
||||
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>" });
|
||||
}
|
||||
|
||||
public void testKorean() throws Exception {
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음",
|
||||
new String[] { "훈민정음" },
|
||||
new String[] { "<HANGUL>" });
|
||||
}
|
||||
|
||||
public void testJapanese() throws Exception {
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ",
|
||||
new String[] { "仮", "名", "遣", "い", "カタカナ" },
|
||||
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -406,4 +406,16 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
new String[] {"𩬅", "艱", "鍟", "䇹", "愯", "瀛"},
|
||||
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>" });
|
||||
}
|
||||
|
||||
public void testKorean() throws Exception {
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음",
|
||||
new String[] { "훈민정음" },
|
||||
new String[] { "<HANGUL>" });
|
||||
}
|
||||
|
||||
public void testJapanese() throws Exception {
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ",
|
||||
new String[] { "仮", "名", "遣", "い", "カタカナ" },
|
||||
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Default RBBI rules, based on UAX#29.
|
||||
#
|
||||
|
||||
!!chain;
|
||||
|
||||
#
|
||||
# Character Class Definitions.
|
||||
#
|
||||
|
||||
$CR = [\p{Word_Break = CR}];
|
||||
$LF = [\p{Word_Break = LF}];
|
||||
$Newline = [\p{Word_Break = Newline}];
|
||||
$Extend = [\p{Word_Break = Extend}];
|
||||
$Format = [\p{Word_Break = Format}];
|
||||
$Katakana = [\p{Word_Break = Katakana}];
|
||||
$ALetter = [\p{Word_Break = ALetter}];
|
||||
$MidNumLet = [\p{Word_Break = MidNumLet}];
|
||||
$MidLetter = [\p{Word_Break = MidLetter}];
|
||||
$MidNum = [\p{Word_Break = MidNum}];
|
||||
$Numeric = [\p{Word_Break = Numeric}[[:Decomposition_Type=Wide:]&[:General_Category=Decimal_Number:]]];
|
||||
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
|
||||
|
||||
|
||||
# Dictionary character set, for triggering language-based break engines. Currently
|
||||
# limited to LineBreak=Complex_Context. Note that this set only works in Unicode
|
||||
# 5.0 or later as the definition of Complex_Context was corrected to include all
|
||||
# characters requiring dictionary break.
|
||||
|
||||
$dictionary = [:LineBreak = Complex_Context:];
|
||||
$Control = [\p{Grapheme_Cluster_Break = Control}];
|
||||
$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default ALetter does not
|
||||
# include the dictionary characters.
|
||||
|
||||
#
|
||||
# Rules 4 Ignore Format and Extend characters,
|
||||
# except when they appear at the beginning of a region of text.
|
||||
#
|
||||
$KatakanaEx = $Katakana ($Extend | $Format)*;
|
||||
$ALetterEx = $ALetterPlus ($Extend | $Format)*;
|
||||
$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
|
||||
$MidLetterEx = $MidLetter ($Extend | $Format)*;
|
||||
$MidNumEx = $MidNum ($Extend | $Format)*;
|
||||
$NumericEx = $Numeric ($Extend | $Format)*;
|
||||
$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
|
||||
|
||||
$Hiragana = [\p{script=Hiragana}];
|
||||
$Ideographic = [\p{Ideographic}];
|
||||
$HiraganaEx = $Hiragana ($Extend | $Format)*;
|
||||
$IdeographicEx = $Ideographic ($Extend | $Format)*;
|
||||
|
||||
## -------------------------------------------------
|
||||
|
||||
!!forward;
|
||||
|
||||
|
||||
# Rule 3 - CR x LF
|
||||
#
|
||||
$CR $LF;
|
||||
|
||||
# Rule 4 - ignore Format and Extend characters, except when they appear at the beginning
|
||||
# of a region of Text. The rule here comes into play when the start of text
|
||||
# begins with a group of Format chars, or with a "word" consisting of a single
|
||||
# char that is not in any of the listed word break categories followed by
|
||||
# format char(s).
|
||||
[^$CR $LF $Newline]? ($Extend | $Format)+;
|
||||
|
||||
$NumericEx {100};
|
||||
$ALetterEx {200};
|
||||
$KatakanaEx {300}; # note: these status values override those from rule 5
|
||||
$HiraganaEx {300}; # by virtual of being numerically larger.
|
||||
$IdeographicEx {400}; #
|
||||
|
||||
#
|
||||
# rule 5
|
||||
# Do not break between most letters.
|
||||
#
|
||||
$ALetterEx $ALetterEx {200};
|
||||
|
||||
# rule 6 and 7
|
||||
$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200};
|
||||
|
||||
# rule 8
|
||||
|
||||
$NumericEx $NumericEx {100};
|
||||
|
||||
# rule 9
|
||||
|
||||
$ALetterEx $NumericEx {200};
|
||||
|
||||
# rule 10
|
||||
|
||||
$NumericEx $ALetterEx {200};
|
||||
|
||||
# rule 11 and 12
|
||||
|
||||
$NumericEx ($MidNumEx | $MidNumLetEx) $NumericEx {100};
|
||||
|
||||
# rule 13
|
||||
|
||||
$KatakanaEx $KatakanaEx {300};
|
||||
|
||||
# rule 13a/b
|
||||
|
||||
$ALetterEx $ExtendNumLetEx {200}; # (13a)
|
||||
$NumericEx $ExtendNumLetEx {100}; # (13a)
|
||||
$KatakanaEx $ExtendNumLetEx {300}; # (13a)
|
||||
$ExtendNumLetEx $ExtendNumLetEx {200}; # (13a)
|
||||
|
||||
$ExtendNumLetEx $ALetterEx {200}; # (13b)
|
||||
$ExtendNumLetEx $NumericEx {100}; # (13b)
|
||||
$ExtendNumLetEx $KatakanaEx {300}; # (13b)
|
|
@ -20,6 +20,8 @@ package org.apache.lucene.analysis.icu.segmentation;
|
|||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
import com.ibm.icu.lang.UScript;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.text.RuleBasedBreakIterator;
|
||||
|
@ -44,20 +46,24 @@ import com.ibm.icu.util.ULocale;
|
|||
*/
|
||||
public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
||||
/** Token type for words containing ideographic characters */
|
||||
public static final String WORD_IDEO = "<IDEOGRAPHIC>";
|
||||
/** Token type for words containing Japanese kana */
|
||||
public static final String WORD_KANA = "<KANA>";
|
||||
public static final String WORD_IDEO = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.IDEOGRAPHIC];
|
||||
/** Token type for words containing Japanese hiragana */
|
||||
public static final String WORD_HIRAGANA = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HIRAGANA];
|
||||
/** Token type for words containing Japanese katakana */
|
||||
public static final String WORD_KATAKANA = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.KATAKANA];
|
||||
/** Token type for words containing Korean hangul */
|
||||
public static final String WORD_HANGUL = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HANGUL];
|
||||
/** Token type for words that contain letters */
|
||||
public static final String WORD_LETTER = "<ALPHANUM>";
|
||||
public static final String WORD_LETTER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ALPHANUM];
|
||||
/** Token type for words that appear to be numbers */
|
||||
public static final String WORD_NUMBER = "<NUM>";
|
||||
public static final String WORD_NUMBER = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.NUM];
|
||||
|
||||
/*
|
||||
* the default breakiterators in use. these can be expensive to
|
||||
* instantiate, cheap to clone.
|
||||
*/
|
||||
private static final BreakIterator rootBreakIterator =
|
||||
BreakIterator.getWordInstance(ULocale.ROOT);
|
||||
readBreakIterator("Default.brk");
|
||||
private static final BreakIterator thaiBreakIterator =
|
||||
BreakIterator.getWordInstance(new ULocale("th_TH"));
|
||||
private static final BreakIterator hebrewBreakIterator =
|
||||
|
@ -87,9 +93,9 @@ public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
|
|||
case RuleBasedBreakIterator.WORD_IDEO:
|
||||
return WORD_IDEO;
|
||||
case RuleBasedBreakIterator.WORD_KANA:
|
||||
return WORD_KANA;
|
||||
return script == UScript.HIRAGANA ? WORD_HIRAGANA : WORD_KATAKANA;
|
||||
case RuleBasedBreakIterator.WORD_LETTER:
|
||||
return WORD_LETTER;
|
||||
return script == UScript.HANGUL ? WORD_HANGUL : WORD_LETTER;
|
||||
case RuleBasedBreakIterator.WORD_NUMBER:
|
||||
return WORD_NUMBER;
|
||||
default: /* some other custom code */
|
||||
|
|
Binary file not shown.
|
@ -128,11 +128,10 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
|||
|
||||
/*
|
||||
* For chinese, tokenize as char (these can later form bigrams or whatever)
|
||||
* TODO: why do full-width numerics have no word-break prop?
|
||||
*/
|
||||
public void testChinese() throws Exception {
|
||||
assertAnalyzesTo(a, "我是中国人。 1234 Tests ",
|
||||
new String[] { "我", "是", "中", "国", "人", "tests"});
|
||||
new String[] { "我", "是", "中", "国", "人", "1234", "tests"});
|
||||
}
|
||||
|
||||
public void testEmpty() throws Exception {
|
||||
|
@ -221,4 +220,16 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
|||
new String[] {"david", "has", "5000", "bones"},
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
|
||||
}
|
||||
|
||||
public void testKorean() throws Exception {
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(a, "훈민정음",
|
||||
new String[] { "훈민정음" },
|
||||
new String[] { "<HANGUL>" });
|
||||
}
|
||||
|
||||
public void testJapanese() throws Exception {
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(a, "仮名遣い カタカナ",
|
||||
new String[] { "仮", "名", "遣", "い", "カタカナ" },
|
||||
new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue