mirror of https://github.com/apache/lucene.git
LUCENE-5357: Upgrade StandardTokenizer and UAX29URLEmailTokenizer to Unicode 6.3; update UAX29URLEmailTokenizer's recognized top level domains in URLs and Emails from the IANA Root Zone Database.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1548595 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
618f6b876d
commit
d516948bbd
|
@ -91,6 +91,11 @@ Build
|
||||||
|
|
||||||
* LUCENE-4381: Upgrade analysis/icu to 52.1. (Robert Muir)
|
* LUCENE-4381: Upgrade analysis/icu to 52.1. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-5357: Upgrade StandardTokenizer and UAX29URLEmailTokenizer to
|
||||||
|
Unicode 6.3; update UAX29URLEmailTokenizer's recognized top level
|
||||||
|
domains in URLs and Emails from the IANA Root Zone Database.
|
||||||
|
(Steve Rowe)
|
||||||
|
|
||||||
Bug fixes
|
Bug fixes
|
||||||
|
|
||||||
* LUCENE-5285: Improved highlighting of multi-valued fields with
|
* LUCENE-5285: Improved highlighting of multi-valued fields with
|
||||||
|
|
|
@ -45,17 +45,13 @@
|
||||||
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
|
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
|
||||||
<classpath refid="jflex.classpath"/>
|
<classpath refid="jflex.classpath"/>
|
||||||
</taskdef>
|
</taskdef>
|
||||||
<!-- this logic below looks duplicated with run-jflex, but its not, the regexp is different! -->
|
|
||||||
<jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex"
|
<jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex"
|
||||||
outdir="src/java/org/apache/lucene/analysis/charfilter"
|
outdir="src/java/org/apache/lucene/analysis/charfilter"
|
||||||
nobak="on"/>
|
nobak="on" inputstreamctor="false"/>
|
||||||
<!-- Remove the inappropriate JFlex-generated constructors -->
|
<!-- Remove the inappropriate JFlex-generated constructor -->
|
||||||
<replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java"
|
<replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java"
|
||||||
match="/\*\*\s*\*\s*Creates a new scanner.*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}"
|
match="/\*\*\s*\*\s*Creates a new scanner\s*\*\s*\*\s*@param\s*in\s*the java.io.Reader to read input from\.\s*\*/\s*public HTMLStripCharFilter\(java\.io\.Reader in\)\s*\{\s*this.zzReader = in;\s*\}"
|
||||||
replace="" flags="sg"/>
|
replace="" flags="s"/>
|
||||||
<replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java"
|
|
||||||
match="\/\*\s*The following code was generated by JFlex.*"
|
|
||||||
replace="\/\* The following code was generated by JFlex. \*\/" flags=""/>
|
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="generate-jflex-html-char-entities">
|
<target name="generate-jflex-html-char-entities">
|
||||||
|
@ -96,15 +92,7 @@
|
||||||
<attribute name="dir"/>
|
<attribute name="dir"/>
|
||||||
<attribute name="name"/>
|
<attribute name="name"/>
|
||||||
<sequential>
|
<sequential>
|
||||||
<jflex file="@{dir}/@{name}.jflex"
|
<jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
|
||||||
outdir="@{dir}"
|
|
||||||
nobak="on" />
|
|
||||||
<replaceregexp file="@{dir}/@{name}.java"
|
|
||||||
match="/\*\*\s*\*\s*Creates a new scanner\..*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}"
|
|
||||||
replace="" flags="sg"/>
|
|
||||||
<replaceregexp file="@{dir}/@{name}.java"
|
|
||||||
match="\/\*\s*The following code was generated by JFlex.*"
|
|
||||||
replace="\/\* The following code was generated by JFlex. \*\/" flags=""/>
|
|
||||||
</sequential>
|
</sequential>
|
||||||
</macrodef>
|
</macrodef>
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* The following code was generated by JFlex. */
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT */
|
||||||
|
|
||||||
package org.apache.lucene.analysis.charfilter;
|
package org.apache.lucene.analysis.charfilter;
|
||||||
|
|
||||||
|
@ -152,77 +152,77 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
"\21\1\1\41\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+
|
"\21\1\1\41\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+
|
||||||
"\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+
|
"\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+
|
||||||
"\1\0\3\1\1\0\2\2\14\0\64\1\40\2\3\0\1\1\4\0"+
|
"\1\0\3\1\1\0\2\2\14\0\64\1\40\2\3\0\1\1\4\0"+
|
||||||
"\1\1\1\2\2\0\12\274\41\0\3\2\1\41\1\0\12\274\6\0"+
|
"\1\1\1\2\2\0\12\274\41\0\3\2\2\0\12\274\6\0\130\1"+
|
||||||
"\130\1\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0"+
|
"\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2"+
|
||||||
"\14\2\4\0\14\2\12\0\12\274\36\1\2\0\5\1\13\0\54\1"+
|
"\4\0\14\2\12\0\12\274\36\1\2\0\5\1\13\0\54\1\4\0"+
|
||||||
"\4\0\21\2\7\1\2\2\6\0\12\274\1\2\45\0\27\1\5\2"+
|
"\21\2\7\1\2\2\6\0\12\274\1\2\45\0\27\1\5\2\4\0"+
|
||||||
"\4\0\65\1\12\2\1\0\35\2\2\0\1\2\12\274\6\0\12\274"+
|
"\65\1\12\2\1\0\35\2\2\0\1\2\12\274\6\0\12\274\15\0"+
|
||||||
"\15\0\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\274\21\0"+
|
"\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\274\21\0\11\2"+
|
||||||
"\11\2\14\0\3\2\36\1\15\2\2\1\12\274\54\1\16\2\14\0"+
|
"\14\0\3\2\36\1\15\2\2\1\12\274\54\1\16\2\14\0\44\1"+
|
||||||
"\44\1\24\2\10\0\12\274\3\0\3\1\12\274\44\1\122\0\3\2"+
|
"\24\2\10\0\12\274\3\0\3\1\12\274\44\1\122\0\3\2\1\0"+
|
||||||
"\1\0\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2"+
|
"\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2\25\0"+
|
||||||
"\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0"+
|
"\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1"+
|
||||||
"\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0"+
|
"\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1"+
|
||||||
"\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0"+
|
"\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1"+
|
||||||
"\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0"+
|
"\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41"+
|
||||||
"\13\41\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0"+
|
"\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41"+
|
||||||
"\1\41\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0"+
|
"\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2"+
|
||||||
"\1\2\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0"+
|
"\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1"+
|
||||||
"\1\1\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0"+
|
"\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1"+
|
||||||
"\20\1\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0"+
|
"\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1"+
|
||||||
"\57\1\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0"+
|
"\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1"+
|
||||||
"\46\1\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0"+
|
"\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2"+
|
||||||
"\1\2\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
|
"\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
|
||||||
"\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2"+
|
"\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0"+
|
||||||
"\u0200\0\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0"+
|
"\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1"+
|
||||||
"\5\1\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1"+
|
"\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0"+
|
||||||
"\5\0\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1"+
|
"\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0"+
|
||||||
"\112\0\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1"+
|
"\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\274"+
|
||||||
"\12\274\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0"+
|
"\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0\1\2"+
|
||||||
"\1\2\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0"+
|
"\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
|
||||||
"\4\1\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2"+
|
"\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2\27\1"+
|
||||||
"\27\1\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\274"+
|
"\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\274\6\0"+
|
||||||
"\6\0\22\2\6\1\3\0\1\1\4\0\12\274\34\1\10\2\2\0"+
|
"\22\2\6\1\3\0\1\1\4\0\12\274\34\1\10\2\2\0\27\1"+
|
||||||
"\27\1\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1"+
|
"\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\274"+
|
||||||
"\12\274\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0"+
|
"\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\274"+
|
||||||
"\12\274\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1"+
|
"\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1\3\2"+
|
||||||
"\3\2\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1"+
|
"\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1\2\0"+
|
||||||
"\2\0\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1"+
|
"\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1\2\0"+
|
||||||
"\2\0\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0"+
|
"\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+
|
||||||
"\2\2\2\0\12\274\6\0\u2ba4\1\14\0\27\1\4\0\61\1\4\0"+
|
"\2\0\12\274\6\0\u2ba4\1\14\0\27\1\4\0\61\1\4\0\1\170"+
|
||||||
"\1\170\1\223\1\103\1\165\1\136\1\214\2\0\1\160\1\153\2\0"+
|
"\1\223\1\103\1\165\1\136\1\214\2\0\1\160\1\153\2\0\1\120"+
|
||||||
"\1\120\1\210\14\0\1\105\1\127\20\0\1\122\7\0\1\256\1\112"+
|
"\1\210\14\0\1\105\1\127\20\0\1\122\7\0\1\256\1\112\5\0"+
|
||||||
"\5\0\1\143\4\0\51\120\1\110\3\120\1\124\1\220\17\0\1\133"+
|
"\1\143\4\0\51\120\1\110\3\120\1\124\1\220\17\0\1\133\u02c1\0"+
|
||||||
"\u02c1\0\1\252\277\0\2\123\1\212\3\222\2\211\1\222\1\211\2\222"+
|
"\1\252\277\0\2\123\1\212\3\222\2\211\1\222\1\211\2\222\1\221"+
|
||||||
"\1\221\21\222\11\213\1\157\7\213\7\204\1\156\1\204\1\246\2\207"+
|
"\21\222\11\213\1\157\7\213\7\204\1\156\1\204\1\246\2\207\1\166"+
|
||||||
"\1\166\1\246\1\207\1\166\10\246\2\167\5\203\2\155\5\203\1\107"+
|
"\1\246\1\207\1\166\10\246\2\167\5\203\2\155\5\203\1\107\10\202"+
|
||||||
"\10\202\5\154\3\224\12\251\20\224\3\225\32\227\1\226\2\200\2\234"+
|
"\5\154\3\224\12\251\20\224\3\225\32\227\1\226\2\200\2\234\1\235"+
|
||||||
"\1\235\2\234\2\235\2\234\1\235\3\200\1\177\2\200\12\250\1\247"+
|
"\2\234\2\235\2\234\1\235\3\200\1\177\2\200\12\250\1\247\1\176"+
|
||||||
"\1\176\1\171\7\176\1\171\13\176\31\200\7\176\12\250\1\176\5\134"+
|
"\1\171\7\176\1\171\13\176\31\200\7\176\12\250\1\176\5\134\3\245"+
|
||||||
"\3\245\3\142\1\140\4\142\2\140\10\142\1\140\7\141\1\137\2\141"+
|
"\3\142\1\140\4\142\2\140\10\142\1\140\7\141\1\137\2\141\7\142"+
|
||||||
"\7\142\16\245\1\135\4\245\1\106\4\244\1\106\5\255\1\254\1\255"+
|
"\16\245\1\135\4\245\1\106\4\244\1\106\5\255\1\254\1\255\3\254"+
|
||||||
"\3\254\7\255\1\254\23\255\5\264\3\255\6\264\2\255\6\253\5\263"+
|
"\7\255\1\254\23\255\5\264\3\255\6\264\2\255\6\253\5\263\3\262"+
|
||||||
"\3\262\2\142\7\257\36\142\4\257\5\142\5\245\6\244\2\245\1\244"+
|
"\2\142\7\257\36\142\4\257\5\142\5\245\6\244\2\245\1\244\4\141"+
|
||||||
"\4\141\13\253\12\244\26\253\15\134\1\243\2\134\1\152\3\237\1\134"+
|
"\13\253\12\244\26\253\15\134\1\243\2\134\1\152\3\237\1\134\2\237"+
|
||||||
"\2\237\5\151\4\237\4\152\1\151\3\152\1\151\5\152\2\147\1\116"+
|
"\5\151\4\237\4\152\1\151\3\152\1\151\5\152\2\147\1\116\2\147"+
|
||||||
"\2\147\1\116\1\147\2\116\1\147\1\116\12\147\1\116\4\146\1\115"+
|
"\1\116\1\147\2\116\1\147\1\116\12\147\1\116\4\146\1\115\1\236"+
|
||||||
"\1\236\1\240\1\150\3\164\1\240\2\164\1\260\2\261\2\164\1\150"+
|
"\1\240\1\150\3\164\1\240\2\164\1\260\2\261\2\164\1\150\1\164"+
|
||||||
"\1\164\1\150\1\164\1\150\1\164\3\150\1\164\2\150\1\164\1\150"+
|
"\1\150\1\164\1\150\1\164\3\150\1\164\2\150\1\164\1\150\2\164"+
|
||||||
"\2\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150"+
|
"\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\162"+
|
||||||
"\1\162\2\145\1\162\1\145\2\162\4\145\1\162\7\145\1\162\4\145"+
|
"\2\145\1\162\1\145\2\162\4\145\1\162\7\145\1\162\4\145\1\162"+
|
||||||
"\1\162\4\145\1\164\1\150\1\164\12\216\1\217\21\216\1\217\3\215"+
|
"\4\145\1\164\1\150\1\164\12\216\1\217\21\216\1\217\3\215\1\217"+
|
||||||
"\1\217\3\216\1\217\1\216\2\144\2\216\1\217\15\241\4\201\4\206"+
|
"\3\216\1\217\1\216\2\144\2\216\1\217\15\241\4\201\4\206\1\242"+
|
||||||
"\1\242\1\161\10\242\7\206\6\164\4\113\1\121\37\113\1\121\4\113"+
|
"\1\161\10\242\7\206\6\164\4\113\1\121\37\113\1\121\4\113\25\174"+
|
||||||
"\25\174\1\131\11\174\21\130\5\174\1\104\12\117\5\174\6\205\4\162"+
|
"\1\131\11\174\21\130\5\174\1\104\12\117\5\174\6\205\4\162\1\163"+
|
||||||
"\1\163\1\130\5\231\12\232\17\231\1\125\3\114\14\230\1\126\11\173"+
|
"\1\130\5\231\12\232\17\231\1\125\3\114\14\230\1\126\11\173\1\172"+
|
||||||
"\1\172\5\173\4\233\13\175\2\132\11\173\1\172\31\173\1\172\4\126"+
|
"\5\173\4\233\13\175\2\132\11\173\1\172\31\173\1\172\4\126\4\173"+
|
||||||
"\4\173\2\172\2\265\1\111\5\265\52\111\u1900\0\u016e\1\2\0\152\1"+
|
"\2\172\2\265\1\111\5\265\52\111\u1900\0\u016e\1\2\0\152\1\46\0"+
|
||||||
"\46\0\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1"+
|
"\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1\1\0"+
|
||||||
"\1\0\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1"+
|
"\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1\41\0"+
|
||||||
"\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2"+
|
"\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2\20\0"+
|
||||||
"\20\0\7\2\14\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1"+
|
"\7\2\14\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1\23\0"+
|
||||||
"\23\0\12\274\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1"+
|
"\12\274\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1\3\0"+
|
||||||
"\3\0\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0";
|
"\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Translates characters to character classes
|
* Translates characters to character classes
|
||||||
|
@ -30895,6 +30895,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unpacks the compressed character translation table.
|
* Unpacks the compressed character translation table.
|
||||||
*
|
*
|
||||||
|
@ -30905,7 +30906,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||||
char [] map = new char[0x10000];
|
char [] map = new char[0x10000];
|
||||||
int i = 0; /* index in packed string */
|
int i = 0; /* index in packed string */
|
||||||
int j = 0; /* index in unpacked array */
|
int j = 0; /* index in unpacked array */
|
||||||
while (i < 2778) {
|
while (i < 2776) {
|
||||||
int count = packed.charAt(i++);
|
int count = packed.charAt(i++);
|
||||||
char value = packed.charAt(i++);
|
char value = packed.charAt(i++);
|
||||||
do map[j++] = value; while (--count > 0);
|
do map[j++] = value; while (--count > 0);
|
||||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
|
||||||
*/
|
*/
|
||||||
%%
|
%%
|
||||||
|
|
||||||
%unicode 6.1
|
%unicode 6.3
|
||||||
%apiprivate
|
%apiprivate
|
||||||
%type int
|
%type int
|
||||||
%final
|
%final
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2001-2005 The Apache Software Foundation.
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
@ -13,10 +14,9 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
|
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
|
||||||
// file version from Saturday, July 14, 2012 4:34:14 AM UTC
|
// file version from Friday, December 6, 2013 4:34:10 AM UTC
|
||||||
// generated on Sunday, July 15, 2012 12:59:44 AM UTC
|
// generated on Friday, December 6, 2013 3:21:59 PM UTC
|
||||||
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
|
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
|
||||||
|
|
||||||
ASCIITLD = "." (
|
ASCIITLD = "." (
|
||||||
|
@ -49,6 +49,7 @@ ASCIITLD = "." (
|
||||||
| [bB][gG]
|
| [bB][gG]
|
||||||
| [bB][hH]
|
| [bB][hH]
|
||||||
| [bB][iI]
|
| [bB][iI]
|
||||||
|
| [bB][iI][kK][eE]
|
||||||
| [bB][iI][zZ]
|
| [bB][iI][zZ]
|
||||||
| [bB][jJ]
|
| [bB][jJ]
|
||||||
| [bB][mM]
|
| [bB][mM]
|
||||||
|
@ -62,6 +63,7 @@ ASCIITLD = "." (
|
||||||
| [bB][yY]
|
| [bB][yY]
|
||||||
| [bB][zZ]
|
| [bB][zZ]
|
||||||
| [cC][aA]
|
| [cC][aA]
|
||||||
|
| [cC][aA][mM][eE][rR][aA]
|
||||||
| [cC][aA][tT]
|
| [cC][aA][tT]
|
||||||
| [cC][cC]
|
| [cC][cC]
|
||||||
| [cC][dD]
|
| [cC][dD]
|
||||||
|
@ -71,10 +73,13 @@ ASCIITLD = "." (
|
||||||
| [cC][iI]
|
| [cC][iI]
|
||||||
| [cC][kK]
|
| [cC][kK]
|
||||||
| [cC][lL]
|
| [cC][lL]
|
||||||
|
| [cC][lL][oO][tT][hH][iI][nN][gG]
|
||||||
| [cC][mM]
|
| [cC][mM]
|
||||||
| [cC][nN]
|
| [cC][nN]
|
||||||
| [cC][oO]
|
| [cC][oO]
|
||||||
| [cC][oO][mM]
|
| [cC][oO][mM]
|
||||||
|
| [cC][oO][nN][sS][tT][rR][uU][cC][tT][iI][oO][nN]
|
||||||
|
| [cC][oO][nN][tT][rR][aA][cC][tT][oO][rR][sS]
|
||||||
| [cC][oO][oO][pP]
|
| [cC][oO][oO][pP]
|
||||||
| [cC][rR]
|
| [cC][rR]
|
||||||
| [cC][uU]
|
| [cC][uU]
|
||||||
|
@ -84,6 +89,8 @@ ASCIITLD = "." (
|
||||||
| [cC][yY]
|
| [cC][yY]
|
||||||
| [cC][zZ]
|
| [cC][zZ]
|
||||||
| [dD][eE]
|
| [dD][eE]
|
||||||
|
| [dD][iI][aA][mM][oO][nN][dD][sS]
|
||||||
|
| [dD][iI][rR][eE][cC][tT][oO][rR][yY]
|
||||||
| [dD][jJ]
|
| [dD][jJ]
|
||||||
| [dD][kK]
|
| [dD][kK]
|
||||||
| [dD][mM]
|
| [dD][mM]
|
||||||
|
@ -93,8 +100,11 @@ ASCIITLD = "." (
|
||||||
| [eE][dD][uU]
|
| [eE][dD][uU]
|
||||||
| [eE][eE]
|
| [eE][eE]
|
||||||
| [eE][gG]
|
| [eE][gG]
|
||||||
|
| [eE][nN][tT][eE][rR][pP][rR][iI][sS][eE][sS]
|
||||||
|
| [eE][qQ][uU][iI][pP][mM][eE][nN][tT]
|
||||||
| [eE][rR]
|
| [eE][rR]
|
||||||
| [eE][sS]
|
| [eE][sS]
|
||||||
|
| [eE][sS][tT][aA][tT][eE]
|
||||||
| [eE][tT]
|
| [eE][tT]
|
||||||
| [eE][uU]
|
| [eE][uU]
|
||||||
| [fF][iI]
|
| [fF][iI]
|
||||||
|
@ -104,6 +114,7 @@ ASCIITLD = "." (
|
||||||
| [fF][oO]
|
| [fF][oO]
|
||||||
| [fF][rR]
|
| [fF][rR]
|
||||||
| [gG][aA]
|
| [gG][aA]
|
||||||
|
| [gG][aA][lL][lL][eE][rR][yY]
|
||||||
| [gG][bB]
|
| [gG][bB]
|
||||||
| [gG][dD]
|
| [gG][dD]
|
||||||
| [gG][eE]
|
| [gG][eE]
|
||||||
|
@ -118,14 +129,17 @@ ASCIITLD = "." (
|
||||||
| [gG][pP]
|
| [gG][pP]
|
||||||
| [gG][qQ]
|
| [gG][qQ]
|
||||||
| [gG][rR]
|
| [gG][rR]
|
||||||
|
| [gG][rR][aA][pP][hH][iI][cC][sS]
|
||||||
| [gG][sS]
|
| [gG][sS]
|
||||||
| [gG][tT]
|
| [gG][tT]
|
||||||
| [gG][uU]
|
| [gG][uU]
|
||||||
|
| [gG][uU][rR][uU]
|
||||||
| [gG][wW]
|
| [gG][wW]
|
||||||
| [gG][yY]
|
| [gG][yY]
|
||||||
| [hH][kK]
|
| [hH][kK]
|
||||||
| [hH][mM]
|
| [hH][mM]
|
||||||
| [hH][nN]
|
| [hH][nN]
|
||||||
|
| [hH][oO][lL][dD][iI][nN][gG][sS]
|
||||||
| [hH][rR]
|
| [hH][rR]
|
||||||
| [hH][tT]
|
| [hH][tT]
|
||||||
| [hH][uU]
|
| [hH][uU]
|
||||||
|
@ -150,6 +164,7 @@ ASCIITLD = "." (
|
||||||
| [kK][gG]
|
| [kK][gG]
|
||||||
| [kK][hH]
|
| [kK][hH]
|
||||||
| [kK][iI]
|
| [kK][iI]
|
||||||
|
| [kK][iI][tT][cC][hH][eE][nN]
|
||||||
| [kK][mM]
|
| [kK][mM]
|
||||||
| [kK][nN]
|
| [kK][nN]
|
||||||
| [kK][pP]
|
| [kK][pP]
|
||||||
|
@ -158,9 +173,11 @@ ASCIITLD = "." (
|
||||||
| [kK][yY]
|
| [kK][yY]
|
||||||
| [kK][zZ]
|
| [kK][zZ]
|
||||||
| [lL][aA]
|
| [lL][aA]
|
||||||
|
| [lL][aA][nN][dD]
|
||||||
| [lL][bB]
|
| [lL][bB]
|
||||||
| [lL][cC]
|
| [lL][cC]
|
||||||
| [lL][iI]
|
| [lL][iI]
|
||||||
|
| [lL][iI][gG][hH][tT][iI][nN][gG]
|
||||||
| [lL][kK]
|
| [lL][kK]
|
||||||
| [lL][rR]
|
| [lL][rR]
|
||||||
| [lL][sS]
|
| [lL][sS]
|
||||||
|
@ -172,6 +189,7 @@ ASCIITLD = "." (
|
||||||
| [mM][cC]
|
| [mM][cC]
|
||||||
| [mM][dD]
|
| [mM][dD]
|
||||||
| [mM][eE]
|
| [mM][eE]
|
||||||
|
| [mM][eE][nN][uU]
|
||||||
| [mM][gG]
|
| [mM][gG]
|
||||||
| [mM][hH]
|
| [mM][hH]
|
||||||
| [mM][iI][lL]
|
| [mM][iI][lL]
|
||||||
|
@ -214,10 +232,13 @@ ASCIITLD = "." (
|
||||||
| [pP][fF]
|
| [pP][fF]
|
||||||
| [pP][gG]
|
| [pP][gG]
|
||||||
| [pP][hH]
|
| [pP][hH]
|
||||||
|
| [pP][hH][oO][tT][oO][gG][rR][aA][pP][hH][yY]
|
||||||
| [pP][kK]
|
| [pP][kK]
|
||||||
| [pP][lL]
|
| [pP][lL]
|
||||||
|
| [pP][lL][uU][mM][bB][iI][nN][gG]
|
||||||
| [pP][mM]
|
| [pP][mM]
|
||||||
| [pP][nN]
|
| [pP][nN]
|
||||||
|
| [pP][oO][sS][tT]
|
||||||
| [pP][rR]
|
| [pP][rR]
|
||||||
| [pP][rR][oO]
|
| [pP][rR][oO]
|
||||||
| [pP][sS]
|
| [pP][sS]
|
||||||
|
@ -235,9 +256,11 @@ ASCIITLD = "." (
|
||||||
| [sS][cC]
|
| [sS][cC]
|
||||||
| [sS][dD]
|
| [sS][dD]
|
||||||
| [sS][eE]
|
| [sS][eE]
|
||||||
|
| [sS][eE][xX][yY]
|
||||||
| [sS][gG]
|
| [sS][gG]
|
||||||
| [sS][hH]
|
| [sS][hH]
|
||||||
| [sS][iI]
|
| [sS][iI]
|
||||||
|
| [sS][iI][nN][gG][lL][eE][sS]
|
||||||
| [sS][jJ]
|
| [sS][jJ]
|
||||||
| [sS][kK]
|
| [sS][kK]
|
||||||
| [sS][lL]
|
| [sS][lL]
|
||||||
|
@ -251,18 +274,22 @@ ASCIITLD = "." (
|
||||||
| [sS][xX]
|
| [sS][xX]
|
||||||
| [sS][yY]
|
| [sS][yY]
|
||||||
| [sS][zZ]
|
| [sS][zZ]
|
||||||
|
| [tT][aA][tT][tT][oO][oO]
|
||||||
| [tT][cC]
|
| [tT][cC]
|
||||||
| [tT][dD]
|
| [tT][dD]
|
||||||
|
| [tT][eE][cC][hH][nN][oO][lL][oO][gG][yY]
|
||||||
| [tT][eE][lL]
|
| [tT][eE][lL]
|
||||||
| [tT][fF]
|
| [tT][fF]
|
||||||
| [tT][gG]
|
| [tT][gG]
|
||||||
| [tT][hH]
|
| [tT][hH]
|
||||||
|
| [tT][iI][pP][sS]
|
||||||
| [tT][jJ]
|
| [tT][jJ]
|
||||||
| [tT][kK]
|
| [tT][kK]
|
||||||
| [tT][lL]
|
| [tT][lL]
|
||||||
| [tT][mM]
|
| [tT][mM]
|
||||||
| [tT][nN]
|
| [tT][nN]
|
||||||
| [tT][oO]
|
| [tT][oO]
|
||||||
|
| [tT][oO][dD][aA][yY]
|
||||||
| [tT][pP]
|
| [tT][pP]
|
||||||
| [tT][rR]
|
| [tT][rR]
|
||||||
| [tT][rR][aA][vV][eE][lL]
|
| [tT][rR][aA][vV][eE][lL]
|
||||||
|
@ -273,61 +300,62 @@ ASCIITLD = "." (
|
||||||
| [uU][aA]
|
| [uU][aA]
|
||||||
| [uU][gG]
|
| [uU][gG]
|
||||||
| [uU][kK]
|
| [uU][kK]
|
||||||
|
| [uU][nN][oO]
|
||||||
| [uU][sS]
|
| [uU][sS]
|
||||||
| [uU][yY]
|
| [uU][yY]
|
||||||
| [uU][zZ]
|
| [uU][zZ]
|
||||||
| [vV][aA]
|
| [vV][aA]
|
||||||
| [vV][cC]
|
| [vV][cC]
|
||||||
| [vV][eE]
|
| [vV][eE]
|
||||||
|
| [vV][eE][nN][tT][uU][rR][eE][sS]
|
||||||
| [vV][gG]
|
| [vV][gG]
|
||||||
| [vV][iI]
|
| [vV][iI]
|
||||||
| [vV][nN]
|
| [vV][nN]
|
||||||
|
| [vV][oO][yY][aA][gG][eE]
|
||||||
| [vV][uU]
|
| [vV][uU]
|
||||||
| [wW][fF]
|
| [wW][fF]
|
||||||
| [wW][sS]
|
| [wW][sS]
|
||||||
| [xX][nN]--0[zZ][wW][mM]56[dD]
|
|
||||||
| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
|
|
||||||
| [xX][nN]--3[eE]0[bB]707[eE]
|
| [xX][nN]--3[eE]0[bB]707[eE]
|
||||||
| [xX][nN]--45[bB][rR][jJ]9[cC]
|
| [xX][nN]--45[bB][rR][jJ]9[cC]
|
||||||
| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
|
|
||||||
| [xX][nN]--80[aA][oO]21[aA]
|
| [xX][nN]--80[aA][oO]21[aA]
|
||||||
|
| [xX][nN]--80[aA][sS][eE][hH][dD][bB]
|
||||||
|
| [xX][nN]--80[aA][sS][wW][gG]
|
||||||
| [xX][nN]--90[aA]3[aA][cC]
|
| [xX][nN]--90[aA]3[aA][cC]
|
||||||
| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
|
|
||||||
| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
|
| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
|
||||||
| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
|
|
||||||
| [xX][nN]--[fF][iI][qQ][sS]8[sS]
|
| [xX][nN]--[fF][iI][qQ][sS]8[sS]
|
||||||
| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
|
| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
|
||||||
| [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
|
| [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
|
||||||
| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
|
| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
|
||||||
| [xX][nN]--[gG]6[wW]251[dD]
|
|
||||||
| [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
|
| [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
|
||||||
| [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
|
| [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
|
||||||
| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
|
| [xX][nN]--[jJ]1[aA][mM][hH]
|
||||||
| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
|
|
||||||
| [xX][nN]--[jJ]6[wW]193[gG]
|
| [xX][nN]--[jJ]6[wW]193[gG]
|
||||||
| [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]
|
|
||||||
| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
|
|
||||||
| [xX][nN]--[kK][pP][rR][wW]13[dD]
|
| [xX][nN]--[kK][pP][rR][wW]13[dD]
|
||||||
| [xX][nN]--[kK][pP][rR][yY]57[dD]
|
| [xX][nN]--[kK][pP][rR][yY]57[dD]
|
||||||
|
| [xX][nN]--[lL]1[aA][cC][cC]
|
||||||
| [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ]
|
| [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ]
|
||||||
| [xX][nN]--[mM][gG][bB]9[aA][wW][bB][fF]
|
| [xX][nN]--[mM][gG][bB]9[aA][wW][bB][fF]
|
||||||
|
| [xX][nN]--[mM][gG][bB][aA]3[aA]4[fF]16[aA]
|
||||||
| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
|
| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
|
||||||
| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
|
| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
|
||||||
| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
|
| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
|
||||||
| [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG]
|
| [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG]
|
||||||
| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
|
| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
|
||||||
|
| [xX][nN]--[mM][gG][bB][xX]4[cC][dD]0[aA][bB]
|
||||||
|
| [xX][nN]--[nN][gG][bB][cC]5[aA][zZ][dD]
|
||||||
| [xX][nN]--[oO]3[cC][wW]4[hH]
|
| [xX][nN]--[oO]3[cC][wW]4[hH]
|
||||||
| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
|
| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
|
||||||
| [xX][nN]--[pP]1[aA][iI]
|
| [xX][nN]--[pP]1[aA][iI]
|
||||||
| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
|
| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
|
||||||
|
| [xX][nN]--[qQ]9[jJ][yY][bB]4[cC]
|
||||||
| [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
|
| [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
|
||||||
|
| [xX][nN]--[uU][nN][uU][pP]4[yY]
|
||||||
| [xX][nN]--[wW][gG][bB][hH]1[cC]
|
| [xX][nN]--[wW][gG][bB][hH]1[cC]
|
||||||
| [xX][nN]--[wW][gG][bB][lL]6[aA]
|
| [xX][nN]--[wW][gG][bB][lL]6[aA]
|
||||||
| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
|
| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
|
||||||
| [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
|
| [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
|
||||||
| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
|
| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
|
||||||
| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
|
| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
|
||||||
| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
|
|
||||||
| [xX][xX][xX]
|
| [xX][xX][xX]
|
||||||
| [yY][eE]
|
| [yY][eE]
|
||||||
| [yY][tT]
|
| [yY][tT]
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* The following code was generated by JFlex. */
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT */
|
||||||
|
|
||||||
package org.apache.lucene.analysis.standard;
|
package org.apache.lucene.analysis.standard;
|
||||||
|
|
||||||
|
@ -58,64 +58,63 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||||
* Translates characters to character classes
|
* Translates characters to character classes
|
||||||
*/
|
*/
|
||||||
private static final String ZZ_CMAP_PACKED =
|
private static final String ZZ_CMAP_PACKED =
|
||||||
"\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5"+
|
"\46\0\1\5\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0"+
|
||||||
"\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12"+
|
"\1\6\32\12\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12"+
|
||||||
"\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12"+
|
"\4\0\1\12\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12"+
|
||||||
"\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12\34\0\136\12"+
|
"\34\0\136\12\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12"+
|
||||||
"\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12\11\0\1\12"+
|
"\11\0\1\12\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12"+
|
||||||
"\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12\1\0\24\12"+
|
"\1\0\24\12\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12"+
|
||||||
"\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12\12\0\71\12"+
|
"\12\0\71\12\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12"+
|
||||||
"\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12\67\0\46\12"+
|
"\67\0\46\12\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12"+
|
||||||
"\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12\56\0\32\12"+
|
"\56\0\32\12\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12"+
|
||||||
"\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12\17\0\2\12"+
|
"\17\0\2\12\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0"+
|
||||||
"\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0\46\12\u015f\0"+
|
"\46\12\u015f\0\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0"+
|
||||||
"\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2\25\0"+
|
"\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0"+
|
||||||
"\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12\3\0"+
|
"\1\12\3\0\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12"+
|
||||||
"\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12\23\0\6\12"+
|
"\23\0\6\12\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12"+
|
||||||
"\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12\1\0\2\12"+
|
"\1\0\2\12\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2"+
|
||||||
"\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2\2\0\3\12"+
|
"\2\0\3\12\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12"+
|
||||||
"\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12\1\0\7\12"+
|
"\1\0\7\12\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12"+
|
||||||
"\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0\1\12"+
|
"\17\0\1\12\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12"+
|
||||||
"\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12"+
|
"\1\0\7\12\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12"+
|
||||||
"\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12"+
|
"\1\0\3\12\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12"+
|
||||||
"\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12\3\0\2\12"+
|
"\3\0\2\12\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12"+
|
||||||
"\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12\3\0\10\12"+
|
"\3\0\10\12\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12"+
|
||||||
"\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12\1\0\27\12"+
|
"\1\0\27\12\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2"+
|
||||||
"\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2\25\0\10\12"+
|
"\25\0\10\12\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12"+
|
||||||
"\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\44\0\1\12"+
|
"\44\0\1\12\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12"+
|
||||||
"\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12"+
|
"\1\0\27\12\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12"+
|
||||||
"\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12\3\0\30\12"+
|
"\3\0\30\12\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1"+
|
||||||
"\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1\60\12\1\1"+
|
"\60\12\1\1\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0"+
|
||||||
"\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0\1\12\2\0"+
|
"\1\12\2\0\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0"+
|
||||||
"\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0\7\12\1\0"+
|
"\7\12\1\0\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0"+
|
||||||
"\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0\4\12\1\0"+
|
"\4\12\1\0\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0"+
|
||||||
"\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0\12\2\2\0"+
|
"\12\2\2\0\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0"+
|
||||||
"\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0\42\12\35\0"+
|
"\42\12\35\0\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0"+
|
||||||
"\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0\12\2\6\0"+
|
"\12\2\6\0\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0"+
|
||||||
"\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0\104\12\5\0"+
|
"\104\12\5\0\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0"+
|
||||||
"\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0\4\12\2\0"+
|
"\4\12\2\0\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0"+
|
||||||
"\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0\1\12\1\0"+
|
"\1\12\1\0\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0"+
|
||||||
"\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0"+
|
"\7\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0"+
|
||||||
"\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0\27\12\1\0"+
|
"\27\12\1\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0"+
|
||||||
"\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\47\12\1\0"+
|
"\47\12\1\0\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0"+
|
||||||
"\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0"+
|
"\10\12\12\0\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0"+
|
||||||
"\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0\12\2\6\0"+
|
"\12\2\6\0\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0"+
|
||||||
"\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0\26\12\2\0"+
|
"\26\12\2\0\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0"+
|
||||||
"\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0\1\12\1\0"+
|
"\1\12\1\0\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0"+
|
||||||
"\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0\7\12\1\0"+
|
"\7\12\1\0\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0"+
|
||||||
"\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0\6\12\4\0"+
|
"\6\12\4\0\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0"+
|
||||||
"\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0\1\12\4\0"+
|
"\1\12\4\0\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0"+
|
||||||
"\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0\1\12\1\0"+
|
"\1\12\1\0\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0"+
|
||||||
"\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0\7\12\u0ecb\0"+
|
"\7\12\u0ecb\0\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13"+
|
||||||
"\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13\2\13\132\13"+
|
"\2\13\132\13\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0"+
|
||||||
"\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0"+
|
"\30\12\70\0\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13"+
|
||||||
"\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12"+
|
"\132\13\u048d\12\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12"+
|
||||||
"\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12\5\0\1\12"+
|
"\5\0\1\12\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12"+
|
||||||
"\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12\1\0\2\12"+
|
"\1\0\2\12\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12"+
|
||||||
"\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12\2\0\66\12"+
|
"\2\0\66\12\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12"+
|
||||||
"\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12\23\0\12\2"+
|
"\23\0\12\2\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12"+
|
||||||
"\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12"+
|
"\3\0\6\12\2\0\6\12\2\0\6\12\2\0\3\12\43\0";
|
||||||
"\2\0\6\12\2\0\6\12\2\0\3\12\43\0";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Translates characters to character classes
|
* Translates characters to character classes
|
||||||
|
@ -128,13 +127,12 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||||
private static final int [] ZZ_ACTION = zzUnpackAction();
|
private static final int [] ZZ_ACTION = zzUnpackAction();
|
||||||
|
|
||||||
private static final String ZZ_ACTION_PACKED_0 =
|
private static final String ZZ_ACTION_PACKED_0 =
|
||||||
"\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4"+
|
"\1\0\1\1\3\2\1\3\13\0\1\2\3\4\2\0"+
|
||||||
"\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4"+
|
"\1\5\1\0\1\5\3\4\6\5\1\6\1\4\2\7"+
|
||||||
"\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12"+
|
"\1\10\1\0\1\10\3\0\2\10\1\11\1\12\1\4";
|
||||||
"\1\4";
|
|
||||||
|
|
||||||
private static int [] zzUnpackAction() {
|
private static int [] zzUnpackAction() {
|
||||||
int [] result = new int[51];
|
int [] result = new int[50];
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
|
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -159,16 +157,16 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||||
private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
|
private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
|
||||||
|
|
||||||
private static final String ZZ_ROWMAP_PACKED_0 =
|
private static final String ZZ_ROWMAP_PACKED_0 =
|
||||||
"\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124"+
|
"\0\0\0\14\0\30\0\44\0\60\0\14\0\74\0\110"+
|
||||||
"\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304"+
|
"\0\124\0\140\0\154\0\170\0\204\0\220\0\234\0\250"+
|
||||||
"\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134"+
|
"\0\264\0\300\0\314\0\330\0\344\0\360\0\374\0\u0108"+
|
||||||
"\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4"+
|
"\0\u0114\0\u0120\0\u012c\0\u0138\0\u0144\0\u0150\0\u015c\0\u0168"+
|
||||||
"\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206"+
|
"\0\u0174\0\u0180\0\u018c\0\u0198\0\u01a4\0\250\0\u01b0\0\u01bc"+
|
||||||
"\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214"+
|
"\0\u01c8\0\u01d4\0\u01e0\0\u01ec\0\u01f8\0\74\0\154\0\u0204"+
|
||||||
"\0\u0268\0\u0276\0\u0284";
|
"\0\u0210\0\u021c";
|
||||||
|
|
||||||
private static int [] zzUnpackRowMap() {
|
private static int [] zzUnpackRowMap() {
|
||||||
int [] result = new int[51];
|
int [] result = new int[50];
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
|
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -191,49 +189,49 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||||
private static final int [] ZZ_TRANS = zzUnpackTrans();
|
private static final int [] ZZ_TRANS = zzUnpackTrans();
|
||||||
|
|
||||||
private static final String ZZ_TRANS_PACKED_0 =
|
private static final String ZZ_TRANS_PACKED_0 =
|
||||||
"\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2"+
|
"\1\2\1\3\1\4\7\2\1\5\1\6\15\0\2\3"+
|
||||||
"\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13"+
|
"\1\0\1\7\1\0\1\10\2\11\1\12\1\3\2\0"+
|
||||||
"\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11"+
|
"\1\3\1\4\1\0\1\13\1\0\1\10\2\14\1\15"+
|
||||||
"\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20"+
|
"\1\4\2\0\1\3\1\4\1\16\1\17\1\20\1\21"+
|
||||||
"\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0"+
|
"\2\11\1\12\1\22\2\0\1\23\1\24\7\0\1\25"+
|
||||||
"\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27"+
|
"\2\0\2\26\7\0\1\26\2\0\1\27\1\30\7\0"+
|
||||||
"\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0"+
|
"\1\31\3\0\1\32\7\0\1\12\2\0\1\33\1\34"+
|
||||||
"\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37"+
|
"\7\0\1\35\2\0\1\36\1\37\7\0\1\40\2\0"+
|
||||||
"\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44"+
|
"\1\41\1\42\7\0\1\43\13\0\1\44\2\0\1\23"+
|
||||||
"\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0"+
|
"\1\24\7\0\1\45\13\0\1\46\2\0\2\26\7\0"+
|
||||||
"\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4"+
|
"\1\47\2\0\1\3\1\4\1\16\1\7\1\20\1\21"+
|
||||||
"\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0"+
|
"\2\11\1\12\1\22\2\0\2\23\1\0\1\50\1\0"+
|
||||||
"\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24"+
|
"\1\10\2\51\1\0\1\23\2\0\1\23\1\24\1\0"+
|
||||||
"\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54"+
|
"\1\52\1\0\1\10\2\53\1\54\1\24\2\0\1\23"+
|
||||||
"\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0"+
|
"\1\24\1\0\1\50\1\0\1\10\2\51\1\0\1\25"+
|
||||||
"\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56"+
|
"\2\0\2\26\1\0\1\55\2\0\1\55\2\0\1\26"+
|
||||||
"\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52"+
|
"\2\0\2\27\1\0\1\51\1\0\1\10\2\51\1\0"+
|
||||||
"\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31"+
|
"\1\27\2\0\1\27\1\30\1\0\1\53\1\0\1\10"+
|
||||||
"\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0"+
|
"\2\53\1\54\1\30\2\0\1\27\1\30\1\0\1\51"+
|
||||||
"\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0"+
|
"\1\0\1\10\2\51\1\0\1\31\3\0\1\32\1\0"+
|
||||||
"\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33"+
|
"\1\54\2\0\3\54\1\32\2\0\2\33\1\0\1\56"+
|
||||||
"\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13"+
|
"\1\0\1\10\2\11\1\12\1\33\2\0\1\33\1\34"+
|
||||||
"\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11"+
|
"\1\0\1\57\1\0\1\10\2\14\1\15\1\34\2\0"+
|
||||||
"\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57"+
|
"\1\33\1\34\1\0\1\56\1\0\1\10\2\11\1\12"+
|
||||||
"\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0"+
|
"\1\35\2\0\2\36\1\0\1\11\1\0\1\10\2\11"+
|
||||||
"\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37"+
|
"\1\12\1\36\2\0\1\36\1\37\1\0\1\14\1\0"+
|
||||||
"\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40"+
|
"\1\10\2\14\1\15\1\37\2\0\1\36\1\37\1\0"+
|
||||||
"\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12"+
|
"\1\11\1\0\1\10\2\11\1\12\1\40\2\0\2\41"+
|
||||||
"\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13"+
|
"\1\0\1\12\2\0\3\12\1\41\2\0\1\41\1\42"+
|
||||||
"\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16"+
|
"\1\0\1\15\2\0\3\15\1\42\2\0\1\41\1\42"+
|
||||||
"\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13"+
|
"\1\0\1\12\2\0\3\12\1\43\4\0\1\16\6\0"+
|
||||||
"\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25"+
|
"\1\44\2\0\1\23\1\24\1\0\1\60\1\0\1\10"+
|
||||||
"\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0"+
|
"\2\51\1\0\1\25\2\0\2\26\1\0\1\55\2\0"+
|
||||||
"\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0"+
|
"\1\55\2\0\1\47\2\0\2\23\7\0\1\23\2\0"+
|
||||||
"\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0"+
|
"\2\27\7\0\1\27\2\0\2\33\7\0\1\33\2\0"+
|
||||||
"\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0"+
|
"\2\36\7\0\1\36\2\0\2\41\7\0\1\41\2\0"+
|
||||||
"\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0"+
|
"\2\61\7\0\1\61\2\0\2\23\7\0\1\62\2\0"+
|
||||||
"\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0"+
|
"\2\61\1\0\1\55\2\0\1\55\2\0\1\61\2\0"+
|
||||||
"\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0"+
|
"\2\23\1\0\1\60\1\0\1\10\2\51\1\0\1\23"+
|
||||||
"\1\11\2\52\1\0\1\24\3\0";
|
"\1\0";
|
||||||
|
|
||||||
private static int [] zzUnpackTrans() {
|
private static int [] zzUnpackTrans() {
|
||||||
int [] result = new int[658];
|
int [] result = new int[552];
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
|
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -271,11 +269,11 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||||
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
|
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
|
||||||
|
|
||||||
private static final String ZZ_ATTRIBUTE_PACKED_0 =
|
private static final String ZZ_ATTRIBUTE_PACKED_0 =
|
||||||
"\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0"+
|
"\1\0\1\11\3\1\1\11\13\0\4\1\2\0\1\1"+
|
||||||
"\1\1\1\0\17\1\1\0\1\1\3\0\5\1";
|
"\1\0\17\1\1\0\1\1\3\0\5\1";
|
||||||
|
|
||||||
private static int [] zzUnpackAttribute() {
|
private static int [] zzUnpackAttribute() {
|
||||||
int [] result = new int[51];
|
int [] result = new int[50];
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
|
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -372,7 +370,6 @@ public final void getText(CharTermAttribute t) {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new scanner
|
* Creates a new scanner
|
||||||
* There is also a java.io.InputStream version of this constructor.
|
|
||||||
*
|
*
|
||||||
* @param in the java.io.Reader to read input from.
|
* @param in the java.io.Reader to read input from.
|
||||||
*/
|
*/
|
||||||
|
@ -380,7 +377,6 @@ public final void getText(CharTermAttribute t) {
|
||||||
this.zzReader = in;
|
this.zzReader = in;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unpacks the compressed character translation table.
|
* Unpacks the compressed character translation table.
|
||||||
|
@ -392,7 +388,7 @@ public final void getText(CharTermAttribute t) {
|
||||||
char [] map = new char[0x10000];
|
char [] map = new char[0x10000];
|
||||||
int i = 0; /* index in packed string */
|
int i = 0; /* index in packed string */
|
||||||
int j = 0; /* index in unpacked array */
|
int j = 0; /* index in unpacked array */
|
||||||
while (i < 1154) {
|
while (i < 1138) {
|
||||||
int count = packed.charAt(i++);
|
int count = packed.charAt(i++);
|
||||||
char value = packed.charAt(i++);
|
char value = packed.charAt(i++);
|
||||||
do map[j++] = value; while (--count > 0);
|
do map[j++] = value; while (--count > 0);
|
||||||
|
|
|
@ -116,8 +116,6 @@ LETTER = !(![:letter:]|{CJ})
|
||||||
// Chinese and Japanese (but NOT Korean, which is included in [:letter:])
|
// Chinese and Japanese (but NOT Korean, which is included in [:letter:])
|
||||||
CJ = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
|
CJ = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
|
||||||
|
|
||||||
WHITESPACE = \r\n | [ \r\n\t\f]
|
|
||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
{ALPHANUM} { return ALPHANUM; }
|
{ALPHANUM} { return ALPHANUM; }
|
||||||
|
@ -131,4 +129,4 @@ WHITESPACE = \r\n | [ \r\n\t\f]
|
||||||
{ACRONYM_DEP} { return ACRONYM_DEP; }
|
{ACRONYM_DEP} { return ACRONYM_DEP; }
|
||||||
|
|
||||||
/** Ignore the rest */
|
/** Ignore the rest */
|
||||||
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
[^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
||||||
|
|
|
@ -18,4 +18,4 @@
|
||||||
|
|
||||||
WARNING: if you change StandardTokenizerImpl*.jflex or UAX29URLEmailTokenizer
|
WARNING: if you change StandardTokenizerImpl*.jflex or UAX29URLEmailTokenizer
|
||||||
and need to regenerate the tokenizer, only use the trunk version
|
and need to regenerate the tokenizer, only use the trunk version
|
||||||
of JFlex 1.5 (with a minimum SVN revision 607) at the moment!
|
of JFlex 1.5 (with a minimum SVN revision 722) at the moment!
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2010 The Apache Software Foundation.
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
@ -13,7 +14,6 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// Generated using ICU4J 52.1.0.0
|
// Generated using ICU4J 52.1.0.0
|
||||||
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
||||||
|
|
||||||
|
@ -39,6 +39,12 @@ FormatSupp = (
|
||||||
| ([\ud834][\uDD73-\uDD7A])
|
| ([\ud834][\uDD73-\uDD7A])
|
||||||
| ([\udb40][\uDC01\uDC20-\uDC7F])
|
| ([\udb40][\uDC01\uDC20-\uDC7F])
|
||||||
)
|
)
|
||||||
|
NumericSupp = (
|
||||||
|
([\ud805][\uDEC0-\uDEC9])
|
||||||
|
| ([\ud804][\uDC66-\uDC6F\uDCF0-\uDCF9\uDD36-\uDD3F\uDDD0-\uDDD9])
|
||||||
|
| ([\ud835][\uDFCE-\uDFFF])
|
||||||
|
| ([\ud801][\uDCA0-\uDCA9])
|
||||||
|
)
|
||||||
ExtendSupp = (
|
ExtendSupp = (
|
||||||
([\ud81b][\uDF51-\uDF7E\uDF8F-\uDF92])
|
([\ud81b][\uDF51-\uDF7E\uDF8F-\uDF92])
|
||||||
| ([\ud805][\uDEAB-\uDEB7])
|
| ([\ud805][\uDEAB-\uDEB7])
|
||||||
|
@ -48,12 +54,6 @@ ExtendSupp = (
|
||||||
| ([\udb40][\uDD00-\uDDEF])
|
| ([\udb40][\uDD00-\uDDEF])
|
||||||
| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
|
| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
|
||||||
)
|
)
|
||||||
NumericSupp = (
|
|
||||||
([\ud805][\uDEC0-\uDEC9])
|
|
||||||
| ([\ud804][\uDC66-\uDC6F\uDCF0-\uDCF9\uDD36-\uDD3F\uDDD0-\uDDD9])
|
|
||||||
| ([\ud835][\uDFCE-\uDFFF])
|
|
||||||
| ([\ud801][\uDCA0-\uDCA9])
|
|
||||||
)
|
|
||||||
KatakanaSupp = (
|
KatakanaSupp = (
|
||||||
([\ud82c][\uDC00])
|
([\ud82c][\uDC00])
|
||||||
)
|
)
|
||||||
|
@ -129,3 +129,15 @@ HiraganaSupp = (
|
||||||
([\ud83c][\uDE00])
|
([\ud83c][\uDE00])
|
||||||
| ([\ud82c][\uDC01])
|
| ([\ud82c][\uDC01])
|
||||||
)
|
)
|
||||||
|
SingleQuoteSupp = (
|
||||||
|
[]
|
||||||
|
)
|
||||||
|
DoubleQuoteSupp = (
|
||||||
|
[]
|
||||||
|
)
|
||||||
|
HebrewLetterSupp = (
|
||||||
|
[]
|
||||||
|
)
|
||||||
|
RegionalIndicatorSupp = (
|
||||||
|
([\ud83c][\uDDE6-\uDDFF])
|
||||||
|
)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -32,11 +32,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
|
* Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
|
||||||
* <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
|
* <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
|
||||||
* <li><HIRAGANA>: A single hiragana character</li>
|
* <li><HIRAGANA>: A single hiragana character</li>
|
||||||
|
* <li><KATAKANA>: A sequence of katakana characters</li>
|
||||||
|
* <li><HANGUL>: A sequence of Hangul characters</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
%%
|
%%
|
||||||
|
|
||||||
%unicode 6.1
|
%unicode 6.3
|
||||||
%integer
|
%integer
|
||||||
%final
|
%final
|
||||||
%public
|
%public
|
||||||
|
@ -47,33 +49,40 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%buffer 4096
|
%buffer 4096
|
||||||
|
|
||||||
%include SUPPLEMENTARY.jflex-macro
|
%include SUPPLEMENTARY.jflex-macro
|
||||||
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
ALetter = (\p{WB:ALetter} | {ALetterSupp})
|
||||||
Format = ([\p{WB:Format}] | {FormatSupp})
|
Format = (\p{WB:Format} | {FormatSupp})
|
||||||
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
Numeric = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp})
|
||||||
Extend = ([\p{WB:Extend}] | {ExtendSupp})
|
Extend = (\p{WB:Extend} | {ExtendSupp})
|
||||||
Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
|
Katakana = (\p{WB:Katakana} | {KatakanaSupp})
|
||||||
MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
|
MidLetter = (\p{WB:MidLetter} | {MidLetterSupp})
|
||||||
MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
|
MidNum = (\p{WB:MidNum} | {MidNumSupp})
|
||||||
MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
|
MidNumLet = (\p{WB:MidNumLet} | {MidNumLetSupp})
|
||||||
ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
|
ExtendNumLet = (\p{WB:ExtendNumLet} | {ExtendNumLetSupp})
|
||||||
ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
|
ComplexContext = (\p{LB:Complex_Context} | {ComplexContextSupp})
|
||||||
Han = ([\p{Script:Han}] | {HanSupp})
|
Han = (\p{Script:Han} | {HanSupp})
|
||||||
Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
|
Hiragana = (\p{Script:Hiragana} | {HiraganaSupp})
|
||||||
|
SingleQuote = (\p{WB:Single_Quote} | {SingleQuoteSupp})
|
||||||
|
DoubleQuote = (\p{WB:Double_Quote} | {DoubleQuoteSupp})
|
||||||
|
HebrewLetter = (\p{WB:Hebrew_Letter} | {HebrewLetterSupp})
|
||||||
|
RegionalIndicator = (\p{WB:Regional_Indicator} | {RegionalIndicatorSupp})
|
||||||
|
HebrewOrALetter = ({HebrewLetter} | {ALetter})
|
||||||
|
|
||||||
// Script=Hangul & Aletter
|
|
||||||
HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
|
|
||||||
// UAX#29 WB4. X (Extend | Format)* --> X
|
// UAX#29 WB4. X (Extend | Format)* --> X
|
||||||
//
|
//
|
||||||
ALetterEx = {ALetter} ({Format} | {Extend})*
|
HangulEx = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})*
|
||||||
// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
|
HebrewOrALetterEx = {HebrewOrALetter} ({Format} | {Extend})*
|
||||||
NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
|
NumericEx = {Numeric} ({Format} | {Extend})*
|
||||||
KatakanaEx = {Katakana} ({Format} | {Extend})*
|
KatakanaEx = {Katakana} ({Format} | {Extend})*
|
||||||
MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})*
|
MidLetterEx = ({MidLetter} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
|
||||||
MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})*
|
MidNumericEx = ({MidNum} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
|
||||||
ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
|
ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
|
||||||
|
HanEx = {Han} ({Format} | {Extend})*
|
||||||
|
HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
||||||
|
SingleQuoteEx = {SingleQuote} ({Format} | {Extend})*
|
||||||
|
DoubleQuoteEx = {DoubleQuote} ({Format} | {Extend})*
|
||||||
|
HebrewLetterEx = {HebrewLetter} ({Format} | {Extend})*
|
||||||
|
RegionalIndicatorEx = {RegionalIndicator} ({Format} | {Extend})*
|
||||||
|
|
||||||
HanEx = {Han} ({Format} | {Extend})*
|
|
||||||
HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
|
||||||
|
|
||||||
%{
|
%{
|
||||||
/** Alphanumeric sequences */
|
/** Alphanumeric sequences */
|
||||||
|
@ -121,15 +130,12 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
||||||
<<EOF>> { return StandardTokenizerInterface.YYEOF; }
|
<<EOF>> { return StandardTokenizerInterface.YYEOF; }
|
||||||
|
|
||||||
// UAX#29 WB8. Numeric × Numeric
|
// UAX#29 WB8. Numeric × Numeric
|
||||||
// WB11. Numeric (MidNum | MidNumLet) × Numeric
|
// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
|
||||||
// WB12. Numeric × (MidNum | MidNumLet) Numeric
|
// WB12. Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
|
||||||
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||||
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
|
// WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
|
||||||
//
|
//
|
||||||
{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx}
|
{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
|
||||||
| {MidNumericEx} {NumericEx}
|
|
||||||
| {NumericEx})*
|
|
||||||
{ExtendNumLetEx}*
|
|
||||||
{ return NUMERIC_TYPE; }
|
{ return NUMERIC_TYPE; }
|
||||||
|
|
||||||
// subset of the below for typing purposes only!
|
// subset of the below for typing purposes only!
|
||||||
|
@ -139,22 +145,32 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
||||||
{KatakanaEx}+
|
{KatakanaEx}+
|
||||||
{ return KATAKANA_TYPE; }
|
{ return KATAKANA_TYPE; }
|
||||||
|
|
||||||
// UAX#29 WB5. ALetter × ALetter
|
// UAX#29 WB5. (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter)
|
||||||
// WB6. ALetter × (MidLetter | MidNumLet) ALetter
|
// WB6. (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
|
||||||
// WB7. ALetter (MidLetter | MidNumLet) × ALetter
|
// WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter)
|
||||||
// WB9. ALetter × Numeric
|
// WB7a. Hebrew_Letter × Single_Quote
|
||||||
// WB10. Numeric × ALetter
|
// WB7b. Hebrew_Letter × Double_Quote Hebrew_Letter
|
||||||
|
// WB7c. Hebrew_Letter Double_Quote × Hebrew_Letter
|
||||||
|
// WB9. (ALetter | Hebrew_Letter) × Numeric
|
||||||
|
// WB10. Numeric × (ALetter | Hebrew_Letter)
|
||||||
// WB13. Katakana × Katakana
|
// WB13. Katakana × Katakana
|
||||||
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||||
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
|
// WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
|
||||||
//
|
//
|
||||||
{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
|
{ExtendNumLetEx}* ( {KatakanaEx}
|
||||||
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
|
| ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
|
||||||
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ )
|
| {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
|
||||||
({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
|
| {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
|
||||||
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
|
)+
|
||||||
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )*
|
)
|
||||||
{ExtendNumLetEx}*
|
({ExtendNumLetEx}+ ( {KatakanaEx}
|
||||||
|
| ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
|
||||||
|
| {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
|
||||||
|
| {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
|
||||||
|
)+
|
||||||
|
)
|
||||||
|
)*
|
||||||
|
{ExtendNumLetEx}*
|
||||||
{ return WORD_TYPE; }
|
{ return WORD_TYPE; }
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,7 +182,7 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
||||||
// annex. That means that satisfactory treatment of languages like Chinese
|
// annex. That means that satisfactory treatment of languages like Chinese
|
||||||
// or Thai requires special handling.
|
// or Thai requires special handling.
|
||||||
//
|
//
|
||||||
// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break}
|
// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
|
||||||
// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
|
// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
|
||||||
//
|
//
|
||||||
// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
|
// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
|
||||||
|
@ -188,6 +204,8 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
||||||
// UAX#29 WB3. CR × LF
|
// UAX#29 WB3. CR × LF
|
||||||
// WB3a. (Newline | CR | LF) ÷
|
// WB3a. (Newline | CR | LF) ÷
|
||||||
// WB3b. ÷ (Newline | CR | LF)
|
// WB3b. ÷ (Newline | CR | LF)
|
||||||
|
// WB13c. Regional_Indicator × Regional_Indicator
|
||||||
// WB14. Any ÷ Any
|
// WB14. Any ÷ Any
|
||||||
//
|
//
|
||||||
[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
|
{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
|
||||||
|
{ /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -35,11 +35,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
|
* Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
|
||||||
* <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
|
* <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
|
||||||
* <li><HIRAGANA>: A single hiragana character</li>
|
* <li><HIRAGANA>: A single hiragana character</li>
|
||||||
|
* <li><KATAKANA>: A sequence of katakana characters</li>
|
||||||
|
* <li><HANGUL>: A sequence of Hangul characters</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
%%
|
%%
|
||||||
|
|
||||||
%unicode 6.1
|
%unicode 6.3
|
||||||
%integer
|
%integer
|
||||||
%final
|
%final
|
||||||
%public
|
%public
|
||||||
|
@ -50,33 +52,39 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%buffer 4096
|
%buffer 4096
|
||||||
|
|
||||||
%include SUPPLEMENTARY.jflex-macro
|
%include SUPPLEMENTARY.jflex-macro
|
||||||
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
ALetter = (\p{WB:ALetter} | {ALetterSupp})
|
||||||
Format = ([\p{WB:Format}] | {FormatSupp})
|
Format = (\p{WB:Format} | {FormatSupp})
|
||||||
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
Numeric = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp})
|
||||||
Extend = ([\p{WB:Extend}] | {ExtendSupp})
|
Extend = (\p{WB:Extend} | {ExtendSupp})
|
||||||
Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
|
Katakana = (\p{WB:Katakana} | {KatakanaSupp})
|
||||||
MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
|
MidLetter = (\p{WB:MidLetter} | {MidLetterSupp})
|
||||||
MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
|
MidNum = (\p{WB:MidNum} | {MidNumSupp})
|
||||||
MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
|
MidNumLet = (\p{WB:MidNumLet} | {MidNumLetSupp})
|
||||||
ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
|
ExtendNumLet = (\p{WB:ExtendNumLet} | {ExtendNumLetSupp})
|
||||||
ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
|
ComplexContext = (\p{LB:Complex_Context} | {ComplexContextSupp})
|
||||||
Han = ([\p{Script:Han}] | {HanSupp})
|
Han = (\p{Script:Han} | {HanSupp})
|
||||||
Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
|
Hiragana = (\p{Script:Hiragana} | {HiraganaSupp})
|
||||||
|
SingleQuote = (\p{WB:Single_Quote} | {SingleQuoteSupp})
|
||||||
|
DoubleQuote = (\p{WB:Double_Quote} | {DoubleQuoteSupp})
|
||||||
|
HebrewLetter = (\p{WB:Hebrew_Letter} | {HebrewLetterSupp})
|
||||||
|
RegionalIndicator = (\p{WB:Regional_Indicator} | {RegionalIndicatorSupp})
|
||||||
|
HebrewOrALetter = ({HebrewLetter} | {ALetter})
|
||||||
|
|
||||||
// Script=Hangul & Aletter
|
|
||||||
HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
|
|
||||||
// UAX#29 WB4. X (Extend | Format)* --> X
|
// UAX#29 WB4. X (Extend | Format)* --> X
|
||||||
//
|
//
|
||||||
ALetterEx = {ALetter} ({Format} | {Extend})*
|
HangulEx = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})*
|
||||||
// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
|
HebrewOrALetterEx = {HebrewOrALetter} ({Format} | {Extend})*
|
||||||
NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
|
NumericEx = {Numeric} ({Format} | {Extend})*
|
||||||
KatakanaEx = {Katakana} ({Format} | {Extend})*
|
KatakanaEx = {Katakana} ({Format} | {Extend})*
|
||||||
MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})*
|
MidLetterEx = ({MidLetter} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
|
||||||
MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})*
|
MidNumericEx = ({MidNum} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
|
||||||
ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
|
ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
|
||||||
|
HanEx = {Han} ({Format} | {Extend})*
|
||||||
HanEx = {Han} ({Format} | {Extend})*
|
HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
||||||
HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
SingleQuoteEx = {SingleQuote} ({Format} | {Extend})*
|
||||||
|
DoubleQuoteEx = {DoubleQuote} ({Format} | {Extend})*
|
||||||
|
HebrewLetterEx = {HebrewLetter} ({Format} | {Extend})*
|
||||||
|
RegionalIndicatorEx = {RegionalIndicator} ({Format} | {Extend})*
|
||||||
|
|
||||||
// URL and E-mail syntax specifications:
|
// URL and E-mail syntax specifications:
|
||||||
//
|
//
|
||||||
|
@ -213,40 +221,47 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
||||||
{EMAIL} { return EMAIL_TYPE; }
|
{EMAIL} { return EMAIL_TYPE; }
|
||||||
|
|
||||||
// UAX#29 WB8. Numeric × Numeric
|
// UAX#29 WB8. Numeric × Numeric
|
||||||
// WB11. Numeric (MidNum | MidNumLet) × Numeric
|
// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
|
||||||
// WB12. Numeric × (MidNum | MidNumLet) Numeric
|
// WB12. Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
|
||||||
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||||
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
|
// WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
|
||||||
//
|
//
|
||||||
{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx}
|
{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
|
||||||
| {MidNumericEx} {NumericEx}
|
|
||||||
| {NumericEx})*
|
|
||||||
{ExtendNumLetEx}*
|
|
||||||
{ return NUMERIC_TYPE; }
|
{ return NUMERIC_TYPE; }
|
||||||
|
|
||||||
// subset of the below for typing purposes only!
|
// subset of the below for typing purposes only!
|
||||||
{HangulEx}+
|
{HangulEx}+
|
||||||
{ return HANGUL_TYPE; }
|
{ return HANGUL_TYPE; }
|
||||||
|
|
||||||
{KatakanaEx}+
|
{KatakanaEx}+
|
||||||
{ return KATAKANA_TYPE; }
|
{ return KATAKANA_TYPE; }
|
||||||
|
|
||||||
// UAX#29 WB5. ALetter × ALetter
|
// UAX#29 WB5. (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter)
|
||||||
// WB6. ALetter × (MidLetter | MidNumLet) ALetter
|
// WB6. (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
|
||||||
// WB7. ALetter (MidLetter | MidNumLet) × ALetter
|
// WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter)
|
||||||
// WB9. ALetter × Numeric
|
// WB7a. Hebrew_Letter × Single_Quote
|
||||||
// WB10. Numeric × ALetter
|
// WB7b. Hebrew_Letter × Double_Quote Hebrew_Letter
|
||||||
|
// WB7c. Hebrew_Letter Double_Quote × Hebrew_Letter
|
||||||
|
// WB9. (ALetter | Hebrew_Letter) × Numeric
|
||||||
|
// WB10. Numeric × (ALetter | Hebrew_Letter)
|
||||||
// WB13. Katakana × Katakana
|
// WB13. Katakana × Katakana
|
||||||
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||||
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
|
// WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
|
||||||
//
|
//
|
||||||
{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
|
{ExtendNumLetEx}* ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )*
|
||||||
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
|
| ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
|
||||||
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ )
|
| {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
|
||||||
({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
|
| {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
|
||||||
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
|
)+
|
||||||
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )*
|
)
|
||||||
{ExtendNumLetEx}*
|
({ExtendNumLetEx}+ ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )*
|
||||||
|
| ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
|
||||||
|
| {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
|
||||||
|
| {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
|
||||||
|
)+
|
||||||
|
)
|
||||||
|
)*
|
||||||
|
{ExtendNumLetEx}*
|
||||||
{ return WORD_TYPE; }
|
{ return WORD_TYPE; }
|
||||||
|
|
||||||
|
|
||||||
|
@ -258,7 +273,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
||||||
// annex. That means that satisfactory treatment of languages like Chinese
|
// annex. That means that satisfactory treatment of languages like Chinese
|
||||||
// or Thai requires special handling.
|
// or Thai requires special handling.
|
||||||
//
|
//
|
||||||
// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break}
|
// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
|
||||||
// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
|
// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
|
||||||
//
|
//
|
||||||
// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
|
// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
|
||||||
|
@ -280,6 +295,8 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
||||||
// UAX#29 WB3. CR × LF
|
// UAX#29 WB3. CR × LF
|
||||||
// WB3a. (Newline | CR | LF) ÷
|
// WB3a. (Newline | CR | LF) ÷
|
||||||
// WB3b. ÷ (Newline | CR | LF)
|
// WB3b. ÷ (Newline | CR | LF)
|
||||||
|
// WB13c. Regional_Indicator × Regional_Indicator
|
||||||
// WB14. Any ÷ Any
|
// WB14. Any ÷ Any
|
||||||
//
|
//
|
||||||
[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
|
{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
|
||||||
|
{ /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/* The following code was generated by JFlex. */
|
/* The following code was generated by JFlex 1.5.0-SNAPSHOT */
|
||||||
|
|
||||||
package org.apache.lucene.analysis.wikipedia;
|
package org.apache.lucene.analysis.wikipedia;
|
||||||
|
|
||||||
|
@ -84,21 +84,20 @@ class WikipediaTokenizerImpl {
|
||||||
private static final int [] ZZ_ACTION = zzUnpackAction();
|
private static final int [] ZZ_ACTION = zzUnpackAction();
|
||||||
|
|
||||||
private static final String ZZ_ACTION_PACKED_0 =
|
private static final String ZZ_ACTION_PACKED_0 =
|
||||||
"\12\0\4\1\4\2\1\3\1\1\1\4\1\1\2\5"+
|
"\12\0\4\1\4\2\1\3\1\4\1\1\2\5\1\6"+
|
||||||
"\1\6\2\5\1\7\1\5\2\10\1\11\1\12\1\11"+
|
"\1\5\1\7\1\5\2\10\1\11\1\5\1\12\1\11"+
|
||||||
"\1\13\1\14\1\10\1\15\1\16\1\15\1\17\1\20"+
|
"\1\13\1\14\1\15\1\16\1\15\1\17\1\20\1\10"+
|
||||||
"\1\10\1\21\1\10\4\22\1\23\1\22\1\24\1\25"+
|
"\1\21\1\10\4\22\1\23\1\24\1\25\1\26\3\0"+
|
||||||
"\1\26\3\0\1\27\14\0\1\30\1\31\1\32\1\33"+
|
"\1\27\14\0\1\30\1\31\1\32\1\33\1\11\1\0"+
|
||||||
"\1\11\1\0\1\34\1\35\1\36\1\0\1\37\1\0"+
|
"\1\34\1\35\1\36\1\0\1\37\1\0\1\40\3\0"+
|
||||||
"\1\40\3\0\1\41\1\42\2\43\1\42\2\44\2\0"+
|
"\1\41\1\42\2\43\1\42\2\44\2\0\1\43\1\0"+
|
||||||
"\1\43\1\0\14\43\1\42\3\0\1\11\1\45\3\0"+
|
"\14\43\1\42\3\0\1\11\1\45\3\0\1\46\1\47"+
|
||||||
"\1\46\1\47\5\0\1\50\4\0\1\50\2\0\2\50"+
|
"\5\0\1\50\4\0\1\50\2\0\2\50\2\0\1\11"+
|
||||||
"\2\0\1\11\5\0\1\31\1\42\1\43\1\51\3\0"+
|
"\5\0\1\31\1\42\1\43\1\51\3\0\1\11\2\0"+
|
||||||
"\1\11\2\0\1\52\30\0\1\53\2\0\1\54\1\55"+
|
"\1\52\30\0\1\53\2\0\1\54\1\55\1\56";
|
||||||
"\1\56";
|
|
||||||
|
|
||||||
private static int [] zzUnpackAction() {
|
private static int [] zzUnpackAction() {
|
||||||
int [] result = new int[184];
|
int [] result = new int[181];
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
|
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -125,30 +124,30 @@ class WikipediaTokenizerImpl {
|
||||||
private static final String ZZ_ROWMAP_PACKED_0 =
|
private static final String ZZ_ROWMAP_PACKED_0 =
|
||||||
"\0\0\0\54\0\130\0\204\0\260\0\334\0\u0108\0\u0134"+
|
"\0\0\0\54\0\130\0\204\0\260\0\334\0\u0108\0\u0134"+
|
||||||
"\0\u0160\0\u018c\0\u01b8\0\u01e4\0\u0210\0\u023c\0\u0268\0\u0294"+
|
"\0\u0160\0\u018c\0\u01b8\0\u01e4\0\u0210\0\u023c\0\u0268\0\u0294"+
|
||||||
"\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u0370\0\u01b8\0\u039c"+
|
"\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u01b8\0\u0370\0\u039c"+
|
||||||
"\0\u03c8\0\u03f4\0\u0420\0\u044c\0\u0478\0\u01b8\0\u039c\0\u04a4"+
|
"\0\u03c8\0\u03f4\0\u0420\0\u01b8\0\u0370\0\u044c\0\u0478\0\u01b8"+
|
||||||
"\0\u01b8\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+
|
"\0\u04a4\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+
|
||||||
"\0\u0604\0\u0630\0\u065c\0\u0688\0\u06b4\0\u01b8\0\u06e0\0\u039c"+
|
"\0\u0604\0\u0630\0\u065c\0\u01b8\0\u0688\0\u0370\0\u06b4\0\u06e0"+
|
||||||
"\0\u070c\0\u0738\0\u0764\0\u0790\0\u01b8\0\u01b8\0\u07bc\0\u07e8"+
|
"\0\u070c\0\u01b8\0\u01b8\0\u0738\0\u0764\0\u0790\0\u01b8\0\u07bc"+
|
||||||
"\0\u0814\0\u01b8\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+
|
"\0\u07e8\0\u0814\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+
|
||||||
"\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u0a24\0\u0a50\0\u0a7c"+
|
"\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u01b8\0\u01b8\0\u0a24"+
|
||||||
"\0\u01b8\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b00\0\u01b8\0\u0b2c"+
|
"\0\u0a50\0\u0a7c\0\u0a7c\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b2c"+
|
||||||
"\0\u0b58\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c"+
|
"\0\u0b58\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c"+
|
||||||
"\0\u0cb8\0\u0ce4\0\u0d10\0\u0898\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+
|
"\0\u0814\0\u0cb8\0\u0ce4\0\u0d10\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+
|
||||||
"\0\u0dec\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20"+
|
"\0\u0dec\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20"+
|
||||||
"\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u1080"+
|
"\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u01b8"+
|
||||||
"\0\u10ac\0\u10d8\0\u01b8\0\u1104\0\u1130\0\u115c\0\u1188\0\u01b8"+
|
"\0\u1080\0\u10ac\0\u10d8\0\u1104\0\u01b8\0\u1130\0\u115c\0\u1188"+
|
||||||
"\0\u11b4\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8"+
|
"\0\u11b4\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8"+
|
||||||
"\0\u1314\0\u1340\0\u136c\0\u1398\0\u13c4\0\u086c\0\u09f8\0\u13f0"+
|
"\0\u1314\0\u1340\0\u07e8\0\u0974\0\u136c\0\u1398\0\u13c4\0\u13f0"+
|
||||||
"\0\u141c\0\u1448\0\u1474\0\u14a0\0\u14cc\0\u14f8\0\u1524\0\u01b8"+
|
"\0\u141c\0\u1448\0\u1474\0\u14a0\0\u01b8\0\u14cc\0\u14f8\0\u1524"+
|
||||||
"\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u1658\0\u1684"+
|
"\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u01b8\0\u1658"+
|
||||||
"\0\u16b0\0\u01b8\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+
|
"\0\u1684\0\u16b0\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+
|
||||||
"\0\u17e4\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918"+
|
"\0\u17e4\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918"+
|
||||||
"\0\u1944\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78"+
|
"\0\u1944\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78"+
|
||||||
"\0\u1aa4\0\u1ad0\0\u1afc\0\u1b28\0\u1b54\0\u01b8\0\u01b8\0\u01b8";
|
"\0\u1aa4\0\u1ad0\0\u01b8\0\u01b8\0\u01b8";
|
||||||
|
|
||||||
private static int [] zzUnpackRowMap() {
|
private static int [] zzUnpackRowMap() {
|
||||||
int [] result = new int[184];
|
int [] result = new int[181];
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
|
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -172,152 +171,149 @@ class WikipediaTokenizerImpl {
|
||||||
|
|
||||||
private static final String ZZ_TRANS_PACKED_0 =
|
private static final String ZZ_TRANS_PACKED_0 =
|
||||||
"\1\13\1\14\5\13\1\15\1\13\1\16\3\13\1\17"+
|
"\1\13\1\14\5\13\1\15\1\13\1\16\3\13\1\17"+
|
||||||
"\1\20\1\21\1\22\1\23\1\24\2\13\1\25\2\13"+
|
"\1\20\1\21\1\22\1\23\3\13\1\24\2\13\15\17"+
|
||||||
"\15\17\1\26\2\13\3\17\1\13\7\27\1\30\5\27"+
|
"\1\25\2\13\3\17\1\13\7\26\1\27\5\26\4\30"+
|
||||||
"\4\31\1\27\1\32\3\27\1\33\1\27\15\31\3\27"+
|
"\5\26\1\31\1\26\15\30\3\26\3\30\10\26\1\27"+
|
||||||
"\3\31\10\27\1\30\5\27\4\34\1\27\1\32\3\27"+
|
"\5\26\4\32\5\26\1\33\1\26\15\32\3\26\3\32"+
|
||||||
"\1\35\1\27\15\34\3\27\3\34\1\27\7\36\1\37"+
|
"\1\26\7\34\1\35\5\34\4\36\1\34\1\37\2\26"+
|
||||||
"\5\36\4\40\1\36\1\32\2\27\1\36\1\41\1\36"+
|
"\1\34\1\40\1\34\15\36\3\34\1\41\2\36\2\34"+
|
||||||
"\15\40\3\36\1\42\2\40\2\36\1\43\5\36\1\37"+
|
"\1\42\5\34\1\35\5\34\4\43\4\34\1\44\2\34"+
|
||||||
"\5\36\4\44\1\36\1\45\2\36\1\46\2\36\15\44"+
|
"\15\43\3\34\3\43\10\34\1\35\5\34\4\45\4\34"+
|
||||||
"\3\36\3\44\10\36\1\37\5\36\4\47\1\36\1\45"+
|
"\1\44\2\34\15\45\3\34\3\45\10\34\1\35\5\34"+
|
||||||
"\2\36\1\46\2\36\15\47\3\36\3\47\10\36\1\37"+
|
"\4\45\4\34\1\46\2\34\15\45\3\34\3\45\10\34"+
|
||||||
"\5\36\4\47\1\36\1\45\2\36\1\50\2\36\15\47"+
|
"\1\35\1\34\1\47\3\34\4\50\7\34\15\50\3\34"+
|
||||||
"\3\36\3\47\10\36\1\37\1\36\1\51\3\36\4\52"+
|
"\3\50\10\34\1\51\5\34\4\52\7\34\15\52\1\34"+
|
||||||
"\1\36\1\45\5\36\15\52\3\36\3\52\10\36\1\53"+
|
"\1\53\1\34\3\52\1\34\1\54\1\55\5\54\1\56"+
|
||||||
"\5\36\4\54\1\36\1\45\5\36\15\54\1\36\1\55"+
|
"\1\54\1\57\3\54\4\60\4\54\1\61\2\54\15\60"+
|
||||||
"\1\36\3\54\1\36\1\56\1\57\5\56\1\60\1\56"+
|
"\2\54\1\62\3\60\1\54\55\0\1\63\62\0\1\64"+
|
||||||
"\1\61\3\56\4\62\1\56\1\63\2\56\1\64\2\56"+
|
"\4\0\4\65\7\0\6\65\1\66\6\65\3\0\3\65"+
|
||||||
"\15\62\2\56\1\65\3\62\1\56\55\0\1\66\62\0"+
|
"\12\0\1\67\43\0\1\70\1\71\1\72\1\73\2\74"+
|
||||||
"\1\67\4\0\4\70\7\0\6\70\1\71\6\70\3\0"+
|
"\1\0\1\75\3\0\1\75\1\17\1\20\1\21\1\22"+
|
||||||
"\3\70\12\0\1\72\43\0\1\73\1\74\1\75\1\76"+
|
"\7\0\15\17\3\0\3\17\3\0\1\76\1\0\1\77"+
|
||||||
"\2\77\1\0\1\100\3\0\1\100\1\17\1\20\1\21"+
|
"\2\100\1\0\1\101\3\0\1\101\3\20\1\22\7\0"+
|
||||||
"\1\22\7\0\15\17\3\0\3\17\3\0\1\101\1\0"+
|
"\15\20\3\0\3\20\2\0\1\70\1\102\1\72\1\73"+
|
||||||
"\1\102\2\103\1\0\1\104\3\0\1\104\3\20\1\22"+
|
"\2\100\1\0\1\101\3\0\1\101\1\21\1\20\1\21"+
|
||||||
"\7\0\15\20\3\0\3\20\2\0\1\73\1\105\1\75"+
|
"\1\22\7\0\15\21\3\0\3\21\3\0\1\103\1\0"+
|
||||||
"\1\76\2\103\1\0\1\104\3\0\1\104\1\21\1\20"+
|
"\1\77\2\74\1\0\1\75\3\0\1\75\4\22\7\0"+
|
||||||
"\1\21\1\22\7\0\15\21\3\0\3\21\3\0\1\106"+
|
"\15\22\3\0\3\22\26\0\1\104\73\0\1\105\16\0"+
|
||||||
"\1\0\1\102\2\77\1\0\1\100\3\0\1\100\4\22"+
|
"\1\64\4\0\4\65\7\0\15\65\3\0\3\65\16\0"+
|
||||||
"\7\0\15\22\3\0\3\22\24\0\1\13\55\0\1\107"+
|
"\4\30\7\0\15\30\3\0\3\30\27\0\1\106\42\0"+
|
||||||
"\73\0\1\110\16\0\1\67\4\0\4\70\7\0\15\70"+
|
"\4\32\7\0\15\32\3\0\3\32\27\0\1\107\42\0"+
|
||||||
"\3\0\3\70\16\0\4\31\7\0\15\31\3\0\3\31"+
|
"\4\36\7\0\15\36\3\0\3\36\24\0\1\26\45\0"+
|
||||||
"\24\0\1\27\56\0\1\111\42\0\4\34\7\0\15\34"+
|
"\4\36\7\0\2\36\1\110\12\36\3\0\3\36\2\0"+
|
||||||
"\3\0\3\34\27\0\1\112\42\0\4\40\7\0\15\40"+
|
"\1\111\67\0\4\43\7\0\15\43\3\0\3\43\26\0"+
|
||||||
"\3\0\3\40\16\0\4\40\7\0\2\40\1\113\12\40"+
|
"\1\112\43\0\4\45\7\0\15\45\3\0\3\45\26\0"+
|
||||||
"\3\0\3\40\2\0\1\114\67\0\4\44\7\0\15\44"+
|
"\1\113\37\0\1\114\57\0\4\50\7\0\15\50\3\0"+
|
||||||
"\3\0\3\44\24\0\1\36\55\0\1\115\43\0\4\47"+
|
"\3\50\11\0\1\115\4\0\4\65\7\0\15\65\3\0"+
|
||||||
"\7\0\15\47\3\0\3\47\26\0\1\116\37\0\1\117"+
|
"\3\65\16\0\4\52\7\0\15\52\3\0\3\52\47\0"+
|
||||||
"\57\0\4\52\7\0\15\52\3\0\3\52\11\0\1\120"+
|
"\1\114\6\0\1\116\63\0\1\117\57\0\4\60\7\0"+
|
||||||
"\4\0\4\70\7\0\15\70\3\0\3\70\16\0\4\54"+
|
"\15\60\3\0\3\60\26\0\1\120\43\0\4\65\7\0"+
|
||||||
"\7\0\15\54\3\0\3\54\47\0\1\117\6\0\1\121"+
|
"\15\65\3\0\3\65\14\0\1\34\1\0\4\121\1\0"+
|
||||||
"\63\0\1\122\57\0\4\62\7\0\15\62\3\0\3\62"+
|
"\3\122\3\0\15\121\3\0\3\121\14\0\1\34\1\0"+
|
||||||
"\24\0\1\56\55\0\1\123\43\0\4\70\7\0\15\70"+
|
"\4\121\1\0\3\122\3\0\3\121\1\123\11\121\3\0"+
|
||||||
"\3\0\3\70\14\0\1\36\1\0\4\124\1\0\3\125"+
|
"\3\121\16\0\1\124\1\0\1\124\10\0\15\124\3\0"+
|
||||||
"\3\0\15\124\3\0\3\124\14\0\1\36\1\0\4\124"+
|
"\3\124\16\0\1\125\1\126\1\127\1\130\7\0\15\125"+
|
||||||
"\1\0\3\125\3\0\3\124\1\126\11\124\3\0\3\124"+
|
"\3\0\3\125\16\0\1\131\1\0\1\131\10\0\15\131"+
|
||||||
"\16\0\1\127\1\0\1\127\10\0\15\127\3\0\3\127"+
|
"\3\0\3\131\16\0\1\132\1\133\1\132\1\133\7\0"+
|
||||||
"\16\0\1\130\1\131\1\132\1\133\7\0\15\130\3\0"+
|
"\15\132\3\0\3\132\16\0\1\134\2\135\1\136\7\0"+
|
||||||
"\3\130\16\0\1\134\1\0\1\134\10\0\15\134\3\0"+
|
"\15\134\3\0\3\134\16\0\1\75\2\137\10\0\15\75"+
|
||||||
"\3\134\16\0\1\135\1\136\1\135\1\136\7\0\15\135"+
|
"\3\0\3\75\16\0\1\140\2\141\1\142\7\0\15\140"+
|
||||||
"\3\0\3\135\16\0\1\137\2\140\1\141\7\0\15\137"+
|
"\3\0\3\140\16\0\4\133\7\0\15\133\3\0\3\133"+
|
||||||
"\3\0\3\137\16\0\1\100\2\142\10\0\15\100\3\0"+
|
"\16\0\1\143\2\144\1\145\7\0\15\143\3\0\3\143"+
|
||||||
"\3\100\16\0\1\143\2\144\1\145\7\0\15\143\3\0"+
|
"\16\0\1\146\2\147\1\150\7\0\15\146\3\0\3\146"+
|
||||||
"\3\143\16\0\4\136\7\0\15\136\3\0\3\136\16\0"+
|
"\16\0\1\151\1\141\1\152\1\142\7\0\15\151\3\0"+
|
||||||
"\1\146\2\147\1\150\7\0\15\146\3\0\3\146\16\0"+
|
"\3\151\16\0\1\153\2\126\1\130\7\0\15\153\3\0"+
|
||||||
"\1\151\2\152\1\153\7\0\15\151\3\0\3\151\16\0"+
|
"\3\153\30\0\1\154\1\155\64\0\1\156\27\0\4\36"+
|
||||||
"\1\154\1\144\1\155\1\145\7\0\15\154\3\0\3\154"+
|
"\7\0\2\36\1\157\12\36\3\0\3\36\2\0\1\160"+
|
||||||
"\16\0\1\156\2\131\1\133\7\0\15\156\3\0\3\156"+
|
"\101\0\1\161\1\162\40\0\4\65\7\0\6\65\1\163"+
|
||||||
"\30\0\1\157\1\160\64\0\1\161\27\0\4\40\7\0"+
|
"\6\65\3\0\3\65\2\0\1\164\63\0\1\165\71\0"+
|
||||||
"\2\40\1\162\12\40\3\0\3\40\2\0\1\163\101\0"+
|
"\1\166\1\167\34\0\1\170\1\0\1\34\1\0\4\121"+
|
||||||
"\1\164\1\165\40\0\4\70\7\0\6\70\1\166\6\70"+
|
"\1\0\3\122\3\0\15\121\3\0\3\121\16\0\4\171"+
|
||||||
"\3\0\3\70\2\0\1\167\63\0\1\170\71\0\1\171"+
|
"\1\0\3\122\3\0\15\171\3\0\3\171\12\0\1\170"+
|
||||||
"\1\172\34\0\1\173\1\0\1\36\1\0\4\124\1\0"+
|
"\1\0\1\34\1\0\4\121\1\0\3\122\3\0\10\121"+
|
||||||
"\3\125\3\0\15\124\3\0\3\124\16\0\4\174\1\0"+
|
"\1\172\4\121\3\0\3\121\2\0\1\70\13\0\1\124"+
|
||||||
"\3\125\3\0\15\174\3\0\3\174\12\0\1\173\1\0"+
|
"\1\0\1\124\10\0\15\124\3\0\3\124\3\0\1\173"+
|
||||||
"\1\36\1\0\4\124\1\0\3\125\3\0\10\124\1\175"+
|
"\1\0\1\77\2\174\6\0\1\125\1\126\1\127\1\130"+
|
||||||
"\4\124\3\0\3\124\2\0\1\73\13\0\1\127\1\0"+
|
"\7\0\15\125\3\0\3\125\3\0\1\175\1\0\1\77"+
|
||||||
"\1\127\10\0\15\127\3\0\3\127\3\0\1\176\1\0"+
|
"\2\176\1\0\1\177\3\0\1\177\3\126\1\130\7\0"+
|
||||||
"\1\102\2\177\6\0\1\130\1\131\1\132\1\133\7\0"+
|
"\15\126\3\0\3\126\3\0\1\200\1\0\1\77\2\176"+
|
||||||
"\15\130\3\0\3\130\3\0\1\200\1\0\1\102\2\201"+
|
"\1\0\1\177\3\0\1\177\1\127\1\126\1\127\1\130"+
|
||||||
"\1\0\1\202\3\0\1\202\3\131\1\133\7\0\15\131"+
|
"\7\0\15\127\3\0\3\127\3\0\1\201\1\0\1\77"+
|
||||||
"\3\0\3\131\3\0\1\203\1\0\1\102\2\201\1\0"+
|
"\2\174\6\0\4\130\7\0\15\130\3\0\3\130\3\0"+
|
||||||
"\1\202\3\0\1\202\1\132\1\131\1\132\1\133\7\0"+
|
"\1\202\2\0\1\202\7\0\1\132\1\133\1\132\1\133"+
|
||||||
"\15\132\3\0\3\132\3\0\1\204\1\0\1\102\2\177"+
|
"\7\0\15\132\3\0\3\132\3\0\1\202\2\0\1\202"+
|
||||||
"\6\0\4\133\7\0\15\133\3\0\3\133\3\0\1\205"+
|
"\7\0\4\133\7\0\15\133\3\0\3\133\3\0\1\174"+
|
||||||
"\2\0\1\205\7\0\1\135\1\136\1\135\1\136\7\0"+
|
"\1\0\1\77\2\174\6\0\1\134\2\135\1\136\7\0"+
|
||||||
"\15\135\3\0\3\135\3\0\1\205\2\0\1\205\7\0"+
|
"\15\134\3\0\3\134\3\0\1\176\1\0\1\77\2\176"+
|
||||||
"\4\136\7\0\15\136\3\0\3\136\3\0\1\177\1\0"+
|
"\1\0\1\177\3\0\1\177\3\135\1\136\7\0\15\135"+
|
||||||
"\1\102\2\177\6\0\1\137\2\140\1\141\7\0\15\137"+
|
"\3\0\3\135\3\0\1\174\1\0\1\77\2\174\6\0"+
|
||||||
"\3\0\3\137\3\0\1\201\1\0\1\102\2\201\1\0"+
|
"\4\136\7\0\15\136\3\0\3\136\3\0\1\177\2\0"+
|
||||||
"\1\202\3\0\1\202\3\140\1\141\7\0\15\140\3\0"+
|
"\2\177\1\0\1\177\3\0\1\177\3\137\10\0\15\137"+
|
||||||
"\3\140\3\0\1\177\1\0\1\102\2\177\6\0\4\141"+
|
"\3\0\3\137\3\0\1\103\1\0\1\77\2\74\1\0"+
|
||||||
"\7\0\15\141\3\0\3\141\3\0\1\202\2\0\2\202"+
|
"\1\75\3\0\1\75\1\140\2\141\1\142\7\0\15\140"+
|
||||||
"\1\0\1\202\3\0\1\202\3\142\10\0\15\142\3\0"+
|
"\3\0\3\140\3\0\1\76\1\0\1\77\2\100\1\0"+
|
||||||
"\3\142\3\0\1\106\1\0\1\102\2\77\1\0\1\100"+
|
"\1\101\3\0\1\101\3\141\1\142\7\0\15\141\3\0"+
|
||||||
"\3\0\1\100\1\143\2\144\1\145\7\0\15\143\3\0"+
|
"\3\141\3\0\1\103\1\0\1\77\2\74\1\0\1\75"+
|
||||||
"\3\143\3\0\1\101\1\0\1\102\2\103\1\0\1\104"+
|
"\3\0\1\75\4\142\7\0\15\142\3\0\3\142\3\0"+
|
||||||
"\3\0\1\104\3\144\1\145\7\0\15\144\3\0\3\144"+
|
"\1\74\1\0\1\77\2\74\1\0\1\75\3\0\1\75"+
|
||||||
"\3\0\1\106\1\0\1\102\2\77\1\0\1\100\3\0"+
|
"\1\143\2\144\1\145\7\0\15\143\3\0\3\143\3\0"+
|
||||||
"\1\100\4\145\7\0\15\145\3\0\3\145\3\0\1\77"+
|
"\1\100\1\0\1\77\2\100\1\0\1\101\3\0\1\101"+
|
||||||
"\1\0\1\102\2\77\1\0\1\100\3\0\1\100\1\146"+
|
"\3\144\1\145\7\0\15\144\3\0\3\144\3\0\1\74"+
|
||||||
"\2\147\1\150\7\0\15\146\3\0\3\146\3\0\1\103"+
|
"\1\0\1\77\2\74\1\0\1\75\3\0\1\75\4\145"+
|
||||||
"\1\0\1\102\2\103\1\0\1\104\3\0\1\104\3\147"+
|
"\7\0\15\145\3\0\3\145\3\0\1\75\2\0\2\75"+
|
||||||
"\1\150\7\0\15\147\3\0\3\147\3\0\1\77\1\0"+
|
"\1\0\1\75\3\0\1\75\1\146\2\147\1\150\7\0"+
|
||||||
"\1\102\2\77\1\0\1\100\3\0\1\100\4\150\7\0"+
|
"\15\146\3\0\3\146\3\0\1\101\2\0\2\101\1\0"+
|
||||||
"\15\150\3\0\3\150\3\0\1\100\2\0\2\100\1\0"+
|
"\1\101\3\0\1\101\3\147\1\150\7\0\15\147\3\0"+
|
||||||
"\1\100\3\0\1\100\1\151\2\152\1\153\7\0\15\151"+
|
"\3\147\3\0\1\75\2\0\2\75\1\0\1\75\3\0"+
|
||||||
"\3\0\3\151\3\0\1\104\2\0\2\104\1\0\1\104"+
|
"\1\75\4\150\7\0\15\150\3\0\3\150\3\0\1\203"+
|
||||||
"\3\0\1\104\3\152\1\153\7\0\15\152\3\0\3\152"+
|
"\1\0\1\77\2\74\1\0\1\75\3\0\1\75\1\151"+
|
||||||
"\3\0\1\100\2\0\2\100\1\0\1\100\3\0\1\100"+
|
"\1\141\1\152\1\142\7\0\15\151\3\0\3\151\3\0"+
|
||||||
"\4\153\7\0\15\153\3\0\3\153\3\0\1\206\1\0"+
|
"\1\204\1\0\1\77\2\100\1\0\1\101\3\0\1\101"+
|
||||||
"\1\102\2\77\1\0\1\100\3\0\1\100\1\154\1\144"+
|
"\1\152\1\141\1\152\1\142\7\0\15\152\3\0\3\152"+
|
||||||
"\1\155\1\145\7\0\15\154\3\0\3\154\3\0\1\207"+
|
"\3\0\1\201\1\0\1\77\2\174\6\0\1\153\2\126"+
|
||||||
"\1\0\1\102\2\103\1\0\1\104\3\0\1\104\1\155"+
|
"\1\130\7\0\15\153\3\0\3\153\31\0\1\155\54\0"+
|
||||||
"\1\144\1\155\1\145\7\0\15\155\3\0\3\155\3\0"+
|
"\1\205\64\0\1\206\26\0\4\36\7\0\15\36\3\0"+
|
||||||
"\1\204\1\0\1\102\2\177\6\0\1\156\2\131\1\133"+
|
"\1\36\1\207\1\36\31\0\1\162\54\0\1\210\35\0"+
|
||||||
"\7\0\15\156\3\0\3\156\31\0\1\160\54\0\1\210"+
|
"\1\34\1\0\4\121\1\0\3\122\3\0\3\121\1\211"+
|
||||||
"\64\0\1\211\26\0\4\40\7\0\15\40\3\0\1\40"+
|
"\11\121\3\0\3\121\2\0\1\212\102\0\1\167\54\0"+
|
||||||
"\1\212\1\40\31\0\1\165\54\0\1\213\35\0\1\36"+
|
"\1\213\34\0\1\214\52\0\1\170\3\0\4\171\7\0"+
|
||||||
"\1\0\4\124\1\0\3\125\3\0\3\124\1\214\11\124"+
|
"\15\171\3\0\3\171\12\0\1\170\1\0\1\215\1\0"+
|
||||||
"\3\0\3\124\2\0\1\215\102\0\1\172\54\0\1\216"+
|
"\4\121\1\0\3\122\3\0\15\121\3\0\3\121\16\0"+
|
||||||
"\34\0\1\217\52\0\1\173\3\0\4\174\7\0\15\174"+
|
"\1\216\1\130\1\216\1\130\7\0\15\216\3\0\3\216"+
|
||||||
"\3\0\3\174\12\0\1\173\1\0\1\220\1\0\4\124"+
|
"\16\0\4\136\7\0\15\136\3\0\3\136\16\0\4\142"+
|
||||||
"\1\0\3\125\3\0\15\124\3\0\3\124\16\0\1\221"+
|
"\7\0\15\142\3\0\3\142\16\0\4\145\7\0\15\145"+
|
||||||
"\1\133\1\221\1\133\7\0\15\221\3\0\3\221\16\0"+
|
"\3\0\3\145\16\0\4\150\7\0\15\150\3\0\3\150"+
|
||||||
"\4\141\7\0\15\141\3\0\3\141\16\0\4\145\7\0"+
|
"\16\0\1\217\1\142\1\217\1\142\7\0\15\217\3\0"+
|
||||||
"\15\145\3\0\3\145\16\0\4\150\7\0\15\150\3\0"+
|
"\3\217\16\0\4\130\7\0\15\130\3\0\3\130\16\0"+
|
||||||
"\3\150\16\0\4\153\7\0\15\153\3\0\3\153\16\0"+
|
"\4\220\7\0\15\220\3\0\3\220\33\0\1\221\61\0"+
|
||||||
"\1\222\1\145\1\222\1\145\7\0\15\222\3\0\3\222"+
|
"\1\222\30\0\4\36\6\0\1\223\15\36\3\0\2\36"+
|
||||||
"\16\0\4\133\7\0\15\133\3\0\3\133\16\0\4\223"+
|
"\1\224\33\0\1\225\32\0\1\170\1\0\1\34\1\0"+
|
||||||
"\7\0\15\223\3\0\3\223\33\0\1\224\61\0\1\225"+
|
"\4\121\1\0\3\122\3\0\10\121\1\226\4\121\3\0"+
|
||||||
"\30\0\4\40\6\0\1\226\15\40\3\0\2\40\1\227"+
|
"\3\121\2\0\1\227\104\0\1\230\36\0\4\231\7\0"+
|
||||||
"\33\0\1\230\32\0\1\173\1\0\1\36\1\0\4\124"+
|
"\15\231\3\0\3\231\3\0\1\173\1\0\1\77\2\174"+
|
||||||
"\1\0\3\125\3\0\10\124\1\231\4\124\3\0\3\124"+
|
"\6\0\1\216\1\130\1\216\1\130\7\0\15\216\3\0"+
|
||||||
"\2\0\1\232\104\0\1\233\36\0\4\234\7\0\15\234"+
|
"\3\216\3\0\1\203\1\0\1\77\2\74\1\0\1\75"+
|
||||||
"\3\0\3\234\3\0\1\176\1\0\1\102\2\177\6\0"+
|
"\3\0\1\75\1\217\1\142\1\217\1\142\7\0\15\217"+
|
||||||
"\1\221\1\133\1\221\1\133\7\0\15\221\3\0\3\221"+
|
"\3\0\3\217\3\0\1\202\2\0\1\202\7\0\4\220"+
|
||||||
"\3\0\1\206\1\0\1\102\2\77\1\0\1\100\3\0"+
|
"\7\0\15\220\3\0\3\220\34\0\1\232\55\0\1\233"+
|
||||||
"\1\100\1\222\1\145\1\222\1\145\7\0\15\222\3\0"+
|
"\26\0\1\234\60\0\4\36\6\0\1\223\15\36\3\0"+
|
||||||
"\3\222\3\0\1\205\2\0\1\205\7\0\4\223\7\0"+
|
"\3\36\34\0\1\235\31\0\1\170\1\0\1\114\1\0"+
|
||||||
"\15\223\3\0\3\223\34\0\1\235\55\0\1\236\26\0"+
|
"\4\121\1\0\3\122\3\0\15\121\3\0\3\121\34\0"+
|
||||||
"\1\237\60\0\4\40\6\0\1\226\15\40\3\0\3\40"+
|
"\1\236\32\0\1\237\2\0\4\231\7\0\15\231\3\0"+
|
||||||
"\34\0\1\240\31\0\1\173\1\0\1\117\1\0\4\124"+
|
"\3\231\35\0\1\240\62\0\1\241\20\0\1\242\77\0"+
|
||||||
"\1\0\3\125\3\0\15\124\3\0\3\124\34\0\1\241"+
|
"\1\243\53\0\1\244\32\0\1\34\1\0\4\171\1\0"+
|
||||||
"\32\0\1\242\2\0\4\234\7\0\15\234\3\0\3\234"+
|
"\3\122\3\0\15\171\3\0\3\171\36\0\1\245\53\0"+
|
||||||
"\35\0\1\243\62\0\1\244\20\0\1\245\77\0\1\246"+
|
"\1\246\33\0\4\247\7\0\15\247\3\0\3\247\36\0"+
|
||||||
"\53\0\1\247\32\0\1\36\1\0\4\174\1\0\3\125"+
|
"\1\250\53\0\1\251\54\0\1\252\61\0\1\253\11\0"+
|
||||||
"\3\0\15\174\3\0\3\174\36\0\1\250\53\0\1\251"+
|
"\1\254\12\0\4\247\7\0\15\247\3\0\3\247\37\0"+
|
||||||
"\33\0\4\252\7\0\15\252\3\0\3\252\36\0\1\253"+
|
"\1\255\53\0\1\256\54\0\1\257\22\0\1\13\62\0"+
|
||||||
"\53\0\1\254\54\0\1\255\61\0\1\256\11\0\1\257"+
|
"\4\260\7\0\15\260\3\0\3\260\40\0\1\261\53\0"+
|
||||||
"\12\0\4\252\7\0\15\252\3\0\3\252\37\0\1\260"+
|
"\1\262\43\0\1\263\26\0\2\260\1\0\2\260\1\0"+
|
||||||
"\53\0\1\261\54\0\1\262\22\0\1\13\62\0\4\263"+
|
"\2\260\2\0\5\260\7\0\15\260\3\0\4\260\27\0"+
|
||||||
"\7\0\15\263\3\0\3\263\40\0\1\264\53\0\1\265"+
|
"\1\264\53\0\1\265\24\0";
|
||||||
"\43\0\1\266\26\0\2\263\1\0\2\263\1\0\2\263"+
|
|
||||||
"\2\0\5\263\7\0\15\263\3\0\4\263\27\0\1\267"+
|
|
||||||
"\53\0\1\270\24\0";
|
|
||||||
|
|
||||||
private static int [] zzUnpackTrans() {
|
private static int [] zzUnpackTrans() {
|
||||||
int [] result = new int[7040];
|
int [] result = new int[6908];
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
|
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -355,8 +351,8 @@ class WikipediaTokenizerImpl {
|
||||||
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
|
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
|
||||||
|
|
||||||
private static final String ZZ_ATTRIBUTE_PACKED_0 =
|
private static final String ZZ_ATTRIBUTE_PACKED_0 =
|
||||||
"\12\0\1\11\7\1\1\11\3\1\1\11\6\1\1\11"+
|
"\12\0\1\11\7\1\1\11\2\1\1\11\5\1\1\11"+
|
||||||
"\2\1\1\11\14\1\1\11\6\1\2\11\3\0\1\11"+
|
"\3\1\1\11\13\1\1\11\5\1\2\11\3\0\1\11"+
|
||||||
"\14\0\2\1\2\11\1\1\1\0\2\1\1\11\1\0"+
|
"\14\0\2\1\2\11\1\1\1\0\2\1\1\11\1\0"+
|
||||||
"\1\1\1\0\1\1\3\0\7\1\2\0\1\1\1\0"+
|
"\1\1\1\0\1\1\3\0\7\1\2\0\1\1\1\0"+
|
||||||
"\15\1\3\0\1\1\1\11\3\0\1\1\1\11\5\0"+
|
"\15\1\3\0\1\1\1\11\3\0\1\1\1\11\5\0"+
|
||||||
|
@ -365,7 +361,7 @@ class WikipediaTokenizerImpl {
|
||||||
"\2\0\3\11";
|
"\2\0\3\11";
|
||||||
|
|
||||||
private static int [] zzUnpackAttribute() {
|
private static int [] zzUnpackAttribute() {
|
||||||
int [] result = new int[184];
|
int [] result = new int[181];
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
|
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
|
||||||
return result;
|
return result;
|
||||||
|
@ -508,7 +504,6 @@ final void reset() {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new scanner
|
* Creates a new scanner
|
||||||
* There is also a java.io.InputStream version of this constructor.
|
|
||||||
*
|
*
|
||||||
* @param in the java.io.Reader to read input from.
|
* @param in the java.io.Reader to read input from.
|
||||||
*/
|
*/
|
||||||
|
@ -516,7 +511,6 @@ final void reset() {
|
||||||
this.zzReader = in;
|
this.zzReader = in;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unpacks the compressed character translation table.
|
* Unpacks the compressed character translation table.
|
||||||
|
|
|
@ -212,7 +212,7 @@ DOUBLE_EQUALS = "="{2}
|
||||||
{DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
|
{DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
|
||||||
{CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
|
{CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
|
||||||
//ignore
|
//ignore
|
||||||
. | {WHITESPACE} |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
|
[^] |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
|
||||||
}
|
}
|
||||||
|
|
||||||
<INTERNAL_LINK_STATE>{
|
<INTERNAL_LINK_STATE>{
|
||||||
|
@ -221,7 +221,7 @@ DOUBLE_EQUALS = "="{2}
|
||||||
{ALPHANUM} {yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;}
|
{ALPHANUM} {yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;}
|
||||||
{DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
|
{DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
|
||||||
//ignore
|
//ignore
|
||||||
. | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
|
[^] { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
|
||||||
}
|
}
|
||||||
|
|
||||||
<EXTERNAL_LINK_STATE>{
|
<EXTERNAL_LINK_STATE>{
|
||||||
|
@ -236,7 +236,7 @@ DOUBLE_EQUALS = "="{2}
|
||||||
{ALPHANUM} {yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;}
|
{ALPHANUM} {yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;}
|
||||||
{DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;}
|
{DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;}
|
||||||
//ignore
|
//ignore
|
||||||
. | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
|
[^] { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
|
||||||
}
|
}
|
||||||
//italics
|
//italics
|
||||||
<TWO_SINGLE_QUOTES_STATE>{
|
<TWO_SINGLE_QUOTES_STATE>{
|
||||||
|
@ -249,7 +249,7 @@ DOUBLE_EQUALS = "="{2}
|
||||||
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
|
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
|
||||||
|
|
||||||
//ignore
|
//ignore
|
||||||
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
[^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
||||||
}
|
}
|
||||||
//bold
|
//bold
|
||||||
<THREE_SINGLE_QUOTES_STATE>{
|
<THREE_SINGLE_QUOTES_STATE>{
|
||||||
|
@ -260,7 +260,7 @@ DOUBLE_EQUALS = "="{2}
|
||||||
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
|
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
|
||||||
|
|
||||||
//ignore
|
//ignore
|
||||||
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
[^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
||||||
|
|
||||||
}
|
}
|
||||||
//bold italics
|
//bold italics
|
||||||
|
@ -272,7 +272,7 @@ DOUBLE_EQUALS = "="{2}
|
||||||
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
|
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
|
||||||
|
|
||||||
//ignore
|
//ignore
|
||||||
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
[^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
||||||
}
|
}
|
||||||
|
|
||||||
<DOUBLE_EQUALS_STATE>{
|
<DOUBLE_EQUALS_STATE>{
|
||||||
|
@ -280,15 +280,15 @@ DOUBLE_EQUALS = "="{2}
|
||||||
{ALPHANUM} {currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;}
|
{ALPHANUM} {currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;}
|
||||||
{DOUBLE_EQUALS} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
|
{DOUBLE_EQUALS} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
|
||||||
//ignore
|
//ignore
|
||||||
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
[^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
||||||
}
|
}
|
||||||
|
|
||||||
<DOUBLE_BRACE_STATE>{
|
<DOUBLE_BRACE_STATE>{
|
||||||
{ALPHANUM} {yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;}
|
{ALPHANUM} {yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;}
|
||||||
{DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
|
{DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
|
||||||
{CITATION_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
|
{CITATION_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
|
||||||
//ignore
|
//ignore
|
||||||
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
[^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
||||||
}
|
}
|
||||||
|
|
||||||
<STRING> {
|
<STRING> {
|
||||||
|
@ -305,7 +305,7 @@ DOUBLE_EQUALS = "="{2}
|
||||||
|
|
||||||
{PIPE} {yybegin(STRING); return currentTokType;/*pipe*/}
|
{PIPE} {yybegin(STRING); return currentTokType;/*pipe*/}
|
||||||
|
|
||||||
.|{WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ }
|
[^] { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -327,7 +327,7 @@ DOUBLE_EQUALS = "="{2}
|
||||||
//end wikipedia
|
//end wikipedia
|
||||||
|
|
||||||
/** Ignore the rest */
|
/** Ignore the rest */
|
||||||
. | {WHITESPACE}|{TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
[^] | {TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
|
||||||
|
|
||||||
|
|
||||||
//INTERNAL_LINK = "["{2}({ALPHANUM}+{WHITESPACE}*)+"]"{2}
|
//INTERNAL_LINK = "["{2}({ALPHANUM}+{WHITESPACE}*)+"]"{2}
|
||||||
|
|
|
@ -202,7 +202,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testUnicodeWordBreaks() throws Exception {
|
public void testUnicodeWordBreaks() throws Exception {
|
||||||
WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
|
WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
|
||||||
wordBreakTest.test(a);
|
wordBreakTest.test(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,6 +230,8 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
||||||
checkOneTerm(a, "壹゙", "壹゙"); // ideographic
|
checkOneTerm(a, "壹゙", "壹゙"); // ideographic
|
||||||
checkOneTerm(a, "아゙", "아゙"); // hangul
|
checkOneTerm(a, "아゙", "아゙"); // hangul
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** blast some random strings through the analyzer */
|
/** blast some random strings through the analyzer */
|
||||||
public void testRandomStrings() throws Exception {
|
public void testRandomStrings() throws Exception {
|
||||||
|
|
|
@ -424,7 +424,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testUnicodeWordBreaks() throws Exception {
|
public void testUnicodeWordBreaks() throws Exception {
|
||||||
WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
|
WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
|
||||||
wordBreakTest.test(a);
|
wordBreakTest.test(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -78,13 +78,13 @@ LTLNFsgB@[191.56.104.113]
|
||||||
iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU
|
iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU
|
||||||
VGLn@z3E2.3an2.MM
|
VGLn@z3E2.3an2.MM
|
||||||
TWmfsxn@[112.192.017.029]
|
TWmfsxn@[112.192.017.029]
|
||||||
2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV
|
2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KPRW13D
|
||||||
CjaPC63@['\RDrwk]
|
CjaPC63@['\RDrwk]
|
||||||
Ayydpdoa@tdgypppmen.wf
|
Ayydpdoa@tdgypppmen.wf
|
||||||
"gfKP9"@jo3-r0.mz
|
"gfKP9"@jo3-r0.mz
|
||||||
aTMgDW4@t5gax.XN--0ZWM56D
|
aTMgDW4@t5gax.XN--3E0B707E
|
||||||
mcDrMO3FQ@nwc21.y5qd45lesryrp.IL
|
mcDrMO3FQ@nwc21.y5qd45lesryrp.IL
|
||||||
NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp
|
NZqj@v50egeveepk.z290kk.Bc3.xn--kprw13d
|
||||||
XtAhFnq@[218.214.251.103]
|
XtAhFnq@[218.214.251.103]
|
||||||
x0S8uos@[109.82.126.233]
|
x0S8uos@[109.82.126.233]
|
||||||
ALB4KFavj16pODdd@i206d6s.MM
|
ALB4KFavj16pODdd@i206d6s.MM
|
||||||
|
|
|
@ -78,9 +78,10 @@ import org.junit.Ignore;
|
||||||
* \\p{Script = Hiragana}
|
* \\p{Script = Hiragana}
|
||||||
* \\p{LineBreak = Complex_Context} (From $line_break_url)
|
* \\p{LineBreak = Complex_Context} (From $line_break_url)
|
||||||
* \\p{WordBreak = ALetter} (From $word_break_url)
|
* \\p{WordBreak = ALetter} (From $word_break_url)
|
||||||
|
* \\p{WordBreak = Hebrew_Letter}
|
||||||
* \\p{WordBreak = Katakana}
|
* \\p{WordBreak = Katakana}
|
||||||
* \\p{WordBreak = Numeric} (Excludes full-width Arabic digits)
|
* \\p{WordBreak = Numeric} (Excludes full-width Arabic digits)
|
||||||
* [\\uFF10-\\uFF19] (Full-width Arabic digits)
|
* [\\uFF10-\\uFF19] (Full-width Arabic digits)
|
||||||
*/
|
*/
|
||||||
\@Ignore
|
\@Ignore
|
||||||
public class ${class_name} extends BaseTokenStreamTestCase {
|
public class ${class_name} extends BaseTokenStreamTestCase {
|
||||||
|
@ -97,7 +98,7 @@ parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
|
||||||
parse_Unicode_data_file($scripts_url, $codepoints,
|
parse_Unicode_data_file($scripts_url, $codepoints,
|
||||||
{'han' => 1, 'hiragana' => 1});
|
{'han' => 1, 'hiragana' => 1});
|
||||||
parse_Unicode_data_file($word_break_url, $codepoints,
|
parse_Unicode_data_file($word_break_url, $codepoints,
|
||||||
{'aletter' => 1, 'katakana' => 1, 'numeric' => 1});
|
{'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
|
||||||
my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
|
my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
|
||||||
|
|
||||||
my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
|
my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
|
||||||
|
@ -109,25 +110,33 @@ print STDERR "Writing '$output_path'...";
|
||||||
print OUT $header;
|
print OUT $header;
|
||||||
|
|
||||||
for my $line (@tests) {
|
for my $line (@tests) {
|
||||||
next if ($line =~ /^\s*\#/);
|
next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
|
||||||
# ÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
|
# Example line: ÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
|
||||||
my ($sequence) = $line =~ /^(.*?)\s*\#/;
|
my ($sequence) = $line =~ /^(.*?)\s*\#/;
|
||||||
|
$line =~ s/\t/ /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
|
||||||
print OUT " // $line\n";
|
print OUT " // $line\n";
|
||||||
$sequence =~ s/\s*÷\s*$//; # Trim trailing break character
|
$sequence =~ s/\s*÷\s*$//; # Trim trailing break character
|
||||||
my $test_string = $sequence;
|
my $test_string = $sequence;
|
||||||
$test_string =~ s/\s*÷\s*/\\u/g;
|
$test_string =~ s/\s*÷\s*/\\u/g;
|
||||||
$test_string =~ s/\s*×\s*/\\u/g;
|
$test_string =~ s/\s*×\s*/\\u/g;
|
||||||
|
$test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
|
||||||
$test_string =~ s/\\u000A/\\n/g;
|
$test_string =~ s/\\u000A/\\n/g;
|
||||||
$test_string =~ s/\\u000D/\\r/g;
|
$test_string =~ s/\\u000D/\\r/g;
|
||||||
|
$test_string =~ s/\\u0022/\\\"/g;
|
||||||
$sequence =~ s/^\s*÷\s*//; # Trim leading break character
|
$sequence =~ s/^\s*÷\s*//; # Trim leading break character
|
||||||
my @tokens = ();
|
my @tokens = ();
|
||||||
for my $candidate (split /\s*÷\s*/, $sequence) {
|
for my $candidate (split /\s*÷\s*/, $sequence) {
|
||||||
my @chars = ();
|
my @chars = ();
|
||||||
my $has_wanted_char = 0;
|
my $has_wanted_char = 0;
|
||||||
while ($candidate =~ /([0-9A-F]+)/gi) {
|
while ($candidate =~ /([0-9A-F]+)/gi) {
|
||||||
push @chars, $1;
|
my $hexchar = $1;
|
||||||
|
if (4 == length($hexchar)) {
|
||||||
|
push @chars, $hexchar;
|
||||||
|
} else {
|
||||||
|
push @chars, above_BMP_char_to_surrogates($hexchar);
|
||||||
|
}
|
||||||
unless ($has_wanted_char) {
|
unless ($has_wanted_char) {
|
||||||
$has_wanted_char = 1 if (defined($codepoints->[hex($1)]));
|
$has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($has_wanted_char) {
|
if ($has_wanted_char) {
|
||||||
|
@ -144,6 +153,21 @@ close OUT;
|
||||||
print STDERR "done.\n";
|
print STDERR "done.\n";
|
||||||
|
|
||||||
|
|
||||||
|
# sub above_BMP_char_to_surrogates
|
||||||
|
#
|
||||||
|
# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
|
||||||
|
# to the corresponding UTF-16 surrogate pair
|
||||||
|
#
|
||||||
|
# Assumption: input string is a sequence more than four hex digits
|
||||||
|
#
|
||||||
|
sub above_BMP_char_to_surrogates {
|
||||||
|
my $ch = hex(shift);
|
||||||
|
my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
|
||||||
|
my $low_surrogate = 0xDC00 + ($ch & 0x3FF);
|
||||||
|
return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# sub parse_Unicode_data_file
|
# sub parse_Unicode_data_file
|
||||||
#
|
#
|
||||||
# Downloads and parses the specified Unicode data file, parses it, and
|
# Downloads and parses the specified Unicode data file, parses it, and
|
||||||
|
|
|
@ -121,14 +121,14 @@ Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them "0\!P?".shQVdSerA@2qmqj8ul.hm the leg
|
||||||
of LTLNFsgB@[191.56.104.113] all, until it has read it is
|
of LTLNFsgB@[191.56.104.113] all, until it has read it is
|
||||||
iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. <VGLn@z3E2.3an2.MM> Once
|
iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. <VGLn@z3E2.3an2.MM> Once
|
||||||
TWmfsxn@[112.192.017.029] Spiros under the place
|
TWmfsxn@[112.192.017.029] Spiros under the place
|
||||||
2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV as were not a house of the
|
2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KPRW13D as were not a house of the
|
||||||
rosebushes and the whateverend, feel her waist. She changes everything. We had
|
rosebushes and the whateverend, feel her waist. She changes everything. We had
|
||||||
decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us
|
decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us
|
||||||
come to, <Ayydpdoa@tdgypppmen.wf> what history as died. Strange, Spiros with
|
come to, <Ayydpdoa@tdgypppmen.wf> what history as died. Strange, Spiros with
|
||||||
delight: That night "gfKP9"@jo3-r0.mz and gold case
|
delight: That night "gfKP9"@jo3-r0.mz and gold case
|
||||||
<aTMgDW4@t5gax.XN--0ZWM56D> is spring: the aeon arising, wherein he returned,
|
<aTMgDW4@t5gax.XN--3E0B707E> is spring: the aeon arising, wherein he returned,
|
||||||
retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first
|
retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first
|
||||||
<NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp> to reach session. Initiating first
|
<NZqj@v50egeveepk.z290kk.Bc3.xn--kprw13d> to reach session. Initiating first
|
||||||
part of the main hall toward his own spurs. Hes an <XtAhFnq@[218.214.251.103]>
|
part of the main hall toward his own spurs. Hes an <XtAhFnq@[218.214.251.103]>
|
||||||
Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and
|
Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and
|
||||||
reality. The hidden set up to come. ROSE WAKINS: No answer. The
|
reality. The hidden set up to come. ROSE WAKINS: No answer. The
|
||||||
|
|
|
@ -24,7 +24,7 @@ and Joe recited this iron bars with their account, poor elth, and she had been
|
||||||
almost drove me towards evening. At
|
almost drove me towards evening. At
|
||||||
HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH the
|
HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH the
|
||||||
sergeant and then on the raw
|
sergeant and then on the raw
|
||||||
<Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m> afternoon towards
|
<Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m> afternoon towards
|
||||||
the terror, merely wished him as biled
|
the terror, merely wished him as biled
|
||||||
M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb -- a conciliatory air on in
|
M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb -- a conciliatory air on in
|
||||||
<ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J>
|
<ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J>
|
||||||
|
@ -47,7 +47,7 @@ to live. You didn't know nothing could attend more.' He had been a coming! Get
|
||||||
behind the answer those aids, I saw him in the same appearance of the convict's
|
behind the answer those aids, I saw him in the same appearance of the convict's
|
||||||
file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
|
file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
|
||||||
confession, and bring you see? '
|
confession, and bring you see? '
|
||||||
HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an
|
HTTP://yA2O3F.XN--3E0B707E/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an
|
||||||
accusatory manner as well known that Joe Gargery marry her cup. `I wonder and
|
accusatory manner as well known that Joe Gargery marry her cup. `I wonder and
|
||||||
there was publicly made it was,
|
there was publicly made it was,
|
||||||
<file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#> as lookers on; me, I
|
<file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#> as lookers on; me, I
|
||||||
|
@ -63,7 +63,7 @@ again
|
||||||
FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
|
FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
|
||||||
towards evening. At last, and kneaded, and a dead man taking any. There was
|
towards evening. At last, and kneaded, and a dead man taking any. There was
|
||||||
publicly made out there?' said I,
|
publicly made out there?' said I,
|
||||||
ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
|
ftp://w0yaysrl.XN--CLCHC0EA0B2G2A9GCD/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
|
||||||
glancing http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY at the
|
glancing http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY at the
|
||||||
N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ river wound, twenty miles of the
|
N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ river wound, twenty miles of the
|
||||||
number called, hears the awful it lights; here and trimmings of Caesar. This
|
number called, hears the awful it lights; here and trimmings of Caesar. This
|
||||||
|
@ -155,7 +155,7 @@ ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sg
|
||||||
at me, and that her walking z3ymb.KM/DdnrqoBz=YtxSB away so much of the
|
at me, and that her walking z3ymb.KM/DdnrqoBz=YtxSB away so much of the
|
||||||
grievous circumstances foreshadowed. After receiving the way, that I thought,
|
grievous circumstances foreshadowed. After receiving the way, that I thought,
|
||||||
if she should go to?' `Good again!' cried the
|
if she should go to?' `Good again!' cried the
|
||||||
FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0 society of a savoury pork pie,
|
FTP://7kgip3z.XN--KPRY57D:15983/OYEQzIA0 society of a savoury pork pie,
|
||||||
and nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc they challenged, hears nothin' all my
|
and nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc they challenged, hears nothin' all my
|
||||||
hands in herself, and bring him by hand. `This,' ftp://085.062.055.011/bopfVV/
|
hands in herself, and bring him by hand. `This,' ftp://085.062.055.011/bopfVV/
|
||||||
said he wore ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs a dog of
|
said he wore ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs a dog of
|
||||||
|
@ -191,7 +191,7 @@ and tingling, and that I had won of the shoulder. `Excuse me, and we departed
|
||||||
from Richard the furthest end of
|
from Richard the furthest end of
|
||||||
http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w both imp and stung by the
|
http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w both imp and stung by the
|
||||||
bright fire, another look
|
bright fire, another look
|
||||||
zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her
|
zQFC1SPO96J.Jy20d8.xn--3e0b707e:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her
|
||||||
best use asking questions, and feet,
|
best use asking questions, and feet,
|
||||||
<ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ> hanging to try
|
<ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ> hanging to try
|
||||||
back was the poker. `It was not warmly. `Seems
|
back was the poker. `It was not warmly. `Seems
|
||||||
|
@ -204,7 +204,7 @@ kitchen wall,
|
||||||
Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 he ate the
|
Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 he ate the
|
||||||
house, end with the Ghost in order): Forty-three pence?' To five hundred
|
house, end with the Ghost in order): Forty-three pence?' To five hundred
|
||||||
Gargerys.' `I say, Pip; stay
|
Gargerys.' `I say, Pip; stay
|
||||||
7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with
|
7WO6F.XN--45BRJ9C/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with
|
||||||
ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
|
ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
|
||||||
his shot, and reposing no help to my seat. It was in the kitchen wall, because
|
his shot, and reposing no help to my seat. It was in the kitchen wall, because
|
||||||
I calculated the sounds by giving me by the name for a rush of Joe's forge
|
I calculated the sounds by giving me by the name for a rush of Joe's forge
|
||||||
|
@ -299,7 +299,7 @@ She drew the kitchen, carrying file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH so low
|
||||||
wooden hut
|
wooden hut
|
||||||
ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
|
ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
|
||||||
where it seemed to give Pirrip as
|
where it seemed to give Pirrip as
|
||||||
<79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO>
|
<79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--FIQS8S/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO>
|
||||||
to say, on the guiltily coarse his head, he tried to the
|
to say, on the guiltily coarse his head, he tried to the
|
||||||
Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
|
Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
|
||||||
remark. `There's one sprinkled all I was possible she beggared me. All these
|
remark. `There's one sprinkled all I was possible she beggared me. All these
|
||||||
|
@ -311,7 +311,7 @@ Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%be
|
||||||
he shook her veil so thick nor my milk and would impart all had returned, with
|
he shook her veil so thick nor my milk and would impart all had returned, with
|
||||||
soap-suds, I had FILE:///#F9Bgl just like thin snow. `Enough of his right side
|
soap-suds, I had FILE:///#F9Bgl just like thin snow. `Enough of his right side
|
||||||
of thenceforth sitting
|
of thenceforth sitting
|
||||||
jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
|
jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--3E0B707E/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
|
||||||
in File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg my soul. I sat down on it, I have
|
in File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg my soul. I sat down on it, I have
|
||||||
been a spoon that the pie, blacksmith?' asked Estella of it made a mouth wide
|
been a spoon that the pie, blacksmith?' asked Estella of it made a mouth wide
|
||||||
open, and so
|
open, and so
|
||||||
|
@ -324,7 +324,7 @@ FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2 of the stranger looked at it, I
|
||||||
pointed to Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz himself. No glimpse of
|
pointed to Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz himself. No glimpse of
|
||||||
file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg herself, I saw that he would have
|
file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg herself, I saw that he would have
|
||||||
been there, I was too far and uncomfortable by it.
|
been there, I was too far and uncomfortable by it.
|
||||||
http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
|
http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--CLCHC0EA0B2G2A9GCD/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
|
||||||
Under the Above,' I rather to become transfixed -- he gave me out of the
|
Under the Above,' I rather to become transfixed -- he gave me out of the
|
||||||
kitchen empty-handed, to keep him, I had made a
|
kitchen empty-handed, to keep him, I had made a
|
||||||
Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG subject, if he had
|
Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG subject, if he had
|
||||||
|
@ -468,7 +468,7 @@ hard twist upon his -- `Well, boy,' Uncle Pumblechook: a look at the sermon he
|
||||||
had heard it had hesitated as little window, violently plunging and she had
|
had heard it had hesitated as little window, violently plunging and she had
|
||||||
committed, and had all about the present calling, which the fingers of tea on
|
committed, and had all about the present calling, which the fingers of tea on
|
||||||
Saturdays than this country, gentlemen, but I could see those,
|
Saturdays than this country, gentlemen, but I could see those,
|
||||||
https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
|
https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--3E0B707E/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
|
||||||
too, if you remember what stock she told me again. `But I know what
|
too, if you remember what stock she told me again. `But I know what
|
||||||
file:///enqvF%EFLOBsZhl8h2z wittles is?' `Yes, ma'am.' `Estella, take me again
|
file:///enqvF%EFLOBsZhl8h2z wittles is?' `Yes, ma'am.' `Estella, take me again
|
||||||
and ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A refractory
|
and ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A refractory
|
||||||
|
@ -493,7 +493,7 @@ right-side
|
||||||
ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
|
ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
|
||||||
flaxen curls and tables, and a foot of the blacksmith's.' `Halloa!' said Joe,
|
flaxen curls and tables, and a foot of the blacksmith's.' `Halloa!' said Joe,
|
||||||
staring at that it had withered like a infunt, and took another look about the
|
staring at that it had withered like a infunt, and took another look about the
|
||||||
rum <6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once.
|
rum <6S8.Crwllo5e3.jmtz.XN--GECRJ9C/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once.
|
||||||
Three Jolly Bargemen to think she seemed to tell you were. When we saw the file
|
Three Jolly Bargemen to think she seemed to tell you were. When we saw the file
|
||||||
coming at my slice. I have mentioned it with the wooden hut where we had got up
|
coming at my slice. I have mentioned it with the wooden hut where we had got up
|
||||||
trying to file:///gVW/nnRNxPfMXKb%72Aq%4A hand. If ever grateful for. If a
|
trying to file:///gVW/nnRNxPfMXKb%72Aq%4A hand. If ever grateful for. If a
|
||||||
|
@ -662,7 +662,7 @@ open,' he
|
||||||
https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
|
https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
|
||||||
wiped the liquor. He was the bad; and some one
|
wiped the liquor. He was the bad; and some one
|
||||||
Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE another
|
Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE another
|
||||||
Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws
|
Ftp://3zd7z.etw.XN--KPRW13D/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws
|
||||||
down by a most powerfully down
|
down by a most powerfully down
|
||||||
t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x to me, and all that
|
t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x to me, and all that
|
||||||
know the window,
|
know the window,
|
||||||
|
@ -993,7 +993,7 @@ upon a door, which was gobbling mincemeat, meatbone, bread, some lace for it
|
||||||
that Joe's blue file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ eyes, had an
|
that Joe's blue file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ eyes, had an
|
||||||
hour longer than at me, and dismal, and gloves, and that's further than I
|
hour longer than at me, and dismal, and gloves, and that's further than I
|
||||||
mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs looked on. `Now, boy!
|
mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs looked on. `Now, boy!
|
||||||
g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
|
g6tylc0.daeczh.4q.XN--CLCHC0EA0B2G2A9GCD/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
|
||||||
Why, here's a ridiculous old chap. And looked up by hand. `Why don't like
|
Why, here's a ridiculous old chap. And looked up by hand. `Why don't like
|
||||||
`sulks.' Therefore, I was in such game?' Everybody, myself drifting down his
|
`sulks.' Therefore, I was in such game?' Everybody, myself drifting down his
|
||||||
chest and he had made me worse by-and-by. I was a
|
chest and he had made me worse by-and-by. I was a
|
||||||
|
@ -1035,7 +1035,7 @@ in every word out again. `You are prison-ships, and they fought
|
||||||
<HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt>
|
<HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt>
|
||||||
for us heavy. `I Bolted, myself, 5.Piba4ac.JE/55M1H/AZXdj and thread, and we
|
for us heavy. `I Bolted, myself, 5.Piba4ac.JE/55M1H/AZXdj and thread, and we
|
||||||
after him, or to inspire confidence. This was brought you spoke all the act, he
|
after him, or to inspire confidence. This was brought you spoke all the act, he
|
||||||
couldn't m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire
|
couldn't m-k6-ej7x.XN--J6W193G/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire
|
||||||
between the forge was <ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/>
|
between the forge was <ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/>
|
||||||
busy in it. Until
|
busy in it. Until
|
||||||
hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ she jammed
|
hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ she jammed
|
||||||
|
@ -1329,7 +1329,7 @@ sort Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L --
|
||||||
FILE://155.24.106.255/3VEZIT7 if it was to him, I might not do not afraid of
|
FILE://155.24.106.255/3VEZIT7 if it was to him, I might not do not afraid of
|
||||||
report, and looking rather to make nothing of a confidential voice,
|
report, and looking rather to make nothing of a confidential voice,
|
||||||
d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
|
d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
|
||||||
as lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be
|
as lda5l5wc.XN--KPRY57D/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be
|
||||||
supposed,' said the wind and so we were read the conversation consisted of it
|
supposed,' said the wind and so we were read the conversation consisted of it
|
||||||
had so that we saw some bread, some
|
had so that we saw some bread, some
|
||||||
l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C brandy out: no black velvet
|
l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C brandy out: no black velvet
|
||||||
|
|
|
@ -10,7 +10,7 @@ http://Rcbu6/Oxc%C0IkGSZ8rO9IUpd/BEvkvw3nWNXZ/P%17tp3gjATN/0ZRzs
|
||||||
file:///2CdsP/U2GCLT
|
file:///2CdsP/U2GCLT
|
||||||
Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA=
|
Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA=
|
||||||
HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH
|
HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH
|
||||||
Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m
|
Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m
|
||||||
M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb
|
M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb
|
||||||
ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J
|
ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J
|
||||||
ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj
|
ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj
|
||||||
|
@ -23,13 +23,13 @@ Ftp://Xmswrxn8d-1s.pe.gm/dB6C3xTk%D3x/EKOiTmk%7c/API/0cdgpi;Type=a
|
||||||
FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH
|
FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH
|
||||||
ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/
|
ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/
|
||||||
file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
|
file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
|
||||||
HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND
|
HTTP://yA2O3F.XN--3E0B707E/qPDTt/MwMXGQq2S7JT/TJ2iCND
|
||||||
file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#
|
file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#
|
||||||
http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx
|
http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx
|
||||||
ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7
|
ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7
|
||||||
http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM
|
http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM
|
||||||
FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
|
FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
|
||||||
ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
|
ftp://w0yaysrl.XN--CLCHC0EA0B2G2A9GCD/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
|
||||||
http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY
|
http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY
|
||||||
N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/
|
N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/
|
||||||
http://ah-2d4.ASIA/qmp
|
http://ah-2d4.ASIA/qmp
|
||||||
|
@ -75,7 +75,7 @@ http://4u3o/BKdhwRyzG
|
||||||
file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/
|
file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/
|
||||||
ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz
|
ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz
|
||||||
z3ymb.KM/DdnrqoBz=YtxSB
|
z3ymb.KM/DdnrqoBz=YtxSB
|
||||||
FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0
|
FTP://7kgip3z.XN--KPRY57D:15983/OYEQzIA0
|
||||||
nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc
|
nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc
|
||||||
ftp://085.062.055.011/bopfVV/
|
ftp://085.062.055.011/bopfVV/
|
||||||
ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs
|
ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs
|
||||||
|
@ -93,12 +93,12 @@ https://[3790:ad57:0B63::e5f7:f6ac:164C]/Obax;zcD/Y%48%9a/Z2xcdar
|
||||||
bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae
|
bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae
|
||||||
ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO
|
ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO
|
||||||
http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w
|
http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w
|
||||||
zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1
|
zQFC1SPO96J.Jy20d8.xn--3e0b707e:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1
|
||||||
ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ
|
ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ
|
||||||
HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV
|
HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV
|
||||||
ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ
|
ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ
|
||||||
Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1
|
Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1
|
||||||
7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb
|
7WO6F.XN--45BRJ9C/1L%f9G0NEu/L2lD/mQGNS9UhgCEb
|
||||||
ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
|
ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
|
||||||
ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk
|
ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk
|
||||||
Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1
|
Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1
|
||||||
|
@ -147,20 +147,20 @@ ftp://Lq.es/%B1ZPdTZgB2mNFW/qre92rM
|
||||||
file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw
|
file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw
|
||||||
file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH
|
file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH
|
||||||
ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
|
ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
|
||||||
79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO
|
79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--FIQS8S/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO
|
||||||
Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
|
Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
|
||||||
ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ
|
ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ
|
||||||
[62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23
|
[62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23
|
||||||
Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5=
|
Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5=
|
||||||
FILE:///#F9Bgl
|
FILE:///#F9Bgl
|
||||||
jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
|
jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--3E0B707E/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
|
||||||
File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg
|
File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg
|
||||||
ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw
|
ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw
|
||||||
http://sisas.ua/4CU60ZLK4VgY8AR89
|
http://sisas.ua/4CU60ZLK4VgY8AR89
|
||||||
FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2
|
FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2
|
||||||
Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz
|
Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz
|
||||||
file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg
|
file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg
|
||||||
http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
|
http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--CLCHC0EA0B2G2A9GCD/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
|
||||||
Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG
|
Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG
|
||||||
ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d
|
ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d
|
||||||
FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH
|
FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH
|
||||||
|
@ -228,7 +228,7 @@ file:///UIIGOxv6jvF2%c0/%A8J3%677Gmq8im1zklKhqx/HMhCSY2QcyxvL/
|
||||||
http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33=
|
http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33=
|
||||||
Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9
|
Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9
|
||||||
file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8
|
file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8
|
||||||
https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
|
https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--3E0B707E/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
|
||||||
file:///enqvF%EFLOBsZhl8h2z
|
file:///enqvF%EFLOBsZhl8h2z
|
||||||
ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A
|
ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A
|
||||||
ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9
|
ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9
|
||||||
|
@ -240,7 +240,7 @@ http://nEN5ZN.EG/%0efsf4v30L
|
||||||
file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q
|
file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q
|
||||||
r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q
|
r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q
|
||||||
ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
|
ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
|
||||||
6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/
|
6S8.Crwllo5e3.jmtz.XN--GECRJ9C/6InlQn/hnhu2f%ac8tX/apq%0D6o/
|
||||||
file:///gVW/nnRNxPfMXKb%72Aq%4A
|
file:///gVW/nnRNxPfMXKb%72Aq%4A
|
||||||
file:///Fzza388TQ
|
file:///Fzza388TQ
|
||||||
file:///
|
file:///
|
||||||
|
@ -314,7 +314,7 @@ file:///3%aexrb7UdZ5GpR4ZIfoxwL/vQV%4a2zQxki/QRji6gHpMGgBaM/d%71A2CTpZv-kF0tD/Ig
|
||||||
f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7
|
f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7
|
||||||
https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
|
https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
|
||||||
Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE
|
Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE
|
||||||
Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9
|
Ftp://3zd7z.etw.XN--KPRW13D/4UztCuTbW2z/LL%2cDI/dTYSi9
|
||||||
t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x
|
t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x
|
||||||
ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR
|
ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR
|
||||||
file:///XoCg%EDVf/A3ibJYjU
|
file:///XoCg%EDVf/A3ibJYjU
|
||||||
|
@ -476,7 +476,7 @@ ftp://53.151.134.240/uZqGXLUIu-J/=%0C2pO/PvL0%19MpQBv/
|
||||||
FILE:///Kywof5D5q/0TRS/zayrkrnENB
|
FILE:///Kywof5D5q/0TRS/zayrkrnENB
|
||||||
file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/
|
file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/
|
||||||
mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs
|
mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs
|
||||||
g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
|
g6tylc0.daeczh.4q.XN--CLCHC0EA0B2G2A9GCD/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
|
||||||
file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB
|
file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB
|
||||||
file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL
|
file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL
|
||||||
file:///mJM%a1/jv5%53QDqE/bFMu0CBp
|
file:///mJM%a1/jv5%53QDqE/bFMu0CBp
|
||||||
|
@ -496,7 +496,7 @@ http://gpu16lz.LS/9e%daJrwQfHEpFvsZ3jx/c4STIJ/CmvEGAUx9f/
|
||||||
file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc
|
file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc
|
||||||
HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt
|
HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt
|
||||||
5.Piba4ac.JE/55M1H/AZXdj
|
5.Piba4ac.JE/55M1H/AZXdj
|
||||||
m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/
|
m-k6-ej7x.XN--J6W193G/suVrNQSIj9/TmRhHbe/o&0dbqR/
|
||||||
ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/
|
ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/
|
||||||
hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/
|
hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/
|
||||||
Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD
|
Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD
|
||||||
|
@ -633,7 +633,7 @@ http://047.014.184.200/Z_QdOwjzfBue4Nt/aEn/xuEQD/cXlnoxHIK%7d8h/1%eegEk7E0/8Ejku
|
||||||
Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L
|
Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L
|
||||||
FILE://155.24.106.255/3VEZIT7
|
FILE://155.24.106.255/3VEZIT7
|
||||||
d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
|
d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
|
||||||
lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET
|
lda5l5wc.XN--KPRY57D/pr80SSZ/eNM1%D50lp/Rc%8EimOET
|
||||||
l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C
|
l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C
|
||||||
FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k
|
FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k
|
||||||
212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt=
|
212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt=
|
||||||
|
|
|
@ -75,7 +75,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes
|
||||||
+ " samba Halta gamba "
|
+ " samba Halta gamba "
|
||||||
+ "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R\n"
|
+ "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R\n"
|
||||||
+ "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb\n"
|
+ "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb\n"
|
||||||
+ "Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m"
|
+ "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m"
|
||||||
+ " inter Locutio "
|
+ " inter Locutio "
|
||||||
+ "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/\n"
|
+ "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/\n"
|
||||||
+ "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7"
|
+ "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7"
|
||||||
|
@ -91,7 +91,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes
|
||||||
"samba", "Halta", "gamba",
|
"samba", "Halta", "gamba",
|
||||||
"ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R",
|
"ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R",
|
||||||
"M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb",
|
"M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb",
|
||||||
"Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m",
|
"Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m",
|
||||||
"inter", "Locutio",
|
"inter", "Locutio",
|
||||||
"[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/",
|
"[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/",
|
||||||
"file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7",
|
"file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7",
|
||||||
|
|
|
@ -60,20 +60,21 @@ public class GenerateJflexTLDMacros {
|
||||||
|
|
||||||
private static final String APACHE_LICENSE
|
private static final String APACHE_LICENSE
|
||||||
= "/*" + NL
|
= "/*" + NL
|
||||||
+ " * Copyright 2001-2005 The Apache Software Foundation." + NL
|
+ " * Licensed to the Apache Software Foundation (ASF) under one or more" + NL
|
||||||
+ " *" + NL
|
+ " * contributor license agreements. See the NOTICE file distributed with" + NL
|
||||||
+ " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL
|
+ " * this work for additional information regarding copyright ownership." + NL
|
||||||
+ " * you may not use this file except in compliance with the License." + NL
|
+ " * The ASF licenses this file to You under the Apache License, Version 2.0" + NL
|
||||||
+ " * You may obtain a copy of the License at" + NL
|
+ " * (the \"License\"); you may not use this file except in compliance with" + NL
|
||||||
+ " *" + NL
|
+ " * the License. You may obtain a copy of the License at" + NL
|
||||||
+ " * http://www.apache.org/licenses/LICENSE-2.0" + NL
|
+ " *" + NL
|
||||||
+ " *" + NL
|
+ " * http://www.apache.org/licenses/LICENSE-2.0" + NL
|
||||||
+ " * Unless required by applicable law or agreed to in writing, software" + NL
|
+ " *" + NL
|
||||||
+ " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
|
+ " * Unless required by applicable law or agreed to in writing, software" + NL
|
||||||
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
|
+ " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
|
||||||
+ " * See the License for the specific language governing permissions and" + NL
|
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
|
||||||
+ " * limitations under the License." + NL
|
+ " * See the License for the specific language governing permissions and" + NL
|
||||||
+ " */" + NL + NL;
|
+ " * limitations under the License." + NL
|
||||||
|
+ " */" + NL;
|
||||||
|
|
||||||
private static final Pattern TLD_PATTERN_1
|
private static final Pattern TLD_PATTERN_1
|
||||||
= Pattern.compile("([-A-Za-z0-9]+)\\.\\s+NS\\s+.*");
|
= Pattern.compile("([-A-Za-z0-9]+)\\.\\s+NS\\s+.*");
|
||||||
|
|
|
@ -36,40 +36,45 @@ public class GenerateJFlexSupplementaryMacros {
|
||||||
static {
|
static {
|
||||||
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
|
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final String APACHE_LICENSE
|
private static final String APACHE_LICENSE
|
||||||
= "/*" + NL
|
= "/*" + NL
|
||||||
+ " * Copyright 2010 The Apache Software Foundation." + NL
|
+ " * Licensed to the Apache Software Foundation (ASF) under one or more" + NL
|
||||||
|
+ " * contributor license agreements. See the NOTICE file distributed with" + NL
|
||||||
|
+ " * this work for additional information regarding copyright ownership." + NL
|
||||||
|
+ " * The ASF licenses this file to You under the Apache License, Version 2.0" + NL
|
||||||
|
+ " * (the \"License\"); you may not use this file except in compliance with" + NL
|
||||||
|
+ " * the License. You may obtain a copy of the License at" + NL
|
||||||
+ " *" + NL
|
+ " *" + NL
|
||||||
+ " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL
|
+ " * http://www.apache.org/licenses/LICENSE-2.0" + NL
|
||||||
+ " * you may not use this file except in compliance with the License." + NL
|
|
||||||
+ " * You may obtain a copy of the License at" + NL
|
|
||||||
+ " *" + NL
|
|
||||||
+ " * http://www.apache.org/licenses/LICENSE-2.0" + NL
|
|
||||||
+ " *" + NL
|
+ " *" + NL
|
||||||
+ " * Unless required by applicable law or agreed to in writing, software" + NL
|
+ " * Unless required by applicable law or agreed to in writing, software" + NL
|
||||||
+ " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
|
+ " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
|
||||||
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
|
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
|
||||||
+ " * See the License for the specific language governing permissions and" + NL
|
+ " * See the License for the specific language governing permissions and" + NL
|
||||||
+ " * limitations under the License." + NL
|
+ " * limitations under the License." + NL
|
||||||
+ " */" + NL + NL;
|
+ " */" + NL;
|
||||||
|
|
||||||
|
|
||||||
public static void main(String args[]) {
|
public static void main(String args[]) {
|
||||||
outputHeader();
|
outputHeader();
|
||||||
outputMacro("ALetterSupp", "[:WordBreak=ALetter:]");
|
outputMacro("ALetterSupp", "[:WordBreak=ALetter:]");
|
||||||
outputMacro("FormatSupp", "[:WordBreak=Format:]");
|
outputMacro("FormatSupp", "[:WordBreak=Format:]");
|
||||||
outputMacro("ExtendSupp", "[:WordBreak=Extend:]");
|
outputMacro("NumericSupp", "[:WordBreak=Numeric:]");
|
||||||
outputMacro("NumericSupp", "[:WordBreak=Numeric:]");
|
outputMacro("ExtendSupp", "[:WordBreak=Extend:]");
|
||||||
outputMacro("KatakanaSupp", "[:WordBreak=Katakana:]");
|
outputMacro("KatakanaSupp", "[:WordBreak=Katakana:]");
|
||||||
outputMacro("MidLetterSupp", "[:WordBreak=MidLetter:]");
|
outputMacro("MidLetterSupp", "[:WordBreak=MidLetter:]");
|
||||||
outputMacro("MidNumSupp", "[:WordBreak=MidNum:]");
|
outputMacro("MidNumSupp", "[:WordBreak=MidNum:]");
|
||||||
outputMacro("MidNumLetSupp", "[:WordBreak=MidNumLet:]");
|
outputMacro("MidNumLetSupp", "[:WordBreak=MidNumLet:]");
|
||||||
outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]");
|
outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]");
|
||||||
outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]");
|
outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]");
|
||||||
outputMacro("ComplexContextSupp", "[:LineBreak=Complex_Context:]");
|
outputMacro("ComplexContextSupp", "[:LineBreak=Complex_Context:]");
|
||||||
outputMacro("HanSupp", "[:Script=Han:]");
|
outputMacro("HanSupp", "[:Script=Han:]");
|
||||||
outputMacro("HiraganaSupp", "[:Script=Hiragana:]");
|
outputMacro("HiraganaSupp", "[:Script=Hiragana:]");
|
||||||
|
outputMacro("SingleQuoteSupp", "[:WordBreak=Single_Quote:]");
|
||||||
|
outputMacro("DoubleQuoteSupp", "[:WordBreak=Double_Quote:]");
|
||||||
|
outputMacro("HebrewLetterSupp", "[:WordBreak=Hebrew_Letter:]");
|
||||||
|
outputMacro("RegionalIndicatorSupp", "[:WordBreak=Regional_Indicator:]");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void outputHeader() {
|
static void outputHeader() {
|
||||||
|
|
|
@ -476,7 +476,7 @@
|
||||||
<available property="jflex.present" classname="jflex.anttask.JFlexTask">
|
<available property="jflex.present" classname="jflex.anttask.JFlexTask">
|
||||||
<classpath refid="jflex.classpath"/>
|
<classpath refid="jflex.classpath"/>
|
||||||
</available>
|
</available>
|
||||||
<fail unless="jflex.present">
|
<fail unless="jflex.present"> 
|
||||||
##################################################################
|
##################################################################
|
||||||
JFlex not found.
|
JFlex not found.
|
||||||
JFlex Home: ${jflex.home}
|
JFlex Home: ${jflex.home}
|
||||||
|
@ -484,14 +484,14 @@
|
||||||
Please install the jFlex 1.5 version (currently not released)
|
Please install the jFlex 1.5 version (currently not released)
|
||||||
from its SVN repository:
|
from its SVN repository:
|
||||||
|
|
||||||
svn co -r 623 http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex
|
svn co -r 722 https://svn.code.sf.net/p/jflex/code/trunk jflex
|
||||||
cd jflex
|
cd jflex
|
||||||
mvn install
|
mvn install
|
||||||
|
|
||||||
Then, create a build.properties file either in your home
|
Then, create a build.properties file either in your home
|
||||||
directory, or within the Lucene directory and set the jflex.home
|
directory, or within the Lucene directory and set the jflex.home
|
||||||
property to the path where the JFlex trunk checkout is located
|
property to the path where the JFlex trunk checkout is located
|
||||||
(in the above example its the directory called "jflex").
|
(in the above example it's the directory called "jflex").
|
||||||
|
|
||||||
##################################################################
|
##################################################################
|
||||||
</fail>
|
</fail>
|
||||||
|
|
Loading…
Reference in New Issue