LUCENE-5357: Upgrade StandardTokenizer and UAX29URLEmailTokenizer to Unicode 6.3; update UAX29URLEmailTokenizer's recognized top level domains in URLs and Emails from the IANA Root Zone Database.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1548595 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2013-12-06 16:51:43 +00:00
parent 618f6b876d
commit d516948bbd
27 changed files with 8776 additions and 5333 deletions

View File

@ -91,6 +91,11 @@ Build
* LUCENE-4381: Upgrade analysis/icu to 52.1. (Robert Muir) * LUCENE-4381: Upgrade analysis/icu to 52.1. (Robert Muir)
* LUCENE-5357: Upgrade StandardTokenizer and UAX29URLEmailTokenizer to
Unicode 6.3; update UAX29URLEmailTokenizer's recognized top level
domains in URLs and Emails from the IANA Root Zone Database.
(Steve Rowe)
Bug fixes Bug fixes
* LUCENE-5285: Improved highlighting of multi-valued fields with * LUCENE-5285: Improved highlighting of multi-valued fields with

View File

@ -45,17 +45,13 @@
<taskdef classname="jflex.anttask.JFlexTask" name="jflex"> <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/> <classpath refid="jflex.classpath"/>
</taskdef> </taskdef>
<!-- this logic below looks duplicated with run-jflex, but its not, the regexp is different! -->
<jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex" <jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex"
outdir="src/java/org/apache/lucene/analysis/charfilter" outdir="src/java/org/apache/lucene/analysis/charfilter"
nobak="on"/> nobak="on" inputstreamctor="false"/>
<!-- Remove the inappropriate JFlex-generated constructors --> <!-- Remove the inappropriate JFlex-generated constructor -->
<replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java" <replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java"
match="/\*\*\s*\*\s*Creates a new scanner.*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}" match="/\*\*\s*\*\s*Creates a new scanner\s*\*\s*\*\s*@param\s*in\s*the java.io.Reader to read input from\.\s*\*/\s*public HTMLStripCharFilter\(java\.io\.Reader in\)\s*\{\s*this.zzReader = in;\s*\}"
replace="" flags="sg"/> replace="" flags="s"/>
<replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java"
match="\/\*\s*The following code was generated by JFlex.*"
replace="\/\* The following code was generated by JFlex. \*\/" flags=""/>
</target> </target>
<target name="generate-jflex-html-char-entities"> <target name="generate-jflex-html-char-entities">
@ -96,15 +92,7 @@
<attribute name="dir"/> <attribute name="dir"/>
<attribute name="name"/> <attribute name="name"/>
<sequential> <sequential>
<jflex file="@{dir}/@{name}.jflex" <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
outdir="@{dir}"
nobak="on" />
<replaceregexp file="@{dir}/@{name}.java"
match="/\*\*\s*\*\s*Creates a new scanner\..*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}"
replace="" flags="sg"/>
<replaceregexp file="@{dir}/@{name}.java"
match="\/\*\s*The following code was generated by JFlex.*"
replace="\/\* The following code was generated by JFlex. \*\/" flags=""/>
</sequential> </sequential>
</macrodef> </macrodef>

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex. */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT */
package org.apache.lucene.analysis.charfilter; package org.apache.lucene.analysis.charfilter;
@ -152,77 +152,77 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
"\21\1\1\41\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+ "\21\1\1\41\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+
"\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+ "\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+
"\1\0\3\1\1\0\2\2\14\0\64\1\40\2\3\0\1\1\4\0"+ "\1\0\3\1\1\0\2\2\14\0\64\1\40\2\3\0\1\1\4\0"+
"\1\1\1\2\2\0\12\274\41\0\3\2\1\41\1\0\12\274\6\0"+ "\1\1\1\2\2\0\12\274\41\0\3\2\2\0\12\274\6\0\130\1"+
"\130\1\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0"+ "\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2"+
"\14\2\4\0\14\2\12\0\12\274\36\1\2\0\5\1\13\0\54\1"+ "\4\0\14\2\12\0\12\274\36\1\2\0\5\1\13\0\54\1\4\0"+
"\4\0\21\2\7\1\2\2\6\0\12\274\1\2\45\0\27\1\5\2"+ "\21\2\7\1\2\2\6\0\12\274\1\2\45\0\27\1\5\2\4\0"+
"\4\0\65\1\12\2\1\0\35\2\2\0\1\2\12\274\6\0\12\274"+ "\65\1\12\2\1\0\35\2\2\0\1\2\12\274\6\0\12\274\15\0"+
"\15\0\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\274\21\0"+ "\1\1\130\0\5\2\57\1\21\2\7\1\4\0\12\274\21\0\11\2"+
"\11\2\14\0\3\2\36\1\15\2\2\1\12\274\54\1\16\2\14\0"+ "\14\0\3\2\36\1\15\2\2\1\12\274\54\1\16\2\14\0\44\1"+
"\44\1\24\2\10\0\12\274\3\0\3\1\12\274\44\1\122\0\3\2"+ "\24\2\10\0\12\274\3\0\3\1\12\274\44\1\122\0\3\2\1\0"+
"\1\0\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2"+ "\25\2\4\1\1\2\4\1\3\2\2\1\11\0\300\1\47\2\25\0"+
"\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0"+ "\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1"+
"\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0"+ "\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1"+
"\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0"+ "\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1"+
"\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0"+ "\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1\3\0\13\41"+
"\13\41\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0"+ "\35\0\2\41\5\0\1\41\17\0\2\2\23\0\1\2\12\0\1\41"+
"\1\41\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0"+ "\21\0\1\1\15\0\1\1\20\0\15\1\63\0\15\2\4\0\1\2"+
"\1\2\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0"+ "\3\0\14\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0\1\1"+
"\1\1\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0"+ "\2\0\6\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0\20\1"+
"\20\1\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0"+ "\2\0\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u0a77\0\57\1"+
"\57\1\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0"+ "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\2\1\14\0\46\1"+
"\46\1\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0"+ "\1\0\1\1\5\0\1\1\2\0\70\1\7\0\1\1\17\0\1\2"+
"\1\2\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+ "\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
"\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2"+ "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\u0200\0"+
"\u0200\0\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0"+ "\1\41\4\0\3\1\31\0\11\1\6\2\1\0\5\1\2\0\5\1"+
"\5\1\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1"+ "\4\0\126\1\2\0\2\2\5\1\1\0\132\1\1\0\4\1\5\0"+
"\5\0\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1"+ "\51\1\3\0\136\1\21\0\33\1\65\0\20\1\u0200\0\u19b6\1\112\0"+
"\112\0\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1"+ "\u51cd\1\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\274"+
"\12\274\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0"+ "\2\1\24\0\57\1\1\2\4\0\12\2\1\0\31\1\7\0\1\2"+
"\1\2\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0"+ "\120\1\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
"\4\1\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2"+ "\14\0\13\1\115\0\12\1\1\2\3\1\1\2\4\1\1\2\27\1"+
"\27\1\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\274"+ "\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\274\6\0"+
"\6\0\22\2\6\1\3\0\1\1\4\0\12\274\34\1\10\2\2\0"+ "\22\2\6\1\3\0\1\1\4\0\12\274\34\1\10\2\2\0\27\1"+
"\27\1\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1"+ "\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\274"+
"\12\274\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0"+ "\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\274"+
"\12\274\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1"+ "\6\0\27\1\3\0\1\1\1\2\4\0\60\1\1\2\1\1\3\2"+
"\3\2\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1"+ "\2\1\2\2\5\1\2\2\1\1\1\2\1\1\30\0\3\1\2\0"+
"\2\0\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1"+ "\13\1\5\2\2\0\3\1\2\2\12\0\6\1\2\0\6\1\2\0"+
"\2\0\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0"+ "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+
"\2\2\2\0\12\274\6\0\u2ba4\1\14\0\27\1\4\0\61\1\4\0"+ "\2\0\12\274\6\0\u2ba4\1\14\0\27\1\4\0\61\1\4\0\1\170"+
"\1\170\1\223\1\103\1\165\1\136\1\214\2\0\1\160\1\153\2\0"+ "\1\223\1\103\1\165\1\136\1\214\2\0\1\160\1\153\2\0\1\120"+
"\1\120\1\210\14\0\1\105\1\127\20\0\1\122\7\0\1\256\1\112"+ "\1\210\14\0\1\105\1\127\20\0\1\122\7\0\1\256\1\112\5\0"+
"\5\0\1\143\4\0\51\120\1\110\3\120\1\124\1\220\17\0\1\133"+ "\1\143\4\0\51\120\1\110\3\120\1\124\1\220\17\0\1\133\u02c1\0"+
"\u02c1\0\1\252\277\0\2\123\1\212\3\222\2\211\1\222\1\211\2\222"+ "\1\252\277\0\2\123\1\212\3\222\2\211\1\222\1\211\2\222\1\221"+
"\1\221\21\222\11\213\1\157\7\213\7\204\1\156\1\204\1\246\2\207"+ "\21\222\11\213\1\157\7\213\7\204\1\156\1\204\1\246\2\207\1\166"+
"\1\166\1\246\1\207\1\166\10\246\2\167\5\203\2\155\5\203\1\107"+ "\1\246\1\207\1\166\10\246\2\167\5\203\2\155\5\203\1\107\10\202"+
"\10\202\5\154\3\224\12\251\20\224\3\225\32\227\1\226\2\200\2\234"+ "\5\154\3\224\12\251\20\224\3\225\32\227\1\226\2\200\2\234\1\235"+
"\1\235\2\234\2\235\2\234\1\235\3\200\1\177\2\200\12\250\1\247"+ "\2\234\2\235\2\234\1\235\3\200\1\177\2\200\12\250\1\247\1\176"+
"\1\176\1\171\7\176\1\171\13\176\31\200\7\176\12\250\1\176\5\134"+ "\1\171\7\176\1\171\13\176\31\200\7\176\12\250\1\176\5\134\3\245"+
"\3\245\3\142\1\140\4\142\2\140\10\142\1\140\7\141\1\137\2\141"+ "\3\142\1\140\4\142\2\140\10\142\1\140\7\141\1\137\2\141\7\142"+
"\7\142\16\245\1\135\4\245\1\106\4\244\1\106\5\255\1\254\1\255"+ "\16\245\1\135\4\245\1\106\4\244\1\106\5\255\1\254\1\255\3\254"+
"\3\254\7\255\1\254\23\255\5\264\3\255\6\264\2\255\6\253\5\263"+ "\7\255\1\254\23\255\5\264\3\255\6\264\2\255\6\253\5\263\3\262"+
"\3\262\2\142\7\257\36\142\4\257\5\142\5\245\6\244\2\245\1\244"+ "\2\142\7\257\36\142\4\257\5\142\5\245\6\244\2\245\1\244\4\141"+
"\4\141\13\253\12\244\26\253\15\134\1\243\2\134\1\152\3\237\1\134"+ "\13\253\12\244\26\253\15\134\1\243\2\134\1\152\3\237\1\134\2\237"+
"\2\237\5\151\4\237\4\152\1\151\3\152\1\151\5\152\2\147\1\116"+ "\5\151\4\237\4\152\1\151\3\152\1\151\5\152\2\147\1\116\2\147"+
"\2\147\1\116\1\147\2\116\1\147\1\116\12\147\1\116\4\146\1\115"+ "\1\116\1\147\2\116\1\147\1\116\12\147\1\116\4\146\1\115\1\236"+
"\1\236\1\240\1\150\3\164\1\240\2\164\1\260\2\261\2\164\1\150"+ "\1\240\1\150\3\164\1\240\2\164\1\260\2\261\2\164\1\150\1\164"+
"\1\164\1\150\1\164\1\150\1\164\3\150\1\164\2\150\1\164\1\150"+ "\1\150\1\164\1\150\1\164\3\150\1\164\2\150\1\164\1\150\2\164"+
"\2\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150"+ "\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\164\1\150\1\162"+
"\1\162\2\145\1\162\1\145\2\162\4\145\1\162\7\145\1\162\4\145"+ "\2\145\1\162\1\145\2\162\4\145\1\162\7\145\1\162\4\145\1\162"+
"\1\162\4\145\1\164\1\150\1\164\12\216\1\217\21\216\1\217\3\215"+ "\4\145\1\164\1\150\1\164\12\216\1\217\21\216\1\217\3\215\1\217"+
"\1\217\3\216\1\217\1\216\2\144\2\216\1\217\15\241\4\201\4\206"+ "\3\216\1\217\1\216\2\144\2\216\1\217\15\241\4\201\4\206\1\242"+
"\1\242\1\161\10\242\7\206\6\164\4\113\1\121\37\113\1\121\4\113"+ "\1\161\10\242\7\206\6\164\4\113\1\121\37\113\1\121\4\113\25\174"+
"\25\174\1\131\11\174\21\130\5\174\1\104\12\117\5\174\6\205\4\162"+ "\1\131\11\174\21\130\5\174\1\104\12\117\5\174\6\205\4\162\1\163"+
"\1\163\1\130\5\231\12\232\17\231\1\125\3\114\14\230\1\126\11\173"+ "\1\130\5\231\12\232\17\231\1\125\3\114\14\230\1\126\11\173\1\172"+
"\1\172\5\173\4\233\13\175\2\132\11\173\1\172\31\173\1\172\4\126"+ "\5\173\4\233\13\175\2\132\11\173\1\172\31\173\1\172\4\126\4\173"+
"\4\173\2\172\2\265\1\111\5\265\52\111\u1900\0\u016e\1\2\0\152\1"+ "\2\172\2\265\1\111\5\265\52\111\u1900\0\u016e\1\2\0\152\1\46\0"+
"\46\0\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1"+ "\7\1\14\0\5\1\5\0\1\1\1\2\12\1\1\0\15\1\1\0"+
"\1\0\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1"+ "\5\1\1\0\1\1\1\0\2\1\1\0\2\1\1\0\154\1\41\0"+
"\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2"+ "\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\2\20\0"+
"\20\0\7\2\14\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1"+ "\7\2\14\0\2\2\30\0\3\2\40\0\5\1\1\0\207\1\23\0"+
"\23\0\12\274\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1"+ "\12\274\7\0\32\1\4\0\1\2\1\0\32\1\13\0\131\1\3\0"+
"\3\0\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0"; "\6\1\2\0\6\1\2\0\6\1\2\0\3\1\43\0";
/** /**
* Translates characters to character classes * Translates characters to character classes
@ -30895,6 +30895,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
/** /**
* Unpacks the compressed character translation table. * Unpacks the compressed character translation table.
* *
@ -30905,7 +30906,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
char [] map = new char[0x10000]; char [] map = new char[0x10000];
int i = 0; /* index in packed string */ int i = 0; /* index in packed string */
int j = 0; /* index in unpacked array */ int j = 0; /* index in unpacked array */
while (i < 2778) { while (i < 2776) {
int count = packed.charAt(i++); int count = packed.charAt(i++);
char value = packed.charAt(i++); char value = packed.charAt(i++);
do map[j++] = value; while (--count > 0); do map[j++] = value; while (--count > 0);

View File

@ -34,7 +34,7 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
*/ */
%% %%
%unicode 6.1 %unicode 6.3
%apiprivate %apiprivate
%type int %type int
%final %final

View File

@ -1,11 +1,12 @@
/* /*
* Copyright 2001-2005 The Apache Software Foundation. * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * http://www.apache.org/licenses/LICENSE-2.0
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
@ -13,10 +14,9 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone> // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
// file version from Saturday, July 14, 2012 4:34:14 AM UTC // file version from Friday, December 6, 2013 4:34:10 AM UTC
// generated on Sunday, July 15, 2012 12:59:44 AM UTC // generated on Friday, December 6, 2013 3:21:59 PM UTC
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
ASCIITLD = "." ( ASCIITLD = "." (
@ -49,6 +49,7 @@ ASCIITLD = "." (
| [bB][gG] | [bB][gG]
| [bB][hH] | [bB][hH]
| [bB][iI] | [bB][iI]
| [bB][iI][kK][eE]
| [bB][iI][zZ] | [bB][iI][zZ]
| [bB][jJ] | [bB][jJ]
| [bB][mM] | [bB][mM]
@ -62,6 +63,7 @@ ASCIITLD = "." (
| [bB][yY] | [bB][yY]
| [bB][zZ] | [bB][zZ]
| [cC][aA] | [cC][aA]
| [cC][aA][mM][eE][rR][aA]
| [cC][aA][tT] | [cC][aA][tT]
| [cC][cC] | [cC][cC]
| [cC][dD] | [cC][dD]
@ -71,10 +73,13 @@ ASCIITLD = "." (
| [cC][iI] | [cC][iI]
| [cC][kK] | [cC][kK]
| [cC][lL] | [cC][lL]
| [cC][lL][oO][tT][hH][iI][nN][gG]
| [cC][mM] | [cC][mM]
| [cC][nN] | [cC][nN]
| [cC][oO] | [cC][oO]
| [cC][oO][mM] | [cC][oO][mM]
| [cC][oO][nN][sS][tT][rR][uU][cC][tT][iI][oO][nN]
| [cC][oO][nN][tT][rR][aA][cC][tT][oO][rR][sS]
| [cC][oO][oO][pP] | [cC][oO][oO][pP]
| [cC][rR] | [cC][rR]
| [cC][uU] | [cC][uU]
@ -84,6 +89,8 @@ ASCIITLD = "." (
| [cC][yY] | [cC][yY]
| [cC][zZ] | [cC][zZ]
| [dD][eE] | [dD][eE]
| [dD][iI][aA][mM][oO][nN][dD][sS]
| [dD][iI][rR][eE][cC][tT][oO][rR][yY]
| [dD][jJ] | [dD][jJ]
| [dD][kK] | [dD][kK]
| [dD][mM] | [dD][mM]
@ -93,8 +100,11 @@ ASCIITLD = "." (
| [eE][dD][uU] | [eE][dD][uU]
| [eE][eE] | [eE][eE]
| [eE][gG] | [eE][gG]
| [eE][nN][tT][eE][rR][pP][rR][iI][sS][eE][sS]
| [eE][qQ][uU][iI][pP][mM][eE][nN][tT]
| [eE][rR] | [eE][rR]
| [eE][sS] | [eE][sS]
| [eE][sS][tT][aA][tT][eE]
| [eE][tT] | [eE][tT]
| [eE][uU] | [eE][uU]
| [fF][iI] | [fF][iI]
@ -104,6 +114,7 @@ ASCIITLD = "." (
| [fF][oO] | [fF][oO]
| [fF][rR] | [fF][rR]
| [gG][aA] | [gG][aA]
| [gG][aA][lL][lL][eE][rR][yY]
| [gG][bB] | [gG][bB]
| [gG][dD] | [gG][dD]
| [gG][eE] | [gG][eE]
@ -118,14 +129,17 @@ ASCIITLD = "." (
| [gG][pP] | [gG][pP]
| [gG][qQ] | [gG][qQ]
| [gG][rR] | [gG][rR]
| [gG][rR][aA][pP][hH][iI][cC][sS]
| [gG][sS] | [gG][sS]
| [gG][tT] | [gG][tT]
| [gG][uU] | [gG][uU]
| [gG][uU][rR][uU]
| [gG][wW] | [gG][wW]
| [gG][yY] | [gG][yY]
| [hH][kK] | [hH][kK]
| [hH][mM] | [hH][mM]
| [hH][nN] | [hH][nN]
| [hH][oO][lL][dD][iI][nN][gG][sS]
| [hH][rR] | [hH][rR]
| [hH][tT] | [hH][tT]
| [hH][uU] | [hH][uU]
@ -150,6 +164,7 @@ ASCIITLD = "." (
| [kK][gG] | [kK][gG]
| [kK][hH] | [kK][hH]
| [kK][iI] | [kK][iI]
| [kK][iI][tT][cC][hH][eE][nN]
| [kK][mM] | [kK][mM]
| [kK][nN] | [kK][nN]
| [kK][pP] | [kK][pP]
@ -158,9 +173,11 @@ ASCIITLD = "." (
| [kK][yY] | [kK][yY]
| [kK][zZ] | [kK][zZ]
| [lL][aA] | [lL][aA]
| [lL][aA][nN][dD]
| [lL][bB] | [lL][bB]
| [lL][cC] | [lL][cC]
| [lL][iI] | [lL][iI]
| [lL][iI][gG][hH][tT][iI][nN][gG]
| [lL][kK] | [lL][kK]
| [lL][rR] | [lL][rR]
| [lL][sS] | [lL][sS]
@ -172,6 +189,7 @@ ASCIITLD = "." (
| [mM][cC] | [mM][cC]
| [mM][dD] | [mM][dD]
| [mM][eE] | [mM][eE]
| [mM][eE][nN][uU]
| [mM][gG] | [mM][gG]
| [mM][hH] | [mM][hH]
| [mM][iI][lL] | [mM][iI][lL]
@ -214,10 +232,13 @@ ASCIITLD = "." (
| [pP][fF] | [pP][fF]
| [pP][gG] | [pP][gG]
| [pP][hH] | [pP][hH]
| [pP][hH][oO][tT][oO][gG][rR][aA][pP][hH][yY]
| [pP][kK] | [pP][kK]
| [pP][lL] | [pP][lL]
| [pP][lL][uU][mM][bB][iI][nN][gG]
| [pP][mM] | [pP][mM]
| [pP][nN] | [pP][nN]
| [pP][oO][sS][tT]
| [pP][rR] | [pP][rR]
| [pP][rR][oO] | [pP][rR][oO]
| [pP][sS] | [pP][sS]
@ -235,9 +256,11 @@ ASCIITLD = "." (
| [sS][cC] | [sS][cC]
| [sS][dD] | [sS][dD]
| [sS][eE] | [sS][eE]
| [sS][eE][xX][yY]
| [sS][gG] | [sS][gG]
| [sS][hH] | [sS][hH]
| [sS][iI] | [sS][iI]
| [sS][iI][nN][gG][lL][eE][sS]
| [sS][jJ] | [sS][jJ]
| [sS][kK] | [sS][kK]
| [sS][lL] | [sS][lL]
@ -251,18 +274,22 @@ ASCIITLD = "." (
| [sS][xX] | [sS][xX]
| [sS][yY] | [sS][yY]
| [sS][zZ] | [sS][zZ]
| [tT][aA][tT][tT][oO][oO]
| [tT][cC] | [tT][cC]
| [tT][dD] | [tT][dD]
| [tT][eE][cC][hH][nN][oO][lL][oO][gG][yY]
| [tT][eE][lL] | [tT][eE][lL]
| [tT][fF] | [tT][fF]
| [tT][gG] | [tT][gG]
| [tT][hH] | [tT][hH]
| [tT][iI][pP][sS]
| [tT][jJ] | [tT][jJ]
| [tT][kK] | [tT][kK]
| [tT][lL] | [tT][lL]
| [tT][mM] | [tT][mM]
| [tT][nN] | [tT][nN]
| [tT][oO] | [tT][oO]
| [tT][oO][dD][aA][yY]
| [tT][pP] | [tT][pP]
| [tT][rR] | [tT][rR]
| [tT][rR][aA][vV][eE][lL] | [tT][rR][aA][vV][eE][lL]
@ -273,61 +300,62 @@ ASCIITLD = "." (
| [uU][aA] | [uU][aA]
| [uU][gG] | [uU][gG]
| [uU][kK] | [uU][kK]
| [uU][nN][oO]
| [uU][sS] | [uU][sS]
| [uU][yY] | [uU][yY]
| [uU][zZ] | [uU][zZ]
| [vV][aA] | [vV][aA]
| [vV][cC] | [vV][cC]
| [vV][eE] | [vV][eE]
| [vV][eE][nN][tT][uU][rR][eE][sS]
| [vV][gG] | [vV][gG]
| [vV][iI] | [vV][iI]
| [vV][nN] | [vV][nN]
| [vV][oO][yY][aA][gG][eE]
| [vV][uU] | [vV][uU]
| [wW][fF] | [wW][fF]
| [wW][sS] | [wW][sS]
| [xX][nN]--0[zZ][wW][mM]56[dD]
| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
| [xX][nN]--3[eE]0[bB]707[eE] | [xX][nN]--3[eE]0[bB]707[eE]
| [xX][nN]--45[bB][rR][jJ]9[cC] | [xX][nN]--45[bB][rR][jJ]9[cC]
| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
| [xX][nN]--80[aA][oO]21[aA] | [xX][nN]--80[aA][oO]21[aA]
| [xX][nN]--80[aA][sS][eE][hH][dD][bB]
| [xX][nN]--80[aA][sS][wW][gG]
| [xX][nN]--90[aA]3[aA][cC] | [xX][nN]--90[aA]3[aA][cC]
| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD] | [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
| [xX][nN]--[fF][iI][qQ][sS]8[sS] | [xX][nN]--[fF][iI][qQ][sS]8[sS]
| [xX][nN]--[fF][iI][qQ][zZ]9[sS] | [xX][nN]--[fF][iI][qQ][zZ]9[sS]
| [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD] | [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC] | [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
| [xX][nN]--[gG]6[wW]251[dD]
| [xX][nN]--[gG][eE][cC][rR][jJ]9[cC] | [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
| [xX][nN]--[hH]2[bB][rR][jJ]9[cC] | [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA] | [xX][nN]--[jJ]1[aA][mM][hH]
| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
| [xX][nN]--[jJ]6[wW]193[gG] | [xX][nN]--[jJ]6[wW]193[gG]
| [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]
| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
| [xX][nN]--[kK][pP][rR][wW]13[dD] | [xX][nN]--[kK][pP][rR][wW]13[dD]
| [xX][nN]--[kK][pP][rR][yY]57[dD] | [xX][nN]--[kK][pP][rR][yY]57[dD]
| [xX][nN]--[lL]1[aA][cC][cC]
| [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ] | [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ]
| [xX][nN]--[mM][gG][bB]9[aA][wW][bB][fF] | [xX][nN]--[mM][gG][bB]9[aA][wW][bB][fF]
| [xX][nN]--[mM][gG][bB][aA]3[aA]4[fF]16[aA]
| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH] | [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA] | [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE] | [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
| [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG] | [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG]
| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR] | [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
| [xX][nN]--[mM][gG][bB][xX]4[cC][dD]0[aA][bB]
| [xX][nN]--[nN][gG][bB][cC]5[aA][zZ][dD]
| [xX][nN]--[oO]3[cC][wW]4[hH] | [xX][nN]--[oO]3[cC][wW]4[hH]
| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL] | [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
| [xX][nN]--[pP]1[aA][iI] | [xX][nN]--[pP]1[aA][iI]
| [xX][nN]--[pP][gG][bB][sS]0[dD][hH] | [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
| [xX][nN]--[qQ]9[jJ][yY][bB]4[cC]
| [xX][nN]--[sS]9[bB][rR][jJ]9[cC] | [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
| [xX][nN]--[uU][nN][uU][pP]4[yY]
| [xX][nN]--[wW][gG][bB][hH]1[cC] | [xX][nN]--[wW][gG][bB][hH]1[cC]
| [xX][nN]--[wW][gG][bB][lL]6[aA] | [xX][nN]--[wW][gG][bB][lL]6[aA]
| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA] | [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
| [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH] | [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO] | [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX] | [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
| [xX][xX][xX] | [xX][xX][xX]
| [yY][eE] | [yY][eE]
| [yY][tT] | [yY][tT]

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex. */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT */
package org.apache.lucene.analysis.standard; package org.apache.lucene.analysis.standard;
@ -58,64 +58,63 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
* Translates characters to character classes * Translates characters to character classes
*/ */
private static final String ZZ_CMAP_PACKED = private static final String ZZ_CMAP_PACKED =
"\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5"+ "\46\0\1\5\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0"+
"\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12"+ "\1\6\32\12\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12"+
"\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12"+ "\4\0\1\12\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12"+
"\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12\34\0\136\12"+ "\34\0\136\12\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12"+
"\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12\11\0\1\12"+ "\11\0\1\12\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12"+
"\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12\1\0\24\12"+ "\1\0\24\12\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12"+
"\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12\12\0\71\12"+ "\12\0\71\12\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12"+
"\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12\67\0\46\12"+ "\67\0\46\12\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12"+
"\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12\56\0\32\12"+ "\56\0\32\12\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12"+
"\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12\17\0\2\12"+ "\17\0\2\12\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0"+
"\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0\46\12\u015f\0"+ "\46\12\u015f\0\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0"+
"\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2\25\0"+ "\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0"+
"\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12\3\0"+ "\1\12\3\0\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12"+
"\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12\23\0\6\12"+ "\23\0\6\12\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12"+
"\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12\1\0\2\12"+ "\1\0\2\12\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2"+
"\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2\2\0\3\12"+ "\2\0\3\12\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12"+
"\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12\1\0\7\12"+ "\1\0\7\12\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12"+
"\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0\1\12"+ "\17\0\1\12\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12"+
"\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12"+ "\1\0\7\12\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12"+
"\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12"+ "\1\0\3\12\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12"+
"\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12\3\0\2\12"+ "\3\0\2\12\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12"+
"\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12\3\0\10\12"+ "\3\0\10\12\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12"+
"\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12\1\0\27\12"+ "\1\0\27\12\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2"+
"\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2\25\0\10\12"+ "\25\0\10\12\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12"+
"\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\44\0\1\12"+ "\44\0\1\12\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12"+
"\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12"+ "\1\0\27\12\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12"+
"\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12\3\0\30\12"+ "\3\0\30\12\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1"+
"\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1\60\12\1\1"+ "\60\12\1\1\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0"+
"\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0\1\12\2\0"+ "\1\12\2\0\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0"+
"\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0\7\12\1\0"+ "\7\12\1\0\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0"+
"\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0\4\12\1\0"+ "\4\12\1\0\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0"+
"\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0\12\2\2\0"+ "\12\2\2\0\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0"+
"\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0\42\12\35\0"+ "\42\12\35\0\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0"+
"\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0\12\2\6\0"+ "\12\2\6\0\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0"+
"\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0\104\12\5\0"+ "\104\12\5\0\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0"+
"\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0\4\12\2\0"+ "\4\12\2\0\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0"+
"\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0\1\12\1\0"+ "\1\12\1\0\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0"+
"\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0"+ "\7\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0"+
"\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0\27\12\1\0"+ "\27\12\1\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0"+
"\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\47\12\1\0"+ "\47\12\1\0\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0"+
"\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0"+ "\10\12\12\0\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0"+
"\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0\12\2\6\0"+ "\12\2\6\0\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0"+
"\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0\26\12\2\0"+ "\26\12\2\0\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0"+
"\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0\1\12\1\0"+ "\1\12\1\0\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0"+
"\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0\7\12\1\0"+ "\7\12\1\0\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0"+
"\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0\6\12\4\0"+ "\6\12\4\0\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0"+
"\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0\1\12\4\0"+ "\1\12\4\0\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0"+
"\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0\1\12\1\0"+ "\1\12\1\0\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0"+
"\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0\7\12\u0ecb\0"+ "\7\12\u0ecb\0\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13"+
"\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13\2\13\132\13"+ "\2\13\132\13\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0"+
"\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0"+ "\30\12\70\0\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13"+
"\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12"+ "\132\13\u048d\12\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12"+
"\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12\5\0\1\12"+ "\5\0\1\12\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12"+
"\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12\1\0\2\12"+ "\1\0\2\12\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12"+
"\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12\2\0\66\12"+ "\2\0\66\12\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12"+
"\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12\23\0\12\2"+ "\23\0\12\2\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12"+
"\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12"+ "\3\0\6\12\2\0\6\12\2\0\6\12\2\0\3\12\43\0";
"\2\0\6\12\2\0\6\12\2\0\3\12\43\0";
/** /**
* Translates characters to character classes * Translates characters to character classes
@ -128,13 +127,12 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
private static final int [] ZZ_ACTION = zzUnpackAction(); private static final int [] ZZ_ACTION = zzUnpackAction();
private static final String ZZ_ACTION_PACKED_0 = private static final String ZZ_ACTION_PACKED_0 =
"\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4"+ "\1\0\1\1\3\2\1\3\13\0\1\2\3\4\2\0"+
"\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4"+ "\1\5\1\0\1\5\3\4\6\5\1\6\1\4\2\7"+
"\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12"+ "\1\10\1\0\1\10\3\0\2\10\1\11\1\12\1\4";
"\1\4";
private static int [] zzUnpackAction() { private static int [] zzUnpackAction() {
int [] result = new int[51]; int [] result = new int[50];
int offset = 0; int offset = 0;
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
return result; return result;
@ -159,16 +157,16 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
private static final String ZZ_ROWMAP_PACKED_0 = private static final String ZZ_ROWMAP_PACKED_0 =
"\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124"+ "\0\0\0\14\0\30\0\44\0\60\0\14\0\74\0\110"+
"\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304"+ "\0\124\0\140\0\154\0\170\0\204\0\220\0\234\0\250"+
"\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134"+ "\0\264\0\300\0\314\0\330\0\344\0\360\0\374\0\u0108"+
"\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4"+ "\0\u0114\0\u0120\0\u012c\0\u0138\0\u0144\0\u0150\0\u015c\0\u0168"+
"\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206"+ "\0\u0174\0\u0180\0\u018c\0\u0198\0\u01a4\0\250\0\u01b0\0\u01bc"+
"\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214"+ "\0\u01c8\0\u01d4\0\u01e0\0\u01ec\0\u01f8\0\74\0\154\0\u0204"+
"\0\u0268\0\u0276\0\u0284"; "\0\u0210\0\u021c";
private static int [] zzUnpackRowMap() { private static int [] zzUnpackRowMap() {
int [] result = new int[51]; int [] result = new int[50];
int offset = 0; int offset = 0;
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
return result; return result;
@ -191,49 +189,49 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
private static final int [] ZZ_TRANS = zzUnpackTrans(); private static final int [] ZZ_TRANS = zzUnpackTrans();
private static final String ZZ_TRANS_PACKED_0 = private static final String ZZ_TRANS_PACKED_0 =
"\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2"+ "\1\2\1\3\1\4\7\2\1\5\1\6\15\0\2\3"+
"\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13"+ "\1\0\1\7\1\0\1\10\2\11\1\12\1\3\2\0"+
"\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11"+ "\1\3\1\4\1\0\1\13\1\0\1\10\2\14\1\15"+
"\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20"+ "\1\4\2\0\1\3\1\4\1\16\1\17\1\20\1\21"+
"\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0"+ "\2\11\1\12\1\22\2\0\1\23\1\24\7\0\1\25"+
"\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27"+ "\2\0\2\26\7\0\1\26\2\0\1\27\1\30\7\0"+
"\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0"+ "\1\31\3\0\1\32\7\0\1\12\2\0\1\33\1\34"+
"\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37"+ "\7\0\1\35\2\0\1\36\1\37\7\0\1\40\2\0"+
"\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44"+ "\1\41\1\42\7\0\1\43\13\0\1\44\2\0\1\23"+
"\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0"+ "\1\24\7\0\1\45\13\0\1\46\2\0\2\26\7\0"+
"\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4"+ "\1\47\2\0\1\3\1\4\1\16\1\7\1\20\1\21"+
"\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0"+ "\2\11\1\12\1\22\2\0\2\23\1\0\1\50\1\0"+
"\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24"+ "\1\10\2\51\1\0\1\23\2\0\1\23\1\24\1\0"+
"\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54"+ "\1\52\1\0\1\10\2\53\1\54\1\24\2\0\1\23"+
"\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0"+ "\1\24\1\0\1\50\1\0\1\10\2\51\1\0\1\25"+
"\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56"+ "\2\0\2\26\1\0\1\55\2\0\1\55\2\0\1\26"+
"\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52"+ "\2\0\2\27\1\0\1\51\1\0\1\10\2\51\1\0"+
"\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31"+ "\1\27\2\0\1\27\1\30\1\0\1\53\1\0\1\10"+
"\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0"+ "\2\53\1\54\1\30\2\0\1\27\1\30\1\0\1\51"+
"\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0"+ "\1\0\1\10\2\51\1\0\1\31\3\0\1\32\1\0"+
"\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33"+ "\1\54\2\0\3\54\1\32\2\0\2\33\1\0\1\56"+
"\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13"+ "\1\0\1\10\2\11\1\12\1\33\2\0\1\33\1\34"+
"\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11"+ "\1\0\1\57\1\0\1\10\2\14\1\15\1\34\2\0"+
"\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57"+ "\1\33\1\34\1\0\1\56\1\0\1\10\2\11\1\12"+
"\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0"+ "\1\35\2\0\2\36\1\0\1\11\1\0\1\10\2\11"+
"\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37"+ "\1\12\1\36\2\0\1\36\1\37\1\0\1\14\1\0"+
"\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40"+ "\1\10\2\14\1\15\1\37\2\0\1\36\1\37\1\0"+
"\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12"+ "\1\11\1\0\1\10\2\11\1\12\1\40\2\0\2\41"+
"\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13"+ "\1\0\1\12\2\0\3\12\1\41\2\0\1\41\1\42"+
"\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16"+ "\1\0\1\15\2\0\3\15\1\42\2\0\1\41\1\42"+
"\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13"+ "\1\0\1\12\2\0\3\12\1\43\4\0\1\16\6\0"+
"\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25"+ "\1\44\2\0\1\23\1\24\1\0\1\60\1\0\1\10"+
"\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0"+ "\2\51\1\0\1\25\2\0\2\26\1\0\1\55\2\0"+
"\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0"+ "\1\55\2\0\1\47\2\0\2\23\7\0\1\23\2\0"+
"\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0"+ "\2\27\7\0\1\27\2\0\2\33\7\0\1\33\2\0"+
"\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0"+ "\2\36\7\0\1\36\2\0\2\41\7\0\1\41\2\0"+
"\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0"+ "\2\61\7\0\1\61\2\0\2\23\7\0\1\62\2\0"+
"\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0"+ "\2\61\1\0\1\55\2\0\1\55\2\0\1\61\2\0"+
"\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0"+ "\2\23\1\0\1\60\1\0\1\10\2\51\1\0\1\23"+
"\1\11\2\52\1\0\1\24\3\0"; "\1\0";
private static int [] zzUnpackTrans() { private static int [] zzUnpackTrans() {
int [] result = new int[658]; int [] result = new int[552];
int offset = 0; int offset = 0;
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
return result; return result;
@ -271,11 +269,11 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
private static final String ZZ_ATTRIBUTE_PACKED_0 = private static final String ZZ_ATTRIBUTE_PACKED_0 =
"\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0"+ "\1\0\1\11\3\1\1\11\13\0\4\1\2\0\1\1"+
"\1\1\1\0\17\1\1\0\1\1\3\0\5\1"; "\1\0\17\1\1\0\1\1\3\0\5\1";
private static int [] zzUnpackAttribute() { private static int [] zzUnpackAttribute() {
int [] result = new int[51]; int [] result = new int[50];
int offset = 0; int offset = 0;
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
return result; return result;
@ -372,7 +370,6 @@ public final void getText(CharTermAttribute t) {
/** /**
* Creates a new scanner * Creates a new scanner
* There is also a java.io.InputStream version of this constructor.
* *
* @param in the java.io.Reader to read input from. * @param in the java.io.Reader to read input from.
*/ */
@ -380,7 +377,6 @@ public final void getText(CharTermAttribute t) {
this.zzReader = in; this.zzReader = in;
} }
/** /**
* Unpacks the compressed character translation table. * Unpacks the compressed character translation table.
@ -392,7 +388,7 @@ public final void getText(CharTermAttribute t) {
char [] map = new char[0x10000]; char [] map = new char[0x10000];
int i = 0; /* index in packed string */ int i = 0; /* index in packed string */
int j = 0; /* index in unpacked array */ int j = 0; /* index in unpacked array */
while (i < 1154) { while (i < 1138) {
int count = packed.charAt(i++); int count = packed.charAt(i++);
char value = packed.charAt(i++); char value = packed.charAt(i++);
do map[j++] = value; while (--count > 0); do map[j++] = value; while (--count > 0);

View File

@ -116,8 +116,6 @@ LETTER = !(![:letter:]|{CJ})
// Chinese and Japanese (but NOT Korean, which is included in [:letter:]) // Chinese and Japanese (but NOT Korean, which is included in [:letter:])
CJ = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f] CJ = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
WHITESPACE = \r\n | [ \r\n\t\f]
%% %%
{ALPHANUM} { return ALPHANUM; } {ALPHANUM} { return ALPHANUM; }
@ -131,4 +129,4 @@ WHITESPACE = \r\n | [ \r\n\t\f]
{ACRONYM_DEP} { return ACRONYM_DEP; } {ACRONYM_DEP} { return ACRONYM_DEP; }
/** Ignore the rest */ /** Ignore the rest */
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }

View File

@ -18,4 +18,4 @@
WARNING: if you change StandardTokenizerImpl*.jflex or UAX29URLEmailTokenizer WARNING: if you change StandardTokenizerImpl*.jflex or UAX29URLEmailTokenizer
and need to regenerate the tokenizer, only use the trunk version and need to regenerate the tokenizer, only use the trunk version
of JFlex 1.5 (with a minimum SVN revision 607) at the moment! of JFlex 1.5 (with a minimum SVN revision 722) at the moment!

View File

@ -1,11 +1,12 @@
/* /*
* Copyright 2010 The Apache Software Foundation. * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * http://www.apache.org/licenses/LICENSE-2.0
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
@ -13,7 +14,6 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
// Generated using ICU4J 52.1.0.0 // Generated using ICU4J 52.1.0.0
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
@ -39,6 +39,12 @@ FormatSupp = (
| ([\ud834][\uDD73-\uDD7A]) | ([\ud834][\uDD73-\uDD7A])
| ([\udb40][\uDC01\uDC20-\uDC7F]) | ([\udb40][\uDC01\uDC20-\uDC7F])
) )
NumericSupp = (
([\ud805][\uDEC0-\uDEC9])
| ([\ud804][\uDC66-\uDC6F\uDCF0-\uDCF9\uDD36-\uDD3F\uDDD0-\uDDD9])
| ([\ud835][\uDFCE-\uDFFF])
| ([\ud801][\uDCA0-\uDCA9])
)
ExtendSupp = ( ExtendSupp = (
([\ud81b][\uDF51-\uDF7E\uDF8F-\uDF92]) ([\ud81b][\uDF51-\uDF7E\uDF8F-\uDF92])
| ([\ud805][\uDEAB-\uDEB7]) | ([\ud805][\uDEAB-\uDEB7])
@ -48,12 +54,6 @@ ExtendSupp = (
| ([\udb40][\uDD00-\uDDEF]) | ([\udb40][\uDD00-\uDDEF])
| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F]) | ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
) )
NumericSupp = (
([\ud805][\uDEC0-\uDEC9])
| ([\ud804][\uDC66-\uDC6F\uDCF0-\uDCF9\uDD36-\uDD3F\uDDD0-\uDDD9])
| ([\ud835][\uDFCE-\uDFFF])
| ([\ud801][\uDCA0-\uDCA9])
)
KatakanaSupp = ( KatakanaSupp = (
([\ud82c][\uDC00]) ([\ud82c][\uDC00])
) )
@ -129,3 +129,15 @@ HiraganaSupp = (
([\ud83c][\uDE00]) ([\ud83c][\uDE00])
| ([\ud82c][\uDC01]) | ([\ud82c][\uDC01])
) )
SingleQuoteSupp = (
[]
)
DoubleQuoteSupp = (
[]
)
HebrewLetterSupp = (
[]
)
RegionalIndicatorSupp = (
([\ud83c][\uDDE6-\uDDFF])
)

View File

@ -32,11 +32,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* Asian languages, including Thai, Lao, Myanmar, and Khmer</li> * Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
* <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li> * <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
* <li>&lt;HIRAGANA&gt;: A single hiragana character</li> * <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
* <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
* <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
* </ul> * </ul>
*/ */
%% %%
%unicode 6.1 %unicode 6.3
%integer %integer
%final %final
%public %public
@ -47,33 +49,40 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%buffer 4096 %buffer 4096
%include SUPPLEMENTARY.jflex-macro %include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) ALetter = (\p{WB:ALetter} | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp}) Format = (\p{WB:Format} | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp}) Numeric = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp})
Extend = ([\p{WB:Extend}] | {ExtendSupp}) Extend = (\p{WB:Extend} | {ExtendSupp})
Katakana = ([\p{WB:Katakana}] | {KatakanaSupp}) Katakana = (\p{WB:Katakana} | {KatakanaSupp})
MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp}) MidLetter = (\p{WB:MidLetter} | {MidLetterSupp})
MidNum = ([\p{WB:MidNum}] | {MidNumSupp}) MidNum = (\p{WB:MidNum} | {MidNumSupp})
MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp}) MidNumLet = (\p{WB:MidNumLet} | {MidNumLetSupp})
ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp}) ExtendNumLet = (\p{WB:ExtendNumLet} | {ExtendNumLetSupp})
ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp}) ComplexContext = (\p{LB:Complex_Context} | {ComplexContextSupp})
Han = ([\p{Script:Han}] | {HanSupp}) Han = (\p{Script:Han} | {HanSupp})
Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp}) Hiragana = (\p{Script:Hiragana} | {HiraganaSupp})
SingleQuote = (\p{WB:Single_Quote} | {SingleQuoteSupp})
DoubleQuote = (\p{WB:Double_Quote} | {DoubleQuoteSupp})
HebrewLetter = (\p{WB:Hebrew_Letter} | {HebrewLetterSupp})
RegionalIndicator = (\p{WB:Regional_Indicator} | {RegionalIndicatorSupp})
HebrewOrALetter = ({HebrewLetter} | {ALetter})
// Script=Hangul & Aletter
HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
// UAX#29 WB4. X (Extend | Format)* --> X // UAX#29 WB4. X (Extend | Format)* --> X
// //
ALetterEx = {ALetter} ({Format} | {Extend})* HangulEx = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})*
// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it HebrewOrALetterEx = {HebrewOrALetter} ({Format} | {Extend})*
NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})* NumericEx = {Numeric} ({Format} | {Extend})*
KatakanaEx = {Katakana} ({Format} | {Extend})* KatakanaEx = {Katakana} ({Format} | {Extend})*
MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})* MidLetterEx = ({MidLetter} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})* MidNumericEx = ({MidNum} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
HanEx = {Han} ({Format} | {Extend})*
HiraganaEx = {Hiragana} ({Format} | {Extend})*
SingleQuoteEx = {SingleQuote} ({Format} | {Extend})*
DoubleQuoteEx = {DoubleQuote} ({Format} | {Extend})*
HebrewLetterEx = {HebrewLetter} ({Format} | {Extend})*
RegionalIndicatorEx = {RegionalIndicator} ({Format} | {Extend})*
HanEx = {Han} ({Format} | {Extend})*
HiraganaEx = {Hiragana} ({Format} | {Extend})*
%{ %{
/** Alphanumeric sequences */ /** Alphanumeric sequences */
@ -121,15 +130,12 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
<<EOF>> { return StandardTokenizerInterface.YYEOF; } <<EOF>> { return StandardTokenizerInterface.YYEOF; }
// UAX#29 WB8. Numeric × Numeric // UAX#29 WB8. Numeric × Numeric
// WB11. Numeric (MidNum | MidNumLet) × Numeric // WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
// WB12. Numeric × (MidNum | MidNumLet) Numeric // WB12. Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet // WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana) // WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
// //
{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} {ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
| {MidNumericEx} {NumericEx}
| {NumericEx})*
{ExtendNumLetEx}*
{ return NUMERIC_TYPE; } { return NUMERIC_TYPE; }
// subset of the below for typing purposes only! // subset of the below for typing purposes only!
@ -139,22 +145,32 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
{KatakanaEx}+ {KatakanaEx}+
{ return KATAKANA_TYPE; } { return KATAKANA_TYPE; }
// UAX#29 WB5. ALetter × ALetter // UAX#29 WB5. (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter)
// WB6. ALetter × (MidLetter | MidNumLet) ALetter // WB6. (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
// WB7. ALetter (MidLetter | MidNumLet) × ALetter // WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter)
// WB9. ALetter × Numeric // WB7a. Hebrew_Letter × Single_Quote
// WB10. Numeric × ALetter // WB7b. Hebrew_Letter × Double_Quote Hebrew_Letter
// WB7c. Hebrew_Letter Double_Quote × Hebrew_Letter
// WB9. (ALetter | Hebrew_Letter) × Numeric
// WB10. Numeric × (ALetter | Hebrew_Letter)
// WB13. Katakana × Katakana // WB13. Katakana × Katakana
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet // WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana) // WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
// //
{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* {ExtendNumLetEx}* ( {KatakanaEx}
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})* | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})* )+
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )* )
{ExtendNumLetEx}* ({ExtendNumLetEx}+ ( {KatakanaEx}
| ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
| {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
| {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
)+
)
)*
{ExtendNumLetEx}*
{ return WORD_TYPE; } { return WORD_TYPE; }
@ -166,7 +182,7 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
// annex. That means that satisfactory treatment of languages like Chinese // annex. That means that satisfactory treatment of languages like Chinese
// or Thai requires special handling. // or Thai requires special handling.
// //
// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break} // In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
// property: U+FFFC ( ) OBJECT REPLACEMENT CHARACTER. // property: U+FFFC ( ) OBJECT REPLACEMENT CHARACTER.
// //
// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context} // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@ -188,6 +204,8 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
// UAX#29 WB3. CR × LF // UAX#29 WB3. CR × LF
// WB3a. (Newline | CR | LF) ÷ // WB3a. (Newline | CR | LF) ÷
// WB3b. ÷ (Newline | CR | LF) // WB3b. ÷ (Newline | CR | LF)
// WB13c. Regional_Indicator × Regional_Indicator
// WB14. Any ÷ Any // WB14. Any ÷ Any
// //
[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } {RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
{ /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }

View File

@ -35,11 +35,13 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* Asian languages, including Thai, Lao, Myanmar, and Khmer</li> * Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
* <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li> * <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
* <li>&lt;HIRAGANA&gt;: A single hiragana character</li> * <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
* <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
* <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
* </ul> * </ul>
*/ */
%% %%
%unicode 6.1 %unicode 6.3
%integer %integer
%final %final
%public %public
@ -50,33 +52,39 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%buffer 4096 %buffer 4096
%include SUPPLEMENTARY.jflex-macro %include SUPPLEMENTARY.jflex-macro
ALetter = ([\p{WB:ALetter}] | {ALetterSupp}) ALetter = (\p{WB:ALetter} | {ALetterSupp})
Format = ([\p{WB:Format}] | {FormatSupp}) Format = (\p{WB:Format} | {FormatSupp})
Numeric = ([\p{WB:Numeric}] | {NumericSupp}) Numeric = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp})
Extend = ([\p{WB:Extend}] | {ExtendSupp}) Extend = (\p{WB:Extend} | {ExtendSupp})
Katakana = ([\p{WB:Katakana}] | {KatakanaSupp}) Katakana = (\p{WB:Katakana} | {KatakanaSupp})
MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp}) MidLetter = (\p{WB:MidLetter} | {MidLetterSupp})
MidNum = ([\p{WB:MidNum}] | {MidNumSupp}) MidNum = (\p{WB:MidNum} | {MidNumSupp})
MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp}) MidNumLet = (\p{WB:MidNumLet} | {MidNumLetSupp})
ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp}) ExtendNumLet = (\p{WB:ExtendNumLet} | {ExtendNumLetSupp})
ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp}) ComplexContext = (\p{LB:Complex_Context} | {ComplexContextSupp})
Han = ([\p{Script:Han}] | {HanSupp}) Han = (\p{Script:Han} | {HanSupp})
Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp}) Hiragana = (\p{Script:Hiragana} | {HiraganaSupp})
SingleQuote = (\p{WB:Single_Quote} | {SingleQuoteSupp})
DoubleQuote = (\p{WB:Double_Quote} | {DoubleQuoteSupp})
HebrewLetter = (\p{WB:Hebrew_Letter} | {HebrewLetterSupp})
RegionalIndicator = (\p{WB:Regional_Indicator} | {RegionalIndicatorSupp})
HebrewOrALetter = ({HebrewLetter} | {ALetter})
// Script=Hangul & Aletter
HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
// UAX#29 WB4. X (Extend | Format)* --> X // UAX#29 WB4. X (Extend | Format)* --> X
// //
ALetterEx = {ALetter} ({Format} | {Extend})* HangulEx = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})*
// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it HebrewOrALetterEx = {HebrewOrALetter} ({Format} | {Extend})*
NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})* NumericEx = {Numeric} ({Format} | {Extend})*
KatakanaEx = {Katakana} ({Format} | {Extend})* KatakanaEx = {Katakana} ({Format} | {Extend})*
MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})* MidLetterEx = ({MidLetter} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})* MidNumericEx = ({MidNum} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
HanEx = {Han} ({Format} | {Extend})*
HanEx = {Han} ({Format} | {Extend})* HiraganaEx = {Hiragana} ({Format} | {Extend})*
HiraganaEx = {Hiragana} ({Format} | {Extend})* SingleQuoteEx = {SingleQuote} ({Format} | {Extend})*
DoubleQuoteEx = {DoubleQuote} ({Format} | {Extend})*
HebrewLetterEx = {HebrewLetter} ({Format} | {Extend})*
RegionalIndicatorEx = {RegionalIndicator} ({Format} | {Extend})*
// URL and E-mail syntax specifications: // URL and E-mail syntax specifications:
// //
@ -213,40 +221,47 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
{EMAIL} { return EMAIL_TYPE; } {EMAIL} { return EMAIL_TYPE; }
// UAX#29 WB8. Numeric × Numeric // UAX#29 WB8. Numeric × Numeric
// WB11. Numeric (MidNum | MidNumLet) × Numeric // WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
// WB12. Numeric × (MidNum | MidNumLet) Numeric // WB12. Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet // WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana) // WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
// //
{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} {ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
| {MidNumericEx} {NumericEx}
| {NumericEx})*
{ExtendNumLetEx}*
{ return NUMERIC_TYPE; } { return NUMERIC_TYPE; }
// subset of the below for typing purposes only! // subset of the below for typing purposes only!
{HangulEx}+ {HangulEx}+
{ return HANGUL_TYPE; } { return HANGUL_TYPE; }
{KatakanaEx}+ {KatakanaEx}+
{ return KATAKANA_TYPE; } { return KATAKANA_TYPE; }
// UAX#29 WB5. ALetter × ALetter // UAX#29 WB5. (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter)
// WB6. ALetter × (MidLetter | MidNumLet) ALetter // WB6. (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
// WB7. ALetter (MidLetter | MidNumLet) × ALetter // WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter)
// WB9. ALetter × Numeric // WB7a. Hebrew_Letter × Single_Quote
// WB10. Numeric × ALetter // WB7b. Hebrew_Letter × Double_Quote Hebrew_Letter
// WB7c. Hebrew_Letter Double_Quote × Hebrew_Letter
// WB9. (ALetter | Hebrew_Letter) × Numeric
// WB10. Numeric × (ALetter | Hebrew_Letter)
// WB13. Katakana × Katakana // WB13. Katakana × Katakana
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet // WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana) // WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana)
// //
{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* {ExtendNumLetEx}* ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )*
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})* | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})* )+
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )* )
{ExtendNumLetEx}* ({ExtendNumLetEx}+ ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )*
| ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
| {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
| {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
)+
)
)*
{ExtendNumLetEx}*
{ return WORD_TYPE; } { return WORD_TYPE; }
@ -258,7 +273,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
// annex. That means that satisfactory treatment of languages like Chinese // annex. That means that satisfactory treatment of languages like Chinese
// or Thai requires special handling. // or Thai requires special handling.
// //
// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break} // In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
// property: U+FFFC ( ) OBJECT REPLACEMENT CHARACTER. // property: U+FFFC ( ) OBJECT REPLACEMENT CHARACTER.
// //
// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context} // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@ -280,6 +295,8 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
// UAX#29 WB3. CR × LF // UAX#29 WB3. CR × LF
// WB3a. (Newline | CR | LF) ÷ // WB3a. (Newline | CR | LF) ÷
// WB3b. ÷ (Newline | CR | LF) // WB3b. ÷ (Newline | CR | LF)
// WB13c. Regional_Indicator × Regional_Indicator
// WB14. Any ÷ Any // WB14. Any ÷ Any
// //
[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } {RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
{ /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex. */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT */
package org.apache.lucene.analysis.wikipedia; package org.apache.lucene.analysis.wikipedia;
@ -84,21 +84,20 @@ class WikipediaTokenizerImpl {
private static final int [] ZZ_ACTION = zzUnpackAction(); private static final int [] ZZ_ACTION = zzUnpackAction();
private static final String ZZ_ACTION_PACKED_0 = private static final String ZZ_ACTION_PACKED_0 =
"\12\0\4\1\4\2\1\3\1\1\1\4\1\1\2\5"+ "\12\0\4\1\4\2\1\3\1\4\1\1\2\5\1\6"+
"\1\6\2\5\1\7\1\5\2\10\1\11\1\12\1\11"+ "\1\5\1\7\1\5\2\10\1\11\1\5\1\12\1\11"+
"\1\13\1\14\1\10\1\15\1\16\1\15\1\17\1\20"+ "\1\13\1\14\1\15\1\16\1\15\1\17\1\20\1\10"+
"\1\10\1\21\1\10\4\22\1\23\1\22\1\24\1\25"+ "\1\21\1\10\4\22\1\23\1\24\1\25\1\26\3\0"+
"\1\26\3\0\1\27\14\0\1\30\1\31\1\32\1\33"+ "\1\27\14\0\1\30\1\31\1\32\1\33\1\11\1\0"+
"\1\11\1\0\1\34\1\35\1\36\1\0\1\37\1\0"+ "\1\34\1\35\1\36\1\0\1\37\1\0\1\40\3\0"+
"\1\40\3\0\1\41\1\42\2\43\1\42\2\44\2\0"+ "\1\41\1\42\2\43\1\42\2\44\2\0\1\43\1\0"+
"\1\43\1\0\14\43\1\42\3\0\1\11\1\45\3\0"+ "\14\43\1\42\3\0\1\11\1\45\3\0\1\46\1\47"+
"\1\46\1\47\5\0\1\50\4\0\1\50\2\0\2\50"+ "\5\0\1\50\4\0\1\50\2\0\2\50\2\0\1\11"+
"\2\0\1\11\5\0\1\31\1\42\1\43\1\51\3\0"+ "\5\0\1\31\1\42\1\43\1\51\3\0\1\11\2\0"+
"\1\11\2\0\1\52\30\0\1\53\2\0\1\54\1\55"+ "\1\52\30\0\1\53\2\0\1\54\1\55\1\56";
"\1\56";
private static int [] zzUnpackAction() { private static int [] zzUnpackAction() {
int [] result = new int[184]; int [] result = new int[181];
int offset = 0; int offset = 0;
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
return result; return result;
@ -125,30 +124,30 @@ class WikipediaTokenizerImpl {
private static final String ZZ_ROWMAP_PACKED_0 = private static final String ZZ_ROWMAP_PACKED_0 =
"\0\0\0\54\0\130\0\204\0\260\0\334\0\u0108\0\u0134"+ "\0\0\0\54\0\130\0\204\0\260\0\334\0\u0108\0\u0134"+
"\0\u0160\0\u018c\0\u01b8\0\u01e4\0\u0210\0\u023c\0\u0268\0\u0294"+ "\0\u0160\0\u018c\0\u01b8\0\u01e4\0\u0210\0\u023c\0\u0268\0\u0294"+
"\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u0370\0\u01b8\0\u039c"+ "\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u01b8\0\u0370\0\u039c"+
"\0\u03c8\0\u03f4\0\u0420\0\u044c\0\u0478\0\u01b8\0\u039c\0\u04a4"+ "\0\u03c8\0\u03f4\0\u0420\0\u01b8\0\u0370\0\u044c\0\u0478\0\u01b8"+
"\0\u01b8\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+ "\0\u04a4\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+
"\0\u0604\0\u0630\0\u065c\0\u0688\0\u06b4\0\u01b8\0\u06e0\0\u039c"+ "\0\u0604\0\u0630\0\u065c\0\u01b8\0\u0688\0\u0370\0\u06b4\0\u06e0"+
"\0\u070c\0\u0738\0\u0764\0\u0790\0\u01b8\0\u01b8\0\u07bc\0\u07e8"+ "\0\u070c\0\u01b8\0\u01b8\0\u0738\0\u0764\0\u0790\0\u01b8\0\u07bc"+
"\0\u0814\0\u01b8\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+ "\0\u07e8\0\u0814\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+
"\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u0a24\0\u0a50\0\u0a7c"+ "\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u01b8\0\u01b8\0\u0a24"+
"\0\u01b8\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b00\0\u01b8\0\u0b2c"+ "\0\u0a50\0\u0a7c\0\u0a7c\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b2c"+
"\0\u0b58\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c"+ "\0\u0b58\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c"+
"\0\u0cb8\0\u0ce4\0\u0d10\0\u0898\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+ "\0\u0814\0\u0cb8\0\u0ce4\0\u0d10\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+
"\0\u0dec\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20"+ "\0\u0dec\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20"+
"\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u1080"+ "\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u01b8"+
"\0\u10ac\0\u10d8\0\u01b8\0\u1104\0\u1130\0\u115c\0\u1188\0\u01b8"+ "\0\u1080\0\u10ac\0\u10d8\0\u1104\0\u01b8\0\u1130\0\u115c\0\u1188"+
"\0\u11b4\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8"+ "\0\u11b4\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8"+
"\0\u1314\0\u1340\0\u136c\0\u1398\0\u13c4\0\u086c\0\u09f8\0\u13f0"+ "\0\u1314\0\u1340\0\u07e8\0\u0974\0\u136c\0\u1398\0\u13c4\0\u13f0"+
"\0\u141c\0\u1448\0\u1474\0\u14a0\0\u14cc\0\u14f8\0\u1524\0\u01b8"+ "\0\u141c\0\u1448\0\u1474\0\u14a0\0\u01b8\0\u14cc\0\u14f8\0\u1524"+
"\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u1658\0\u1684"+ "\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u01b8\0\u1658"+
"\0\u16b0\0\u01b8\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+ "\0\u1684\0\u16b0\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+
"\0\u17e4\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918"+ "\0\u17e4\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918"+
"\0\u1944\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78"+ "\0\u1944\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78"+
"\0\u1aa4\0\u1ad0\0\u1afc\0\u1b28\0\u1b54\0\u01b8\0\u01b8\0\u01b8"; "\0\u1aa4\0\u1ad0\0\u01b8\0\u01b8\0\u01b8";
private static int [] zzUnpackRowMap() { private static int [] zzUnpackRowMap() {
int [] result = new int[184]; int [] result = new int[181];
int offset = 0; int offset = 0;
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
return result; return result;
@ -172,152 +171,149 @@ class WikipediaTokenizerImpl {
private static final String ZZ_TRANS_PACKED_0 = private static final String ZZ_TRANS_PACKED_0 =
"\1\13\1\14\5\13\1\15\1\13\1\16\3\13\1\17"+ "\1\13\1\14\5\13\1\15\1\13\1\16\3\13\1\17"+
"\1\20\1\21\1\22\1\23\1\24\2\13\1\25\2\13"+ "\1\20\1\21\1\22\1\23\3\13\1\24\2\13\15\17"+
"\15\17\1\26\2\13\3\17\1\13\7\27\1\30\5\27"+ "\1\25\2\13\3\17\1\13\7\26\1\27\5\26\4\30"+
"\4\31\1\27\1\32\3\27\1\33\1\27\15\31\3\27"+ "\5\26\1\31\1\26\15\30\3\26\3\30\10\26\1\27"+
"\3\31\10\27\1\30\5\27\4\34\1\27\1\32\3\27"+ "\5\26\4\32\5\26\1\33\1\26\15\32\3\26\3\32"+
"\1\35\1\27\15\34\3\27\3\34\1\27\7\36\1\37"+ "\1\26\7\34\1\35\5\34\4\36\1\34\1\37\2\26"+
"\5\36\4\40\1\36\1\32\2\27\1\36\1\41\1\36"+ "\1\34\1\40\1\34\15\36\3\34\1\41\2\36\2\34"+
"\15\40\3\36\1\42\2\40\2\36\1\43\5\36\1\37"+ "\1\42\5\34\1\35\5\34\4\43\4\34\1\44\2\34"+
"\5\36\4\44\1\36\1\45\2\36\1\46\2\36\15\44"+ "\15\43\3\34\3\43\10\34\1\35\5\34\4\45\4\34"+
"\3\36\3\44\10\36\1\37\5\36\4\47\1\36\1\45"+ "\1\44\2\34\15\45\3\34\3\45\10\34\1\35\5\34"+
"\2\36\1\46\2\36\15\47\3\36\3\47\10\36\1\37"+ "\4\45\4\34\1\46\2\34\15\45\3\34\3\45\10\34"+
"\5\36\4\47\1\36\1\45\2\36\1\50\2\36\15\47"+ "\1\35\1\34\1\47\3\34\4\50\7\34\15\50\3\34"+
"\3\36\3\47\10\36\1\37\1\36\1\51\3\36\4\52"+ "\3\50\10\34\1\51\5\34\4\52\7\34\15\52\1\34"+
"\1\36\1\45\5\36\15\52\3\36\3\52\10\36\1\53"+ "\1\53\1\34\3\52\1\34\1\54\1\55\5\54\1\56"+
"\5\36\4\54\1\36\1\45\5\36\15\54\1\36\1\55"+ "\1\54\1\57\3\54\4\60\4\54\1\61\2\54\15\60"+
"\1\36\3\54\1\36\1\56\1\57\5\56\1\60\1\56"+ "\2\54\1\62\3\60\1\54\55\0\1\63\62\0\1\64"+
"\1\61\3\56\4\62\1\56\1\63\2\56\1\64\2\56"+ "\4\0\4\65\7\0\6\65\1\66\6\65\3\0\3\65"+
"\15\62\2\56\1\65\3\62\1\56\55\0\1\66\62\0"+ "\12\0\1\67\43\0\1\70\1\71\1\72\1\73\2\74"+
"\1\67\4\0\4\70\7\0\6\70\1\71\6\70\3\0"+ "\1\0\1\75\3\0\1\75\1\17\1\20\1\21\1\22"+
"\3\70\12\0\1\72\43\0\1\73\1\74\1\75\1\76"+ "\7\0\15\17\3\0\3\17\3\0\1\76\1\0\1\77"+
"\2\77\1\0\1\100\3\0\1\100\1\17\1\20\1\21"+ "\2\100\1\0\1\101\3\0\1\101\3\20\1\22\7\0"+
"\1\22\7\0\15\17\3\0\3\17\3\0\1\101\1\0"+ "\15\20\3\0\3\20\2\0\1\70\1\102\1\72\1\73"+
"\1\102\2\103\1\0\1\104\3\0\1\104\3\20\1\22"+ "\2\100\1\0\1\101\3\0\1\101\1\21\1\20\1\21"+
"\7\0\15\20\3\0\3\20\2\0\1\73\1\105\1\75"+ "\1\22\7\0\15\21\3\0\3\21\3\0\1\103\1\0"+
"\1\76\2\103\1\0\1\104\3\0\1\104\1\21\1\20"+ "\1\77\2\74\1\0\1\75\3\0\1\75\4\22\7\0"+
"\1\21\1\22\7\0\15\21\3\0\3\21\3\0\1\106"+ "\15\22\3\0\3\22\26\0\1\104\73\0\1\105\16\0"+
"\1\0\1\102\2\77\1\0\1\100\3\0\1\100\4\22"+ "\1\64\4\0\4\65\7\0\15\65\3\0\3\65\16\0"+
"\7\0\15\22\3\0\3\22\24\0\1\13\55\0\1\107"+ "\4\30\7\0\15\30\3\0\3\30\27\0\1\106\42\0"+
"\73\0\1\110\16\0\1\67\4\0\4\70\7\0\15\70"+ "\4\32\7\0\15\32\3\0\3\32\27\0\1\107\42\0"+
"\3\0\3\70\16\0\4\31\7\0\15\31\3\0\3\31"+ "\4\36\7\0\15\36\3\0\3\36\24\0\1\26\45\0"+
"\24\0\1\27\56\0\1\111\42\0\4\34\7\0\15\34"+ "\4\36\7\0\2\36\1\110\12\36\3\0\3\36\2\0"+
"\3\0\3\34\27\0\1\112\42\0\4\40\7\0\15\40"+ "\1\111\67\0\4\43\7\0\15\43\3\0\3\43\26\0"+
"\3\0\3\40\16\0\4\40\7\0\2\40\1\113\12\40"+ "\1\112\43\0\4\45\7\0\15\45\3\0\3\45\26\0"+
"\3\0\3\40\2\0\1\114\67\0\4\44\7\0\15\44"+ "\1\113\37\0\1\114\57\0\4\50\7\0\15\50\3\0"+
"\3\0\3\44\24\0\1\36\55\0\1\115\43\0\4\47"+ "\3\50\11\0\1\115\4\0\4\65\7\0\15\65\3\0"+
"\7\0\15\47\3\0\3\47\26\0\1\116\37\0\1\117"+ "\3\65\16\0\4\52\7\0\15\52\3\0\3\52\47\0"+
"\57\0\4\52\7\0\15\52\3\0\3\52\11\0\1\120"+ "\1\114\6\0\1\116\63\0\1\117\57\0\4\60\7\0"+
"\4\0\4\70\7\0\15\70\3\0\3\70\16\0\4\54"+ "\15\60\3\0\3\60\26\0\1\120\43\0\4\65\7\0"+
"\7\0\15\54\3\0\3\54\47\0\1\117\6\0\1\121"+ "\15\65\3\0\3\65\14\0\1\34\1\0\4\121\1\0"+
"\63\0\1\122\57\0\4\62\7\0\15\62\3\0\3\62"+ "\3\122\3\0\15\121\3\0\3\121\14\0\1\34\1\0"+
"\24\0\1\56\55\0\1\123\43\0\4\70\7\0\15\70"+ "\4\121\1\0\3\122\3\0\3\121\1\123\11\121\3\0"+
"\3\0\3\70\14\0\1\36\1\0\4\124\1\0\3\125"+ "\3\121\16\0\1\124\1\0\1\124\10\0\15\124\3\0"+
"\3\0\15\124\3\0\3\124\14\0\1\36\1\0\4\124"+ "\3\124\16\0\1\125\1\126\1\127\1\130\7\0\15\125"+
"\1\0\3\125\3\0\3\124\1\126\11\124\3\0\3\124"+ "\3\0\3\125\16\0\1\131\1\0\1\131\10\0\15\131"+
"\16\0\1\127\1\0\1\127\10\0\15\127\3\0\3\127"+ "\3\0\3\131\16\0\1\132\1\133\1\132\1\133\7\0"+
"\16\0\1\130\1\131\1\132\1\133\7\0\15\130\3\0"+ "\15\132\3\0\3\132\16\0\1\134\2\135\1\136\7\0"+
"\3\130\16\0\1\134\1\0\1\134\10\0\15\134\3\0"+ "\15\134\3\0\3\134\16\0\1\75\2\137\10\0\15\75"+
"\3\134\16\0\1\135\1\136\1\135\1\136\7\0\15\135"+ "\3\0\3\75\16\0\1\140\2\141\1\142\7\0\15\140"+
"\3\0\3\135\16\0\1\137\2\140\1\141\7\0\15\137"+ "\3\0\3\140\16\0\4\133\7\0\15\133\3\0\3\133"+
"\3\0\3\137\16\0\1\100\2\142\10\0\15\100\3\0"+ "\16\0\1\143\2\144\1\145\7\0\15\143\3\0\3\143"+
"\3\100\16\0\1\143\2\144\1\145\7\0\15\143\3\0"+ "\16\0\1\146\2\147\1\150\7\0\15\146\3\0\3\146"+
"\3\143\16\0\4\136\7\0\15\136\3\0\3\136\16\0"+ "\16\0\1\151\1\141\1\152\1\142\7\0\15\151\3\0"+
"\1\146\2\147\1\150\7\0\15\146\3\0\3\146\16\0"+ "\3\151\16\0\1\153\2\126\1\130\7\0\15\153\3\0"+
"\1\151\2\152\1\153\7\0\15\151\3\0\3\151\16\0"+ "\3\153\30\0\1\154\1\155\64\0\1\156\27\0\4\36"+
"\1\154\1\144\1\155\1\145\7\0\15\154\3\0\3\154"+ "\7\0\2\36\1\157\12\36\3\0\3\36\2\0\1\160"+
"\16\0\1\156\2\131\1\133\7\0\15\156\3\0\3\156"+ "\101\0\1\161\1\162\40\0\4\65\7\0\6\65\1\163"+
"\30\0\1\157\1\160\64\0\1\161\27\0\4\40\7\0"+ "\6\65\3\0\3\65\2\0\1\164\63\0\1\165\71\0"+
"\2\40\1\162\12\40\3\0\3\40\2\0\1\163\101\0"+ "\1\166\1\167\34\0\1\170\1\0\1\34\1\0\4\121"+
"\1\164\1\165\40\0\4\70\7\0\6\70\1\166\6\70"+ "\1\0\3\122\3\0\15\121\3\0\3\121\16\0\4\171"+
"\3\0\3\70\2\0\1\167\63\0\1\170\71\0\1\171"+ "\1\0\3\122\3\0\15\171\3\0\3\171\12\0\1\170"+
"\1\172\34\0\1\173\1\0\1\36\1\0\4\124\1\0"+ "\1\0\1\34\1\0\4\121\1\0\3\122\3\0\10\121"+
"\3\125\3\0\15\124\3\0\3\124\16\0\4\174\1\0"+ "\1\172\4\121\3\0\3\121\2\0\1\70\13\0\1\124"+
"\3\125\3\0\15\174\3\0\3\174\12\0\1\173\1\0"+ "\1\0\1\124\10\0\15\124\3\0\3\124\3\0\1\173"+
"\1\36\1\0\4\124\1\0\3\125\3\0\10\124\1\175"+ "\1\0\1\77\2\174\6\0\1\125\1\126\1\127\1\130"+
"\4\124\3\0\3\124\2\0\1\73\13\0\1\127\1\0"+ "\7\0\15\125\3\0\3\125\3\0\1\175\1\0\1\77"+
"\1\127\10\0\15\127\3\0\3\127\3\0\1\176\1\0"+ "\2\176\1\0\1\177\3\0\1\177\3\126\1\130\7\0"+
"\1\102\2\177\6\0\1\130\1\131\1\132\1\133\7\0"+ "\15\126\3\0\3\126\3\0\1\200\1\0\1\77\2\176"+
"\15\130\3\0\3\130\3\0\1\200\1\0\1\102\2\201"+ "\1\0\1\177\3\0\1\177\1\127\1\126\1\127\1\130"+
"\1\0\1\202\3\0\1\202\3\131\1\133\7\0\15\131"+ "\7\0\15\127\3\0\3\127\3\0\1\201\1\0\1\77"+
"\3\0\3\131\3\0\1\203\1\0\1\102\2\201\1\0"+ "\2\174\6\0\4\130\7\0\15\130\3\0\3\130\3\0"+
"\1\202\3\0\1\202\1\132\1\131\1\132\1\133\7\0"+ "\1\202\2\0\1\202\7\0\1\132\1\133\1\132\1\133"+
"\15\132\3\0\3\132\3\0\1\204\1\0\1\102\2\177"+ "\7\0\15\132\3\0\3\132\3\0\1\202\2\0\1\202"+
"\6\0\4\133\7\0\15\133\3\0\3\133\3\0\1\205"+ "\7\0\4\133\7\0\15\133\3\0\3\133\3\0\1\174"+
"\2\0\1\205\7\0\1\135\1\136\1\135\1\136\7\0"+ "\1\0\1\77\2\174\6\0\1\134\2\135\1\136\7\0"+
"\15\135\3\0\3\135\3\0\1\205\2\0\1\205\7\0"+ "\15\134\3\0\3\134\3\0\1\176\1\0\1\77\2\176"+
"\4\136\7\0\15\136\3\0\3\136\3\0\1\177\1\0"+ "\1\0\1\177\3\0\1\177\3\135\1\136\7\0\15\135"+
"\1\102\2\177\6\0\1\137\2\140\1\141\7\0\15\137"+ "\3\0\3\135\3\0\1\174\1\0\1\77\2\174\6\0"+
"\3\0\3\137\3\0\1\201\1\0\1\102\2\201\1\0"+ "\4\136\7\0\15\136\3\0\3\136\3\0\1\177\2\0"+
"\1\202\3\0\1\202\3\140\1\141\7\0\15\140\3\0"+ "\2\177\1\0\1\177\3\0\1\177\3\137\10\0\15\137"+
"\3\140\3\0\1\177\1\0\1\102\2\177\6\0\4\141"+ "\3\0\3\137\3\0\1\103\1\0\1\77\2\74\1\0"+
"\7\0\15\141\3\0\3\141\3\0\1\202\2\0\2\202"+ "\1\75\3\0\1\75\1\140\2\141\1\142\7\0\15\140"+
"\1\0\1\202\3\0\1\202\3\142\10\0\15\142\3\0"+ "\3\0\3\140\3\0\1\76\1\0\1\77\2\100\1\0"+
"\3\142\3\0\1\106\1\0\1\102\2\77\1\0\1\100"+ "\1\101\3\0\1\101\3\141\1\142\7\0\15\141\3\0"+
"\3\0\1\100\1\143\2\144\1\145\7\0\15\143\3\0"+ "\3\141\3\0\1\103\1\0\1\77\2\74\1\0\1\75"+
"\3\143\3\0\1\101\1\0\1\102\2\103\1\0\1\104"+ "\3\0\1\75\4\142\7\0\15\142\3\0\3\142\3\0"+
"\3\0\1\104\3\144\1\145\7\0\15\144\3\0\3\144"+ "\1\74\1\0\1\77\2\74\1\0\1\75\3\0\1\75"+
"\3\0\1\106\1\0\1\102\2\77\1\0\1\100\3\0"+ "\1\143\2\144\1\145\7\0\15\143\3\0\3\143\3\0"+
"\1\100\4\145\7\0\15\145\3\0\3\145\3\0\1\77"+ "\1\100\1\0\1\77\2\100\1\0\1\101\3\0\1\101"+
"\1\0\1\102\2\77\1\0\1\100\3\0\1\100\1\146"+ "\3\144\1\145\7\0\15\144\3\0\3\144\3\0\1\74"+
"\2\147\1\150\7\0\15\146\3\0\3\146\3\0\1\103"+ "\1\0\1\77\2\74\1\0\1\75\3\0\1\75\4\145"+
"\1\0\1\102\2\103\1\0\1\104\3\0\1\104\3\147"+ "\7\0\15\145\3\0\3\145\3\0\1\75\2\0\2\75"+
"\1\150\7\0\15\147\3\0\3\147\3\0\1\77\1\0"+ "\1\0\1\75\3\0\1\75\1\146\2\147\1\150\7\0"+
"\1\102\2\77\1\0\1\100\3\0\1\100\4\150\7\0"+ "\15\146\3\0\3\146\3\0\1\101\2\0\2\101\1\0"+
"\15\150\3\0\3\150\3\0\1\100\2\0\2\100\1\0"+ "\1\101\3\0\1\101\3\147\1\150\7\0\15\147\3\0"+
"\1\100\3\0\1\100\1\151\2\152\1\153\7\0\15\151"+ "\3\147\3\0\1\75\2\0\2\75\1\0\1\75\3\0"+
"\3\0\3\151\3\0\1\104\2\0\2\104\1\0\1\104"+ "\1\75\4\150\7\0\15\150\3\0\3\150\3\0\1\203"+
"\3\0\1\104\3\152\1\153\7\0\15\152\3\0\3\152"+ "\1\0\1\77\2\74\1\0\1\75\3\0\1\75\1\151"+
"\3\0\1\100\2\0\2\100\1\0\1\100\3\0\1\100"+ "\1\141\1\152\1\142\7\0\15\151\3\0\3\151\3\0"+
"\4\153\7\0\15\153\3\0\3\153\3\0\1\206\1\0"+ "\1\204\1\0\1\77\2\100\1\0\1\101\3\0\1\101"+
"\1\102\2\77\1\0\1\100\3\0\1\100\1\154\1\144"+ "\1\152\1\141\1\152\1\142\7\0\15\152\3\0\3\152"+
"\1\155\1\145\7\0\15\154\3\0\3\154\3\0\1\207"+ "\3\0\1\201\1\0\1\77\2\174\6\0\1\153\2\126"+
"\1\0\1\102\2\103\1\0\1\104\3\0\1\104\1\155"+ "\1\130\7\0\15\153\3\0\3\153\31\0\1\155\54\0"+
"\1\144\1\155\1\145\7\0\15\155\3\0\3\155\3\0"+ "\1\205\64\0\1\206\26\0\4\36\7\0\15\36\3\0"+
"\1\204\1\0\1\102\2\177\6\0\1\156\2\131\1\133"+ "\1\36\1\207\1\36\31\0\1\162\54\0\1\210\35\0"+
"\7\0\15\156\3\0\3\156\31\0\1\160\54\0\1\210"+ "\1\34\1\0\4\121\1\0\3\122\3\0\3\121\1\211"+
"\64\0\1\211\26\0\4\40\7\0\15\40\3\0\1\40"+ "\11\121\3\0\3\121\2\0\1\212\102\0\1\167\54\0"+
"\1\212\1\40\31\0\1\165\54\0\1\213\35\0\1\36"+ "\1\213\34\0\1\214\52\0\1\170\3\0\4\171\7\0"+
"\1\0\4\124\1\0\3\125\3\0\3\124\1\214\11\124"+ "\15\171\3\0\3\171\12\0\1\170\1\0\1\215\1\0"+
"\3\0\3\124\2\0\1\215\102\0\1\172\54\0\1\216"+ "\4\121\1\0\3\122\3\0\15\121\3\0\3\121\16\0"+
"\34\0\1\217\52\0\1\173\3\0\4\174\7\0\15\174"+ "\1\216\1\130\1\216\1\130\7\0\15\216\3\0\3\216"+
"\3\0\3\174\12\0\1\173\1\0\1\220\1\0\4\124"+ "\16\0\4\136\7\0\15\136\3\0\3\136\16\0\4\142"+
"\1\0\3\125\3\0\15\124\3\0\3\124\16\0\1\221"+ "\7\0\15\142\3\0\3\142\16\0\4\145\7\0\15\145"+
"\1\133\1\221\1\133\7\0\15\221\3\0\3\221\16\0"+ "\3\0\3\145\16\0\4\150\7\0\15\150\3\0\3\150"+
"\4\141\7\0\15\141\3\0\3\141\16\0\4\145\7\0"+ "\16\0\1\217\1\142\1\217\1\142\7\0\15\217\3\0"+
"\15\145\3\0\3\145\16\0\4\150\7\0\15\150\3\0"+ "\3\217\16\0\4\130\7\0\15\130\3\0\3\130\16\0"+
"\3\150\16\0\4\153\7\0\15\153\3\0\3\153\16\0"+ "\4\220\7\0\15\220\3\0\3\220\33\0\1\221\61\0"+
"\1\222\1\145\1\222\1\145\7\0\15\222\3\0\3\222"+ "\1\222\30\0\4\36\6\0\1\223\15\36\3\0\2\36"+
"\16\0\4\133\7\0\15\133\3\0\3\133\16\0\4\223"+ "\1\224\33\0\1\225\32\0\1\170\1\0\1\34\1\0"+
"\7\0\15\223\3\0\3\223\33\0\1\224\61\0\1\225"+ "\4\121\1\0\3\122\3\0\10\121\1\226\4\121\3\0"+
"\30\0\4\40\6\0\1\226\15\40\3\0\2\40\1\227"+ "\3\121\2\0\1\227\104\0\1\230\36\0\4\231\7\0"+
"\33\0\1\230\32\0\1\173\1\0\1\36\1\0\4\124"+ "\15\231\3\0\3\231\3\0\1\173\1\0\1\77\2\174"+
"\1\0\3\125\3\0\10\124\1\231\4\124\3\0\3\124"+ "\6\0\1\216\1\130\1\216\1\130\7\0\15\216\3\0"+
"\2\0\1\232\104\0\1\233\36\0\4\234\7\0\15\234"+ "\3\216\3\0\1\203\1\0\1\77\2\74\1\0\1\75"+
"\3\0\3\234\3\0\1\176\1\0\1\102\2\177\6\0"+ "\3\0\1\75\1\217\1\142\1\217\1\142\7\0\15\217"+
"\1\221\1\133\1\221\1\133\7\0\15\221\3\0\3\221"+ "\3\0\3\217\3\0\1\202\2\0\1\202\7\0\4\220"+
"\3\0\1\206\1\0\1\102\2\77\1\0\1\100\3\0"+ "\7\0\15\220\3\0\3\220\34\0\1\232\55\0\1\233"+
"\1\100\1\222\1\145\1\222\1\145\7\0\15\222\3\0"+ "\26\0\1\234\60\0\4\36\6\0\1\223\15\36\3\0"+
"\3\222\3\0\1\205\2\0\1\205\7\0\4\223\7\0"+ "\3\36\34\0\1\235\31\0\1\170\1\0\1\114\1\0"+
"\15\223\3\0\3\223\34\0\1\235\55\0\1\236\26\0"+ "\4\121\1\0\3\122\3\0\15\121\3\0\3\121\34\0"+
"\1\237\60\0\4\40\6\0\1\226\15\40\3\0\3\40"+ "\1\236\32\0\1\237\2\0\4\231\7\0\15\231\3\0"+
"\34\0\1\240\31\0\1\173\1\0\1\117\1\0\4\124"+ "\3\231\35\0\1\240\62\0\1\241\20\0\1\242\77\0"+
"\1\0\3\125\3\0\15\124\3\0\3\124\34\0\1\241"+ "\1\243\53\0\1\244\32\0\1\34\1\0\4\171\1\0"+
"\32\0\1\242\2\0\4\234\7\0\15\234\3\0\3\234"+ "\3\122\3\0\15\171\3\0\3\171\36\0\1\245\53\0"+
"\35\0\1\243\62\0\1\244\20\0\1\245\77\0\1\246"+ "\1\246\33\0\4\247\7\0\15\247\3\0\3\247\36\0"+
"\53\0\1\247\32\0\1\36\1\0\4\174\1\0\3\125"+ "\1\250\53\0\1\251\54\0\1\252\61\0\1\253\11\0"+
"\3\0\15\174\3\0\3\174\36\0\1\250\53\0\1\251"+ "\1\254\12\0\4\247\7\0\15\247\3\0\3\247\37\0"+
"\33\0\4\252\7\0\15\252\3\0\3\252\36\0\1\253"+ "\1\255\53\0\1\256\54\0\1\257\22\0\1\13\62\0"+
"\53\0\1\254\54\0\1\255\61\0\1\256\11\0\1\257"+ "\4\260\7\0\15\260\3\0\3\260\40\0\1\261\53\0"+
"\12\0\4\252\7\0\15\252\3\0\3\252\37\0\1\260"+ "\1\262\43\0\1\263\26\0\2\260\1\0\2\260\1\0"+
"\53\0\1\261\54\0\1\262\22\0\1\13\62\0\4\263"+ "\2\260\2\0\5\260\7\0\15\260\3\0\4\260\27\0"+
"\7\0\15\263\3\0\3\263\40\0\1\264\53\0\1\265"+ "\1\264\53\0\1\265\24\0";
"\43\0\1\266\26\0\2\263\1\0\2\263\1\0\2\263"+
"\2\0\5\263\7\0\15\263\3\0\4\263\27\0\1\267"+
"\53\0\1\270\24\0";
private static int [] zzUnpackTrans() { private static int [] zzUnpackTrans() {
int [] result = new int[7040]; int [] result = new int[6908];
int offset = 0; int offset = 0;
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
return result; return result;
@ -355,8 +351,8 @@ class WikipediaTokenizerImpl {
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
private static final String ZZ_ATTRIBUTE_PACKED_0 = private static final String ZZ_ATTRIBUTE_PACKED_0 =
"\12\0\1\11\7\1\1\11\3\1\1\11\6\1\1\11"+ "\12\0\1\11\7\1\1\11\2\1\1\11\5\1\1\11"+
"\2\1\1\11\14\1\1\11\6\1\2\11\3\0\1\11"+ "\3\1\1\11\13\1\1\11\5\1\2\11\3\0\1\11"+
"\14\0\2\1\2\11\1\1\1\0\2\1\1\11\1\0"+ "\14\0\2\1\2\11\1\1\1\0\2\1\1\11\1\0"+
"\1\1\1\0\1\1\3\0\7\1\2\0\1\1\1\0"+ "\1\1\1\0\1\1\3\0\7\1\2\0\1\1\1\0"+
"\15\1\3\0\1\1\1\11\3\0\1\1\1\11\5\0"+ "\15\1\3\0\1\1\1\11\3\0\1\1\1\11\5\0"+
@ -365,7 +361,7 @@ class WikipediaTokenizerImpl {
"\2\0\3\11"; "\2\0\3\11";
private static int [] zzUnpackAttribute() { private static int [] zzUnpackAttribute() {
int [] result = new int[184]; int [] result = new int[181];
int offset = 0; int offset = 0;
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
return result; return result;
@ -508,7 +504,6 @@ final void reset() {
/** /**
* Creates a new scanner * Creates a new scanner
* There is also a java.io.InputStream version of this constructor.
* *
* @param in the java.io.Reader to read input from. * @param in the java.io.Reader to read input from.
*/ */
@ -516,7 +511,6 @@ final void reset() {
this.zzReader = in; this.zzReader = in;
} }
/** /**
* Unpacks the compressed character translation table. * Unpacks the compressed character translation table.

View File

@ -212,7 +212,7 @@ DOUBLE_EQUALS = "="{2}
{DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;} {DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
{CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;} {CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
//ignore //ignore
. | {WHITESPACE} |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} [^] |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
} }
<INTERNAL_LINK_STATE>{ <INTERNAL_LINK_STATE>{
@ -221,7 +221,7 @@ DOUBLE_EQUALS = "="{2}
{ALPHANUM} {yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;} {ALPHANUM} {yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;}
{DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} {DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
//ignore //ignore
. | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} [^] { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
} }
<EXTERNAL_LINK_STATE>{ <EXTERNAL_LINK_STATE>{
@ -236,7 +236,7 @@ DOUBLE_EQUALS = "="{2}
{ALPHANUM} {yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;} {ALPHANUM} {yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;}
{DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;} {DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;}
//ignore //ignore
. | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} [^] { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
} }
//italics //italics
<TWO_SINGLE_QUOTES_STATE>{ <TWO_SINGLE_QUOTES_STATE>{
@ -249,7 +249,7 @@ DOUBLE_EQUALS = "="{2}
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
//ignore //ignore
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
} }
//bold //bold
<THREE_SINGLE_QUOTES_STATE>{ <THREE_SINGLE_QUOTES_STATE>{
@ -260,7 +260,7 @@ DOUBLE_EQUALS = "="{2}
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
//ignore //ignore
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
} }
//bold italics //bold italics
@ -272,7 +272,7 @@ DOUBLE_EQUALS = "="{2}
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
//ignore //ignore
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
} }
<DOUBLE_EQUALS_STATE>{ <DOUBLE_EQUALS_STATE>{
@ -280,15 +280,15 @@ DOUBLE_EQUALS = "="{2}
{ALPHANUM} {currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;} {ALPHANUM} {currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;}
{DOUBLE_EQUALS} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} {DOUBLE_EQUALS} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
//ignore //ignore
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
} }
<DOUBLE_BRACE_STATE>{ <DOUBLE_BRACE_STATE>{
{ALPHANUM} {yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;} {ALPHANUM} {yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;}
{DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} {DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
{CITATION_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} {CITATION_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
//ignore //ignore
. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
} }
<STRING> { <STRING> {
@ -305,7 +305,7 @@ DOUBLE_EQUALS = "="{2}
{PIPE} {yybegin(STRING); return currentTokType;/*pipe*/} {PIPE} {yybegin(STRING); return currentTokType;/*pipe*/}
.|{WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ } [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ }
} }
@ -327,7 +327,7 @@ DOUBLE_EQUALS = "="{2}
//end wikipedia //end wikipedia
/** Ignore the rest */ /** Ignore the rest */
. | {WHITESPACE}|{TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } [^] | {TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
//INTERNAL_LINK = "["{2}({ALPHANUM}+{WHITESPACE}*)+"]"{2} //INTERNAL_LINK = "["{2}({ALPHANUM}+{WHITESPACE}*)+"]"{2}

View File

@ -202,7 +202,7 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
} }
public void testUnicodeWordBreaks() throws Exception { public void testUnicodeWordBreaks() throws Exception {
WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0(); WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
wordBreakTest.test(a); wordBreakTest.test(a);
} }
@ -230,6 +230,8 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
checkOneTerm(a, "壹゙", "壹゙"); // ideographic checkOneTerm(a, "壹゙", "壹゙"); // ideographic
checkOneTerm(a, "아゙", "아゙"); // hangul checkOneTerm(a, "아゙", "아゙"); // hangul
} }
/** blast some random strings through the analyzer */ /** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception { public void testRandomStrings() throws Exception {

View File

@ -424,7 +424,7 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
} }
public void testUnicodeWordBreaks() throws Exception { public void testUnicodeWordBreaks() throws Exception {
WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0(); WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
wordBreakTest.test(a); wordBreakTest.test(a);
} }

View File

@ -78,13 +78,13 @@ LTLNFsgB@[191.56.104.113]
iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU
VGLn@z3E2.3an2.MM VGLn@z3E2.3an2.MM
TWmfsxn@[112.192.017.029] TWmfsxn@[112.192.017.029]
2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV 2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KPRW13D
CjaPC63@['\RDrwk] CjaPC63@['\RDrwk]
Ayydpdoa@tdgypppmen.wf Ayydpdoa@tdgypppmen.wf
"gfKP9"@jo3-r0.mz "gfKP9"@jo3-r0.mz
aTMgDW4@t5gax.XN--0ZWM56D aTMgDW4@t5gax.XN--3E0B707E
mcDrMO3FQ@nwc21.y5qd45lesryrp.IL mcDrMO3FQ@nwc21.y5qd45lesryrp.IL
NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp NZqj@v50egeveepk.z290kk.Bc3.xn--kprw13d
XtAhFnq@[218.214.251.103] XtAhFnq@[218.214.251.103]
x0S8uos@[109.82.126.233] x0S8uos@[109.82.126.233]
ALB4KFavj16pODdd@i206d6s.MM ALB4KFavj16pODdd@i206d6s.MM

View File

@ -78,9 +78,10 @@ import org.junit.Ignore;
* \\p{Script = Hiragana} * \\p{Script = Hiragana}
* \\p{LineBreak = Complex_Context} (From $line_break_url) * \\p{LineBreak = Complex_Context} (From $line_break_url)
* \\p{WordBreak = ALetter} (From $word_break_url) * \\p{WordBreak = ALetter} (From $word_break_url)
* \\p{WordBreak = Hebrew_Letter}
* \\p{WordBreak = Katakana} * \\p{WordBreak = Katakana}
* \\p{WordBreak = Numeric} (Excludes full-width Arabic digits) * \\p{WordBreak = Numeric} (Excludes full-width Arabic digits)
* [\\uFF10-\\uFF19] (Full-width Arabic digits) * [\\uFF10-\\uFF19] (Full-width Arabic digits)
*/ */
\@Ignore \@Ignore
public class ${class_name} extends BaseTokenStreamTestCase { public class ${class_name} extends BaseTokenStreamTestCase {
@ -97,7 +98,7 @@ parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
parse_Unicode_data_file($scripts_url, $codepoints, parse_Unicode_data_file($scripts_url, $codepoints,
{'han' => 1, 'hiragana' => 1}); {'han' => 1, 'hiragana' => 1});
parse_Unicode_data_file($word_break_url, $codepoints, parse_Unicode_data_file($word_break_url, $codepoints,
{'aletter' => 1, 'katakana' => 1, 'numeric' => 1}); {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
my @tests = split /\r?\n/, get_URL_content($word_break_test_url); my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
my $output_path = File::Spec->catpath($volume, $directory, $output_filename); my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
@ -109,25 +110,33 @@ print STDERR "Writing '$output_path'...";
print OUT $header; print OUT $header;
for my $line (@tests) { for my $line (@tests) {
next if ($line =~ /^\s*\#/); next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
# ÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3] # Example line: ÷ 0001 × 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
my ($sequence) = $line =~ /^(.*?)\s*\#/; my ($sequence) = $line =~ /^(.*?)\s*\#/;
$line =~ s/\t/ /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
print OUT " // $line\n"; print OUT " // $line\n";
$sequence =~ s/\s*÷\s*$//; # Trim trailing break character $sequence =~ s/\s*÷\s*$//; # Trim trailing break character
my $test_string = $sequence; my $test_string = $sequence;
$test_string =~ s/\s*÷\s*/\\u/g; $test_string =~ s/\s*÷\s*/\\u/g;
$test_string =~ s/\s*×\s*/\\u/g; $test_string =~ s/\s*×\s*/\\u/g;
$test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
$test_string =~ s/\\u000A/\\n/g; $test_string =~ s/\\u000A/\\n/g;
$test_string =~ s/\\u000D/\\r/g; $test_string =~ s/\\u000D/\\r/g;
$test_string =~ s/\\u0022/\\\"/g;
$sequence =~ s/^\s*÷\s*//; # Trim leading break character $sequence =~ s/^\s*÷\s*//; # Trim leading break character
my @tokens = (); my @tokens = ();
for my $candidate (split /\s*÷\s*/, $sequence) { for my $candidate (split /\s*÷\s*/, $sequence) {
my @chars = (); my @chars = ();
my $has_wanted_char = 0; my $has_wanted_char = 0;
while ($candidate =~ /([0-9A-F]+)/gi) { while ($candidate =~ /([0-9A-F]+)/gi) {
push @chars, $1; my $hexchar = $1;
if (4 == length($hexchar)) {
push @chars, $hexchar;
} else {
push @chars, above_BMP_char_to_surrogates($hexchar);
}
unless ($has_wanted_char) { unless ($has_wanted_char) {
$has_wanted_char = 1 if (defined($codepoints->[hex($1)])); $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
} }
} }
if ($has_wanted_char) { if ($has_wanted_char) {
@ -144,6 +153,21 @@ close OUT;
print STDERR "done.\n"; print STDERR "done.\n";
# sub above_BMP_char_to_surrogates
#
# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
# to the corresponding UTF-16 surrogate pair
#
# Assumption: input string is a sequence more than four hex digits
#
sub above_BMP_char_to_surrogates {
my $ch = hex(shift);
my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
my $low_surrogate = 0xDC00 + ($ch & 0x3FF);
return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
}
# sub parse_Unicode_data_file # sub parse_Unicode_data_file
# #
# Downloads and parses the specified Unicode data file, parses it, and # Downloads and parses the specified Unicode data file, parses it, and

View File

@ -121,14 +121,14 @@ Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them "0\!P?".shQVdSerA@2qmqj8ul.hm the leg
of LTLNFsgB@[191.56.104.113] all, until it has read it is of LTLNFsgB@[191.56.104.113] all, until it has read it is
iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. <VGLn@z3E2.3an2.MM> Once iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. <VGLn@z3E2.3an2.MM> Once
TWmfsxn@[112.192.017.029] Spiros under the place TWmfsxn@[112.192.017.029] Spiros under the place
2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV as were not a house of the 2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KPRW13D as were not a house of the
rosebushes and the whateverend, feel her waist. She changes everything. We had rosebushes and the whateverend, feel her waist. She changes everything. We had
decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us
come to, <Ayydpdoa@tdgypppmen.wf> what history as died. Strange, Spiros with come to, <Ayydpdoa@tdgypppmen.wf> what history as died. Strange, Spiros with
delight: That night "gfKP9"@jo3-r0.mz and gold case delight: That night "gfKP9"@jo3-r0.mz and gold case
<aTMgDW4@t5gax.XN--0ZWM56D> is spring: the aeon arising, wherein he returned, <aTMgDW4@t5gax.XN--3E0B707E> is spring: the aeon arising, wherein he returned,
retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first
<NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp> to reach session. Initiating first <NZqj@v50egeveepk.z290kk.Bc3.xn--kprw13d> to reach session. Initiating first
part of the main hall toward his own spurs. Hes an <XtAhFnq@[218.214.251.103]> part of the main hall toward his own spurs. Hes an <XtAhFnq@[218.214.251.103]>
Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and
reality. The hidden set up to come. ROSE WAKINS: No answer. The reality. The hidden set up to come. ROSE WAKINS: No answer. The

View File

@ -24,7 +24,7 @@ and Joe recited this iron bars with their account, poor elth, and she had been
almost drove me towards evening. At almost drove me towards evening. At
HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH the HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH the
sergeant and then on the raw sergeant and then on the raw
<Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m> afternoon towards <Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m> afternoon towards
the terror, merely wished him as biled the terror, merely wished him as biled
M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb -- a conciliatory air on in M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb -- a conciliatory air on in
<ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J> <ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J>
@ -47,7 +47,7 @@ to live. You didn't know nothing could attend more.' He had been a coming! Get
behind the answer those aids, I saw him in the same appearance of the convict's behind the answer those aids, I saw him in the same appearance of the convict's
file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
confession, and bring you see? ' confession, and bring you see? '
HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an HTTP://yA2O3F.XN--3E0B707E/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an
accusatory manner as well known that Joe Gargery marry her cup. `I wonder and accusatory manner as well known that Joe Gargery marry her cup. `I wonder and
there was publicly made it was, there was publicly made it was,
<file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#> as lookers on; me, I <file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#> as lookers on; me, I
@ -63,7 +63,7 @@ again
FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
towards evening. At last, and kneaded, and a dead man taking any. There was towards evening. At last, and kneaded, and a dead man taking any. There was
publicly made out there?' said I, publicly made out there?' said I,
ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM ftp://w0yaysrl.XN--CLCHC0EA0B2G2A9GCD/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
glancing http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY at the glancing http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY at the
N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ river wound, twenty miles of the N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ river wound, twenty miles of the
number called, hears the awful it lights; here and trimmings of Caesar. This number called, hears the awful it lights; here and trimmings of Caesar. This
@ -155,7 +155,7 @@ ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sg
at me, and that her walking z3ymb.KM/DdnrqoBz=YtxSB away so much of the at me, and that her walking z3ymb.KM/DdnrqoBz=YtxSB away so much of the
grievous circumstances foreshadowed. After receiving the way, that I thought, grievous circumstances foreshadowed. After receiving the way, that I thought,
if she should go to?' `Good again!' cried the if she should go to?' `Good again!' cried the
FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0 society of a savoury pork pie, FTP://7kgip3z.XN--KPRY57D:15983/OYEQzIA0 society of a savoury pork pie,
and nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc they challenged, hears nothin' all my and nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc they challenged, hears nothin' all my
hands in herself, and bring him by hand. `This,' ftp://085.062.055.011/bopfVV/ hands in herself, and bring him by hand. `This,' ftp://085.062.055.011/bopfVV/
said he wore ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs a dog of said he wore ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs a dog of
@ -191,7 +191,7 @@ and tingling, and that I had won of the shoulder. `Excuse me, and we departed
from Richard the furthest end of from Richard the furthest end of
http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w both imp and stung by the http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w both imp and stung by the
bright fire, another look bright fire, another look
zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her zQFC1SPO96J.Jy20d8.xn--3e0b707e:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her
best use asking questions, and feet, best use asking questions, and feet,
<ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ> hanging to try <ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ> hanging to try
back was the poker. `It was not warmly. `Seems back was the poker. `It was not warmly. `Seems
@ -204,7 +204,7 @@ kitchen wall,
Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 he ate the Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 he ate the
house, end with the Ghost in order): Forty-three pence?' To five hundred house, end with the Ghost in order): Forty-three pence?' To five hundred
Gargerys.' `I say, Pip; stay Gargerys.' `I say, Pip; stay
7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with 7WO6F.XN--45BRJ9C/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with
ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
his shot, and reposing no help to my seat. It was in the kitchen wall, because his shot, and reposing no help to my seat. It was in the kitchen wall, because
I calculated the sounds by giving me by the name for a rush of Joe's forge I calculated the sounds by giving me by the name for a rush of Joe's forge
@ -299,7 +299,7 @@ She drew the kitchen, carrying file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH so low
wooden hut wooden hut
ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
where it seemed to give Pirrip as where it seemed to give Pirrip as
<79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO> <79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--FIQS8S/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO>
to say, on the guiltily coarse his head, he tried to the to say, on the guiltily coarse his head, he tried to the
Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
remark. `There's one sprinkled all I was possible she beggared me. All these remark. `There's one sprinkled all I was possible she beggared me. All these
@ -311,7 +311,7 @@ Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%be
he shook her veil so thick nor my milk and would impart all had returned, with he shook her veil so thick nor my milk and would impart all had returned, with
soap-suds, I had FILE:///#F9Bgl just like thin snow. `Enough of his right side soap-suds, I had FILE:///#F9Bgl just like thin snow. `Enough of his right side
of thenceforth sitting of thenceforth sitting
jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--3E0B707E/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
in File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg my soul. I sat down on it, I have in File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg my soul. I sat down on it, I have
been a spoon that the pie, blacksmith?' asked Estella of it made a mouth wide been a spoon that the pie, blacksmith?' asked Estella of it made a mouth wide
open, and so open, and so
@ -324,7 +324,7 @@ FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2 of the stranger looked at it, I
pointed to Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz himself. No glimpse of pointed to Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz himself. No glimpse of
file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg herself, I saw that he would have file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg herself, I saw that he would have
been there, I was too far and uncomfortable by it. been there, I was too far and uncomfortable by it.
http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms= http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--CLCHC0EA0B2G2A9GCD/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
Under the Above,' I rather to become transfixed -- he gave me out of the Under the Above,' I rather to become transfixed -- he gave me out of the
kitchen empty-handed, to keep him, I had made a kitchen empty-handed, to keep him, I had made a
Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG subject, if he had Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG subject, if he had
@ -468,7 +468,7 @@ hard twist upon his -- `Well, boy,' Uncle Pumblechook: a look at the sermon he
had heard it had hesitated as little window, violently plunging and she had had heard it had hesitated as little window, violently plunging and she had
committed, and had all about the present calling, which the fingers of tea on committed, and had all about the present calling, which the fingers of tea on
Saturdays than this country, gentlemen, but I could see those, Saturdays than this country, gentlemen, but I could see those,
https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--3E0B707E/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
too, if you remember what stock she told me again. `But I know what too, if you remember what stock she told me again. `But I know what
file:///enqvF%EFLOBsZhl8h2z wittles is?' `Yes, ma'am.' `Estella, take me again file:///enqvF%EFLOBsZhl8h2z wittles is?' `Yes, ma'am.' `Estella, take me again
and ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A refractory and ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A refractory
@ -493,7 +493,7 @@ right-side
ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
flaxen curls and tables, and a foot of the blacksmith's.' `Halloa!' said Joe, flaxen curls and tables, and a foot of the blacksmith's.' `Halloa!' said Joe,
staring at that it had withered like a infunt, and took another look about the staring at that it had withered like a infunt, and took another look about the
rum <6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once. rum <6S8.Crwllo5e3.jmtz.XN--GECRJ9C/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once.
Three Jolly Bargemen to think she seemed to tell you were. When we saw the file Three Jolly Bargemen to think she seemed to tell you were. When we saw the file
coming at my slice. I have mentioned it with the wooden hut where we had got up coming at my slice. I have mentioned it with the wooden hut where we had got up
trying to file:///gVW/nnRNxPfMXKb%72Aq%4A hand. If ever grateful for. If a trying to file:///gVW/nnRNxPfMXKb%72Aq%4A hand. If ever grateful for. If a
@ -662,7 +662,7 @@ open,' he
https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/ https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
wiped the liquor. He was the bad; and some one wiped the liquor. He was the bad; and some one
Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE another Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE another
Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws Ftp://3zd7z.etw.XN--KPRW13D/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws
down by a most powerfully down down by a most powerfully down
t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x to me, and all that t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x to me, and all that
know the window, know the window,
@ -993,7 +993,7 @@ upon a door, which was gobbling mincemeat, meatbone, bread, some lace for it
that Joe's blue file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ eyes, had an that Joe's blue file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ eyes, had an
hour longer than at me, and dismal, and gloves, and that's further than I hour longer than at me, and dismal, and gloves, and that's further than I
mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs looked on. `Now, boy! mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs looked on. `Now, boy!
g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P g6tylc0.daeczh.4q.XN--CLCHC0EA0B2G2A9GCD/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
Why, here's a ridiculous old chap. And looked up by hand. `Why don't like Why, here's a ridiculous old chap. And looked up by hand. `Why don't like
`sulks.' Therefore, I was in such game?' Everybody, myself drifting down his `sulks.' Therefore, I was in such game?' Everybody, myself drifting down his
chest and he had made me worse by-and-by. I was a chest and he had made me worse by-and-by. I was a
@ -1035,7 +1035,7 @@ in every word out again. `You are prison-ships, and they fought
<HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt> <HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt>
for us heavy. `I Bolted, myself, 5.Piba4ac.JE/55M1H/AZXdj and thread, and we for us heavy. `I Bolted, myself, 5.Piba4ac.JE/55M1H/AZXdj and thread, and we
after him, or to inspire confidence. This was brought you spoke all the act, he after him, or to inspire confidence. This was brought you spoke all the act, he
couldn't m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire couldn't m-k6-ej7x.XN--J6W193G/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire
between the forge was <ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/> between the forge was <ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/>
busy in it. Until busy in it. Until
hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ she jammed hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ she jammed
@ -1329,7 +1329,7 @@ sort Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L --
FILE://155.24.106.255/3VEZIT7 if it was to him, I might not do not afraid of FILE://155.24.106.255/3VEZIT7 if it was to him, I might not do not afraid of
report, and looking rather to make nothing of a confidential voice, report, and looking rather to make nothing of a confidential voice,
d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
as lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be as lda5l5wc.XN--KPRY57D/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be
supposed,' said the wind and so we were read the conversation consisted of it supposed,' said the wind and so we were read the conversation consisted of it
had so that we saw some bread, some had so that we saw some bread, some
l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C brandy out: no black velvet l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C brandy out: no black velvet

View File

@ -10,7 +10,7 @@ http://Rcbu6/Oxc%C0IkGSZ8rO9IUpd/BEvkvw3nWNXZ/P%17tp3gjATN/0ZRzs
file:///2CdsP/U2GCLT file:///2CdsP/U2GCLT
Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA= Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA=
HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH
Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m
M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb
ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J
ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj
@ -23,13 +23,13 @@ Ftp://Xmswrxn8d-1s.pe.gm/dB6C3xTk%D3x/EKOiTmk%7c/API/0cdgpi;Type=a
FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH
ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/ ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/
file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND HTTP://yA2O3F.XN--3E0B707E/qPDTt/MwMXGQq2S7JT/TJ2iCND
file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6# file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#
http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx
ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7 ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7
http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM
FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM ftp://w0yaysrl.XN--CLCHC0EA0B2G2A9GCD/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY
N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/
http://ah-2d4.ASIA/qmp http://ah-2d4.ASIA/qmp
@ -75,7 +75,7 @@ http://4u3o/BKdhwRyzG
file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/ file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/
ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz
z3ymb.KM/DdnrqoBz=YtxSB z3ymb.KM/DdnrqoBz=YtxSB
FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0 FTP://7kgip3z.XN--KPRY57D:15983/OYEQzIA0
nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc
ftp://085.062.055.011/bopfVV/ ftp://085.062.055.011/bopfVV/
ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs
@ -93,12 +93,12 @@ https://[3790:ad57:0B63::e5f7:f6ac:164C]/Obax;zcD/Y%48%9a/Z2xcdar
bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae
ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO
http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w
zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 zQFC1SPO96J.Jy20d8.xn--3e0b707e:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1
ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ
HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV
ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ
Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1
7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb 7WO6F.XN--45BRJ9C/1L%f9G0NEu/L2lD/mQGNS9UhgCEb
ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk
Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1 Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1
@ -147,20 +147,20 @@ ftp://Lq.es/%B1ZPdTZgB2mNFW/qre92rM
file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw
file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH
ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO 79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--FIQS8S/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO
Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ
[62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23 [62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23
Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5= Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5=
FILE:///#F9Bgl FILE:///#F9Bgl
jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--3E0B707E/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg
ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw
http://sisas.ua/4CU60ZLK4VgY8AR89 http://sisas.ua/4CU60ZLK4VgY8AR89
FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2 FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2
Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz
file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg
http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms= http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--CLCHC0EA0B2G2A9GCD/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG
ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d
FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH
@ -228,7 +228,7 @@ file:///UIIGOxv6jvF2%c0/%A8J3%677Gmq8im1zklKhqx/HMhCSY2QcyxvL/
http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33= http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33=
Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9 Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9
file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8 file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8
https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--3E0B707E/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
file:///enqvF%EFLOBsZhl8h2z file:///enqvF%EFLOBsZhl8h2z
ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A
ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9 ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9
@ -240,7 +240,7 @@ http://nEN5ZN.EG/%0efsf4v30L
file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q
r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q
ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/ 6S8.Crwllo5e3.jmtz.XN--GECRJ9C/6InlQn/hnhu2f%ac8tX/apq%0D6o/
file:///gVW/nnRNxPfMXKb%72Aq%4A file:///gVW/nnRNxPfMXKb%72Aq%4A
file:///Fzza388TQ file:///Fzza388TQ
file:/// file:///
@ -314,7 +314,7 @@ file:///3%aexrb7UdZ5GpR4ZIfoxwL/vQV%4a2zQxki/QRji6gHpMGgBaM/d%71A2CTpZv-kF0tD/Ig
f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7 f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7
https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/ https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE
Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9 Ftp://3zd7z.etw.XN--KPRW13D/4UztCuTbW2z/LL%2cDI/dTYSi9
t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x
ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR
file:///XoCg%EDVf/A3ibJYjU file:///XoCg%EDVf/A3ibJYjU
@ -476,7 +476,7 @@ ftp://53.151.134.240/uZqGXLUIu-J/=%0C2pO/PvL0%19MpQBv/
FILE:///Kywof5D5q/0TRS/zayrkrnENB FILE:///Kywof5D5q/0TRS/zayrkrnENB
file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/
mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs
g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P g6tylc0.daeczh.4q.XN--CLCHC0EA0B2G2A9GCD/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB
file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL
file:///mJM%a1/jv5%53QDqE/bFMu0CBp file:///mJM%a1/jv5%53QDqE/bFMu0CBp
@ -496,7 +496,7 @@ http://gpu16lz.LS/9e%daJrwQfHEpFvsZ3jx/c4STIJ/CmvEGAUx9f/
file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc
HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt
5.Piba4ac.JE/55M1H/AZXdj 5.Piba4ac.JE/55M1H/AZXdj
m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/ m-k6-ej7x.XN--J6W193G/suVrNQSIj9/TmRhHbe/o&0dbqR/
ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/ ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/
hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/
Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD
@ -633,7 +633,7 @@ http://047.014.184.200/Z_QdOwjzfBue4Nt/aEn/xuEQD/cXlnoxHIK%7d8h/1%eegEk7E0/8Ejku
Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L
FILE://155.24.106.255/3VEZIT7 FILE://155.24.106.255/3VEZIT7
d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET lda5l5wc.XN--KPRY57D/pr80SSZ/eNM1%D50lp/Rc%8EimOET
l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C
FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k
212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt= 212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt=

View File

@ -75,7 +75,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes
+ " samba Halta gamba " + " samba Halta gamba "
+ "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R\n" + "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R\n"
+ "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb\n" + "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb\n"
+ "Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m" + "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m"
+ " inter Locutio " + " inter Locutio "
+ "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/\n" + "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/\n"
+ "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7" + "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7"
@ -91,7 +91,7 @@ public class TestUAX29URLEmailTokenizerFactory extends BaseTokenStreamFactoryTes
"samba", "Halta", "gamba", "samba", "Halta", "gamba",
"ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R", "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R",
"M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb", "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb",
"Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m", "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m",
"inter", "Locutio", "inter", "Locutio",
"[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/", "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/",
"file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7", "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7",

View File

@ -60,20 +60,21 @@ public class GenerateJflexTLDMacros {
private static final String APACHE_LICENSE private static final String APACHE_LICENSE
= "/*" + NL = "/*" + NL
+ " * Copyright 2001-2005 The Apache Software Foundation." + NL + " * Licensed to the Apache Software Foundation (ASF) under one or more" + NL
+ " *" + NL + " * contributor license agreements. See the NOTICE file distributed with" + NL
+ " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL + " * this work for additional information regarding copyright ownership." + NL
+ " * you may not use this file except in compliance with the License." + NL + " * The ASF licenses this file to You under the Apache License, Version 2.0" + NL
+ " * You may obtain a copy of the License at" + NL + " * (the \"License\"); you may not use this file except in compliance with" + NL
+ " *" + NL + " * the License. You may obtain a copy of the License at" + NL
+ " * http://www.apache.org/licenses/LICENSE-2.0" + NL + " *" + NL
+ " *" + NL + " * http://www.apache.org/licenses/LICENSE-2.0" + NL
+ " * Unless required by applicable law or agreed to in writing, software" + NL + " *" + NL
+ " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL + " * Unless required by applicable law or agreed to in writing, software" + NL
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
+ " * See the License for the specific language governing permissions and" + NL + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
+ " * limitations under the License." + NL + " * See the License for the specific language governing permissions and" + NL
+ " */" + NL + NL; + " * limitations under the License." + NL
+ " */" + NL;
private static final Pattern TLD_PATTERN_1 private static final Pattern TLD_PATTERN_1
= Pattern.compile("([-A-Za-z0-9]+)\\.\\s+NS\\s+.*"); = Pattern.compile("([-A-Za-z0-9]+)\\.\\s+NS\\s+.*");

View File

@ -36,40 +36,45 @@ public class GenerateJFlexSupplementaryMacros {
static { static {
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
} }
private static final String APACHE_LICENSE private static final String APACHE_LICENSE
= "/*" + NL = "/*" + NL
+ " * Copyright 2010 The Apache Software Foundation." + NL + " * Licensed to the Apache Software Foundation (ASF) under one or more" + NL
+ " * contributor license agreements. See the NOTICE file distributed with" + NL
+ " * this work for additional information regarding copyright ownership." + NL
+ " * The ASF licenses this file to You under the Apache License, Version 2.0" + NL
+ " * (the \"License\"); you may not use this file except in compliance with" + NL
+ " * the License. You may obtain a copy of the License at" + NL
+ " *" + NL + " *" + NL
+ " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL + " * http://www.apache.org/licenses/LICENSE-2.0" + NL
+ " * you may not use this file except in compliance with the License." + NL
+ " * You may obtain a copy of the License at" + NL
+ " *" + NL
+ " * http://www.apache.org/licenses/LICENSE-2.0" + NL
+ " *" + NL + " *" + NL
+ " * Unless required by applicable law or agreed to in writing, software" + NL + " * Unless required by applicable law or agreed to in writing, software" + NL
+ " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
+ " * See the License for the specific language governing permissions and" + NL + " * See the License for the specific language governing permissions and" + NL
+ " * limitations under the License." + NL + " * limitations under the License." + NL
+ " */" + NL + NL; + " */" + NL;
public static void main(String args[]) { public static void main(String args[]) {
outputHeader(); outputHeader();
outputMacro("ALetterSupp", "[:WordBreak=ALetter:]"); outputMacro("ALetterSupp", "[:WordBreak=ALetter:]");
outputMacro("FormatSupp", "[:WordBreak=Format:]"); outputMacro("FormatSupp", "[:WordBreak=Format:]");
outputMacro("ExtendSupp", "[:WordBreak=Extend:]"); outputMacro("NumericSupp", "[:WordBreak=Numeric:]");
outputMacro("NumericSupp", "[:WordBreak=Numeric:]"); outputMacro("ExtendSupp", "[:WordBreak=Extend:]");
outputMacro("KatakanaSupp", "[:WordBreak=Katakana:]"); outputMacro("KatakanaSupp", "[:WordBreak=Katakana:]");
outputMacro("MidLetterSupp", "[:WordBreak=MidLetter:]"); outputMacro("MidLetterSupp", "[:WordBreak=MidLetter:]");
outputMacro("MidNumSupp", "[:WordBreak=MidNum:]"); outputMacro("MidNumSupp", "[:WordBreak=MidNum:]");
outputMacro("MidNumLetSupp", "[:WordBreak=MidNumLet:]"); outputMacro("MidNumLetSupp", "[:WordBreak=MidNumLet:]");
outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]");
outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]"); outputMacro("ExtendNumLetSupp", "[:WordBreak=ExtendNumLet:]");
outputMacro("ComplexContextSupp", "[:LineBreak=Complex_Context:]"); outputMacro("ComplexContextSupp", "[:LineBreak=Complex_Context:]");
outputMacro("HanSupp", "[:Script=Han:]"); outputMacro("HanSupp", "[:Script=Han:]");
outputMacro("HiraganaSupp", "[:Script=Hiragana:]"); outputMacro("HiraganaSupp", "[:Script=Hiragana:]");
outputMacro("SingleQuoteSupp", "[:WordBreak=Single_Quote:]");
outputMacro("DoubleQuoteSupp", "[:WordBreak=Double_Quote:]");
outputMacro("HebrewLetterSupp", "[:WordBreak=Hebrew_Letter:]");
outputMacro("RegionalIndicatorSupp", "[:WordBreak=Regional_Indicator:]");
} }
static void outputHeader() { static void outputHeader() {

View File

@ -476,7 +476,7 @@
<available property="jflex.present" classname="jflex.anttask.JFlexTask"> <available property="jflex.present" classname="jflex.anttask.JFlexTask">
<classpath refid="jflex.classpath"/> <classpath refid="jflex.classpath"/>
</available> </available>
<fail unless="jflex.present"> <fail unless="jflex.present">&#xA0;
################################################################## ##################################################################
JFlex not found. JFlex not found.
JFlex Home: ${jflex.home} JFlex Home: ${jflex.home}
@ -484,14 +484,14 @@
Please install the jFlex 1.5 version (currently not released) Please install the jFlex 1.5 version (currently not released)
from its SVN repository: from its SVN repository:
svn co -r 623 http://jflex.svn.sourceforge.net/svnroot/jflex/trunk jflex svn co -r 722 https://svn.code.sf.net/p/jflex/code/trunk jflex
cd jflex cd jflex
mvn install mvn install
Then, create a build.properties file either in your home Then, create a build.properties file either in your home
directory, or within the Lucene directory and set the jflex.home directory, or within the Lucene directory and set the jflex.home
property to the path where the JFlex trunk checkout is located property to the path where the JFlex trunk checkout is located
(in the above example its the directory called "jflex"). (in the above example it's the directory called "jflex").
################################################################## ##################################################################
</fail> </fail>