LUCENE-2167: Implement StandardTokenizer with the UAX#29 Standard

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1002032 13f79535-47bb-0310-9956-ffa450edef68
2010-09-28 06:16:16 +00:00 · 2010-09-28 06:16:16 +00:00 · 3c26a9167c
parent c562b10b2e
commit 3c26a9167c
65 changed files with 13107 additions and 749 deletions
--- a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
+++ b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
@ -17,18 +17,7 @@ package org.apache.lucene.benchmark.quality;
 * limitations under the License.
 */
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.PrintWriter;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.TestPerfTasksLogic;
 import org.apache.lucene.benchmark.quality.Judge;
 import org.apache.lucene.benchmark.quality.QualityQuery;
 import org.apache.lucene.benchmark.quality.QualityQueryParser;
 import org.apache.lucene.benchmark.quality.QualityBenchmark;
 import org.apache.lucene.benchmark.quality.trec.TrecJudge;
 import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
 import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
@ -36,6 +25,12 @@ import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.FSDirectory;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.PrintWriter;
 /**
 * Test that quality run does its job.
 * <p>
@ -177,6 +172,7 @@ public class TestQualityRun extends BenchmarkTestCase {
    String algLines[] = {
        "# ----- properties ",
        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
        "analyzer=org.apache.lucene.analysis.standard.ClassicAnalyzer",
        "docs.file=" + getWorkDirResourcePath("reuters.578.lines.txt.bz2"),
        "content.source.log.step=2500",
        "doc.term.vector=false",
--- a/modules/analysis/CHANGES.txt
+++ b/modules/analysis/CHANGES.txt
@ -9,6 +9,12 @@ API Changes
 * LUCENE-2413: Removed the AnalyzerUtil in common/miscellaneous.  (Robert Muir)
 * LUCENE-2167: StandardTokenizer/Analyzer in common/standard/ now implement
   the Word Break rules from the Unicode Text Segmentation algorithm (UAX#29),
   as well as tokenizing URLs and email addresses according to the relevant
   RFCs.  ClassicTokenizer/Analyzer retains the old StandardTokenizer/Analyzer
   behavior.  (Steven Rowe, Robert Muir, Uwe Schindler)
 New Features
 * LUCENE-2413: Consolidated Solr analysis components into common. 
--- a/modules/analysis/NOTICE.txt
+++ b/modules/analysis/NOTICE.txt
@ -52,3 +52,8 @@ See http://project.carrot2.org/license.html.
 The SmartChineseAnalyzer source code (smartcn) was
 provided by Xiaoping Gao and copyright 2009 by www.imdict.net.
 WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/) 
 is derived from Unicode data such as the Unicode Character Database. 
 See http://unicode.org/copyright.html for more details.
--- a/modules/analysis/common/build.xml
+++ b/modules/analysis/common/build.xml
@ -38,7 +38,7 @@
  <target name="compile-core" depends="jflex-notice, common.compile-core"/>
-  <target name="jflex" depends="jflex-check,clean-jflex,jflex-StandardAnalyzer,jflex-wiki-tokenizer"/>
+  <target name="jflex" depends="jflex-check,clean-jflex,jflex-StandardAnalyzer,jflex-UAX29Tokenizer,jflex-wiki-tokenizer"/>
  <target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
@ -49,27 +49,61 @@
           nobak="on"/>
  </target>
-  <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
+  <target name="jflex-StandardAnalyzer" depends="init,jflex-check,gen-tlds" if="jflex.present">
    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
 			<classpath refid="jflex.classpath"/>
    </taskdef>
-    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex"
+    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard"
           nobak="on" />
-    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex"
+    <jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard"
           nobak="on" />
  </target>
  <target name="jflex-UAX29Tokenizer" depends="jflex-check" if="jflex.present">
    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
 			<classpath refid="jflex.classpath"/>
    </taskdef>
    <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard"
           nobak="on" />
  </target>
  <target name="clean-jflex">
    <delete>
      <fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
        <containsregexp expression="generated.*by.*JFlex"/>
      </fileset>
      <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
-    	<containsregexp expression="generated.*by.*JFlex"/>
+        <containsregexp expression="generated.*by.*JFlex"/>
      </fileset>
    </delete>
  </target>
  <property name="tld.zones" value="http://www.internic.net/zones/root.zone"/>
  <property name="tld.output" location="src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro"/>
  <target name="gen-tlds" depends="compile-tools">
    <java
      classname="org.apache.lucene.analysis.standard.GenerateJflexTLDMacros"
      dir="."
      fork="true"
      failonerror="true">
      <classpath>
      	<pathelement location="${build.dir}/classes/tools"/>
      </classpath>
      <arg value="${tld.zones}"/>
      <arg value="${tld.output}"/>
    </java>
  </target>
  <target name="compile-tools">
    <compile
      srcdir="src/tools/java"
      destdir="${build.dir}/classes/tools">
      <classpath refid="classpath"/>
    </compile>
  </target>
 </project>
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
@ -132,7 +132,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
  @Override
  public TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@ -218,7 +218,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new LowerCaseFilter(matchVersion, source);
-    result = new StandardFilter(result);
+    result = new StandardFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(excltable != null && !excltable.isEmpty())
      result = new KeywordMarkerFilter(result, excltable);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@ -247,7 +247,7 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stoptable);
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@ -120,7 +120,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -237,7 +237,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stopwords);
    result = new KeywordMarkerFilter(result, exclusionSet);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -135,7 +135,7 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_31))
-      result = new StandardFilter(result);
+      result = new StandardFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if (matchVersion.onOrAfter(Version.LUCENE_31))
      result = new GreekStemFilter(result);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@ -104,6 +104,9 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
    // prior to this we get the classic behavior, standardfilter does it for us.
    if (matchVersion.onOrAfter(Version.LUCENE_31))
      result = new EnglishPossessiveFilter(result);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
@ -0,0 +1,52 @@
 package org.apache.lucene.analysis.en;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
 * TokenFilter that removes possessives (trailing 's) from words.
 */
 public final class EnglishPossessiveFilter extends TokenFilter {
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  public EnglishPossessiveFilter(TokenStream input) {
    super(input);
  }
  @Override
  public boolean incrementToken() throws IOException {
    if (!input.incrementToken()) {
      return false;
    }
    final char[] buffer = termAtt.buffer();
    final int bufferLength = termAtt.length();
    if (bufferLength >= 2 &&
        buffer[bufferLength-2] == '\'' &&
        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S'))
      termAtt.setLength(bufferLength - 2); // Strip last 2 characters off
    return true;
  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@ -120,7 +120,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@ -120,7 +120,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@ -240,7 +240,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-      TokenStream result = new StandardFilter(source);
+      TokenStream result = new StandardFilter(matchVersion, source);
      result = new ElisionFilter(matchVersion, result);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stopwords);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@ -120,7 +120,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
@ -119,7 +119,7 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, source);
    result = new StopFilter(matchVersion, result, stopwords);
    if (!stemExclusionSet.isEmpty()) {
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@ -120,7 +120,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@ -246,7 +246,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
      Reader aReader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
-      TokenStream result = new StandardFilter(source);
+      TokenStream result = new StandardFilter(matchVersion, source);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stoptable);
      if (!excltable.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@ -120,7 +120,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@ -120,7 +120,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@ -124,7 +124,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -175,7 +175,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
        Reader reader) {
      if (matchVersion.onOrAfter(Version.LUCENE_31)) {
        final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-        TokenStream result = new StandardFilter(source);
+        TokenStream result = new StandardFilter(matchVersion, source);
        result = new LowerCaseFilter(matchVersion, result);
        result = new StopFilter(matchVersion, result, stopwords);
        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.snowball;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 import org.apache.lucene.analysis.standard.*;
 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
@ -80,7 +81,11 @@ public final class SnowballAnalyzer extends Analyzer {
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(matchVersion, reader);
-    result = new StandardFilter(result);
+    result = new StandardFilter(matchVersion, result);
    // remove the possessive 's for english stemmers
    if (matchVersion.onOrAfter(Version.LUCENE_31) && 
        (name.equals("English") || name.equals("Porter") || name.equals("Lovins")))
      result = new EnglishPossessiveFilter(result);
    // Use a special lowercase filter for turkish, the stemmer expects it.
    if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
      result = new TurkishLowerCaseFilter(result);
@ -108,7 +113,7 @@ public final class SnowballAnalyzer extends Analyzer {
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(matchVersion, reader);
-      streams.result = new StandardFilter(streams.source);
+      streams.result = new StandardFilter(matchVersion, streams.source);
      // Use a special lowercase filter for turkish, the stemmer expects it.
      if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
        streams.result = new TurkishLowerCaseFilter(streams.result);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
@ -0,0 +1,318 @@
 /*
 * Copyright 2001-2005 The Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
 // file version from Tuesday, September 14, 2010 11:34:20 AM UTC
 // generated on Wednesday, September 15, 2010 7:00:44 AM UTC
 // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
 ASCIITLD = "." (
 	  [aA][cC]
 	| [aA][dD]
 	| [aA][eE]
 	| [aA][eE][rR][oO]
 	| [aA][fF]
 	| [aA][gG]
 	| [aA][iI]
 	| [aA][lL]
 	| [aA][mM]
 	| [aA][nN]
 	| [aA][oO]
 	| [aA][qQ]
 	| [aA][rR]
 	| [aA][rR][pP][aA]
 	| [aA][sS]
 	| [aA][sS][iI][aA]
 	| [aA][tT]
 	| [aA][uU]
 	| [aA][wW]
 	| [aA][xX]
 	| [aA][zZ]
 	| [bB][aA]
 	| [bB][bB]
 	| [bB][dD]
 	| [bB][eE]
 	| [bB][fF]
 	| [bB][gG]
 	| [bB][hH]
 	| [bB][iI]
 	| [bB][iI][zZ]
 	| [bB][jJ]
 	| [bB][mM]
 	| [bB][nN]
 	| [bB][oO]
 	| [bB][rR]
 	| [bB][sS]
 	| [bB][tT]
 	| [bB][vV]
 	| [bB][wW]
 	| [bB][yY]
 	| [bB][zZ]
 	| [cC][aA]
 	| [cC][aA][tT]
 	| [cC][cC]
 	| [cC][dD]
 	| [cC][fF]
 	| [cC][gG]
 	| [cC][hH]
 	| [cC][iI]
 	| [cC][kK]
 	| [cC][lL]
 	| [cC][mM]
 	| [cC][nN]
 	| [cC][oO]
 	| [cC][oO][mM]
 	| [cC][oO][oO][pP]
 	| [cC][rR]
 	| [cC][uU]
 	| [cC][vV]
 	| [cC][xX]
 	| [cC][yY]
 	| [cC][zZ]
 	| [dD][eE]
 	| [dD][jJ]
 	| [dD][kK]
 	| [dD][mM]
 	| [dD][oO]
 	| [dD][zZ]
 	| [eE][cC]
 	| [eE][dD][uU]
 	| [eE][eE]
 	| [eE][gG]
 	| [eE][rR]
 	| [eE][sS]
 	| [eE][tT]
 	| [eE][uU]
 	| [fF][iI]
 	| [fF][jJ]
 	| [fF][kK]
 	| [fF][mM]
 	| [fF][oO]
 	| [fF][rR]
 	| [gG][aA]
 	| [gG][bB]
 	| [gG][dD]
 	| [gG][eE]
 	| [gG][fF]
 	| [gG][gG]
 	| [gG][hH]
 	| [gG][iI]
 	| [gG][lL]
 	| [gG][mM]
 	| [gG][nN]
 	| [gG][oO][vV]
 	| [gG][pP]
 	| [gG][qQ]
 	| [gG][rR]
 	| [gG][sS]
 	| [gG][tT]
 	| [gG][uU]
 	| [gG][wW]
 	| [gG][yY]
 	| [hH][kK]
 	| [hH][mM]
 	| [hH][nN]
 	| [hH][rR]
 	| [hH][tT]
 	| [hH][uU]
 	| [iI][dD]
 	| [iI][eE]
 	| [iI][lL]
 	| [iI][mM]
 	| [iI][nN]
 	| [iI][nN][fF][oO]
 	| [iI][nN][tT]
 	| [iI][oO]
 	| [iI][qQ]
 	| [iI][rR]
 	| [iI][sS]
 	| [iI][tT]
 	| [jJ][eE]
 	| [jJ][mM]
 	| [jJ][oO]
 	| [jJ][oO][bB][sS]
 	| [jJ][pP]
 	| [kK][eE]
 	| [kK][gG]
 	| [kK][hH]
 	| [kK][iI]
 	| [kK][mM]
 	| [kK][nN]
 	| [kK][pP]
 	| [kK][rR]
 	| [kK][wW]
 	| [kK][yY]
 	| [kK][zZ]
 	| [lL][aA]
 	| [lL][bB]
 	| [lL][cC]
 	| [lL][iI]
 	| [lL][kK]
 	| [lL][rR]
 	| [lL][sS]
 	| [lL][tT]
 	| [lL][uU]
 	| [lL][vV]
 	| [lL][yY]
 	| [mM][aA]
 	| [mM][cC]
 	| [mM][dD]
 	| [mM][eE]
 	| [mM][gG]
 	| [mM][hH]
 	| [mM][iI][lL]
 	| [mM][kK]
 	| [mM][lL]
 	| [mM][mM]
 	| [mM][nN]
 	| [mM][oO]
 	| [mM][oO][bB][iI]
 	| [mM][pP]
 	| [mM][qQ]
 	| [mM][rR]
 	| [mM][sS]
 	| [mM][tT]
 	| [mM][uU]
 	| [mM][uU][sS][eE][uU][mM]
 	| [mM][vV]
 	| [mM][wW]
 	| [mM][xX]
 	| [mM][yY]
 	| [mM][zZ]
 	| [nN][aA]
 	| [nN][aA][mM][eE]
 	| [nN][cC]
 	| [nN][eE]
 	| [nN][eE][tT]
 	| [nN][fF]
 	| [nN][gG]
 	| [nN][iI]
 	| [nN][lL]
 	| [nN][oO]
 	| [nN][pP]
 	| [nN][rR]
 	| [nN][uU]
 	| [nN][zZ]
 	| [oO][mM]
 	| [oO][rR][gG]
 	| [pP][aA]
 	| [pP][eE]
 	| [pP][fF]
 	| [pP][gG]
 	| [pP][hH]
 	| [pP][kK]
 	| [pP][lL]
 	| [pP][mM]
 	| [pP][nN]
 	| [pP][rR]
 	| [pP][rR][oO]
 	| [pP][sS]
 	| [pP][tT]
 	| [pP][wW]
 	| [pP][yY]
 	| [qQ][aA]
 	| [rR][eE]
 	| [rR][oO]
 	| [rR][sS]
 	| [rR][uU]
 	| [rR][wW]
 	| [sS][aA]
 	| [sS][bB]
 	| [sS][cC]
 	| [sS][dD]
 	| [sS][eE]
 	| [sS][gG]
 	| [sS][hH]
 	| [sS][iI]
 	| [sS][jJ]
 	| [sS][kK]
 	| [sS][lL]
 	| [sS][mM]
 	| [sS][nN]
 	| [sS][oO]
 	| [sS][rR]
 	| [sS][tT]
 	| [sS][uU]
 	| [sS][vV]
 	| [sS][yY]
 	| [sS][zZ]
 	| [tT][cC]
 	| [tT][dD]
 	| [tT][eE][lL]
 	| [tT][fF]
 	| [tT][gG]
 	| [tT][hH]
 	| [tT][jJ]
 	| [tT][kK]
 	| [tT][lL]
 	| [tT][mM]
 	| [tT][nN]
 	| [tT][oO]
 	| [tT][pP]
 	| [tT][rR]
 	| [tT][rR][aA][vV][eE][lL]
 	| [tT][tT]
 	| [tT][vV]
 	| [tT][wW]
 	| [tT][zZ]
 	| [uU][aA]
 	| [uU][gG]
 	| [uU][kK]
 	| [uU][sS]
 	| [uU][yY]
 	| [uU][zZ]
 	| [vV][aA]
 	| [vV][cC]
 	| [vV][eE]
 	| [vV][gG]
 	| [vV][iI]
 	| [vV][nN]
 	| [vV][uU]
 	| [wW][fF]
 	| [wW][sS]
 	| [xX][nN]--0[zZ][wW][mM]56[dD]
 	| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
 	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
 	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
 	| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
 	| [xX][nN]--[fF][iI][qQ][sS]8[sS]
 	| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
 	| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
 	| [xX][nN]--[gG]6[wW]251[dD]
 	| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
 	| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
 	| [xX][nN]--[jJ]6[wW]193[gG]
 	| [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]
 	| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
 	| [xX][nN]--[kK][pP][rR][wW]13[dD]
 	| [xX][nN]--[kK][pP][rR][yY]57[dD]
 	| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
 	| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
 	| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
 	| [xX][nN]--[oO]3[cC][wW]4[hH]
 	| [xX][nN]--[pP]1[aA][iI]
 	| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
 	| [xX][nN]--[wW][gG][bB][hH]1[cC]
 	| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
 	| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
 	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
 	| [yY][eE]
 	| [yY][tT]
 	| [zZ][aA]
 	| [zZ][mM]
 	| [zZ][wW]
 	) "."?   // Accept trailing root (empty) domain
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@ -0,0 +1,140 @@
 package org.apache.lucene.analysis.standard;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.Version;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Set;
 /**
 * Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
 * LowerCaseFilter} and {@link StopFilter}, using a list of
 * English stop words.
 *
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
 * compatibility when creating ClassicAnalyzer:
 * <ul>
 *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
 *         supplementary characters in stopwords
 *   <li> As of 2.9, StopFilter preserves position
 *        increments
 *   <li> As of 2.4, Tokens incorrectly identified as acronyms
 *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
 * </ul>
 * 
 * ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
 * As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,
 * as specified by UAX#29.
 */
 public final class ClassicAnalyzer extends StopwordAnalyzerBase {
  /** Default maximum allowed token length */
  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
  /**
   * Specifies whether deprecated acronyms should be replaced with HOST type.
   * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
   */
  private final boolean replaceInvalidAcronym;
  /** An unmodifiable set containing some common English words that are usually not
  useful for searching. */
  public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
  /** Builds an analyzer with the given stop words.
   * @param matchVersion Lucene version to match See {@link
   * <a href="#version">above</a>}
   * @param stopWords stop words */
  public ClassicAnalyzer(Version matchVersion, Set<?> stopWords) {
    super(matchVersion, stopWords);
    replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
  }
  /** Builds an analyzer with the default stop words ({@link
   * #STOP_WORDS_SET}).
   * @param matchVersion Lucene version to match See {@link
   * <a href="#version">above</a>}
   */
  public ClassicAnalyzer(Version matchVersion) {
    this(matchVersion, STOP_WORDS_SET);
  }
  /** Builds an analyzer with the stop words from the given file.
   * @see WordlistLoader#getWordSet(File)
   * @param matchVersion Lucene version to match See {@link
   * <a href="#version">above</a>}
   * @param stopwords File to read stop words from */
  public ClassicAnalyzer(Version matchVersion, File stopwords) throws IOException {
    this(matchVersion, WordlistLoader.getWordSet(stopwords));
  }
  /** Builds an analyzer with the stop words from the given reader.
   * @see WordlistLoader#getWordSet(Reader)
   * @param matchVersion Lucene version to match See {@link
   * <a href="#version">above</a>}
   * @param stopwords Reader to read stop words from */
  public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
    this(matchVersion, WordlistLoader.getWordSet(stopwords));
  }
  /**
   * Set maximum allowed token length.  If a token is seen
   * that exceeds this length then it is discarded.  This
   * setting only takes effect the next time tokenStream or
   * reusableTokenStream is called.
   */
  public void setMaxTokenLength(int length) {
    maxTokenLength = length;
  }
  /**
   * @see #setMaxTokenLength
   */
  public int getMaxTokenLength() {
    return maxTokenLength;
  }
  @Override
  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
    final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
    TokenStream tok = new ClassicFilter(src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
      @Override
      protected boolean reset(final Reader reader) throws IOException {
        src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
        return super.reset(reader);
      }
    };
  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java
@ -0,0 +1,73 @@
 package org.apache.lucene.analysis.standard;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 /** Normalizes tokens extracted with {@link ClassicTokenizer}. */
 public class ClassicFilter extends TokenFilter {
  /** Construct filtering <i>in</i>. */
  public ClassicFilter(TokenStream in) {
    super(in);
  }
  private static final String APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
  private static final String ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
  // this filters uses attribute type
  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  /** Returns the next token in the stream, or null at EOS.
   * <p>Removes <tt>'s</tt> from the end of words.
   * <p>Removes dots from acronyms.
   */
  @Override
  public final boolean incrementToken() throws java.io.IOException {
    if (!input.incrementToken()) {
      return false;
    }
    final char[] buffer = termAtt.buffer();
    final int bufferLength = termAtt.length();
    final String type = typeAtt.type();
    if (type == APOSTROPHE_TYPE &&      // remove 's
        bufferLength >= 2 &&
        buffer[bufferLength-2] == '\'' &&
        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
      // Strip last 2 characters off
      termAtt.setLength(bufferLength - 2);
    } else if (type == ACRONYM_TYPE) {      // remove dots
      int upto = 0;
      for(int i=0;i<bufferLength;i++) {
        char c = buffer[i];
        if (c != '.')
          buffer[upto++] = c;
      }
      termAtt.setLength(upto);
    }
    return true;
  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
@ -0,0 +1,234 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.analysis.standard;
 import java.io.IOException;
 import java.io.Reader;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
 /** A grammar-based tokenizer constructed with JFlex
 *
 * <p> This should be a good tokenizer for most European-language documents:
 *
 * <ul>
 *   <li>Splits words at punctuation characters, removing punctuation. However, a 
 *     dot that's not followed by whitespace is considered part of a token.
 *   <li>Splits words at hyphens, unless there's a number in the token, in which case
 *     the whole token is interpreted as a product number and is not split.
 *   <li>Recognizes email addresses and internet hostnames as one token.
 * </ul>
 *
 * <p>Many applications have specific tokenizer needs.  If this tokenizer does
 * not suit your application, please consider copying this source code
 * directory to your project and maintaining your own grammar-based tokenizer.
 *
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
 * compatibility when creating ClassicAnalyzer:
 * <ul>
 *   <li> As of 2.4, Tokens incorrectly identified as acronyms
 *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
 * </ul>
 * 
 * ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
 * As of 3.1, {@link StandardTokenizer} implements Unicode text segmentation,
 * as specified by UAX#29.
 */
 public final class ClassicTokenizer extends Tokenizer {
  /** A private instance of the JFlex-constructed scanner */
  private StandardTokenizerInterface scanner;
  public static final int ALPHANUM          = 0;
  public static final int APOSTROPHE        = 1;
  public static final int ACRONYM           = 2;
  public static final int COMPANY           = 3;
  public static final int EMAIL             = 4;
  public static final int HOST              = 5;
  public static final int NUM               = 6;
  public static final int CJ                = 7;
  /**
   * @deprecated this solves a bug where HOSTs that end with '.' are identified
   *             as ACRONYMs.
   */
  @Deprecated
  public static final int ACRONYM_DEP       = 8;
  /** String token types that correspond to token type int constants */
  public static final String [] TOKEN_TYPES = new String [] {
    "<ALPHANUM>",
    "<APOSTROPHE>",
    "<ACRONYM>",
    "<COMPANY>",
    "<EMAIL>",
    "<HOST>",
    "<NUM>",
    "<CJ>",
    "<ACRONYM_DEP>"
  };
  private boolean replaceInvalidAcronym;
  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
  /** Set the max allowed token length.  Any token longer
   *  than this is skipped. */
  public void setMaxTokenLength(int length) {
    this.maxTokenLength = length;
  }
  /** @see #setMaxTokenLength */
  public int getMaxTokenLength() {
    return maxTokenLength;
  }
  /**
   * Creates a new instance of the {@link ClassicTokenizer}.  Attaches
   * the <code>input</code> to the newly created JFlex scanner.
   *
   * @param input The input reader
   *
   * See http://issues.apache.org/jira/browse/LUCENE-1068
   */
  public ClassicTokenizer(Version matchVersion, Reader input) {
    super();
    init(input, matchVersion);
  }
  /**
   * Creates a new ClassicTokenizer with a given {@link AttributeSource}. 
   */
  public ClassicTokenizer(Version matchVersion, AttributeSource source, Reader input) {
    super(source);
    init(input, matchVersion);
  }
  /**
   * Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} 
   */
  public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
    super(factory);
    init(input, matchVersion);
  }
  private final void init(Reader input, Version matchVersion) {
    this.scanner = new ClassicTokenizerImpl(input);
    if (matchVersion.onOrAfter(Version.LUCENE_24)) {
      replaceInvalidAcronym = true;
    } else {
      replaceInvalidAcronym = false;
    }
    this.input = input;    
  }
  // this tokenizer generates three attributes:
  // term offset, positionIncrement and type
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
  /*
   * (non-Javadoc)
   *
   * @see org.apache.lucene.analysis.TokenStream#next()
   */
  @Override
  public final boolean incrementToken() throws IOException {
    clearAttributes();
    int posIncr = 1;
    while(true) {
      int tokenType = scanner.getNextToken();
      if (tokenType == StandardTokenizerInterface.YYEOF) {
        return false;
      }
      if (scanner.yylength() <= maxTokenLength) {
        posIncrAtt.setPositionIncrement(posIncr);
        scanner.getText(termAtt);
        final int start = scanner.yychar();
        offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
        // This 'if' should be removed in the next release. For now, it converts
        // invalid acronyms to HOST. When removed, only the 'else' part should
        // remain.
        if (tokenType == ClassicTokenizer.ACRONYM_DEP) {
          if (replaceInvalidAcronym) {
            typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST]);
            termAtt.setLength(termAtt.length() - 1); // remove extra '.'
          } else {
            typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM]);
          }
        } else {
          typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[tokenType]);
        }
        return true;
      } else
        // When we skip a too-long term, we still increment the
        // position increment
        posIncr++;
    }
  }
  @Override
  public final void end() {
    // set final offset
    int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
    offsetAtt.setOffset(finalOffset, finalOffset);
  }
  @Override
  public void reset(Reader reader) throws IOException {
    super.reset(reader);
    scanner.yyreset(reader);
  }
  /**
   * Prior to https://issues.apache.org/jira/browse/LUCENE-1068, ClassicTokenizer mischaracterized as acronyms tokens like www.abc.com
   * when they should have been labeled as hosts instead.
   * @return true if ClassicTokenizer now returns these tokens as Hosts, otherwise false
   *
   * @deprecated Remove in 3.X and make true the only valid value
   */
  @Deprecated
  public boolean isReplaceInvalidAcronym() {
    return replaceInvalidAcronym;
  }
  /**
   *
   * @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST.
   * @deprecated Remove in 3.X and make true the only valid value
   *
   * See https://issues.apache.org/jira/browse/LUCENE-1068
   */
  @Deprecated
  public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
    this.replaceInvalidAcronym = replaceInvalidAcronym;
  }
 }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 17.05.10 14:50 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/15/10 3:01 AM */
 package org.apache.lucene.analysis.standard;
@ -21,7 +21,7 @@ package org.apache.lucene.analysis.standard;
 /*
-WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
+WARNING: if you change ClassicTokenizerImpl.jflex and need to regenerate
      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!
 */
@ -33,10 +33,10 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
 * This class is a scanner generated by 
 * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 17.05.10 14:50 from the specification file
+ * on 9/15/10 3:01 AM from the specification file
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/newtrunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex</tt>
+ * <tt>c:/Users/us/IdeaProjects/lucene/test-dev-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
 */
-class StandardTokenizerImplOrig implements StandardTokenizerInterface {
+class ClassicTokenizerImpl implements StandardTokenizerInterface {
  /** This character denotes the end of file */
  public static final int YYEOF = -1;
@ -383,7 +383,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param   in  the java.io.Reader to read input from.
   */
-  StandardTokenizerImplOrig(java.io.Reader in) {
+  ClassicTokenizerImpl(java.io.Reader in) {
    this.zzReader = in;
  }
@ -393,7 +393,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param   in  the java.io.Inputstream to read input from.
   */
-  StandardTokenizerImplOrig(java.io.InputStream in) {
+  ClassicTokenizerImpl(java.io.InputStream in) {
    this(new java.io.InputStreamReader(in));
  }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.standard;
 /*
-WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
+WARNING: if you change ClassicTokenizerImpl.jflex and need to regenerate
      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!
 */
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %%
-%class StandardTokenizerImplOrig
+%class ClassicTokenizerImpl
 %implements StandardTokenizerInterface
 %unicode 3.0
 %integer
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@ -39,10 +39,12 @@ import java.util.Set;
 * <p>You must specify the required {@link Version}
 * compatibility when creating StandardAnalyzer:
 * <ul>
- *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
- *         supplementary characters in stopwords
+ *        and StopFilter correctly handles Unicode 4.0 supplementary characters
- *   <li> As of 2.9, StopFilter preserves position
+ *        in stopwords.  {@link ClassicTokenizer} and {@link ClassicAnalyzer} 
- *        increments
+ *        are the pre-3.1 implementations of StandardTokenizer and
 *        StandardAnalyzer.
 *   <li> As of 2.9, StopFilter preserves position increments
 *   <li> As of 2.4, Tokens incorrectly identified as acronyms
 *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
 * </ul>
@ -122,7 +124,7 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
-    TokenStream tok = new StandardFilter(src);
+    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@ -17,33 +17,45 @@ package org.apache.lucene.analysis.standard;
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.Version;
-/** Normalizes tokens extracted with {@link StandardTokenizer}. */
+/**
-
+ * Normalizes tokens extracted with {@link StandardTokenizer}.
-public final class StandardFilter extends TokenFilter {
+ */
-
+public class StandardFilter extends TokenFilter {
-  /** Construct filtering <i>in</i>. */
+  private final Version matchVersion;
  public StandardFilter(TokenStream in) {
-    super(in);
+    this(Version.LUCENE_30, in);
  }
-
+  
-  private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE];
+  public StandardFilter(Version matchVersion, TokenStream in) {
-  private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM];
+    super(in);
    this.matchVersion = matchVersion;
  }
  private static final String APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
  private static final String ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
  // this filters uses attribute type
  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  /** Returns the next token in the stream, or null at EOS.
   * <p>Removes <tt>'s</tt> from the end of words.
   * <p>Removes dots from acronyms.
   */
  @Override
-  public final boolean incrementToken() throws java.io.IOException {
+  public final boolean incrementToken() throws IOException {
    if (matchVersion.onOrAfter(Version.LUCENE_31))
      return input.incrementToken(); // TODO: add some niceties for the new grammar
    else
      return incrementTokenClassic();
  }
  public final boolean incrementTokenClassic() throws IOException {
    if (!input.incrementToken()) {
      return false;
    }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@ -17,39 +17,42 @@
 package org.apache.lucene.analysis.standard;
 import java.io.IOException;
 import java.io.Reader;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;
-/** A grammar-based tokenizer constructed with JFlex
+import java.io.IOException;
- *
+import java.io.Reader;
- * <p> This should be a good tokenizer for most European-language documents:
+
- *
+/** A grammar-based tokenizer constructed with JFlex.
- * <ul>
+ * <p>
- *   <li>Splits words at punctuation characters, removing punctuation. However, a 
+ * As of Lucene version 3.1, this class implements the Word Break rules from the
- *     dot that's not followed by whitespace is considered part of a token.
+ * Unicode Text Segmentation algorithm, as specified in 
- *   <li>Splits words at hyphens, unless there's a number in the token, in which case
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- *     the whole token is interpreted as a product number and is not split.
+ * <p/>
- *   <li>Recognizes email addresses and internet hostnames as one token.
+ * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
- * </ul>
+ * characters (characters above the Basic Multilingual Plane, which contains
- *
+ * those up to and including U+FFFF), this scanner will not recognize them
 * properly.  If you need to be able to process text containing supplementary 
 * characters, consider using the ICU4J-backed implementation in contrib/icu  
 * ({@link org.apache.lucene.analysis.icu.segmentation.ICUTokenizer})
 * instead of this class, since the ICU4J-backed implementation does not have
 * this limitation.
 * <p>Many applications have specific tokenizer needs.  If this tokenizer does
 * not suit your application, please consider copying this source code
 * directory to your project and maintaining your own grammar-based tokenizer.
 *
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
- * compatibility when creating StandardAnalyzer:
+ * compatibility when creating StandardTokenizer:
 * <ul>
- *   <li> As of 2.4, Tokens incorrectly identified as acronyms
+ *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
- *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
+ *   If you use a previous version number, you get the exact behavior of
 *   {@link ClassicTokenizer} for backwards compatibility.
 * </ul>
 */
@ -58,12 +61,22 @@ public final class StandardTokenizer extends Tokenizer {
  private StandardTokenizerInterface scanner;
  public static final int ALPHANUM          = 0;
  /** @deprecated */
  @Deprecated
  public static final int APOSTROPHE        = 1;
  /** @deprecated */
  @Deprecated
  public static final int ACRONYM           = 2;
  /** @deprecated */
  @Deprecated
  public static final int COMPANY           = 3;
  public static final int EMAIL             = 4;
  /** @deprecated */
  @Deprecated
  public static final int HOST              = 5;
  public static final int NUM               = 6;
  /** @deprecated */
  @Deprecated
  public static final int CJ                = 7;
  /**
@ -73,6 +86,11 @@ public final class StandardTokenizer extends Tokenizer {
  @Deprecated
  public static final int ACRONYM_DEP       = 8;
  public static final int URL = 9;
  public static final int SOUTHEAST_ASIAN = 10;
  public static final int IDEOGRAPHIC = 11;
  public static final int HIRAGANA = 12;
  /** String token types that correspond to token type int constants */
  public static final String [] TOKEN_TYPES = new String [] {
    "<ALPHANUM>",
@ -83,7 +101,11 @@ public final class StandardTokenizer extends Tokenizer {
    "<HOST>",
    "<NUM>",
    "<CJ>",
-    "<ACRONYM_DEP>"
+    "<ACRONYM_DEP>",
    "<URL>",
    "<SOUTHEAST_ASIAN>",
    "<IDEOGRAPHIC>",
    "<HIRAGANA>"
  };
  private boolean replaceInvalidAcronym;
@ -132,7 +154,7 @@ public final class StandardTokenizer extends Tokenizer {
  private final void init(Reader input, Version matchVersion) {
    this.scanner = matchVersion.onOrAfter(Version.LUCENE_31) ?
-      new StandardTokenizerImpl31(input) : new StandardTokenizerImplOrig(input);
+      new StandardTokenizerImpl(input) : new ClassicTokenizerImpl(input);
    if (matchVersion.onOrAfter(Version.LUCENE_24)) {
      replaceInvalidAcronym = true;
    } else {
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
@ -0,0 +1,260 @@
 package org.apache.lucene.analysis.standard;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
 * This class implements Word Break rules from the Unicode Text Segmentation 
 * algorithm, as specified in 
 * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
 * URLs and email addresses are also tokenized according to the relevant RFCs.
 * <p/>
 * Tokens produced are of the following types:
 * <ul>
 *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
 *   <li>&lt;NUM&gt;: A number</li>
 *   <li>&lt;URL&gt;: A URL</li>
 *   <li>&lt;EMAIL&gt;: An email address</li>
 *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
 *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
 *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
 *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
 * </ul>
 * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
 * characters (characters above the Basic Multilingual Plane, which contains
 * those up to and including U+FFFF), this scanner will not recognize them
 * properly.  If you need to be able to process text containing supplementary 
 * characters, consider using the ICU4J-backed implementation in contrib/icu  
 * ({@link org.apache.lucene.analysis.icu.segmentation.ICUTokenizer})
 * instead of this class, since the ICU4J-backed implementation does not have
 * this limitation.
 */
 %%
 %unicode 5.2
 %integer
 %final
 %public
 %class StandardTokenizerImpl
 %implements StandardTokenizerInterface
 %function getNextToken
 %char
 // UAX#29 WB4. X (Extend | Format)* --> X
 //
 ALetterEx      = \p{WB:ALetter}                     [\p{WB:Format}\p{WB:Extend}]*
 // TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
 NumericEx      = [\p{WB:Numeric}\uFF10-\uFF19]      [\p{WB:Format}\p{WB:Extend}]*
 KatakanaEx     = \p{WB:Katakana}                    [\p{WB:Format}\p{WB:Extend}]* 
 MidLetterEx    = [\p{WB:MidLetter}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* 
 MidNumericEx   = [\p{WB:MidNum}\p{WB:MidNumLet}]    [\p{WB:Format}\p{WB:Extend}]*
 ExtendNumLetEx = \p{WB:ExtendNumLet}                [\p{WB:Format}\p{WB:Extend}]*
 // URL and E-mail syntax specifications:
 //
 //     RFC-952:  DOD INTERNET HOST TABLE SPECIFICATION
 //     RFC-1035: DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
 //     RFC-1123: Requirements for Internet Hosts - Application and Support
 //     RFC-1738: Uniform Resource Locators (URL)
 //     RFC-3986: Uniform Resource Identifier (URI): Generic Syntax
 //     RFC-5234: Augmented BNF for Syntax Specifications: ABNF
 //     RFC-5321: Simple Mail Transfer Protocol
 //     RFC-5322: Internet Message Format
 %include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
 DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
 DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
 DomainNameLoose  = {DomainLabel} ("." {DomainLabel})*
 IPv4DecimalOctet = "0"{0,2} [0-9] | "0"? [1-9][0-9] | "1" [0-9][0-9] | "2" ([0-4][0-9] | "5" [0-5])
 IPv4Address  = {IPv4DecimalOctet} ("." {IPv4DecimalOctet}){3} 
 IPv6Hex16Bit = [0-9A-Fa-f]{1,4}
 IPv6LeastSignificant32Bits = {IPv4Address} | ({IPv6Hex16Bit} ":" {IPv6Hex16Bit})
 IPv6Address =                                                  ({IPv6Hex16Bit} ":"){6} {IPv6LeastSignificant32Bits}
            |                                             "::" ({IPv6Hex16Bit} ":"){5} {IPv6LeastSignificant32Bits}
            |                            {IPv6Hex16Bit}?  "::" ({IPv6Hex16Bit} ":"){4} {IPv6LeastSignificant32Bits}
            | (({IPv6Hex16Bit} ":"){0,1} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){3} {IPv6LeastSignificant32Bits}
            | (({IPv6Hex16Bit} ":"){0,2} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){2} {IPv6LeastSignificant32Bits}
            | (({IPv6Hex16Bit} ":"){0,3} {IPv6Hex16Bit})? "::"  {IPv6Hex16Bit} ":"     {IPv6LeastSignificant32Bits}
            | (({IPv6Hex16Bit} ":"){0,4} {IPv6Hex16Bit})? "::"                         {IPv6LeastSignificant32Bits}
            | (({IPv6Hex16Bit} ":"){0,5} {IPv6Hex16Bit})? "::"                         {IPv6Hex16Bit}
            | (({IPv6Hex16Bit} ":"){0,6} {IPv6Hex16Bit})? "::"
 URIunreserved = [-._~A-Za-z0-9]
 URIpercentEncoded = "%" [0-9A-Fa-f]{2}
 URIsubDelims = [!$&'()*+,;=]
 URIloginSegment = ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims})*
 URIlogin = {URIloginSegment} (":" {URIloginSegment})? "@"
 URIquery    = "?" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
 URIfragment = "#" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
 URIport = ":" [0-9]{1,5}
 URIhostStrict = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameStrict}  
 URIhostLoose  = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameLoose} 
 URIauthorityStrict =             {URIhostStrict} {URIport}?
 URIauthorityLoose  = {URIlogin}? {URIhostLoose}  {URIport}?
 HTTPsegment = ({URIunreserved} | {URIpercentEncoded} | [;:@&=])*
 HTTPpath = ("/" {HTTPsegment})*
 HTTPscheme = [hH][tT][tT][pP][sS]? "://"
 HTTPurlFull = {HTTPscheme} {URIauthorityLoose}  {HTTPpath}? {URIquery}? {URIfragment}?
 // {HTTPurlNoScheme} excludes {URIlogin}, because it could otherwise accept e-mail addresses
 HTTPurlNoScheme =          {URIauthorityStrict} {HTTPpath}? {URIquery}? {URIfragment}?
 HTTPurl = {HTTPurlFull} | {HTTPurlNoScheme}
 FTPorFILEsegment = ({URIunreserved} | {URIpercentEncoded} | [?:@&=])*
 FTPorFILEpath = "/" {FTPorFILEsegment} ("/" {FTPorFILEsegment})*
 FTPtype = ";" [tT][yY][pP][eE] "=" [aAiIdD]
 FTPscheme = [fF][tT][pP] "://"
 FTPurl = {FTPscheme} {URIauthorityLoose} {FTPorFILEpath} {FTPtype}? {URIfragment}?
 FILEscheme = [fF][iI][lL][eE] "://"
 FILEurl = {FILEscheme} {URIhostLoose}? {FTPorFILEpath} {URIfragment}?
 URL = {HTTPurl} | {FTPurl} | {FILEurl}
 EMAILquotedString = [\"] ([\u0001-\u0008\u000B\u000C\u000E-\u0021\u0023-\u005B\u005D-\u007E] | [\\] [\u0000-\u007F])* [\"]
 EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
 EMAILlabel = {EMAILatomText}+ | {EMAILquotedString}
 EMAILlocalPart = {EMAILlabel} ("." {EMAILlabel})*
 EMAILdomainLiteralText = [\u0001-\u0008\u000B\u000C\u000E-\u005A\u005E-\u007F] | [\\] [\u0000-\u007F]
 // DFA minimization allows {IPv6Address} and {IPv4Address} to be included 
 // in the {EMAILbracketedHost} definition without incurring any size penalties, 
 // since {EMAILdomainLiteralText} recognizes all valid IP addresses.
 // The IP address regexes are included in {EMAILbracketedHost} simply as a 
 // reminder that they are acceptable bracketed host forms.
 EMAILbracketedHost = "[" ({EMAILdomainLiteralText}* | {IPv4Address} | [iI][pP][vV] "6:" {IPv6Address}) "]"
 EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
 %{
  /** Alphanumeric sequences */
  public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
  /** Numbers */
  public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
  /** URLs with scheme: HTTP(S), FTP, or FILE; no-scheme URLs match HTTP syntax */
  public static final int URL_TYPE = StandardTokenizer.URL;
  /** E-mail addresses */
  public static final int EMAIL_TYPE = StandardTokenizer.EMAIL;
  /**
   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
   * together as as a single token rather than broken up, because the logic
   * required to break them at word boundaries is too complex for UAX#29.
   * {@see Unicode Line Breaking Algorithm http://www.unicode.org/reports/tr14/#SA}
   */
  public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
  public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
  public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
  public final int yychar()
  {
    return yychar;
  }
  /**
   * Fills CharTermAttribute with the current token text.
   */
  public final void getText(CharTermAttribute t) {
    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
  }
 %}
 %%
 // UAX#29 WB1. 	sot 	÷ 	
 //        WB2. 		÷ 	eot
 //
 <<EOF>> { return StandardTokenizerInterface.YYEOF; }
 {URL}   { return URL_TYPE; }
 {EMAIL} { return EMAIL_TYPE; }
 // UAX#29 WB8.   Numeric × Numeric
 //        WB11.  Numeric (MidNum | MidNumLet) × Numeric
 //        WB12.  Numeric × (MidNum | MidNumLet) Numeric
 //        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
 //        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
 //
 {ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} 
                              | {MidNumericEx} {NumericEx} 
                              | {NumericEx})*
 {ExtendNumLetEx}* 
  { return NUMERIC_TYPE; }
 // UAX#29 WB5.   ALetter × ALetter
 //        WB6.   ALetter × (MidLetter | MidNumLet) ALetter
 //        WB7.   ALetter (MidLetter | MidNumLet) × ALetter
 //        WB9.   ALetter × Numeric
 //        WB10.  Numeric × ALetter
 //        WB13.  Katakana × Katakana
 //        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
 //        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
 //
 {ExtendNumLetEx}*  ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) 
 ({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) )*
 {ExtendNumLetEx}*  
  { return WORD_TYPE; }
 // From UAX #29:
 //
 //    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
 //    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
 //    boundary property values based on criteria outside of the scope of this
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
 // In Unicode 5.2, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
 // character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
 // Lao, etc.) are kept together.  This grammar does the same below.
 //
 // See also the Unicode Line Breaking Algorithm:
 //
 //    http://www.unicode.org/reports/tr14/#SA
 //
 \p{LB:Complex_Context}+ { return SOUTH_EAST_ASIAN_TYPE; }
 // UAX#29 WB14.  Any ÷ Any
 //
 \p{Script:Han} { return IDEOGRAPHIC_TYPE; }
 \p{Script:Hiragana} { return HIRAGANA_TYPE; }
 // UAX#29 WB3.   CR × LF
 //        WB3a.  (Newline | CR | LF) ÷
 //        WB3b.  ÷ (Newline | CR | LF)
 //        WB14.  Any ÷ Any
 //
 [^] { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
@ -1,134 +0,0 @@
 package org.apache.lucene.analysis.standard;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /*
 WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!
 */
 import java.io.Reader;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %%
 %class StandardTokenizerImpl31
 %implements StandardTokenizerInterface
 %unicode 4.0
 %integer
 %function getNextToken
 %pack
 %char
 %{
 public static final int ALPHANUM          = StandardTokenizer.ALPHANUM;
 public static final int APOSTROPHE        = StandardTokenizer.APOSTROPHE;
 public static final int ACRONYM           = StandardTokenizer.ACRONYM;
 public static final int COMPANY           = StandardTokenizer.COMPANY;
 public static final int EMAIL             = StandardTokenizer.EMAIL;
 public static final int HOST              = StandardTokenizer.HOST;
 public static final int NUM               = StandardTokenizer.NUM;
 public static final int CJ                = StandardTokenizer.CJ;
 /**
 * @deprecated this solves a bug where HOSTs that end with '.' are identified
 *             as ACRONYMs.
 */
 public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
 public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
 public final int yychar()
 {
    return yychar;
 }
 /**
 * Fills CharTermAttribute with the current token text.
 */
 public final void getText(CharTermAttribute t) {
  t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
 }
 %}
 THAI       = [\u0E00-\u0E59]
 // basic word: a sequence of digits & letters (includes Thai to enable ThaiAnalyzer to function)
 ALPHANUM   = ({LETTER}|{THAI}|[:digit:])+
 // internal apostrophes: O'Reilly, you're, O'Reilly's
 // use a post-filter to remove possessives
 APOSTROPHE =  {ALPHA} ("'" {ALPHA})+
 // acronyms: U.S.A., I.B.M., etc.
 // use a post-filter to remove dots
 ACRONYM    =  {LETTER} "." ({LETTER} ".")+
 ACRONYM_DEP	= {ALPHANUM} "." ({ALPHANUM} ".")+
 // company names like AT&T and Excite@Home.
 COMPANY    =  {ALPHA} ("&"|"@") {ALPHA}
 // email addresses
 EMAIL      =  {ALPHANUM} (("."|"-"|"_") {ALPHANUM})* "@" {ALPHANUM} (("."|"-") {ALPHANUM})+
 // hostname
 HOST       =  {ALPHANUM} ((".") {ALPHANUM})+
 // floating point, serial, model numbers, ip addresses, etc.
 // every other segment must have at least one digit
 NUM        = ({ALPHANUM} {P} {HAS_DIGIT}
           | {HAS_DIGIT} {P} {ALPHANUM}
           | {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+
           | {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
           | {ALPHANUM} {P} {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
           | {HAS_DIGIT} {P} {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+)
 // punctuation
 P	         = ("_"|"-"|"/"|"."|",")
 // at least one digit
 HAS_DIGIT  = ({LETTER}|[:digit:])* [:digit:] ({LETTER}|[:digit:])*
 ALPHA      = ({LETTER})+
 // From the JFlex manual: "the expression that matches everything of <a> not matched by <b> is !(!<a>|<b>)"
 LETTER     = !(![:letter:]|{CJ})
 // Chinese and Japanese (but NOT Korean, which is included in [:letter:])
 CJ         = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
 WHITESPACE = \r\n | [ \r\n\t\f]
 %%
 {ALPHANUM}                                                     { return ALPHANUM; }
 {APOSTROPHE}                                                   { return APOSTROPHE; }
 {ACRONYM}                                                      { return ACRONYM; }
 {COMPANY}                                                      { return COMPANY; }
 {EMAIL}                                                        { return EMAIL; }
 {HOST}                                                         { return HOST; }
 {NUM}                                                          { return NUM; }
 {CJ}                                                           { return CJ; }
 {ACRONYM_DEP}                                                  { return ACRONYM_DEP; }
 /** Ignore the rest */
 . | {WHITESPACE}                                               { /* ignore */ }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 17.05.10 14:50 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/15/10 3:01 AM */
 package org.apache.lucene.analysis.standard;
@ -19,33 +19,51 @@ package org.apache.lucene.analysis.standard;
 * limitations under the License.
 */
-/*
+import java.io.IOException;
 WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!
 */
 import java.io.Reader;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
 /**
- * This class is a scanner generated by 
+ * This class implements Word Break rules from the Unicode Text Segmentation 
- * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
+ * algorithm, as specified in 
- * on 17.05.10 14:50 from the specification file
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/newtrunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex</tt>
+ * <p/>
 * Tokens produced are of the following types:
 * <ul>
 *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
 *   <li>&lt;NUM&gt;: A number</li>
 *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
 *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
 *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
 *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
 * </ul>
 * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
 * characters (characters above the Basic Multilingual Plane, which contains
 * those up to and including U+FFFF), this scanner will not recognize them
 * properly.  If you need to be able to process text containing supplementary 
 * characters, consider using the ICU4J-backed implementation in contrib/icu  
 * ({@link org.apache.lucene.analysis.icu.segmentation.ICUTokenizer})
 * instead of this class, since the ICU4J-backed implementation does not have
 * this limitation.
 */
-class StandardTokenizerImpl31 implements StandardTokenizerInterface {
+
 public final class UAX29Tokenizer extends Tokenizer {
  /** This character denotes the end of file */
-  public static final int YYEOF = -1;
+  private static final int YYEOF = -1;
  /** initial size of the lookahead buffer */
  private static final int ZZ_BUFFERSIZE = 16384;
  /** lexical states */
-  public static final int YYINITIAL = 0;
+  private static final int YYINITIAL = 0;
  /**
   * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
@ -61,68 +79,113 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
   * Translates characters to character classes
   */
  private static final String ZZ_CMAP_PACKED = 
-    "\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5"+
+    "\47\0\1\7\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6"+
-    "\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12"+
+    "\5\0\32\1\4\0\1\10\1\0\32\1\57\0\1\1\2\0\1\2"+
-    "\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12"+
+    "\7\0\1\1\1\0\1\5\2\0\1\1\5\0\27\1\1\0\37\1"+
-    "\5\0\27\12\1\0\37\12\1\0\u013f\12\31\0\162\12\4\0\14\12"+
+    "\1\0\u01ca\1\4\0\14\1\16\0\5\1\7\0\1\1\1\0\1\1"+
-    "\16\0\5\12\11\0\1\12\213\0\1\12\13\0\1\12\1\0\3\12"+
+    "\21\0\160\2\5\1\1\0\2\1\2\0\4\1\1\6\7\0\1\1"+
-    "\1\0\1\12\1\0\24\12\1\0\54\12\1\0\46\12\1\0\5\12"+
+    "\1\5\3\1\1\0\1\1\1\0\24\1\1\0\123\1\1\0\213\1"+
-    "\4\0\202\12\10\0\105\12\1\0\46\12\2\0\2\12\6\0\20\12"+
+    "\1\0\7\2\234\1\13\0\46\1\2\0\1\1\7\0\47\1\1\0"+
-    "\41\0\46\12\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12"+
+    "\1\6\7\0\55\2\1\0\1\2\1\0\2\2\1\0\2\2\1\0"+
-    "\56\0\32\12\5\0\13\12\25\0\12\2\4\0\2\12\1\0\143\12"+
+    "\1\2\10\0\33\1\5\0\4\1\1\5\13\0\4\2\10\0\2\6"+
-    "\1\0\1\12\17\0\2\12\7\0\2\12\12\2\3\12\2\0\1\12"+
+    "\2\0\13\2\6\0\52\1\24\2\1\0\12\3\1\0\1\3\1\6"+
-    "\20\0\1\12\1\0\36\12\35\0\3\12\60\0\46\12\13\0\1\12"+
+    "\1\0\2\1\1\2\143\1\1\0\1\1\17\2\2\1\2\2\1\0"+
-    "\u0152\0\66\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2"+
+    "\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2\1\1\1\2"+
-    "\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12"+
+    "\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3\41\1\11\2"+
-    "\3\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12\4\0\12\2"+
+    "\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2\1\1\11\2"+
-    "\2\12\23\0\6\12\4\0\2\12\2\0\26\12\1\0\7\12\1\0"+
+    "\1\1\3\2\1\1\5\2\322\0\4\2\66\1\2\0\1\2\1\1"+
-    "\2\12\1\0\2\12\1\0\2\12\37\0\4\12\1\0\1\12\7\0"+
+    "\21\2\1\0\1\1\5\2\2\0\12\1\2\2\2\0\12\3\1\0"+
-    "\12\2\2\0\3\12\20\0\11\12\1\0\3\12\1\0\26\12\1\0"+
+    "\2\1\6\0\7\1\1\0\3\2\1\0\10\1\2\0\2\1\2\0"+
-    "\7\12\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0"+
+    "\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0\1\2\1\1"+
-    "\2\12\4\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0"+
+    "\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2\4\0\2\1"+
-    "\7\12\1\0\2\12\1\0\5\12\3\0\1\12\36\0\2\12\1\0"+
+    "\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2\1\0\6\1"+
-    "\3\12\4\0\12\2\1\0\1\12\21\0\1\12\1\0\6\12\3\0"+
+    "\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1\1\0\2\1"+
-    "\3\12\1\0\4\12\3\0\2\12\1\0\1\12\1\0\2\12\3\0"+
+    "\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2\2\0\3\2"+
-    "\2\12\3\0\3\12\3\0\10\12\1\0\3\12\55\0\11\2\25\0"+
+    "\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3\2\2\3\1"+
-    "\10\12\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\46\0"+
+    "\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0\26\1\1\0"+
-    "\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12\1\0"+
+    "\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\10\2\1\0"+
-    "\12\12\1\0\5\12\3\0\1\12\40\0\1\12\1\0\2\12\4\0"+
+    "\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2\2\0\12\3"+
-    "\12\2\25\0\10\12\1\0\3\12\1\0\27\12\1\0\20\12\46\0"+
+    "\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
-    "\2\12\4\0\12\2\25\0\22\12\3\0\30\12\1\0\11\12\1\0"+
+    "\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2"+
-    "\1\12\2\0\7\12\71\0\1\1\60\12\1\1\2\12\14\1\7\12"+
+    "\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0"+
-    "\11\1\12\2\47\0\2\12\1\0\1\12\2\0\2\12\1\0\1\12"+
+    "\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1"+
-    "\2\0\1\12\6\0\4\12\1\0\7\12\1\0\3\12\1\0\1\12"+
+    "\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1"+
-    "\1\0\1\12\2\0\2\12\1\0\4\12\1\0\2\12\11\0\1\12"+
+    "\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2"+
-    "\2\0\5\12\1\0\1\12\11\0\12\2\2\0\2\12\42\0\1\12"+
+    "\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2\1\0\10\1"+
-    "\37\0\12\2\26\0\10\12\1\0\42\12\35\0\4\12\164\0\42\12"+
+    "\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\3\0\1\1"+
-    "\1\0\5\12\1\0\2\12\25\0\12\2\6\0\6\12\112\0\46\12"+
+    "\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0\2\1\6\0"+
-    "\12\0\51\12\7\0\132\12\5\0\104\12\5\0\122\12\6\0\7\12"+
+    "\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0\3\1"+
-    "\1\0\77\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\1\12"+
+    "\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2\1\1\7\2"+
-    "\1\0\4\12\2\0\47\12\1\0\1\12\1\0\4\12\2\0\37\12"+
+    "\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1\1\0\2\1"+
-    "\1\0\1\12\1\0\4\12\2\0\7\12\1\0\1\12\1\0\4\12"+
+    "\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0\3\1\1\0"+
-    "\2\0\7\12\1\0\7\12\1\0\27\12\1\0\37\12\1\0\1\12"+
+    "\27\1\1\0\20\1\3\0\1\1\7\2\1\0\3\2\1\0\4\2"+
-    "\1\0\4\12\2\0\7\12\1\0\47\12\1\0\23\12\16\0\11\2"+
+    "\11\0\1\2\10\0\2\1\2\2\2\0\12\3\12\0\6\1\2\0"+
-    "\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0\32\12\5\0\113\12"+
+    "\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0"+
-    "\25\0\15\12\1\0\4\12\16\0\22\12\16\0\22\12\16\0\15\12"+
+    "\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\22\0"+
-    "\1\0\3\12\17\0\64\12\43\0\1\12\4\0\1\12\3\0\12\2"+
+    "\2\2\15\0\60\11\1\12\2\11\7\12\5\0\7\11\10\12\1\0"+
-    "\46\0\12\2\6\0\130\12\10\0\51\12\127\0\35\12\51\0\12\2"+
+    "\12\3\47\0\2\11\1\0\1\11\2\0\2\11\1\0\1\11\2\0"+
-    "\36\12\2\0\5\12\u038b\0\154\12\224\0\234\12\4\0\132\12\6\0"+
+    "\1\11\6\0\4\11\1\0\7\11\1\0\3\11\1\0\1\11\1\0"+
-    "\26\12\2\0\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0"+
+    "\1\11\2\0\2\11\1\0\4\11\1\12\2\11\6\12\1\0\2\12"+
-    "\1\12\1\0\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0"+
+    "\1\11\2\0\5\11\1\0\1\11\1\0\6\12\2\0\12\3\2\0"+
-    "\7\12\1\0\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0"+
+    "\2\11\42\0\1\1\27\0\2\2\6\0\12\3\13\0\1\2\1\0"+
-    "\6\12\4\0\15\12\5\0\3\12\1\0\7\12\164\0\1\12\15\0"+
+    "\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1\4\0\24\2"+
-    "\1\12\202\0\1\12\4\0\1\12\2\0\12\12\1\0\1\12\3\0"+
+    "\1\0\2\2\4\1\4\0\10\2\1\0\44\2\11\0\1\2\71\0"+
-    "\5\12\6\0\1\12\1\0\1\12\1\0\1\12\1\0\4\12\1\0"+
+    "\53\11\24\12\1\11\12\3\6\0\6\11\4\12\4\11\3\12\1\11"+
-    "\3\12\1\0\7\12\3\0\3\12\5\0\5\12\u0ebb\0\2\12\52\0"+
+    "\3\12\2\11\7\12\3\11\4\12\15\11\14\12\1\11\1\12\12\3"+
-    "\5\12\5\0\2\12\3\0\1\13\126\13\6\13\3\13\1\13\132\13"+
+    "\4\12\2\11\46\1\12\0\53\1\1\0\1\1\3\0\u0149\1\1\0"+
-    "\1\13\4\13\5\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0"+
+    "\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0"+
-    "\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12"+
+    "\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0"+
-    "\u0773\0\u2ba4\12\u215c\0\u012e\13\2\13\73\13\225\13\7\12\14\0\5\12"+
+    "\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1\4\0"+
-    "\5\0\1\12\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12"+
+    "\1\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0"+
-    "\1\0\2\12\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12"+
+    "\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\2"+
-    "\2\0\66\12\50\0\14\12\164\0\5\12\1\0\207\12\23\0\12\2"+
+    "\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1\1\0\3\1"+
-    "\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12"+
+    "\1\0\2\2\14\0\64\11\40\12\3\0\1\11\4\0\1\11\1\12"+
-    "\2\0\6\12\2\0\6\12\2\0\3\12\43\0";
+    "\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1\10\0\51\1"+
    "\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2\4\0\14\2"+
    "\12\0\12\3\36\11\2\0\5\11\13\0\54\11\4\0\21\12\7\11"+
    "\2\12\6\0\13\3\3\0\2\11\40\0\27\1\5\2\4\0\65\11"+
    "\12\12\1\0\35\12\2\0\1\2\12\3\6\0\12\3\6\0\16\11"+
    "\122\0\5\2\57\1\21\2\7\1\4\0\12\3\21\0\11\2\14\0"+
    "\3\2\36\1\12\2\3\0\2\1\12\3\106\0\44\1\24\2\10\0"+
    "\12\3\3\0\3\1\12\3\44\1\122\0\3\2\1\0\25\2\4\1"+
    "\1\2\4\1\1\2\15\0\300\1\47\2\26\0\3\2\u0116\1\2\0"+
    "\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1\1\0"+
    "\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1\1\0"+
    "\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1\4\0"+
    "\15\1\5\0\3\1\1\0\7\1\17\0\4\2\10\0\2\7\12\0"+
    "\1\7\2\0\1\5\2\0\5\2\20\0\2\10\3\0\1\6\17\0"+
    "\1\10\13\0\5\2\5\0\6\2\1\0\1\1\15\0\1\1\20\0"+
    "\5\1\73\0\41\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0"+
    "\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0"+
    "\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1\21\0"+
    "\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1\6\0"+
    "\4\1\3\2\16\0\46\1\12\0\66\1\11\0\1\1\20\0\27\1"+
    "\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
    "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\57\0\1\1"+
    "\120\0\32\13\1\0\131\13\14\0\326\13\57\0\1\1\1\0\1\13"+
    "\31\0\11\13\6\2\1\0\5\4\2\0\3\13\1\1\1\1\4\0"+
    "\126\14\2\0\2\2\2\4\3\14\133\4\1\0\4\4\5\0\51\1"+
    "\3\0\136\1\21\0\30\1\70\0\20\4\320\0\57\4\1\0\130\4"+
    "\250\0\u19b6\13\112\0\u51cc\13\64\0\u048d\1\103\0\56\1\2\0\u010d\1"+
    "\3\0\20\1\12\3\2\1\24\0\40\1\2\0\15\1\4\2\11\0"+
    "\2\2\1\0\31\1\10\0\120\1\2\2\45\0\11\1\2\0\147\1"+
    "\2\0\2\1\156\0\7\1\1\2\3\1\1\2\4\1\1\2\27\1"+
    "\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0"+
    "\22\2\6\1\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1"+
    "\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3"+
    "\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3"+
    "\6\0\33\11\1\12\4\0\60\11\1\12\1\11\3\12\2\11\2\12"+
    "\5\11\2\12\1\11\1\12\1\11\30\0\5\11\340\0\43\1\10\2"+
    "\1\0\2\2\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1"+
    "\u2104\0\u012e\13\2\0\76\13\2\0\152\13\46\0\7\1\14\0\5\1"+
    "\5\0\1\1\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1"+
    "\1\0\2\1\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1"+
    "\2\0\66\1\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6"+
    "\13\0\7\2\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0"+
    "\1\6\1\5\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7"+
    "\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1"+
    "\4\0\1\10\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1"+
    "\2\0\6\1\2\0\6\1\2\0\3\1\34\0\3\2\4\0";
  /** 
   * Translates characters to character classes
@ -135,13 +198,11 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private static final int [] ZZ_ACTION = zzUnpackAction();
  private static final String ZZ_ACTION_PACKED_0 =
-    "\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4"+
+    "\1\0\1\1\1\2\1\3\1\2\1\1\1\4\1\5"+
-    "\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4"+
+    "\1\6\1\2\1\0\1\2\1\0\1\3\2\0";
    "\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12"+
    "\1\4";
  private static int [] zzUnpackAction() {
-    int [] result = new int[51];
+    int [] result = new int[16];
    int offset = 0;
    offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
    return result;
@ -166,16 +227,11 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
  private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124"+
+    "\0\0\0\15\0\32\0\47\0\64\0\101\0\116\0\15"+
-    "\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304"+
+    "\0\15\0\133\0\150\0\165\0\202\0\217\0\101\0\234";
    "\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134"+
    "\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4"+
    "\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206"+
    "\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214"+
    "\0\u0268\0\u0276\0\u0284";
  private static int [] zzUnpackRowMap() {
-    int [] result = new int[51];
+    int [] result = new int[16];
    int offset = 0;
    offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
    return result;
@ -198,49 +254,21 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private static final int [] ZZ_TRANS = zzUnpackTrans();
  private static final String ZZ_TRANS_PACKED_0 =
-    "\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2"+
+    "\1\2\1\3\1\2\1\4\1\5\3\2\1\6\2\7"+
-    "\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13"+
+    "\1\10\1\11\16\0\2\3\1\12\1\0\1\13\1\0"+
-    "\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11"+
+    "\1\13\1\14\1\0\1\3\3\0\1\3\2\4\2\0"+
-    "\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20"+
+    "\2\15\1\16\1\0\1\4\4\0\1\5\1\0\1\5"+
-    "\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0"+
+    "\3\0\1\14\1\0\1\5\3\0\1\3\1\17\1\4"+
-    "\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27"+
+    "\1\5\3\0\1\17\1\0\1\17\13\0\2\7\3\0"+
-    "\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0"+
+    "\1\3\2\12\2\0\2\20\1\14\1\0\1\12\3\0"+
-    "\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37"+
+    "\1\3\1\13\7\0\1\13\3\0\1\3\1\14\1\12"+
-    "\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44"+
+    "\1\5\3\0\1\14\1\0\1\14\4\0\1\15\1\4"+
-    "\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0"+
+    "\6\0\1\15\3\0\1\3\1\16\1\4\1\5\3\0"+
-    "\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4"+
+    "\1\16\1\0\1\16\4\0\1\20\1\12\6\0\1\20"+
-    "\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0"+
+    "\2\0";
    "\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24"+
    "\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54"+
    "\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0"+
    "\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56"+
    "\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52"+
    "\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31"+
    "\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0"+
    "\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0"+
    "\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33"+
    "\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13"+
    "\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11"+
    "\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57"+
    "\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0"+
    "\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37"+
    "\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40"+
    "\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12"+
    "\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13"+
    "\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16"+
    "\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13"+
    "\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25"+
    "\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0"+
    "\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0"+
    "\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0"+
    "\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0"+
    "\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0"+
    "\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0"+
    "\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0"+
    "\1\11\2\52\1\0\1\24\3\0";
  private static int [] zzUnpackTrans() {
-    int [] result = new int[658];
+    int [] result = new int[169];
    int offset = 0;
    offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
    return result;
@ -278,11 +306,11 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
  private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0"+
+    "\1\0\1\11\5\1\2\11\1\1\1\0\1\1\1\0"+
-    "\1\1\1\0\17\1\1\0\1\1\3\0\5\1";
+    "\1\1\2\0";
  private static int [] zzUnpackAttribute() {
-    int [] result = new int[51];
+    int [] result = new int[16];
    int offset = 0;
    offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
    return result;
@ -350,35 +378,124 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private boolean zzEOFDone;
  /* user code: */
  /** Alphanumeric sequences */
  public static final String WORD_TYPE = "<ALPHANUM>";
  /** Numbers */
  public static final String NUMERIC_TYPE = "<NUM>";
  /**
   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
   * together as as a single token rather than broken up, because the logic
   * required to break them at word boundaries is too complex for UAX#29.
   * {@see Unicode Line Breaking Algorithm http://www.unicode.org/reports/tr14/#SA}
   */
  public static final String SOUTH_EAST_ASIAN_TYPE = "<SOUTHEAST_ASIAN>";
  public static final String IDEOGRAPHIC_TYPE = "<IDEOGRAPHIC>";
  public static final String HIRAGANA_TYPE = "<HIRAGANA>";
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posIncrAtt 
    = addAttribute(PositionIncrementAttribute.class);
  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
  private int posIncr;
-public static final int ALPHANUM          = StandardTokenizer.ALPHANUM;
+  
-public static final int APOSTROPHE        = StandardTokenizer.APOSTROPHE;
+  /**
-public static final int ACRONYM           = StandardTokenizer.ACRONYM;
+   * @param source The AttributeSource to use
-public static final int COMPANY           = StandardTokenizer.COMPANY;
+   * @param input The input reader
-public static final int EMAIL             = StandardTokenizer.EMAIL;
+   */
-public static final int HOST              = StandardTokenizer.HOST;
+  public UAX29Tokenizer(AttributeSource source, Reader input) {
-public static final int NUM               = StandardTokenizer.NUM;
+    super(source, input);
-public static final int CJ                = StandardTokenizer.CJ;
+    zzReader = input;
-/**
+  }
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
+  
- *             as ACRONYMs.
+  /**
- */
+   * @param factory The AttributeFactory to use
-public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
+   * @param input The input reader
   */
  public UAX29Tokenizer(AttributeFactory factory, Reader input) {
    super(factory, input); 
    zzReader = input;
  }
  /** 
   * Set the max allowed token length.  Any token longer than this is skipped.
   * @param length the new max allowed token length
   */
  public void setMaxTokenLength(int length) {
    this.maxTokenLength = length;
  }
-public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+  /**
   * Returns the max allowed token length.  Any token longer than this is 
   * skipped.
   * @return the max allowed token length 
   */
  public int getMaxTokenLength() {
    return maxTokenLength;
  }
-public final int yychar()
+  @Override
-{
+  public final void end() {
-    return yychar;
+    // set final offset
-}
+    int finalOffset = correctOffset(yychar + yylength());
    offsetAtt.setOffset(finalOffset, finalOffset);
  }
-/**
+  @Override
- * Fills CharTermAttribute with the current token text.
+  public void reset(Reader reader) throws IOException {
- */
+    super.reset(reader);
-public final void getText(CharTermAttribute t) {
+    yyreset(reader);
-  t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+  }
 }
  @Override
  public final boolean incrementToken() throws IOException {
    // This method is required because of two JFlex limitations:
    // 1. No way to insert code at the beginning of the generated scanning
    //    get-next-token method; and
    // 2. No way to declare @Override on the generated scanning method.
    clearAttributes();
    posIncr = 1;
    return getNextToken();
  }
  /**
   * Populates this TokenStream's CharTermAttribute and OffsetAttribute from
   * the current match, the TypeAttribute from the passed-in tokenType, and
   * the PositionIncrementAttribute to one, unless the immediately previous
   * token(s) was/were skipped because maxTokenLength was exceeded, in which
   * case the PositionIncrementAttribute is set to one plus the number of
   * skipped overly long tokens. 
   * <p/> 
   * If maxTokenLength is exceeded, the CharTermAttribute is set back to empty
   * and false is returned.
   * 
   * @param tokenType The type of the matching token
   * @return true there is a token available (not too long); false otherwise 
   */
  private boolean populateAttributes(String tokenType) {
    boolean isTokenAvailable = false;
    if (yylength() > maxTokenLength) {
      // When we skip a too-long token, we treat it like a stopword, introducing
      // a position increment gap
      ++posIncr;
    } else {
      termAtt.copyBuffer(zzBuffer, zzStartRead, yylength());
      posIncrAtt.setPositionIncrement(posIncr);
      offsetAtt.setOffset(correctOffset(yychar),
                          correctOffset(yychar + yylength()));
      typeAtt.setType(tokenType);
      isTokenAvailable = true;
    }
    return isTokenAvailable;
  }
  /**
@ -387,7 +504,8 @@ public final void getText(CharTermAttribute t) {
   *
   * @param   in  the java.io.Reader to read input from.
   */
-  StandardTokenizerImpl31(java.io.Reader in) {
+  public UAX29Tokenizer(java.io.Reader in) {
    super(in);
    this.zzReader = in;
  }
@ -397,7 +515,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param   in  the java.io.Inputstream to read input from.
   */
-  StandardTokenizerImpl31(java.io.InputStream in) {
+  public UAX29Tokenizer(java.io.InputStream in) {
    this(new java.io.InputStreamReader(in));
  }
@ -411,7 +529,7 @@ public final void getText(CharTermAttribute t) {
    char [] map = new char[0x10000];
    int i = 0;  /* index in packed string  */
    int j = 0;  /* index in unpacked array */
-    while (i < 1234) {
+    while (i < 2138) {
      int  count = packed.charAt(i++);
      char value = packed.charAt(i++);
      do map[j++] = value; while (--count > 0);
@ -477,7 +595,7 @@ public final void getText(CharTermAttribute t) {
  /**
   * Closes the input stream.
   */
-  public final void yyclose() throws java.io.IOException {
+  private final void yyclose() throws java.io.IOException {
    zzAtEOF = true;            /* indicate end of file */
    zzEndRead = zzStartRead;  /* invalidate buffer    */
@ -498,7 +616,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param reader   the new input stream 
   */
-  public final void yyreset(java.io.Reader reader) {
+  private final void yyreset(java.io.Reader reader) {
    zzReader = reader;
    zzAtBOL  = true;
    zzAtEOF  = false;
@ -515,7 +633,7 @@ public final void getText(CharTermAttribute t) {
  /**
   * Returns the current lexical state.
   */
-  public final int yystate() {
+  private final int yystate() {
    return zzLexicalState;
  }
@ -525,7 +643,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param newState the new lexical state
   */
-  public final void yybegin(int newState) {
+  private final void yybegin(int newState) {
    zzLexicalState = newState;
  }
@ -533,7 +651,7 @@ public final void getText(CharTermAttribute t) {
  /**
   * Returns the text matched by the current regular expression.
   */
-  public final String yytext() {
+  private final String yytext() {
    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
  }
@ -549,7 +667,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @return the character at position pos
   */
-  public final char yycharat(int pos) {
+  private final char yycharat(int pos) {
    return zzBuffer[zzStartRead+pos];
  }
@ -557,7 +675,7 @@ public final void getText(CharTermAttribute t) {
  /**
   * Returns the length of the matched text region.
   */
-  public final int yylength() {
+  private final int yylength() {
    return zzMarkedPos-zzStartRead;
  }
@ -597,7 +715,7 @@ public final void getText(CharTermAttribute t) {
   * @param number  the number of characters to be read again.
   *                This number must not be greater than yylength()!
   */
-  public void yypushback(int number)  {
+  private void yypushback(int number)  {
    if ( number > yylength() )
      zzScanError(ZZ_PUSHBACK_2BIG);
@ -612,7 +730,7 @@ public final void getText(CharTermAttribute t) {
   * @return      the next token
   * @exception   java.io.IOException  if any I/O-Error occurs
   */
-  public int getNextToken() throws java.io.IOException {
+  private boolean getNextToken() throws java.io.IOException {
    int zzInput;
    int zzAction;
@ -685,49 +803,35 @@ public final void getText(CharTermAttribute t) {
      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
        case 5: 
-          { return NUM;
+          { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true;
          }
        case 7: break;
        case 1: 
          { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
          }
        case 8: break;
        case 3: 
          { if (populateAttributes(NUMERIC_TYPE)) return true;
          }
        case 9: break;
        case 6: 
          { if (populateAttributes(HIRAGANA_TYPE)) return true;
          }
        case 10: break;
        case 4: 
          { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true;
          }
        case 11: break;
-        case 9: 
+        case 2: 
-          { return ACRONYM;
+          { if (populateAttributes(WORD_TYPE)) return true;
          }
        case 12: break;
        case 7: 
          { return COMPANY;
          }
        case 13: break;
        case 10: 
          { return EMAIL;
          }
        case 14: break;
        case 1: 
          { /* ignore */
          }
        case 15: break;
        case 6: 
          { return APOSTROPHE;
          }
        case 16: break;
        case 3: 
          { return CJ;
          }
        case 17: break;
        case 8: 
          { return ACRONYM_DEP;
          }
        case 18: break;
        case 2: 
          { return ALPHANUM;
          }
        case 19: break;
        case 4: 
          { return HOST;
          }
        case 20: break;
        default: 
          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
            zzAtEOF = true;
-            return YYEOF;
+              {
                return false;
              }
          } 
          else {
            zzScanError(ZZ_NO_MATCH);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex
@ -0,0 +1,273 @@
 package org.apache.lucene.analysis.standard;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
 /**
 * This class implements Word Break rules from the Unicode Text Segmentation 
 * algorithm, as specified in 
 * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
 * <p/>
 * Tokens produced are of the following types:
 * <ul>
 *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
 *   <li>&lt;NUM&gt;: A number</li>
 *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
 *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
 *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
 *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
 * </ul>
 * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
 * characters (characters above the Basic Multilingual Plane, which contains
 * those up to and including U+FFFF), this scanner will not recognize them
 * properly.  If you need to be able to process text containing supplementary 
 * characters, consider using the ICU4J-backed implementation in contrib/icu  
 * ({@link org.apache.lucene.analysis.icu.segmentation.ICUTokenizer})
 * instead of this class, since the ICU4J-backed implementation does not have
 * this limitation.
 */
 %%
 %unicode 5.2
 %final
 %public
 %apiprivate
 %class UAX29Tokenizer
 %extends Tokenizer
 %type boolean
 %function getNextToken
 %char
 %init{
  super(in);
 %init}
 // WB4. X (Extend | Format)* --> X
 //
 ALetterEx      = \p{WB:ALetter}                     [\p{WB:Format}\p{WB:Extend}]*
 // TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
 NumericEx      = [\p{WB:Numeric}\uFF10-\uFF19]      [\p{WB:Format}\p{WB:Extend}]*
 KatakanaEx     = \p{WB:Katakana}                    [\p{WB:Format}\p{WB:Extend}]* 
 MidLetterEx    = [\p{WB:MidLetter}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* 
 MidNumericEx   = [\p{WB:MidNum}\p{WB:MidNumLet}]    [\p{WB:Format}\p{WB:Extend}]*
 ExtendNumLetEx = \p{WB:ExtendNumLet}                [\p{WB:Format}\p{WB:Extend}]*
 %{
  /** Alphanumeric sequences */
  public static final String WORD_TYPE = "<ALPHANUM>";
  /** Numbers */
  public static final String NUMERIC_TYPE = "<NUM>";
  /**
   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
   * together as as a single token rather than broken up, because the logic
   * required to break them at word boundaries is too complex for UAX#29.
   * {@see Unicode Line Breaking Algorithm http://www.unicode.org/reports/tr14/#SA}
   */
  public static final String SOUTH_EAST_ASIAN_TYPE = "<SOUTHEAST_ASIAN>";
  public static final String IDEOGRAPHIC_TYPE = "<IDEOGRAPHIC>";
  public static final String HIRAGANA_TYPE = "<HIRAGANA>";
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posIncrAtt 
    = addAttribute(PositionIncrementAttribute.class);
  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
  private int posIncr;
  /**
   * @param source The AttributeSource to use
   * @param input The input reader
   */
  public UAX29Tokenizer(AttributeSource source, Reader input) {
    super(source, input);
    zzReader = input;
  }
  /**
   * @param factory The AttributeFactory to use
   * @param input The input reader
   */
  public UAX29Tokenizer(AttributeFactory factory, Reader input) {
    super(factory, input); 
    zzReader = input;
  }
  /** 
   * Set the max allowed token length.  Any token longer than this is skipped.
   * @param length the new max allowed token length
   */
  public void setMaxTokenLength(int length) {
    this.maxTokenLength = length;
  }
  /**
   * Returns the max allowed token length.  Any token longer than this is 
   * skipped.
   * @return the max allowed token length 
   */
  public int getMaxTokenLength() {
    return maxTokenLength;
  }
  @Override
  public final void end() {
    // set final offset
    int finalOffset = correctOffset(yychar + yylength());
    offsetAtt.setOffset(finalOffset, finalOffset);
  }
  @Override
  public void reset(Reader reader) throws IOException {
    super.reset(reader);
    yyreset(reader);
  }
  @Override
  public final boolean incrementToken() throws IOException {
    // This method is required because of two JFlex limitations:
    // 1. No way to insert code at the beginning of the generated scanning
    //    get-next-token method; and
    // 2. No way to declare @Override on the generated scanning method.
    clearAttributes();
    posIncr = 1;
    return getNextToken();
  }
  /**
   * Populates this TokenStream's CharTermAttribute and OffsetAttribute from
   * the current match, the TypeAttribute from the passed-in tokenType, and
   * the PositionIncrementAttribute to one, unless the immediately previous
   * token(s) was/were skipped because maxTokenLength was exceeded, in which
   * case the PositionIncrementAttribute is set to one plus the number of
   * skipped overly long tokens. 
   * <p/> 
   * If maxTokenLength is exceeded, the CharTermAttribute is set back to empty
   * and false is returned.
   * 
   * @param tokenType The type of the matching token
   * @return true there is a token available (not too long); false otherwise 
   */
  private boolean populateAttributes(String tokenType) {
    boolean isTokenAvailable = false;
    if (yylength() > maxTokenLength) {
      // When we skip a too-long token, we treat it like a stopword, introducing
      // a position increment gap
      ++posIncr;
    } else {
      termAtt.copyBuffer(zzBuffer, zzStartRead, yylength());
      posIncrAtt.setPositionIncrement(posIncr);
      offsetAtt.setOffset(correctOffset(yychar),
                          correctOffset(yychar + yylength()));
      typeAtt.setType(tokenType);
      isTokenAvailable = true;
    }
    return isTokenAvailable;
  }
 %}
 %%
 // WB1. 	sot 	÷ 	
 // WB2. 		÷ 	eot
 //
 <<EOF>> { return false; }
 // WB8.   Numeric × Numeric
 // WB11.  Numeric (MidNum | MidNumLet) × Numeric
 // WB12.  Numeric × (MidNum | MidNumLet) Numeric
 // WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
 // WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
 //
 {ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} 
                              | {MidNumericEx} {NumericEx} 
                              | {NumericEx})*
 {ExtendNumLetEx}* 
  { if (populateAttributes(NUMERIC_TYPE)) return true; }
 // WB5.   ALetter × ALetter
 // WB6.   ALetter × (MidLetter | MidNumLet) ALetter
 // WB7.   ALetter (MidLetter | MidNumLet) × ALetter
 // WB9.   ALetter × Numeric
 // WB10.  Numeric × ALetter
 // WB13.  Katakana × Katakana
 // WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
 // WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
 //
 {ExtendNumLetEx}*  ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) 
 ({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) )*
 {ExtendNumLetEx}*  
  { if (populateAttributes(WORD_TYPE)) return true; }
 // From UAX #29:
 //
 //    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
 //    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
 //    boundary property values based on criteria outside of the scope of this
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
 // In Unicode 5.2, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
 // character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
 // Lao, etc.) are kept together.  This grammar does the same below.
 //
 // See also the Unicode Line Breaking Algorithm:
 //
 //    http://www.unicode.org/reports/tr14/#SA
 //
 \p{LB:Complex_Context}+ { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true; }
 // WB14.  Any ÷ Any
 //
 \p{Script:Han} { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true; }
 \p{Script:Hiragana} { if (populateAttributes(HIRAGANA_TYPE)) return true; }
 // WB3.   CR × LF
 // WB3a.  (Newline | CR | LF) ÷
 // WB3b.  ÷ (Newline | CR | LF)
 // WB14.  Any ÷ Any
 //
 [^] { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
@ -17,9 +17,43 @@
 -->
 <html>
 <head>
-   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>
-A fast grammar-based tokenizer constructed with JFlex.
+<p>The <code>org.apache.lucene.analysis.standard</code> package contains three
    fast grammar-based tokenizers constructed with JFlex:</p>
 <ul>
    <li><code><a href="StandardTokenizer.html">StandardTokenizer</a></code>:
        as of Lucene 3.1, implements the Word Break rules from the Unicode Text 
        Segmentation algorithm, as specified in 
        <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
        URLs and email addresses are also tokenized according to the relevant RFCs.
        <code><a href="StandardAnalyzer">StandardAnalyzer</a></code> includes
        <code>StandardTokenizer</code>, 
        <code><a href="StandardFilter">StandardFilter</a></code>, 
        <code><a href="../../../../../../all/org/apache/lucene/analysis/LowerCaseFilter.html">LowerCaseFilter</a></code>
        and <code><a href="../../../../../../all/org/apache/lucene/analysis/StopFilter.html">StopFilter</a></code>.
        When the <code>Version</code> specified in the constructor is lower than 
        3.1, the <code><a href="ClassicTokenizer.html">ClassicTokenizer</a></code>
        implementation is invoked.</li>
    <li><code><a href="ClassicTokenizer.html">ClassicTokenizer</a></code>:
        this class was formerly (prior to Lucene 3.1) named 
        <code>StandardTokenizer</code>.  (Its tokenization rules are not
        based on the Unicode Text Segmentation algorithm.)
        <code><a href="ClassicAnalyzer">ClassicAnalyzer</a></code> includes
        <code>ClassicTokenizer</code>,
        <code><a href="StandardFilter">StandardFilter</a></code>, 
        <code><a href="../../../../../../all/org/apache/lucene/analysis/LowerCaseFilter.html">LowerCaseFilter</a></code>
        and <code><a href="../../../../../../all/org/apache/lucene/analysis/StopFilter.html">StopFilter</a></code>.
    </li>
    <li><code><a href="UAX29Tokenizer.html">UAX29Tokenizer</a></code>: 
        implements the Word Break rules from the Unicode Text Segmentation 
        algorithm, as specified in
        <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
        Unlike <code>StandardTokenizer</code>, URLs and email addresses are
        <b>not</b> tokenized as single tokens, but are instead split up into 
        tokens according to the UAX#29 word break rules.
    </li>
 </ul>
 </body>
 </html>
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@ -120,7 +120,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@ -58,7 +58,7 @@ public final class ThaiAnalyzer extends ReusableAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_31))
      result = new LowerCaseFilter(matchVersion, result);
    result = new ThaiWordFilter(matchVersion, result);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@ -123,7 +123,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new TurkishLowerCaseFilter(result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPage.html
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPage.html
@ -0,0 +1,267 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
 <html>
 <head>
 <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
 <meta name="robots" content="index,nofollow">
 <title>Resources - Lucene-java Wiki</title>
 <script type="text/javascript" src="/moin_static184/common/js/common.js"></script>
 <script type="text/javascript">
 <!--
 var search_hint = "Search";
 //-->
 </script>
 <link rel="stylesheet" type="text/css" charset="utf-8" media="all" href="/moin_static184/modernized/css/common.css">
 <link rel="stylesheet" type="text/css" charset="utf-8" media="screen" href="/moin_static184/modernized/css/screen.css">
 <link rel="stylesheet" type="text/css" charset="utf-8" media="print" href="/moin_static184/modernized/css/print.css">
 <link rel="stylesheet" type="text/css" charset="utf-8" media="projection" href="/moin_static184/modernized/css/projection.css">
 <!-- css only for MS IE6/IE7 browsers -->
 <!--[if lt IE 8]>
   <link rel="stylesheet" type="text/css" charset="utf-8" media="all" href="/moin_static184/modernized/css/msie.css">
 <![endif]-->
 <link rel="Start" href="/lucene-java/FrontPageEN">
 <link rel="Alternate" title="Wiki Markup" href="/lucene-java/Resources?action=raw">
 <link rel="Alternate" media="print" title="Print View" href="/lucene-java/Resources?action=print">
 <link rel="Appendix" title="IntroductionToApacheLucene.jp.jpg" href="/lucene-java/Resources?action=AttachFile&amp;do=view&amp;target=IntroductionToApacheLucene.jp.jpg">
 <link rel="Appendix" title="SuchmaschinenEntwickelnMitApacheLucene.de.jpg" href="/lucene-java/Resources?action=AttachFile&amp;do=view&amp;target=SuchmaschinenEntwickelnMitApacheLucene.de.jpg">
 <link rel="Appendix" title="building.search.applications.png" href="/lucene-java/Resources?action=AttachFile&amp;do=view&amp;target=building.search.applications.png">
 <link rel="Appendix" title="lia3d.jpg" href="/lucene-java/Resources?action=AttachFile&amp;do=view&amp;target=lia3d.jpg">
 <link rel="Search" href="/lucene-java/FindPage">
 <link rel="Index" href="/lucene-java/TitleIndex">
 <link rel="Glossary" href="/lucene-java/WordIndex">
 <link rel="Help" href="/lucene-java/HelpOnFormatting">
 </head>
 <body  lang="en" dir="ltr">
 <div id="header">
 <form id="searchform" method="get" action="/lucene-java/Resources">
 <div>
 <input type="hidden" name="action" value="fullsearch">
 <input type="hidden" name="context" value="180">
 <label for="searchinput">Search:</label>
 <input id="searchinput" type="text" name="value" value="" size="20"
    onfocus="searchFocus(this)" onblur="searchBlur(this)"
    onkeyup="searchChange(this)" onchange="searchChange(this)" alt="Search">
 <input id="titlesearch" name="titlesearch" type="submit"
    value="Titles" alt="Search Titles">
 <input id="fullsearch" name="fullsearch" type="submit"
    value="Text" alt="Search Full Text">
 </div>
 </form>
 <script type="text/javascript">
 <!--// Initialize search form
 var f = document.getElementById('searchform');
 f.getElementsByTagName('label')[0].style.display = 'none';
 var e = document.getElementById('searchinput');
 searchChange(e);
 searchBlur(e);
 //-->
 </script>
 <div id="logo"><a href="/lucene-java/FrontPageEN">Lucene-java Wiki</a></div>
 <div id="username"><a href="/lucene-java/Resources?action=login" id="login" rel="nofollow">Login</a></div>
 <h1 id="locationline">
 <span id="pagelocation"><a class="backlink" href="/lucene-java/Resources?action=fullsearch&amp;context=180&amp;value=linkto%3A%22Resources%22" rel="nofollow" title="Click to do a full-text search for this title">Resources</a></span>
 </h1>
 <ul id="navibar">
 <li class="wikilink"><a href="/lucene-java/FrontPageEN">FrontPageEN</a></li><li class="wikilink"><a href="/lucene-java/RecentChanges">RecentChanges</a></li><li class="wikilink"><a href="/lucene-java/FindPage">FindPage</a></li><li class="wikilink"><a href="/lucene-java/HelpContents">HelpContents</a></li><li class="current"><a href="/lucene-java/Resources">Resources</a></li>
 </ul>
 <div id="pageline"><hr style="display:none;"></div>
 <ul class="editbar"><li><span class="disabled">Immutable Page</span></li><li class="toggleCommentsButton" style="display:none;"><a href="#" class="nbcomment" onClick="toggleComments();return false;">Comments</a></li><li><a class="nbinfo" href="/lucene-java/Resources?action=info" rel="nofollow">Info</a></li><li>
 <form class="actionsmenu" method="GET" action="/lucene-java/Resources">
 <div>
    <label>More Actions:</label>
    <select name="action"
        onchange="if ((this.selectedIndex != 0) &&
                      (this.options[this.selectedIndex].disabled == false)) {
                this.form.submit();
            }
            this.selectedIndex = 0;">
        <option value="raw">Raw Text</option>
 <option value="print">Print View</option>
 <option value="RenderAsDocbook">Render as Docbook</option>
 <option value="refresh">Delete Cache</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="SpellCheck">Check Spelling</option>
 <option value="LikePages">Like Pages</option>
 <option value="LocalSiteMap">Local Site Map</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="RenamePage" disabled class="disabled">Rename Page</option>
 <option value="CopyPage">Copy Page</option>
 <option value="DeletePage" disabled class="disabled">Delete Page</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="MyPages">My Pages</option>
 <option value="show" disabled class="disabled">Subscribe User</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="show" disabled class="disabled">Remove Spam</option>
 <option value="show" disabled class="disabled">Revert to this revision</option>
 <option value="show" disabled class="disabled">Package Pages</option>
 <option value="SyncPages">Sync Pages</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="Load">Load</option>
 <option value="Save">Save</option>
    </select>
    <input type="submit" value="Do">
 </div>
 <script type="text/javascript">
 <!--// Init menu
 actionsMenuInit('More Actions:');
 //-->
 </script>
 </form>
 </li></ul>
 </div>
 <div id="page" lang="en" dir="ltr">
 <div dir="ltr" id="content" lang="en"><span class="anchor" id="top"></span>
 <span class="anchor" id="line-2"></span><p class="line867"><div class="table-of-contents"><p class="table-of-contents-heading">Contents<ol><li>
 <a href="#Introductions">Introductions</a></li><li>
 <a href="#Blogs">Blogs</a></li><li>
 <a href="#Books">Books</a></li><li>
 <a href="#Articles">Articles</a></li><li>
 <a href="#Interviews">Interviews</a></li><li>
 <a href="#Papers">Papers</a></li><li>
 <a href="#Presentations">Presentations</a></li><li>
 <a href="#Training">Training</a></li><li>
 <a href="#Corpora">Corpora</a></li><li>
 <a href="#Other">Other</a></li></ol></div> <span class="anchor" id="line-3"></span><span class="anchor" id="line-4"></span><p class="line867">
 <h1 id="Introductions">Introductions</h1>
 <span class="anchor" id="line-5"></span><span class="anchor" id="line-6"></span><ul><li><p class="line862">The API documentation contains  <a class="http" href="http://lucene.apache.org/java/3_0_1/api/all/overview-summary.html#overview_description">a short and simple code example</a> that shows the basic way to index and search <span class="anchor" id="line-7"></span></li><li><p class="line862">The <a class="http" href="http://lucene.apache.org/java/3_0_1/gettingstarted.html">Getting Started Guide</a> that describes the demos that come with Lucene <span class="anchor" id="line-8"></span><span class="anchor" id="line-9"></span><span class="anchor" id="line-10"></span></li></ul><p class="line867">
 <h1 id="Blogs">Blogs</h1>
 <span class="anchor" id="line-11"></span><span class="anchor" id="line-12"></span><ul><li><p class="line891"><a class="http" href="http://lucene.grantingersoll.com">Grant's Grunts: Lucene edition</a> - Grant Ingersoll's thoughts on the Lucene ecosystem. <span class="anchor" id="line-13"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/blog/">Lucid Imagination's Blog</a> - Many of the Lucene and Solr committers blog here about how to use Lucene and Solr <span class="anchor" id="line-14"></span></li><li><p class="line891"><a class="http" href="http://blog.sematext.com/">Sematext Blog</a> - Search and Analytics covering Lucene, Solr, Nutch, Hadoop, HBase, and more <span class="anchor" id="line-15"></span><span class="anchor" id="line-16"></span><span class="anchor" id="line-17"></span></li></ul><p class="line867">
 <h1 id="Books">Books</h1>
 <span class="anchor" id="line-18"></span><span class="anchor" id="line-19"></span><ul><li><p class="line891"><img alt="http://www.manning.com/hatcher3/hatcher3_cover150.jpg" class="external_image" src="http://www.manning.com/hatcher3/hatcher3_cover150.jpg" title="http://www.manning.com/hatcher3/hatcher3_cover150.jpg" /> "<a class="http" href="http://www.manning.com/hatcher3/">Lucene in Action, Second Edition"</a> by Erik Hatcher, Otis Gospodneti&#263;, and Michael McCandless <span class="anchor" id="line-20"></span></li><li><p class="line891"><img alt="building.search.applications.png" class="attachment" src="/lucene-java/Resources?action=AttachFile&amp;do=get&amp;target=building.search.applications.png" title="building.search.applications.png" /> "<a class="http" href="http://www.amazon.com/Building-Search-Applications-Lucene-Lingpipe/dp/0615204252/">Building Search Applications: Lucene, LingPipe, and Gate</a>" by Manu Konchady; Mustru Publishing; June 2008; ISBN 978-0615204253 <span class="anchor" id="line-21"></span></li><li><p class="line891"><img alt="IntroductionToApacheLucene.jp.jpg" class="attachment" src="/lucene-java/Resources?action=AttachFile&amp;do=get&amp;target=IntroductionToApacheLucene.jp.jpg" title="IntroductionToApacheLucene.jp.jpg" /> "<a class="http" href="http://www.amazon.co.jp/exec/obidos/ASIN/4774127809/503-9461699-1775907">Apache Lucene 入門 ~Java・オープンソース・全文検索システムの構築</a>" 関口 宏司 ; 技術評論社 ; 2006/05/17 ; ISBN: 4774127809 (<span class="u">Introduction to Apache Lucene: Construction of Java Open Source Full Text Retrieval Systems</span> by Koshi Sekiguti ; Gijutsu-Hyohron Co., Ltd.) <span class="anchor" id="line-22"></span></li><li><p class="line891"><img alt="lia3d.jpg" class="attachment" src="/lucene-java/Resources?action=AttachFile&amp;do=get&amp;target=lia3d.jpg" title="lia3d.jpg" /> "<a class="http" href="http://www.lucenebook.com">Lucene In Action</a>" by Erik Hatcher, Otis Gospodneti&#263;; Manning Publications; December 2004; ISBN 1932394281 (also available from <a class="http" href="http://www.amazon.com/exec/obidos/ASIN/1932394281">Amazon.com</a>) <span class="anchor" id="line-23"></span></li><li><p class="line891"><img alt="SuchmaschinenEntwickelnMitApacheLucene.de.jpg" class="attachment" src="/lucene-java/Resources?action=AttachFile&amp;do=get&amp;target=SuchmaschinenEntwickelnMitApacheLucene.de.jpg" title="SuchmaschinenEntwickelnMitApacheLucene.de.jpg" /> Manfred Hardt, Dr. Fabian Theis: "<a class="http" href="http://www.amazon.de/Suchmaschinen-entwickeln-mit-Apache-Lucene/dp/3935042450">Suchmaschinen entwickeln mit Apache Lucene</a>"; Software &amp; Support Verlag, Frankfurt/Main, Germany; September 2004; ISBN 3935042450 (<span class="u">Developing Search Engines with Apache Lucene</span>) <span class="anchor" id="line-24"></span><span class="anchor" id="line-25"></span></li></ul><p class="line867">
 <h1 id="Articles">Articles</h1>
 <span class="anchor" id="line-26"></span><span class="anchor" id="line-27"></span><ul><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Getting-Started-with-Lucene/">Getting Started with Lucene</a> (by Grant Ingersoll) <br>
 (<em>Published: January 2009 - article</em>) <span class="anchor" id="line-28"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Optimizing-Findability-in-Lucene-and-Solr/">Optimizing Findability in Lucene and Solr</a> (by  Grant Ingersoll)<br>
 (<em>Published: January 2009 - article</em>) <span class="anchor" id="line-29"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Debugging-Relevance-Issues-in-Search/">Debugging Relevance Issues in Search</a> (by Grant Ingersoll)<br>
 (<em>Published: January 2009 - article</em>) <span class="anchor" id="line-30"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Scaling-Lucene-and-Solr/">Scaling Lucene and Solr</a> (by Mark Miller)<br>
 (<em>Published: January 2009 - article</em>)  <span class="anchor" id="line-31"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Introduction-to-Apache-Lucene-and-Solr/">Introduction to Apache Lucene and Solr</a> (by Marc Krellenstein)<br>
 (<em>Published: January 2009 - article</em>)  <span class="anchor" id="line-32"></span></li><li><p class="line891"><a class="http" href="http://cephas.net/blog/2008/03/30/how-morelikethis-works-in-lucene/">How MoreLikeThis Works in Lucene</a> (by Aaron Johnson)<br>
 (<em>Last updated: March 2008 - blog entry</em>) <span class="anchor" id="line-33"></span></li><li><p class="line891"><a class="http" href="http://schmidt.devlib.org/software/lucene-wikipedia.html">Lucene Wikipedia indexer</a> (by Marco Schmidt)<br>
 (<em>Last updated: November 2007 - tutorial</em>) <span class="anchor" id="line-34"></span></li><li><p class="line891"><a class="http" href="http://marceloochoa.blogspot.com/2007/09/running-lucene-inside-your-oracle-jvm.html">Running Lucene inside your Oracle JVM</a> (by Marcelo Ochoa)<br>
 (<em>Last updated: September 2007 - blog entry</em>) <span class="anchor" id="line-35"></span></li><li><p class="line891"><a class="http" href="http://www.onjava.com/pub/a/onjava/2007/05/24/using-the-lucene-query-parser-without-lucene.html">Using the Lucene Query Parser Without Lucene</a> (by Marcin Maciukiewicz and Daniel Owsiański)<br>
 (<em>Published: May 2007 - article</em>) <span class="anchor" id="line-36"></span></li><li><p class="line891"><a class="http" href="http://www.javaworld.com/javaworld/jw-09-2006/jw-0925-lucene.html">Integrate advanced search functionalities into your apps</a> (by John Ferguson Smart)<br>
 (<em>Published: September 2006 - article</em>) <span class="anchor" id="line-37"></span></li><li><p class="line891"><a class="http" href="http://www-128.ibm.com/developerworks/java/library/wa-lucene2/index.html?ca=drs-">Beef up Web search applications with Lucene</a> (by Deng Peng Zhou)<br>
 (<em>Published: August 2006 - article</em>) <span class="anchor" id="line-38"></span></li><li><p class="line891"><a class="http" href="http://www.freesearch.pe.kr/tag/Lucene">Lecture &amp; Etc : Lucene index file format for Korean</a> (by Jeon Hee-Won)<br>
 (<em>Published: July 2006 - article</em>) <span class="anchor" id="line-39"></span></li><li>Cai Ziegler: "Suche nach Suche -- Apaches Lucene: eigene Suche und Indizierung"; iX 6/2006, Seite 120; Heise Zeitschriften Verlag, Hannover, Germany <span class="anchor" id="line-40"></span></li><li><p class="line891"><a class="http" href="http://www-128.ibm.com/developerworks/java/library/wa-lucene/index.html">Delve inside the Lucene indexing mechanism</a> (by Deng Peng Zhou)<br>
 (<em>Published: June 2006 - article</em>) <span class="anchor" id="line-41"></span></li><li><p class="line891"><a class="http" href="http://www.onjava.com/pub/a/onjava/2006/01/18/using-lucene-to-search-java-source.html">Using Lucene to Search Java Source Code</a> (by Renuka Sindhgatta)<br>
 (<em>Published: January 2006 - article</em>) <span class="anchor" id="line-42"></span></li><li><p class="line891"><a class="http" href="http://www.jroller.com/page/wakaleo/?anchor=lucene_a_tutorial_introduction_to">Lucene : a tutorial introduction to full-text indexing in Java</a> (by John Ferguson Smart)<br>
 (<em>Published: October 2005 - article</em>) <span class="anchor" id="line-43"></span></li><li>Daniel Naber: "Herr der Suche -- Eigene Anwendungen mit Volltextsuche erweitern"; c't 7/2005, Seite 196; Heise Zeitschriften Verlag, Hannover, Germany <span class="anchor" id="line-44"></span></li><li><p class="line891"><a class="http" href="http://blog.dev.sf.net/index.php?/archives/10-Behind-the-Scenes-of-the-SourceForge.net-Search-System.html">Behind the Scenes of the SourceForge.net Search System</a> (by Chris Conrad)<br>
 (<em>Last updated: June 2005 - blog entry</em>) <span class="anchor" id="line-45"></span></li><li><p class="line891"><a class="http" href="http://today.java.net/pub/a/today/2005/08/09/didyoumean.html">Did You Mean: Lucene?</a> (by Tom White)<br>
 (<em>Published: August 2005 - article</em>) <span class="anchor" id="line-46"></span></li><li><p class="line891"><a class="http" href="http://www.developer.com/java/other/article.php/3490471">Meet Lucene</a> (by Otis Gospodneti&#263;, Eric Hatcher)<br>
 (<em>Published: March 2005 - article</em>) <span class="anchor" id="line-47"></span></li><li><p class="line891"><a class="http" href="http://www.theserverside.com/tt/articles/article.tss?l=ILoveLucene">I Love Lucene</a> (by Dion Almaer)<br>
 (<em>Published: January 2005 - article</em>) <span class="anchor" id="line-48"></span></li><li><p class="line891"><a class="http" href="http://javaboutique.internet.com/tutorials/HTMLParser/article.html">Unweaving a Tangled Web With HTMLParser and Lucene</a> (by Keld H. Hansen)<br>
 (<em>Last updated: October 2004 - tutorial</em>) <span class="anchor" id="line-49"></span></li><li><p class="line891"><a class="http" href="http://bilgidata.com/localhost/bilgidata/yazi.jsp@dosya=a_lucene.xml.html">Lucene Introduction in Turkish</a> Java Bazl&#305; Arama Motoru - Lusin (by Burak Bayraml&#305;)<br>
 (<em>Last updated: August 2004 - tutorial</em>) <span class="anchor" id="line-50"></span></li><li><p class="line891"><a class="http" href="http://www.chedong.com/tech/lucene.html">Lucene Introduction in Chinese</a> Lucene&#65306;&#22522;&#20110;Java&#30340;&#20840;&#25991;&#26816;&#32034;&#24341;&#25806;&#31616;&#20171; (by Che Dong; &#20316;&#32773;&#65306; &#36710;&#19996;)<br>
 (<em>Last updated: May 2004 - tutorial</em>) <span class="anchor" id="line-51"></span></li><li><p class="line891"><a class="http" href="http://javatechniques.com/public/java/docs/basics/lucene-memory-search.html">Lucene In-Memory Text Search</a> (by Philip Isenhour)<br>
 (<em>Last updated: May 2004 - tutorial</em>) <span class="anchor" id="line-52"></span></li><li><p class="line891"><a class="http" href="http://www.javaranch.com/newsletter/200404/Lucene.html">The Lucene Search Engine: Adding Search to Your Applications</a> (by Thomas Paul)<br>
 (<em>Published: April 2004 - article</em>) <span class="anchor" id="line-53"></span></li><li><p class="line891"><a class="http" href="http://www.darksleep.com/lucene/">Lucene Tutorial</a> (by Steven J. Owens)<br>
 (<em>Last updated: March 2004 - tutorial</em>) <span class="anchor" id="line-54"></span></li><li><p class="line891"><a class="http" href="http://www-igm.univ-mlv.fr/~dr/XPOSE2003/lucene/articleLucene.html">Lucene Introduction in French</a> Exposés Système sur le thème de l'opensource : Analyse de la structure de Lucene. (by Sun Seng TAN)<br>
 (<em>Last updated: February 2004 - tutorial</em>) <span class="anchor" id="line-55"></span></li><li><p class="line891"><a class="http" href="http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html">QueryParser Rules</a> (by Erik Hatcher)<br>
 (<em>Published November 2003 - article</em>) <span class="anchor" id="line-56"></span></li><li><p class="line891"><a class="http" href="http://builder.com.com/5100-6389-5054799.html">Give your Web site its own search engine using Lucene</a> (by Jeffrey Linwood)<br>
 (<em>Published July 2003 - article</em>) <span class="anchor" id="line-57"></span></li><li><p class="line891"><a class="http" href="http://today.java.net/pub/a/today/2003/07/30/LuceneIntro.html">Lucene Intro</a> (by Erik Hatcher)<br>
 (<em>Published: July 2003 - article</em>) <span class="anchor" id="line-58"></span></li><li><p class="line891"><a class="http" href="http://www-106.ibm.com/developerworks/library/j-lucene/">Parsing, indexing, and searching XML with Digester and Lucene</a> (by Otis Gospodneti&#263;)<br>
 (<em>Published June 2003 - article</em>) <span class="anchor" id="line-59"></span></li><li><p class="line891"><a class="http" href="http://www.xml.com/pub/a/ws/2003/05/13/email.html">Using Python, Jython, and Lucene to Search Outlook Email</a> (by Jon Udell)<br>
 (<em>Published: May 2003 - article</em>) <span class="anchor" id="line-60"></span></li><li><p class="line891"><a class="http" href="http://www.onjava.com/pub/a/onjava/2003/03/05/lucene.html">Advanced Text Indexing with Lucene</a> (by Otis Gospodneti&#263;)<br>
 (<em>Published: March 2003 - article</em>) <span class="anchor" id="line-61"></span></li><li><p class="line891"><a class="http" href="http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html">Introduction to Text Indexing with Apache Jakarta Lucene</a> (by Otis Gospodneti&#263;)<br>
 (<em>Published: January 2003 - article</em>) <span class="anchor" id="line-62"></span></li><li><p class="line862">Manfred Hardt: "Suchmaschinen entwickeln mit Java und Lucene - Wo war denn noch gleich ... ?"; JavaMagazin 9/2002; Software &amp; Support Verlag, Frankfurt/Main, Germany <span class="anchor" id="line-63"></span></li><li><p class="line891"><a class="http" href="http://javangelist.snipsnap.org/space/Lucene-Mini-Tutorial">Lucene Mini-Tutorial</a> (by funzel)<br>
 (<em>Last updated: April 2002 - tutorial</em>) <span class="anchor" id="line-64"></span></li><li><p class="line891"><a class="http" href="http://www.javaworld.com/javaworld/jw-09-2000/jw-0915-lucene.html">The Lucene search engine Powerful flexible and free</a> (by Brian Goetz)<br>
 (<em>Published September 2000 - article</em>) <span class="anchor" id="line-65"></span><span class="anchor" id="line-66"></span></li></ul><p class="line867">
 <h1 id="Interviews">Interviews</h1>
 <span class="anchor" id="line-67"></span><span class="anchor" id="line-68"></span><ul><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=109">Interview with Lucene creator Doug Cutting</a> Podcast.  Summary: Doug talks about the creation of Lucene, Nutch and Hadoop. (<em>Published January 2009</em>) <span class="anchor" id="line-69"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=108">Interview with Lucene/Solr committer Chris Hostetter</a> Podcast.  Summary: Chris talks about Solr, Lucene and their usage at CNET. (<em>Published January 2009</em>) <span class="anchor" id="line-70"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=113">Interview with Lucene/Solr committer Ryan McKinley</a> Podcast.  Summary: Ryan discusses Solr, Lucene and geospatial searching with Lucene (<a class="nonexistent" href="/lucene-java/LocalLucene/LocalSolr">LocalLucene/LocalSolr</a>) and his usage of Lucene/Solr throughout his career. (<em>Published January 2009</em>) <span class="anchor" id="line-71"></span><span class="anchor" id="line-72"></span><span class="anchor" id="line-73"></span><span class="anchor" id="line-74"></span></li></ul><p class="line867">
 <h1 id="Papers">Papers</h1>
 <span class="anchor" id="line-75"></span><span class="anchor" id="line-76"></span><ul><li><p class="line891"><a class="http" href="http://lucene.sourceforge.net/publications.html">http://lucene.sourceforge.net/publications.html</a> Doug Cuttings papers from the old Lucene web site <span class="anchor" id="line-77"></span><span class="anchor" id="line-78"></span></li></ul><p class="line867">
 <h1 id="Presentations">Presentations</h1>
 <span class="anchor" id="line-79"></span><ul><li><p class="line891"><a class="http" href="http://people.apache.org/~buschmi/apachecon/AdvancedIndexingLuceneAtlanta07.ppt">Advanced Indexing Techniques with Apache Lucene - Payloads</a> presented by Michael Busch at <a class="http" href="http://www.us.apachecon.com/us2007/">ApacheCon U.S. 2007</a><br>
 (<em>Presented November 2007 - PDF slide show</em>) <span class="anchor" id="line-80"></span></li><li><p class="line891"><a class="http" href="http://people.apache.org/~yonik/presentations/lucene_intro.pdf">Full-Text Search with Lucene</a> presented by Yonik Seeley at <a class="http" href="http://www.eu.apachecon.com">ApacheCon Europe 2007</a>.<br>
 (<em>Presented May 2007 - PDF slide show</em>) <span class="anchor" id="line-81"></span></li><li><p class="line891"><a class="http" href="http://www.cnlp.org/presentations/slides/AdvancedLuceneEU.pdf">Advanced Lucene</a> presented by Grant Ingersoll of <a class="http" href="http://www.cnlp.org">CNLP</a> at <a class="http" href="http://www.eu.apachecon.com">ApacheCon Europe 2007</a>.  Covers term vectors, query tips and tricks and Lucene performance tuning related to indexing, searching and document retrieval.<br>
 (<em>Presented May 2007 - PDF slide show</em>) <span class="anchor" id="line-82"></span></li><li><p class="line891"><a class="http" href="http://blogs.atlassian.com/rebelutionary/downloads/tssjs2007-lucene-generic-data-indexing.pdf">Lucene: Generic Data Indexing</a> presented by Mike Cannon-Brookes, CEO, <a class="http" href="http://www.atlassian.com/">Atlassian Software Systems</a> at <a class="http" href="http://javasymposium.techtarget.com/lasvegas/index.html">TSSJS Las Vegas 2007</a>.  Covers how Atlassian use Lucene as a generic indexing framework for indexing and finding arbitrary collections of complex objects.<br>
 (<em>Presented March 2007 - PDF slide show</em>) <span class="anchor" id="line-83"></span></li><li><p class="line891"><a class="http" href="http://www.cnlp.org/apachecon2005/AdvancedLucene.ppt">Advanced Lucene</a> presented by Grant Ingersoll of the <a class="http" href="http://www.cnlp.org">Center for Natural Language Processing</a> at <a class="http" href="http://www.apachecon.com">ApacheCon 2005</a>.  Covers term vectors, span queries, using Lucene in a basic question answering system, and several Lucene case studies from <a class="http" href="http://www.cnlp.org">http://www.cnlp.org</a>.  The accompanying <a class="http" href="http://www.cnlp.org/apachecon2005">CNLP ApacheCon 2005 Information website</a> contains many working examples using term vectors and span queries. <span class="anchor" id="line-84"></span></li><li><p class="line891"><a class="http" href="http://lucene.sourceforge.net/talks/pisa/">Lucene lecture at The University of Pisa</a> (by Doug Cutting)<br>
 (<em>Presented November 2004 - lecture notes</em>) <span class="anchor" id="line-85"></span></li><li><p class="line891"><a class="http" href="http://conferences.oreillynet.com/presentations/os2003/hatcher_erik_lucene.pdf">Introducing Lucene</a> (by Erik Hatcher)<br>
 (<em>Presented at OS2003, July 2003 - PDF slide show</em>) <span class="anchor" id="line-86"></span></li><li><p class="line891"><a class="http" href="http://lucene.sourceforge.net/talks/inktomi/">The Lucene Search Engine: Inktomi Seminar</a> (by Doug Cutting)<br>
 (<em>Presented June, 2000 - seminar notes</em>) <span class="anchor" id="line-87"></span><span class="anchor" id="line-88"></span></li></ul><p class="line867">
 <h1 id="Training">Training</h1>
 <span class="anchor" id="line-89"></span><span class="anchor" id="line-90"></span><ul><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/How-We-Can-Help/Training/">http://www.lucidimagination.com/How-We-Can-Help/Training/</a> - Training on Lucene created by Lucene committers and contributors (Grant Ingersoll, Erik Hatcher and the rest of the team at Lucid Imagination).   <span class="anchor" id="line-91"></span></li><li><p class="line891"><a class="http" href="http://www.lucenebootcamp.com">Lucene Boot Camp</a> - Training by Lucene committer Grant Ingersoll.  Offered exclusively at <a class="http" href="http://www.apachecon.com">ApacheCon</a>. <span class="anchor" id="line-92"></span><span class="anchor" id="line-93"></span></li></ul><p class="line867">
 <h1 id="Corpora">Corpora</h1>
 <span class="anchor" id="line-94"></span><ul><li><p class="line862">DMOZ RDF dump - <a class="http" href="http://rdf.dmoz.org/">http://rdf.dmoz.org/</a> <span class="anchor" id="line-95"></span></li><li><p class="line862">CMU newsgroups  - <a class="http" href="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html">http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html</a> <span class="anchor" id="line-96"></span></li><li><p class="line862">CMU webpages  - <a class="http" href="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/">http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/</a> <span class="anchor" id="line-97"></span></li><li><p class="line862">Reuters  - <a class="http" href="http://www.daviddlewis.com/resources/testcollections/reuters21578">http://www.daviddlewis.com/resources/testcollections/reuters21578</a> <span class="anchor" id="line-98"></span></li><li><p class="line862">Enron emails - <a class="http" href="http://www-2.cs.cmu.edu/~enron/">http://www-2.cs.cmu.edu/~enron/</a> <span class="anchor" id="line-99"></span></li><li><p class="line862">JRC-ACQUIS Multilingual Parallel Corpus - <a class="http" href="http://wt.jrc.it/lt/Acquis/">http://wt.jrc.it/lt/Acquis/</a> <span class="anchor" id="line-100"></span><span class="anchor" id="line-101"></span></li></ul><p class="line867">
 <h1 id="Other">Other</h1>
 <span class="anchor" id="line-102"></span><ul><li><p class="line891"><a class="http" href="http://www.java201.com/resources/browse/38-all.html">Lucene Resources</a> - Articles, Books, FAQs, Forums, Presentations, Wiki. <span class="anchor" id="line-103"></span></li><li><p class="line891"><a class="http" href="http://www.nabble.com/Web-Search-f2787.html">Lucene Search Forum</a> - hosted by <a class="http" href="http://www.nabble.com">Nabble</a> archiving all Lucene and Nutch mailing lists into a searchable archive/forum. The search is coded using Lucene. <span class="anchor" id="line-104"></span></li><li><p class="line891"><a class="http" href="http://www.lucenetutorial.com">LuceneTutorial.com</a> - Tips and tricks, sample applications, code samples, best practices. <span class="anchor" id="line-105"></span></li></ul><span class="anchor" id="bottom"></span></div><p id="pageinfo" class="info" lang="en" dir="ltr">Resources  (last edited 2010-05-03 22:31:43 by <span title="SteveRowe @ ist-h335-d03.syr.edu[128.230.84.100]"><a class="nonexistent" href="/lucene-java/SteveRowe" title="SteveRowe @ ist-h335-d03.syr.edu[128.230.84.100]">SteveRowe</a></span>)</p>
 <div id="pagebottom"></div>
 </div>
 <div id="footer">
 <ul class="editbar"><li><span class="disabled">Immutable Page</span></li><li class="toggleCommentsButton" style="display:none;"><a href="#" class="nbcomment" onClick="toggleComments();return false;">Comments</a></li><li><a class="nbinfo" href="/lucene-java/Resources?action=info" rel="nofollow">Info</a></li><li>
 <form class="actionsmenu" method="GET" action="/lucene-java/Resources">
 <div>
    <label>More Actions:</label>
    <select name="action"
        onchange="if ((this.selectedIndex != 0) &&
                      (this.options[this.selectedIndex].disabled == false)) {
                this.form.submit();
            }
            this.selectedIndex = 0;">
        <option value="raw">Raw Text</option>
 <option value="print">Print View</option>
 <option value="RenderAsDocbook">Render as Docbook</option>
 <option value="refresh">Delete Cache</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="SpellCheck">Check Spelling</option>
 <option value="LikePages">Like Pages</option>
 <option value="LocalSiteMap">Local Site Map</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="RenamePage" disabled class="disabled">Rename Page</option>
 <option value="CopyPage">Copy Page</option>
 <option value="DeletePage" disabled class="disabled">Delete Page</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="MyPages">My Pages</option>
 <option value="show" disabled class="disabled">Subscribe User</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="show" disabled class="disabled">Remove Spam</option>
 <option value="show" disabled class="disabled">Revert to this revision</option>
 <option value="show" disabled class="disabled">Package Pages</option>
 <option value="SyncPages">Sync Pages</option>
 <option value="show" disabled class="disabled">------------------------</option>
 <option value="Load">Load</option>
 <option value="Save">Save</option>
    </select>
    <input type="submit" value="Do">
 </div>
 <script type="text/javascript">
 <!--// Init menu
 actionsMenuInit('More Actions:');
 //-->
 </script>
 </form>
 </li></ul>
 <ul id="credits">
 <li><a href="http://moinmo.in/" title="This site uses the MoinMoin Wiki software.">MoinMoin Powered</a></li><li><a href="http://moinmo.in/Python" title="MoinMoin is written in Python.">Python Powered</a></li><li><a href="http://moinmo.in/GPL" title="MoinMoin is GPL licensed.">GPL licensed</a></li><li><a href="http://validator.w3.org/check?uri=referer" title="Click here to validate this page.">Valid HTML 4.01</a></li>
 </ul>
 </div>
 </body>
 </html>
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPageURLs.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPageURLs.txt
@ -0,0 +1,105 @@
 http://www.w3.org/TR/html4/strict.dtd
 http://lucene.apache.org/java/3_0_1/api/all/overview-summary.html#overview_description
 http://lucene.apache.org/java/3_0_1/gettingstarted.html
 http://lucene.grantingersoll.com
 http://www.lucidimagination.com/blog/
 http://blog.sematext.com/
 http://www.manning.com/hatcher3/hatcher3_cover150.jpg
 http://www.manning.com/hatcher3/hatcher3_cover150.jpg
 http://www.manning.com/hatcher3/hatcher3_cover150.jpg
 http://www.manning.com/hatcher3/
 http://www.amazon.com/Building-Search-Applications-Lucene-Lingpipe/dp/0615204252/
 http://www.amazon.co.jp/exec/obidos/ASIN/4774127809/503-9461699-1775907
 http://www.lucenebook.com
 http://www.amazon.com/exec/obidos/ASIN/1932394281
 Amazon.com
 http://www.amazon.de/Suchmaschinen-entwickeln-mit-Apache-Lucene/dp/3935042450
 http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Getting-Started-with-Lucene/
 http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Optimizing-Findability-in-Lucene-and-Solr/
 http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Debugging-Relevance-Issues-in-Search/
 http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Scaling-Lucene-and-Solr/
 http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Introduction-to-Apache-Lucene-and-Solr/
 http://cephas.net/blog/2008/03/30/how-morelikethis-works-in-lucene/
 http://schmidt.devlib.org/software/lucene-wikipedia.html
 http://marceloochoa.blogspot.com/2007/09/running-lucene-inside-your-oracle-jvm.html
 http://www.onjava.com/pub/a/onjava/2007/05/24/using-the-lucene-query-parser-without-lucene.html
 http://www.javaworld.com/javaworld/jw-09-2006/jw-0925-lucene.html
 http://www-128.ibm.com/developerworks/java/library/wa-lucene2/index.html?ca=drs-
 http://www.freesearch.pe.kr/tag/Lucene
 http://www-128.ibm.com/developerworks/java/library/wa-lucene/index.html
 http://www.onjava.com/pub/a/onjava/2006/01/18/using-lucene-to-search-java-source.html
 http://www.jroller.com/page/wakaleo/?anchor=lucene_a_tutorial_introduction_to
 http://blog.dev.sf.net/index.php?/archives/10-Behind-the-Scenes-of-the-SourceForge.net-Search-System.html
 SourceForge.net
 http://today.java.net/pub/a/today/2005/08/09/didyoumean.html
 http://www.developer.com/java/other/article.php/3490471
 http://www.theserverside.com/tt/articles/article.tss?l=ILoveLucene
 http://javaboutique.internet.com/tutorials/HTMLParser/article.html
 http://bilgidata.com/localhost/bilgidata/yazi.jsp@dosya=a_lucene.xml.html
 http://www.chedong.com/tech/lucene.html
 http://javatechniques.com/public/java/docs/basics/lucene-memory-search.html
 http://www.javaranch.com/newsletter/200404/Lucene.html
 http://www.darksleep.com/lucene/
 http://www-igm.univ-mlv.fr/~dr/XPOSE2003/lucene/articleLucene.html
 http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html
 http://builder.com.com/5100-6389-5054799.html
 http://today.java.net/pub/a/today/2003/07/30/LuceneIntro.html
 http://www-106.ibm.com/developerworks/library/j-lucene/
 http://www.xml.com/pub/a/ws/2003/05/13/email.html
 http://www.onjava.com/pub/a/onjava/2003/03/05/lucene.html
 http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html
 http://javangelist.snipsnap.org/space/Lucene-Mini-Tutorial
 http://www.javaworld.com/javaworld/jw-09-2000/jw-0915-lucene.html
 http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=109
 http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=108
 http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=113
 http://lucene.sourceforge.net/publications.html
 http://lucene.sourceforge.net/publications.html
 http://people.apache.org/~buschmi/apachecon/AdvancedIndexingLuceneAtlanta07.ppt
 http://www.us.apachecon.com/us2007/
 http://people.apache.org/~yonik/presentations/lucene_intro.pdf
 http://www.eu.apachecon.com
 http://www.cnlp.org/presentations/slides/AdvancedLuceneEU.pdf
 http://www.cnlp.org
 http://www.eu.apachecon.com
 http://blogs.atlassian.com/rebelutionary/downloads/tssjs2007-lucene-generic-data-indexing.pdf
 http://www.atlassian.com/
 http://javasymposium.techtarget.com/lasvegas/index.html
 http://www.cnlp.org/apachecon2005/AdvancedLucene.ppt
 http://www.cnlp.org
 http://www.apachecon.com
 http://www.cnlp.org
 http://www.cnlp.org
 http://www.cnlp.org/apachecon2005
 http://lucene.sourceforge.net/talks/pisa/
 http://conferences.oreillynet.com/presentations/os2003/hatcher_erik_lucene.pdf
 http://lucene.sourceforge.net/talks/inktomi/
 http://www.lucidimagination.com/How-We-Can-Help/Training/
 http://www.lucidimagination.com/How-We-Can-Help/Training/
 http://www.lucenebootcamp.com
 http://www.apachecon.com
 http://rdf.dmoz.org/
 http://rdf.dmoz.org/
 http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html
 http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html
 http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/
 http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/
 http://www.daviddlewis.com/resources/testcollections/reuters21578
 http://www.daviddlewis.com/resources/testcollections/reuters21578
 http://www-2.cs.cmu.edu/~enron/
 http://www-2.cs.cmu.edu/~enron/
 http://wt.jrc.it/lt/Acquis/
 http://wt.jrc.it/lt/Acquis/
 http://www.java201.com/resources/browse/38-all.html
 http://www.nabble.com/Web-Search-f2787.html
 http://www.nabble.com
 http://www.lucenetutorial.com
 LuceneTutorial.com
 ist-h335-d03.syr.edu
 128.230.84.100
 ist-h335-d03.syr.edu
 128.230.84.100
 http://moinmo.in/
 http://moinmo.in/Python
 http://moinmo.in/GPL
 http://validator.w3.org/check?uri=referer
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
@ -0,0 +1,311 @@
 package org.apache.lucene.analysis.core;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.standard.ClassicAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;
 import java.io.IOException;
 import java.util.Arrays;
 /**
 * Copyright 2004 The Apache Software Foundation
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
  private Analyzer  a = new ClassicAnalyzer(TEST_VERSION_CURRENT);
  public void testMaxTermLength() throws Exception {
    ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
    sa.setMaxTokenLength(5);
    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
  }
  public void testMaxTermLength2() throws Exception {
    ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"});
    sa.setMaxTokenLength(5);
    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
  }
  public void testMaxTermLength3() throws Exception {
    char[] chars = new char[255];
    for(int i=0;i<255;i++)
      chars[i] = 'a';
    String longTerm = new String(chars, 0, 255);
    assertAnalyzesTo(a, "ab cd " + longTerm + " xy z", new String[]{"ab", "cd", longTerm, "xy", "z"});
    assertAnalyzesTo(a, "ab cd " + longTerm + "a xy z", new String[]{"ab", "cd", "xy", "z"});
  }
  public void testAlphanumeric() throws Exception {
    // alphanumeric tokens
    assertAnalyzesTo(a, "B2B", new String[]{"b2b"});
    assertAnalyzesTo(a, "2B", new String[]{"2b"});
  }
  public void testUnderscores() throws Exception {
    // underscores are delimiters, but not in email addresses (below)
    assertAnalyzesTo(a, "word_having_underscore", new String[]{"word", "having", "underscore"});
    assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new String[]{"word", "underscore", "stopwords"});
  }
  public void testDelimiters() throws Exception {
    // other delimiters: "-", "/", ","
    assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
    assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
    assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
  }
  public void testApostrophes() throws Exception {
    // internal apostrophes: O'Reilly, you're, O'Reilly's
    // possessives are actually removed by StardardFilter, not the tokenizer
    assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"});
    assertAnalyzesTo(a, "you're", new String[]{"you're"});
    assertAnalyzesTo(a, "she's", new String[]{"she"});
    assertAnalyzesTo(a, "Jim's", new String[]{"jim"});
    assertAnalyzesTo(a, "don't", new String[]{"don't"});
    assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly"});
  }
  public void testTSADash() throws Exception {
    // t and s had been stopwords in Lucene <= 2.0, which made it impossible
    // to correctly search for these terms:
    assertAnalyzesTo(a, "s-class", new String[]{"s", "class"});
    assertAnalyzesTo(a, "t-com", new String[]{"t", "com"});
    // 'a' is still a stopword:
    assertAnalyzesTo(a, "a-class", new String[]{"class"});
  }
  public void testCompanyNames() throws Exception {
    // company names
    assertAnalyzesTo(a, "AT&T", new String[]{"at&t"});
    assertAnalyzesTo(a, "Excite@Home", new String[]{"excite@home"});
  }
  public void testLucene1140() throws Exception {
    try {
      ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
      assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
    } catch (NullPointerException e) {
      fail("Should not throw an NPE and it did");
    }
  }
  public void testDomainNames() throws Exception {
    // Current lucene should not show the bug
    ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);
    // domain names
    assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
    //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
    // the following should be recognized as HOST:
    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
    // 2.3 should show the bug
    a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
    // 2.4 should not show the bug
    a2 = new ClassicAnalyzer(Version.LUCENE_24);
    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
  }
  public void testEMailAddresses() throws Exception {
    // email addresses, possibly with underscores, periods, etc
    assertAnalyzesTo(a, "test@example.com", new String[]{"test@example.com"});
    assertAnalyzesTo(a, "first.lastname@example.com", new String[]{"first.lastname@example.com"});
    assertAnalyzesTo(a, "first_lastname@example.com", new String[]{"first_lastname@example.com"});
  }
  public void testNumeric() throws Exception {
    // floating point, serial, model numbers, ip addresses, etc.
    // every other segment must have at least one digit
    assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
    assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"});
    assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
    assertAnalyzesTo(a, "1-2-3", new String[]{"1-2-3"});
    assertAnalyzesTo(a, "a1-b2-c3", new String[]{"a1-b2-c3"});
    assertAnalyzesTo(a, "a1-b-c3", new String[]{"a1-b-c3"});
  }
  public void testTextWithNumbers() throws Exception {
    // numbers
    assertAnalyzesTo(a, "David has 5000 bones", new String[]{"david", "has", "5000", "bones"});
  }
  public void testVariousText() throws Exception {
    // various
    assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted"});
    assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"});
    assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"});
    assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"});
  }
  public void testAcronyms() throws Exception {
    // acronyms have their dots stripped
    assertAnalyzesTo(a, "U.S.A.", new String[]{"usa"});
  }
  public void testCPlusPlusHash() throws Exception {
    // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens.
    assertAnalyzesTo(a, "C++", new String[]{"c"});
    assertAnalyzesTo(a, "C#", new String[]{"c"});
  }
  public void testKorean() throws Exception {
    // Korean words
    assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
  }
  // Compliance with the "old" JavaCC-based analyzer, see:
  // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752
  public void testComplianceFileName() throws Exception {
    assertAnalyzesTo(a, "2004.jpg",
            new String[]{"2004.jpg"},
            new String[]{"<HOST>"});
  }
  public void testComplianceNumericIncorrect() throws Exception {
    assertAnalyzesTo(a, "62.46",
            new String[]{"62.46"},
            new String[]{"<HOST>"});
  }
  public void testComplianceNumericLong() throws Exception {
    assertAnalyzesTo(a, "978-0-94045043-1",
            new String[]{"978-0-94045043-1"},
            new String[]{"<NUM>"});
  }
  public void testComplianceNumericFile() throws Exception {
    assertAnalyzesTo(
            a,
            "78academyawards/rules/rule02.html",
            new String[]{"78academyawards/rules/rule02.html"},
            new String[]{"<NUM>"});
  }
  public void testComplianceNumericWithUnderscores() throws Exception {
    assertAnalyzesTo(
            a,
            "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs",
            new String[]{"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"},
            new String[]{"<NUM>"});
  }
  public void testComplianceNumericWithDash() throws Exception {
    assertAnalyzesTo(a, "mid-20th", new String[]{"mid-20th"},
            new String[]{"<NUM>"});
  }
  public void testComplianceManyTokens() throws Exception {
    assertAnalyzesTo(
            a,
            "/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm "
                    + "safari-0-sheikh-zayed-grand-mosque.jpg",
            new String[]{"money.cnn.com", "magazines", "fortune",
                    "fortune", "archive/2007/03/19/8402357", "index.htm",
                    "safari-0-sheikh", "zayed", "grand", "mosque.jpg"},
            new String[]{"<HOST>", "<ALPHANUM>", "<ALPHANUM>",
                    "<ALPHANUM>", "<NUM>", "<HOST>", "<NUM>", "<ALPHANUM>",
                    "<ALPHANUM>", "<HOST>"});
  }
  public void testJava14BWCompatibility() throws Exception {
    ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
    assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
  }
  /**
   * Make sure we skip wicked long terms.
  */
  public void testWickedLongTerm() throws IOException {
    RAMDirectory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
      TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));
    char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
    Arrays.fill(chars, 'x');
    Document doc = new Document();
    final String bigTerm = new String(chars);
    // This produces a too-long term:
    String contents = "abc xyz x" + bigTerm + " another term";
    doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);
    // Make sure we can add another normal document
    doc = new Document();
    doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();
    IndexReader reader = IndexReader.open(dir, true);
    // Make sure all terms < max size were indexed
    assertEquals(2, reader.docFreq(new Term("content", "abc")));
    assertEquals(1, reader.docFreq(new Term("content", "bbb")));
    assertEquals(1, reader.docFreq(new Term("content", "term")));
    assertEquals(1, reader.docFreq(new Term("content", "another")));
    // Make sure position is still incremented when
    // massive term is skipped:
    DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
                                                                MultiFields.getDeletedDocs(reader),
                                                                "content",
                                                                new BytesRef("another"));
    assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS);
    assertEquals(1, tps.freq());
    assertEquals(3, tps.nextPosition());
    // Make sure the doc that has the massive term is in
    // the index:
    assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
    reader.close();
    // Make sure we can add a document with exactly the
    // maximum length term, and search on that term:
    doc = new Document();
    doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
    ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
    sa.setMaxTokenLength(100000);
    writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
    writer.addDocument(doc);
    writer.close();
    reader = IndexReader.open(dir, true);
    assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
    reader.close();
    dir.close();
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
@ -1,35 +1,33 @@
 package org.apache.lucene.analysis.core;
 import java.io.IOException;
 import java.util.Arrays;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.document.Document;
+import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.BytesRef;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 /**
- * Copyright 2004 The Apache Software Foundation
+ * Licensed to the Apache Software Foundation (ASF) under one or more
- * <p/>
+ * contributor license agreements.  See the NOTICE file distributed with
- * Licensed under the Apache License, Version 2.0 (the "License");
+ * this work for additional information regarding copyright ownership.
- * you may not use this file except in compliance with the License.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
- * You may obtain a copy of the License at
+ * (the "License"); you may not use this file except in compliance with
- * <p/>
+ * the License.  You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
+ *
- * <p/>
+ *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -38,277 +36,365 @@ import org.apache.lucene.util.BytesRef;
 */
 public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
-
+  
-  private Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT);
+  public void testHugeDoc() throws IOException {
-
+    StringBuilder sb = new StringBuilder();
-  public void testMaxTermLength() throws Exception {
+    char whitespace[] = new char[4094];
-    StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
+    Arrays.fill(whitespace, ' ');
-    sa.setMaxTokenLength(5);
+    sb.append(whitespace);
-    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
+    sb.append("testing 1234");
    String input = sb.toString();
    StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
  }
-  public void testMaxTermLength2() throws Exception {
+  private Analyzer a = new ReusableAnalyzerBase() {
-    StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
+    @Override
-    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"});
+    protected TokenStreamComponents createComponents
-    sa.setMaxTokenLength(5);
+      (String fieldName, Reader reader) {
    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
  }
-  public void testMaxTermLength3() throws Exception {
+      Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
-    char[] chars = new char[255];
+      return new TokenStreamComponents(tokenizer);
    for(int i=0;i<255;i++)
      chars[i] = 'a';
    String longTerm = new String(chars, 0, 255);
    assertAnalyzesTo(a, "ab cd " + longTerm + " xy z", new String[]{"ab", "cd", longTerm, "xy", "z"});
    assertAnalyzesTo(a, "ab cd " + longTerm + "a xy z", new String[]{"ab", "cd", "xy", "z"});
  }
  public void testAlphanumeric() throws Exception {
    // alphanumeric tokens
    assertAnalyzesTo(a, "B2B", new String[]{"b2b"});
    assertAnalyzesTo(a, "2B", new String[]{"2b"});
  }
  public void testUnderscores() throws Exception {
    // underscores are delimiters, but not in email addresses (below)
    assertAnalyzesTo(a, "word_having_underscore", new String[]{"word", "having", "underscore"});
    assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new String[]{"word", "underscore", "stopwords"});
  }
  public void testDelimiters() throws Exception {
    // other delimiters: "-", "/", ","
    assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
    assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
    assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
  }
  public void testApostrophes() throws Exception {
    // internal apostrophes: O'Reilly, you're, O'Reilly's
    // possessives are actually removed by StardardFilter, not the tokenizer
    assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"});
    assertAnalyzesTo(a, "you're", new String[]{"you're"});
    assertAnalyzesTo(a, "she's", new String[]{"she"});
    assertAnalyzesTo(a, "Jim's", new String[]{"jim"});
    assertAnalyzesTo(a, "don't", new String[]{"don't"});
    assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly"});
  }
  public void testTSADash() throws Exception {
    // t and s had been stopwords in Lucene <= 2.0, which made it impossible
    // to correctly search for these terms:
    assertAnalyzesTo(a, "s-class", new String[]{"s", "class"});
    assertAnalyzesTo(a, "t-com", new String[]{"t", "com"});
    // 'a' is still a stopword:
    assertAnalyzesTo(a, "a-class", new String[]{"class"});
  }
  public void testCompanyNames() throws Exception {
    // company names
    assertAnalyzesTo(a, "AT&T", new String[]{"at&t"});
    assertAnalyzesTo(a, "Excite@Home", new String[]{"excite@home"});
  }
  public void testLucene1140() throws Exception {
    try {
      StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
      assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
    } catch (NullPointerException e) {
      fail("Should not throw an NPE and it did");
    }
  };
  /** Passes through tokens with type "<URL>" and blocks all other types. */
  private class URLFilter extends TokenFilter {
    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
    public URLFilter(TokenStream in) {
      super(in);
    }
    @Override
    public final boolean incrementToken() throws java.io.IOException {
      boolean isTokenAvailable = false;
      while (input.incrementToken()) {
        if (typeAtt.type() == StandardTokenizer.TOKEN_TYPES[StandardTokenizer.URL]) {
          isTokenAvailable = true;
          break;
        }
      }
      return isTokenAvailable;
    }
  }
  /** Passes through tokens with type "<EMAIL>" and blocks all other types. */
  private class EmailFilter extends TokenFilter {
    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
    public EmailFilter(TokenStream in) {
      super(in);
    }
    @Override
    public final boolean incrementToken() throws java.io.IOException {
      boolean isTokenAvailable = false;
      while (input.incrementToken()) {
        if (typeAtt.type() == StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMAIL]) {
          isTokenAvailable = true;
          break;
        }
      }
      return isTokenAvailable;
    }
  }
-  public void testDomainNames() throws Exception {
+  private Analyzer urlAnalyzer = new ReusableAnalyzerBase() {
-    // Current lucene should not show the bug
+    @Override
-    StandardAnalyzer a2 = new StandardAnalyzer(TEST_VERSION_CURRENT);
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
      StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
      tokenizer.setMaxTokenLength(Integer.MAX_VALUE);  // Tokenize arbitrary length URLs
      TokenFilter filter = new URLFilter(tokenizer);
      return new TokenStreamComponents(tokenizer, filter);
    }
  };
-    // domain names
+  private Analyzer emailAnalyzer = new ReusableAnalyzerBase() {
-    assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
+    @Override
-    //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-    // the following should be recognized as HOST:
+      Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
-    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
+      TokenFilter filter = new EmailFilter(tokenizer);
      return new TokenStreamComponents(tokenizer, filter);
    }
  };
-    // 2.3 should show the bug
+  public void testArmenian() throws Exception {
-    a2 = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։",
-    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
+        new String[] { "Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", 
-
+        "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը" } );
-    // 2.4 should not show the bug
+  }
-    a2 = new StandardAnalyzer(Version.LUCENE_24);
+  
-    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
+  public void testAmharic() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም",
        new String[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" } );
  }
  public void testArabic() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.",
        new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا",
        "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" } ); 
  }
  public void testAramaic() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀",
        new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ",
        "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"});
  }
  public void testBengali() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।",
        new String[] { "এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার",
        "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়েছে" });
  }
  public void testFarsi() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ ۲۵ دی ۱۳۷۹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.",
        new String[] { "ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "۲۵", "دی", "۱۳۷۹", "به", "صورت", "مکملی",
        "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد" });
  }
  public void testGreek() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.",
        new String[] { "Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που",
        "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα" });
  }
-  public void testEMailAddresses() throws Exception {
+  public void testThai() throws Exception {
-    // email addresses, possibly with underscores, periods, etc
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔",
-    assertAnalyzesTo(a, "test@example.com", new String[]{"test@example.com"});
+        new String[] { "การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔" });
-    assertAnalyzesTo(a, "first.lastname@example.com", new String[]{"first.lastname@example.com"});
+  }
-    assertAnalyzesTo(a, "first_lastname@example.com", new String[]{"first_lastname@example.com"});
+  
  public void testLao() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", 
        new String[] { "ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ" });
  }
  public void testTibetan() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །",
                     new String[] { "སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", 
                                    "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", 
                                    "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ" });
  }
  /*
   * For chinese, tokenize as char (these can later form bigrams or whatever)
   */
  public void testChinese() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 １２３４ Ｔｅｓｔｓ ",
        new String[] { "我", "是", "中", "国", "人", "１２３４", "Ｔｅｓｔｓ"});
  }
  public void testEmpty() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new String[] {});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new String[] {});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new String[] {});
  }
  /* test various jira issues this analyzer is related to */
  public void testLUCENE1545() throws Exception {
    /*
     * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
     * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
     * Expected result is only on token "moͤchte".
     */
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" }); 
  }
  /* Tests from StandardAnalyzer, just to show behavior is similar */
  public void testAlphanumericSA() throws Exception {
    // alphanumeric tokens
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new String[]{"B2B"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new String[]{"2B"});
  }
-  public void testNumeric() throws Exception {
+  public void testDelimitersSA() throws Exception {
    // other delimiters: "-", "/", ","
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
  }
  public void testApostrophesSA() throws Exception {
    // internal apostrophes: O'Reilly, you're, O'Reilly's
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
  }
  public void testNumericSA() throws Exception {
    // floating point, serial, model numbers, ip addresses, etc.
-    // every other segment must have at least one digit
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
-    assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"});
-    assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
-    assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
    assertAnalyzesTo(a, "1-2-3", new String[]{"1-2-3"});
    assertAnalyzesTo(a, "a1-b2-c3", new String[]{"a1-b2-c3"});
    assertAnalyzesTo(a, "a1-b-c3", new String[]{"a1-b-c3"});
  }
-  public void testTextWithNumbers() throws Exception {
+  public void testTextWithNumbersSA() throws Exception {
    // numbers
-    assertAnalyzesTo(a, "David has 5000 bones", new String[]{"david", "has", "5000", "bones"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"});
  }
-  public void testVariousText() throws Exception {
+  public void testVariousTextSA() throws Exception {
    // various
-    assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
-    assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
-    assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
-    assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
  }
-  public void testAcronyms() throws Exception {
+  public void testKoreanSA() throws Exception {
    // acronyms have their dots stripped
    assertAnalyzesTo(a, "U.S.A.", new String[]{"usa"});
  }
  public void testCPlusPlusHash() throws Exception {
    // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens.
    assertAnalyzesTo(a, "C++", new String[]{"c"});
    assertAnalyzesTo(a, "C#", new String[]{"c"});
  }
  public void testKorean() throws Exception {
    // Korean words
-    assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
  }
  public void testOffsets() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
        new String[] {"David", "has", "5000", "bones"},
        new int[] {0, 6, 10, 15},
        new int[] {5, 9, 14, 20});
  }
  public void testTypes() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
        new String[] {"David", "has", "5000", "bones"},
        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
  }
  public void testWikiURLs() throws Exception {
    Reader reader = null;
    String luceneResourcesWikiPage;
    try {
      reader = new InputStreamReader
        (getClass().getResourceAsStream("LuceneResourcesWikiPage.html"), "UTF-8");
      StringBuilder builder = new StringBuilder();
      char[] buffer = new char[1024];
      int numCharsRead;
      while (-1 != (numCharsRead = reader.read(buffer))) {
        builder.append(buffer, 0, numCharsRead);
      }
      luceneResourcesWikiPage = builder.toString(); 
    } finally {
      if (null != reader) {
        reader.close();
      }
    }
    assertTrue(null != luceneResourcesWikiPage 
               && luceneResourcesWikiPage.length() > 0);
    BufferedReader bufferedReader = null;
    String[] urls;
    try {
      List<String> urlList = new ArrayList<String>();
      bufferedReader = new BufferedReader(new InputStreamReader
        (getClass().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), "UTF-8"));
      String line;
      while (null != (line = bufferedReader.readLine())) {
        line = line.trim();
        if (line.length() > 0) {
          urlList.add(line);
        }
      }
      urls = urlList.toArray(new String[urlList.size()]);
    } finally {
      if (null != bufferedReader) {
        bufferedReader.close();
      }
    }
    assertTrue(null != urls && urls.length > 0);
    BaseTokenStreamTestCase.assertAnalyzesTo
      (urlAnalyzer, luceneResourcesWikiPage, urls);
  }
  public void testEmails() throws Exception {
    Reader reader = null;
    String randomTextWithEmails;
    try {
      reader = new InputStreamReader
        (getClass().getResourceAsStream("random.text.with.email.addresses.txt"), "UTF-8");
      StringBuilder builder = new StringBuilder();
      char[] buffer = new char[1024];
      int numCharsRead;
      while (-1 != (numCharsRead = reader.read(buffer))) {
        builder.append(buffer, 0, numCharsRead);
      }
      randomTextWithEmails = builder.toString(); 
    } finally {
      if (null != reader) {
        reader.close();
      }
    }
    assertTrue(null != randomTextWithEmails 
               && randomTextWithEmails.length() > 0);
    BufferedReader bufferedReader = null;
    String[] emails;
    try {
      List<String> emailList = new ArrayList<String>();
      bufferedReader = new BufferedReader(new InputStreamReader
        (getClass().getResourceAsStream("email.addresses.from.random.text.with.email.addresses.txt"), "UTF-8"));
      String line;
      while (null != (line = bufferedReader.readLine())) {
        line = line.trim();
        if (line.length() > 0) {
          emailList.add(line);
        }
      }
      emails = emailList.toArray(new String[emailList.size()]);
    } finally {
      if (null != bufferedReader) {
        bufferedReader.close();
      }
    }
    assertTrue(null != emails && emails.length > 0);
    BaseTokenStreamTestCase.assertAnalyzesTo
      (emailAnalyzer, randomTextWithEmails, emails);
  }
-  // Compliance with the "old" JavaCC-based analyzer, see:
+  public void testURLs() throws Exception {
-  // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752
+    Reader reader = null;
-
+    String randomTextWithURLs;
-  public void testComplianceFileName() throws Exception {
+    try {
-    assertAnalyzesTo(a, "2004.jpg",
+      reader = new InputStreamReader
-            new String[]{"2004.jpg"},
+        (getClass().getResourceAsStream("random.text.with.urls.txt"), "UTF-8");
-            new String[]{"<HOST>"});
+      StringBuilder builder = new StringBuilder();
      char[] buffer = new char[1024];
      int numCharsRead;
      while (-1 != (numCharsRead = reader.read(buffer))) {
        builder.append(buffer, 0, numCharsRead);
      }
      randomTextWithURLs = builder.toString(); 
    } finally {
      if (null != reader) {
        reader.close();
      }
    }
    assertTrue(null != randomTextWithURLs 
               && randomTextWithURLs.length() > 0);
    BufferedReader bufferedReader = null;
    String[] urls;
    try {
      List<String> urlList = new ArrayList<String>();
      bufferedReader = new BufferedReader(new InputStreamReader
        (getClass().getResourceAsStream("urls.from.random.text.with.urls.txt"), "UTF-8"));
      String line;
      while (null != (line = bufferedReader.readLine())) {
        line = line.trim();
        if (line.length() > 0) {
          urlList.add(line);
        }
      }
      urls = urlList.toArray(new String[urlList.size()]);
    } finally {
      if (null != bufferedReader) {
        bufferedReader.close();
      }
    }
    assertTrue(null != urls && urls.length > 0);
    BaseTokenStreamTestCase.assertAnalyzesTo
      (urlAnalyzer, randomTextWithURLs, urls);
  }
-  public void testComplianceNumericIncorrect() throws Exception {
+  public void testUnicodeWordBreaks() throws Exception {
-    assertAnalyzesTo(a, "62.46",
+    WordBreakTestUnicode_5_2_0 wordBreakTest = new WordBreakTestUnicode_5_2_0();
-            new String[]{"62.46"},
+    wordBreakTest.test(a);
            new String[]{"<HOST>"});
  }
  public void testComplianceNumericLong() throws Exception {
    assertAnalyzesTo(a, "978-0-94045043-1",
            new String[]{"978-0-94045043-1"},
            new String[]{"<NUM>"});
  }
  public void testComplianceNumericFile() throws Exception {
    assertAnalyzesTo(
            a,
            "78academyawards/rules/rule02.html",
            new String[]{"78academyawards/rules/rule02.html"},
            new String[]{"<NUM>"});
  }
  public void testComplianceNumericWithUnderscores() throws Exception {
    assertAnalyzesTo(
            a,
            "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs",
            new String[]{"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"},
            new String[]{"<NUM>"});
  }
  public void testComplianceNumericWithDash() throws Exception {
    assertAnalyzesTo(a, "mid-20th", new String[]{"mid-20th"},
            new String[]{"<NUM>"});
  }
  public void testComplianceManyTokens() throws Exception {
    assertAnalyzesTo(
            a,
            "/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm "
                    + "safari-0-sheikh-zayed-grand-mosque.jpg",
            new String[]{"money.cnn.com", "magazines", "fortune",
                    "fortune", "archive/2007/03/19/8402357", "index.htm",
                    "safari-0-sheikh", "zayed", "grand", "mosque.jpg"},
            new String[]{"<HOST>", "<ALPHANUM>", "<ALPHANUM>",
                    "<ALPHANUM>", "<NUM>", "<HOST>", "<NUM>", "<ALPHANUM>",
                    "<ALPHANUM>", "<HOST>"});
  }
  public void testJava14BWCompatibility() throws Exception {
    StandardAnalyzer sa = new StandardAnalyzer(Version.LUCENE_30);
    assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
    sa = new StandardAnalyzer(Version.LUCENE_31);
    assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test\u02C6test" });
  }
  /**
   * Make sure we skip wicked long terms.
  */
  public void testWickedLongTerm() throws IOException {
    RAMDirectory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
      TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
    char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
    Arrays.fill(chars, 'x');
    Document doc = new Document();
    final String bigTerm = new String(chars);
    // This produces a too-long term:
    String contents = "abc xyz x" + bigTerm + " another term";
    doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);
    // Make sure we can add another normal document
    doc = new Document();
    doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();
    IndexReader reader = IndexReader.open(dir, true);
    // Make sure all terms < max size were indexed
    assertEquals(2, reader.docFreq(new Term("content", "abc")));
    assertEquals(1, reader.docFreq(new Term("content", "bbb")));
    assertEquals(1, reader.docFreq(new Term("content", "term")));
    assertEquals(1, reader.docFreq(new Term("content", "another")));
    // Make sure position is still incremented when
    // massive term is skipped:
    DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
                                                                MultiFields.getDeletedDocs(reader),
                                                                "content",
                                                                new BytesRef("another"));
    assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS);
    assertEquals(1, tps.freq());
    assertEquals(3, tps.nextPosition());
    // Make sure the doc that has the massive term is in
    // the index:
    assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
    reader.close();
    // Make sure we can add a document with exactly the
    // maximum length term, and search on that term:
    doc = new Document();
    doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
    StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
    sa.setMaxTokenLength(100000);
    writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
    writer.addDocument(doc);
    writer.close();
    reader = IndexReader.open(dir, true);
    assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
    reader.close();
    dir.close();
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java
@ -0,0 +1,204 @@
 package org.apache.lucene.analysis.core;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.UAX29Tokenizer;
 import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Arrays;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 public class TestUAX29Tokenizer extends BaseTokenStreamTestCase {
  public void testHugeDoc() throws IOException {
    StringBuilder sb = new StringBuilder();
    char whitespace[] = new char[4094];
    Arrays.fill(whitespace, ' ');
    sb.append(whitespace);
    sb.append("testing 1234");
    String input = sb.toString();
    UAX29Tokenizer tokenizer = new UAX29Tokenizer(new StringReader(input));
    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
  }
  private Analyzer a = new ReusableAnalyzerBase() {
    @Override
    protected TokenStreamComponents createComponents
      (String fieldName, Reader reader) {
      Tokenizer tokenizer = new UAX29Tokenizer(reader);
      return new TokenStreamComponents(tokenizer);
    }
  };
  public void testArmenian() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։",
        new String[] { "Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", 
        "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը" } );
  }
  public void testAmharic() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም",
        new String[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" } );
  }
  public void testArabic() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.",
        new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا",
        "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" } ); 
  }
  public void testAramaic() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀",
        new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ",
        "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"});
  }
  public void testBengali() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।",
        new String[] { "এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার",
        "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়েছে" });
  }
  public void testFarsi() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ ۲۵ دی ۱۳۷۹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.",
        new String[] { "ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "۲۵", "دی", "۱۳۷۹", "به", "صورت", "مکملی",
        "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد" });
  }
  public void testGreek() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.",
        new String[] { "Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που",
        "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα" });
  }
  public void testThai() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔",
        new String[] { "การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔" });
  }
  public void testLao() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", 
        new String[] { "ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ" });
  }
  public void testTibetan() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །",
                     new String[] { "སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", 
                                    "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", 
                                    "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ" });
  }
  /*
   * For chinese, tokenize as char (these can later form bigrams or whatever)
   */
  public void testChinese() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 １２３４ Ｔｅｓｔｓ ",
        new String[] { "我", "是", "中", "国", "人", "１２３４", "Ｔｅｓｔｓ"});
  }
  public void testEmpty() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new String[] {});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new String[] {});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new String[] {});
  }
  /* test various jira issues this analyzer is related to */
  public void testLUCENE1545() throws Exception {
    /*
     * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
     * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
     * Expected result is only on token "moͤchte".
     */
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" }); 
  }
  /* Tests from StandardAnalyzer, just to show behavior is similar */
  public void testAlphanumericSA() throws Exception {
    // alphanumeric tokens
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new String[]{"B2B"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new String[]{"2B"});
  }
  public void testDelimitersSA() throws Exception {
    // other delimiters: "-", "/", ","
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
  }
  public void testApostrophesSA() throws Exception {
    // internal apostrophes: O'Reilly, you're, O'Reilly's
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
  }
  public void testNumericSA() throws Exception {
    // floating point, serial, model numbers, ip addresses, etc.
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
  }
  public void testTextWithNumbersSA() throws Exception {
    // numbers
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"});
  }
  public void testVariousTextSA() throws Exception {
    // various
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
  }
  public void testKoreanSA() throws Exception {
    // Korean words
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
  }
  public void testOffsets() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
        new String[] {"David", "has", "5000", "bones"},
        new int[] {0, 6, 10, 15},
        new int[] {5, 9, 14, 20});
  }
  public void testTypes() throws Exception {
    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
        new String[] {"David", "has", "5000", "bones"},
        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
  }
  public void testUnicodeWordBreaks() throws Exception {
    WordBreakTestUnicode_5_2_0 wordBreakTest = new WordBreakTestUnicode_5_2_0();
    wordBreakTest.test(a);
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_5_2_0.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_5_2_0.java
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt
@ -0,0 +1,265 @@
 dJ8ngFi@avz13m.CC
 JCAVLRJg@3aqiq2yui.gm
 kU-l6DS@[082.015.228.189]
 37layCJS@j5NVP7NWAY.VG
 "%U@?\B"@Fl2d.md
 aH3QW@tw8uo2.eu
 Bvd#@tupjv.sn
 SBMm0Nm.oyk70.rMNdd8k.#ru3LI.gMMLBI.0dZRD4d.RVK2nY@au58t.B13albgy4u.mt
 DvdUJk@61zwkit7dkd3rcq4v.BD
 ~+Kdz@3mousnl.SE
 C'ts`@Vh4zk.uoafcft-dr753x4odt04q.UY
 }0tzWYDBuy@cSRQAABB9B.7c8xawf75-cyo.PM
 lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae
 V85E9Hx7@vpf0bs.bz
 MGBg2@7F3MJTCCPROS8YETM0B4-C9P7WXKGFB0.RU
 rsBWOCJ@lYX0SILY4L53Z3VJPSF6.pwrawr.vdpoq.nz
 dIyLrU@9A40T2ZIG7H8R.t63.tv
 6dAsZKz@d33XR.IR
 EnqCC@2bk6da6y08.LI
 AQ9yV@Mfqq32nexufgxzl4o7q5jv3kd.lb
 lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H
 b6/zomNkV@8jwm-he.IN
 5FLuakz.hXVkuqDt@iBFP83V6MNI3N0FRWJ9302DS-0KHRV6O.1bf59kj64uj5b6e2zfn.cm
 RhIwkU@58vmet9yfddpg.3adkmhrv1px.AO
 nEBk6w2Q@Bb5ib.2pay.so
 AlW5CMAn@qos-53u.j91qq96d4en129szf7099kxv5lo6yo.gm
 QPYBDV3.Ah/h8U@x3v444pzi.1cvgokam.PW
 5Iwbiq7@p9s-2pixps9jwzyhfroxqivw8sv90r.xn--wgbh1c
 AaFU9L@3yj1xqf1.cz9.ac
 |iCmQ1@rum6w0a7wt.3QLD.ht71.cx
 EhLTUjo@rEK.sJ44H0.GR
 bHEbq3Rp@33.lKSSMY.9xaurtfle9xe.iu4810l.fj
 eFcup.cPPEW@[1ae]
 p907@bk3o.fvtmw2m2.Uutr83x2yt4.2nuin.EU
 PpW2L5.QgP2n@9rz7.a5qi.oRH1Z.8ov.UZ
 o8UgG5fewm4vr9Ai5wPS@sgh.2F-OLKLZ81DIUET.xpya0vtx.fj
 aixQH@z-y.AR
 jVTeWQfL."M#~t Q"@1e.oglq.ubk.SZ
 6e5QQuy@N7.2cuw3x2wpddf.paycp1pc.AI
 IqG6Fl@[220.112.120.54]
 lWHH4eWSn@tbxyb7.jhzqxrk.lv
 P1zO*RaAr@[111.99.108.22]
 d00gy@[4TC]
 1yNINoBU@[136.003.010.238]
 Ms8ox@[_3Tuehr]
 wtWDNo@1sjmcbbli196-765mt7m8o8hywft.7-ga6rsnum8v.np
 "x)yO"@7le5o2rcud5ngs.Qmfmq.Jfxv8.Zznv6t6il.MIL
 1hXd@f8.1kxqd3yw4j6zmb7l7.US
 "8}(\$"@mu2viak0nh4sj5ivgpy1wqie.HK
 Th7XoAs5@ggdb.BI
 5iDbhah.xdtF1x@[59.55.12.243]
 j2ovALlgm2Wcwx@5jphzt.TN
 ZlaP~E.4Yk1K0F@lF6VN.M5.Nj.PRO
 cFCvIJAw@l93H0R1W6V4RI0AY7RLRQR4KOEVQPEG-PDTF03V4D9A0.xZZK5.lu
 8Ju2AW@1n.h7.vu
 "\nkP]{"@[Vej\yo\HD]
 fKWC?@qgcb.xn--mgbaam7a8h
 L4BbaB@hv1.BIZ
 WvSmV@qpx15vzmbtxzvi-syndl1.ML
 "3|PX~Cbdq"@U3vp-7k.8c4q3sgpwt6sochundzhx.museum
 LjH9rJTu@tkm.gy
 vQgXEFb@maxmrbk-5a5s6o.6MZZ6IK.awjbtiva7.IL
 6TVbIA@r50eh-a.la
 AaASl@Bsteea.qHXE3Q5CUJ3DBG.S2hvnld.4WJWL.fk
 "CN;\-z 6M"@86.qc7s.23p.ET
 zX3=O3o@Yjov.7g660.8M88OJGTDC5.np
 QFZlK1A@4W47EIXE.KY
 1guLnQb07k@ab.ccemuif2s.lb
 Jddxj@[111.079.109.147]
 Hj06gcE@[105.233.192.168]
 u8?xicQ@[i\21I]
 CczYer}W@bezu6wtys9s.lft3z.mobi
 OmpYhIL@6GJ7P29EIE-G63RDW7GLFLFC0M1.AERO
 2RRPLqO@8lh0i.vm7xmvvo-r5nf0x.CY
 TOc!BhbKz@F-myy7.kQWSUI7S3.net
 "0\!P?".shQVdSerA@2qmqj8ul.hm
 LTLNFsgB@[191.56.104.113]
 iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU
 VGLn@z3E2.3an2.MM
 TWmfsxn@[112.192.017.029]
 2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV
 CjaPC63@['\RDrwk]
 Ayydpdoa@tdgypppmen.wf
 "gfKP9"@jo3-r0.mz
 aTMgDW4@t5gax.XN--0ZWM56D
 mcDrMO3FQ@nwc21.y5qd45lesryrp.IL
 NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp
 XtAhFnq@[218.214.251.103]
 x0S8uos@[109.82.126.233]
 ALB4KFavj16pODdd@i206d6s.MM
 grxIt96.46nCf@nokjogh2l4.nCMWXG.yt
 Fgbh7@2rxkk0bvkk-v3evd-sh56gvhxlh.hhjcsg36j8qt98okjbdj9z574xdpix59zf6h80r.Gyb4rrxu.ve
 uo0AX41@Fhlegm1z57j-qvf5.p8jo6zvm.sc
 sjn4cz@9ktlwkqte.bv
 b04v0Ct@[243.230.224.190]
 F!FUbQHU@uvz7cu1l.ciz4h2.93U4V.gb
 6CHec@nONUKT.nl
 zbmZiXw@yb.bxxp.3fm457.va
 "/GdiZ7f"@[221.229.46.3]
 NJde8Li@f7a.g51VICBH.cy
 6IeAft@e-3fp.Nkh7nm8.v8i47xvrv27r.pf
 TC*Qopzb@xIOB3.6egz4.m-24t5wmxtmco4iy8g91o66mjgha1vjlepyffott.E5ta.p9.CF
 "_3Sc_"@[193.165.124.143]
 W0dwHf@[25.174.65.80]
 qPkkP0@4k0vs.oaak2z.3JMTI.PK
 XzZh7@[\\JmD%U]
 66SGHzw@Oqnr82oml7jct0b8crwbstdhcgc3khxj7dj-t898mzro0p3-rvp-dythh.TN
 ot4tPF@[AY\j]
 e4seIFbl@cib.cg
 B2w025e@r2H7BW16B24DG1S5DED.bg
 atweEde@blk-3y.mgvoh6l9my.F6.FI
 uDoPcRGW@rEBD5LUT.ly
 2KQhx@Bba.u--9b5bc0.NF
 tKWc2VjVRYD@[254.190.162.128]
 wc3W16^@D3v2uxqqeclz.w1fd529m.DM
 Njg@6S8MA.HK
 "L\^4z]92"@0qp--walx.MIL
 X08sWFD@62GNK.tN4.f1YXX.ug
 eK6Bz1Bu@[rX;J&036]
 "~`o\:"@hO4UKF.oZBWV56B.cmn.DJ
 lcgUakx@[pjGd&i2]
 BqdBTnv3c@wf35nwaza.ME
 "a#Um{:\'\bX:"@in7tjo.uw8wil.gp
 ApIbER8'@[&Y]
 JTsM0c!s9CzEH@Sd.mh
 hy2AOUc@uqxzl7v0hl2nchokqit9lyscxaa0jaqya1wek5gkd.NC
 pY7bAVD4r@[,>T*R T]
 !0axBT@03-gdh1xmk3x9.GH
 vbtyQBZI@20al5g.ro6ds4.Bsg15f5.NU
 2^ZhSK-FFYOh@Z2iku.rg.Z0ca1.gs
 G1RLpOn."yfJpg["@mXEV8.mu
 yrBKNkq@a2a1.Aifn.Ta2.dj
 Wok5G@b5aqobvi5.ni
 nXz9i.=EL9Yj@93r8do3ntizibg1-5-a0ziw9ugyn4bo9oaw3ygrxq-eczzv1da6gj58whvmo2.rs
 Dp63hd@B1kbahyq.PL
 y01rn27SFq@o0HNP8.C5.i4rvj8j338zgter7er5rkwyo5g.atnc0iuj2ke.8or6ekq0x.IO
 0RiEo@08mnvbu.p661ernzjz5p7nbyix5iuj.cig5hgvcc.SO
 Dwxab5@1sx5y3-umsy72nl.74lwye5.DJ
 IvdZVE4xRk@0vw7ajl.AR
 CvQxhXJ@d5a7qnx.ke
 n7MxA4~@[4(R]
 RFGzu3hD0@wbh4.sm
 eOADW}BcNG@2568p3b4v.Xq3eksr.GP
 AsAMWriW7.zSDQSAR6@Gg2q4rtgr.GG
 cDCVlA0t@[20.116.229.216]
 c=yJU+3L5@n2x3xhksf.gvreani.MZ
 wfYnaA4@lzojy.4oii6w6sn-p9.kh
 kdeOQ5F@vD5Y.wmmv.7rswz.1zelobcp5qxxwzjn.fOEJZ.KM
 ppULqb2Z@Hv9o2ui.AO
 tOHw@[IPv6:3500:8B6C::CB5E:1.124.160.137]
 MWLVsL@7nhliy.O8mjon3rj-kb.t8d6bcpa5i.au
 BN0EY@hh9v.p9bwgs.TN
 RgiAp@d9ln.bf
 PBugBo@97gcz.DJ
 Fh#dKzbI@[+_]
 wyqU-C9hXE@wPRBUI-WS9HXE19.LV
 muC?Js@[IPv6:47FB:5786:4b5e::5675]
 yLTT2xV@wdoszw9k1ork-z-t.kq.l3SEO.Lb4jx0.NA
 6zqw.yPV4LkL@dA3XKC.eg
 S5z9i7i3s@Vzt6.fr
 L|Sit6s@9cklii1.tf
 yWYqz@mw-9k.FJ
 Knhj419mAfftf@R26hxll64.3qtdx6g.AL
 aZYHUr6@Shyn76c67.65grky.am
 ZYxn6Px@di0cqhtg.hu
 "#mLl"@w1sc0g3vm.j1o4o9g.GW
 WYJcFp@653xk-89oprk2im.iemhx9.CC
 y5AXi@[Oa #]
 nZErAGj@6sq3-p.r8KQ.aero
 OMq5sBK@udg-5zp1.Dory85.SG
 2bymd@Ojla1hvfpw8rrihrx.cy
 5OMbw0@r2d8cn75.1VR2BJ0J3A8PY.gc0mljc-h.COOP
 al6X^pQkx@pyj--2hp.lbet.TN
 NkzPW4f@2-0.aaoqccwrgi4olytac0imp6vvphsuobrr115eygh2xwkvzeuj.tl
 "4-b9|/,\e]h]2"@9-iiahsdlzv-v65j.FK
 g8Pv2hb9@[166.176.68.63]
 "IA~".Tn03w7@[\>J?]
 E6aK9TaJ@j0hydmxhkq2q.Svku4saky.MU
 rdF2Zl1@9fsic.C17pw9o0.vn
 pCKjPa88DG&x5a@4ha07ia2jk.xk7xe8.PM
 qgLb5m@nynqp.DE
 qC731@["\S]
 vIch1nT@[IPv6:4c2f:A840:1788:ad5:C2C6:dfae:1b1f::]
 GVSMpg@2YGZ1R19XTW1TIH.Re3vg30u1xq6v7cj1wf-6m14939wvgqbl.93mztd.SG
 0jq4v7PMxm@eq6teog.kO6LR3.x2p.53yltrsvgpd3.RO
 zdGLZD0P@i2JQNM8.816oja8pkk5zkvyx.KM
 Jp#hSH@74zkerax4.31kr.7c9-yuk.mp
 Kx^0oZn@oFFA-URZ13B34J.DK
 sub52@aoq7.iHF.CH
 jfVSq9oAR2D@iGU0.7bp3x.4cr.sz
 nalgU@Yfpbdcv8a5.n9kwz6kyi2u.thic-rws.af.TG
 =uC5qVT@56g530cltpekrw.pt
 QR5&kx@7qhi3bhav5ga0eva.b0sdom.bb
 8DZQ7@dtr16r89fdw59q.cf
 Q4pNw@6o-9weojl3r7.LS
 *mfOc_CN@[G\3]
 2p`tbG@c767inolrav0hg6a-ucs.y0.tw
 Rop{cgBy@Wekdh0xns2um.UK
 t*p05lV@017y.MR
 7ZxO80@Dovepwr4l.qxfzchrn1.es8ul0vavi6gqy82.K1hc7.INT
 C_Iphp@5t4rtc.id
 q+m2x@Cfw.1tm52-kr.BO
 47NIL@Hl68os0.66l9bsf2q.SC
 vi0LyF9O@p74jz6mxby.it
 xQ4jU@rQVWLWAD3T8.4-lnu.AZ
 zea_0Kr@[97.59.144.249]
 5HP1k|s@[068.150.236.123]
 5XJZlmYk.3Du5qee@[072.023.197.244]
 AvNrIHB0@[+n}oV]
 "!N7/I\zhh"@[204.037.067.146]
 vlJODxFF@xFO6V.i1.fgad6bjy.NO
 qDe0FA@xpp1le82ndircjgyrxyzkrqu3il.oUKHVV6829P-16JILWG62KN.cr
 pMF64@wssq6kh9uhxk.cA2YZVBV4JW.xX585A.ru
 G3meE@[^!'OO]
 "1@0UYJl"@vplkx.d2n.i3tcx3aaxut.lbb3v9.ldq.me
 iTH0QND@wg9sizy.lr
 9kF?opSTo9rSDWLo&W&6@xrh32ibf.F0zb6kb.BJ
 a0FI1m@1olkdpz.W70a3w8qmk3.NA
 "0H}r}X(p\M`/x"@rY48LPH.Axy.Ue624.TV
 AQL6YBFb@Hxawb15okz.y4.y5c0e.bt
 PEaNVR@m8NH9BVX5L096DRM7YTR.er
 diI`Q@i5fpkuc.7zg2av.D6tzqq.CK
 TCN0-Z@Tezeq9ejv.ekeab8hz14hui.il
 05SnFh@jZ85JXZ.1RO99W5FYK3.uyv7g15.MP
 B2Z76Rn@9yce0shfsydxetu1v4-y.rBU2M0.6ik8oapv0zho6n653il25gu4rd216uw03.MG
 vGZ2K@C2osgjtel5uerwn.riihbabhh41ve84.r3l.vH6S64.vn
 Nv2ZgL@[037.054.177.155]
 WsdI2W@i1ULFQ1.79qfph2.eg
 vJfpTf3@Hh4x2h.25m0idq3.fr
 oRqbgftr@l6jg0.TV
 NiynsKb@k9BTX4-FV.hc0skm-o.lv
 w9uGwf@4hop8.Jb9655is.nr
 "NVUW+"@6jbe.KM
 QusHU6JMR@0RXKIZNH76C3.Oqwcfr779e.MH
 }C5IwKv1S45vlmPaaVHhF@[IPv6:EBF6::]
 T7rXlYc@4AI1LM.2o.uk
 uuCiDC6c@Maar3.65hlg-wf.t3pt9.FJ
 w2mNOvIUh@dx3ep7ew.ru
 b#Add@9hpopo.Xg3tbjchdpt.TT
 NtrgJjfj."NBwi"@[142.085.096.018]
 00lF9UB@2NR2.rs
 MPr42ye9@p08lcrzs.4bzxfznsh2bhgsa.CX
 awwLoYLn~c2LfTEVT@fwksx.qoj94r11kw19k50k3.gd
 gRZ5w9epm@p6adico3auugj5qklec.Sm4bx5.li
 zfdZ67Y@1azhq.dl3xxzni2.rrj.lpclc6g4d.sl
 vTWwSD4fb@uBSOHD.3g.u3mb.gf
 cYFVxcC6E@F9g0b.n1339r.AU
 pnuXl@s1alo2.tc
 lKy64zp.Cbg8BM@y0S.6uiux8h8.0udipt.ma
 |9FDgc@vbrz.3L.av4kmt.rs
 skcHAu7@xD715N1.DZ
 BfcgHK3@[220.136.9.224]
 LCOEag@Gwm.drsa0.GL
 qrNZtp3vO@a0gr.8j9cvcgy0p-3.HN
 lfW2rei20XWSmpQoPY1Dl@[(N&c]
 WFBBEv|@q7R2J.oy48740.pm
 6H6rPx@zVJ40.xgyat.cLUX6SVFJWMLF9EZ2PL8QQEU7U1WT0JW3QR8898ALFGKO18CF1DOX89DR.1tfu30mp.CA
 ytG@J4auwv4has.PS
 "X;+N1A\A "@rc9cln0xyy8wa6axedojj9r0slj0v.Luy9i6ipqrz74lm5-n6f1-2srq5vdo-opef747ubdykv5hc.2lztpe.er
 DQTmqL4LVRUvuvoNb8=TT@2up3.PY
 NC0OPLz@kcru1s0mu.name
 kBoJf{XaGl@[248.166.223.221]
 pEjZPm8A@v956Y7GQV.5uu6.Ribgf20u.6e.0do1nki1t.ahy.6iy.sm
 pIFWkl2@w9N0Q.MC
 p=VTtlpC@w3ttqb.FO
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl
@ -0,0 +1,206 @@
 #!/usr/bin/perl
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 use warnings;
 use strict;
 use File::Spec;
 use Getopt::Long;
 use LWP::UserAgent;
 my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
 my $version = '';
 unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+\.\d+/) {
  print STDERR "Usage: $script_name -v <version>\n";
  print STDERR "\tversion must be of the form X.Y.Z, e.g. 5.2.0\n"
      if ($version);
  exit 1;
 }
 my $url_prefix = "http://www.unicode.org/Public/${version}/ucd";
 my $scripts_url = "${url_prefix}/Scripts.txt";
 my $line_break_url = "${url_prefix}/LineBreak.txt";
 my $word_break_url = "${url_prefix}/auxiliary/WordBreakProperty.txt";
 my $word_break_test_url = "${url_prefix}/auxiliary/WordBreakTest.txt";
 my $underscore_version = $version;
 $underscore_version =~ s/\./_/g;
 my $class_name = "WordBreakTestUnicode_${underscore_version}";
 my $output_filename = "${class_name}.java";
 my $header =<<"__HEADER__";
 package org.apache.lucene.analysis.core;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 /**
 * This class was automatically generated by ${script_name}
 * from: ${url_prefix}/auxiliary/WordBreakTest.txt
 *
 * WordBreakTest.txt indicates the points in the provided character sequences
 * at which conforming implementations must and must not break words.  This
 * class tests for expected token extraction from each of the test sequences
 * in WordBreakTest.txt, where the expected tokens are those character
 * sequences bounded by word breaks and containing at least one character
 * from one of the following character sets:
 *
 *    \\p{Script = Han}                (From $scripts_url)
 *    \\p{Script = Hiragana}
 *    \\p{LineBreak = Complex_Context} (From $line_break_url)
 *    \\p{WordBreak = ALetter}         (From $word_break_url)
 *    \\p{WordBreak = Katakana}
 *    \\p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
 *    [\\uFF10-\\uFF19]                 (Full-width Arabic digits)
 */
 public class ${class_name} extends BaseTokenStreamTestCase {
  public void test(Analyzer analyzer) throws Exception {
 __HEADER__
 my $codepoints = [];
 map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
 # Complex_Context is an alias for 'SA', which is used in LineBreak.txt
 # Using lowercase versions of property value names to allow for case-
 # insensitive comparison with the names in the Unicode data files.
 parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
 parse_Unicode_data_file($scripts_url, $codepoints, 
                        {'han' => 1, 'hiragana' => 1});
 parse_Unicode_data_file($word_break_url, $codepoints,
                        {'aletter' => 1, 'katakana' => 1, 'numeric' => 1});
 my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
 my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
 open OUT, ">$output_path"
  || die "Error opening '$output_path' for writing: $!";
 print STDERR "Writing '$output_path'...";
 print OUT $header;
 for my $line (@tests) {
  next if ($line =~ /^\s*\#/);
  # ÷ 0001 × 0300 ÷  #  ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
  my ($sequence) = $line =~ /^(.*?)\s*\#/;
  print OUT "    // $line\n";
  $sequence =~ s/\s*÷\s*$//; # Trim trailing break character
  my $test_string = $sequence;
  $test_string =~ s/\s*÷\s*/\\u/g;
  $test_string =~ s/\s*×\s*/\\u/g;
  $test_string =~ s/\\u000A/\\n/g;
  $test_string =~ s/\\u000D/\\r/g;
  $sequence =~ s/^\s*÷\s*//; # Trim leading break character
  my @tokens = ();
  for my $candidate (split /\s*÷\s*/, $sequence) {
    my @chars = ();
    my $has_wanted_char = 0;
    while ($candidate =~ /([0-9A-F]+)/gi) {
      push @chars, $1;
      unless ($has_wanted_char) {
        $has_wanted_char = 1 if (defined($codepoints->[hex($1)]));
      }
    }
    if ($has_wanted_char) {
      push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
    }
  }
  print OUT "    assertAnalyzesTo(analyzer, \"${test_string}\",\n";
  print OUT "                     new String[] { ";
  print OUT join(", ", @tokens), " });\n\n";
 }
 print OUT "  }\n}\n";
 close OUT;
 print STDERR "done.\n";
 # sub parse_Unicode_data_file
 #
 # Downloads and parses the specified Unicode data file, parses it, and
 # extracts code points assigned any of the given property values, defining
 # the corresponding array position in the passed-in target array.
 #
 # Takes in the following parameters:
 #
 #  - URL of the Unicode data file to download and parse
 #  - Reference to target array
 #  - Reference to hash of property values to get code points for
 #
 sub parse_Unicode_data_file {
  my $url = shift;
  my $target = shift;
  my $wanted_property_values = shift;
  my $content = get_URL_content($url);
  print STDERR "Parsing '$url'...";
  my @lines = split /\r?\n/, $content;
  for (@lines) {
    s/\s*#.*//;         # Strip trailing comments
    s/\s+$//;           # Strip trailing space
    next unless (/\S/); # Skip empty lines
    my ($start, $end, $property_value);
    if (/^([0-9A-F]{4,5})\s*;\s*(.+)/i) {
      # 00AA       ; LATIN
      $start = $end = hex $1;
      $property_value = lc $2; # Property value names are case-insensitive
    } elsif (/^([0-9A-F]{4,5})..([0-9A-F]{4,5})\s*;\s*(.+)/i) {
      # 0AE6..0AEF ; Gujarati
      $start = hex $1;
      $end = hex $2;
      $property_value = lc $3; # Property value names are case-insensitive
    } else {
      next;
    }
    if (defined($wanted_property_values->{$property_value})) {
      for my $code_point ($start..$end) {
        $target->[$code_point] = 1;
      }
    }
  }
  print STDERR "done.\n";
 }
 # sub get_URL_content
 #
 # Retrieves and returns the content of the given URL.
 #
 sub get_URL_content {
  my $url = shift;
  print STDERR "Retrieving '$url'...";
  my $user_agent = LWP::UserAgent->new;
  my $request = HTTP::Request->new(GET => $url);
  my $response = $user_agent->request($request);
  unless ($response->is_success) {
    print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
    exit 1;
  }
  print STDERR "done.\n";
  return $response->content;
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt
@ -0,0 +1,427 @@
 =========
 This file was generated in part (i.e. without the email addresses)
 by the random text generator at:
 <http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-rosalixion-word-2gram&paragraphs=20&length=200&suppress-quotes=on&no-ads=on>
 =========
 waist and Wintja are relearning how dJ8ngFi@avz13m.CC we spread out, but it
 here before, our dimension of story. In Bed and Marys opus in the last thing
 actually having difficulties moving, Spiros rises to our hidden on your
 <JCAVLRJg@3aqiq2yui.gm> orders, my love: Im seven doors and with gentle
 fingers, then disappears? Whats the idea <kU-l6DS@[082.015.228.189]> of
 <37layCJS@j5NVP7NWAY.VG> the "%U@?\B"@Fl2d.md pages blowing to appear on Earth
 in motion (what rules did we can take a radio changes. A VOICE: Hes a
 scoundrel. VOICES: Burn him! Burn him! SPIROS: Want to team of the couple is
 the sweetest love aH3QW@tw8uo2.eu of the teaching teaches members to
 communicate with time interplaying and linked and you marry it. It will leave
 Bvd#@tupjv.sn the logic of it from hereing those people were all
 SBMm0Nm.oyk70.rMNdd8k.#ru3LI.gMMLBI.0dZRD4d.RVK2nY@au58t.B13albgy4u.mt the
 artist stray? Does a few rose doom the UFO with my dear Sissy says Sissy,
 holding hands up a bit of DvdUJk@61zwkit7dkd3rcq4v.BD fate falls asleep. When
 an internet age is ~+Kdz@3mousnl.SE currently working with his bedside table,
 and brings in a shimmering timeshifty verse vortex, the dream. Victory is
 hallucination, my hand for more. Mmm my head,
 C'ts`@Vh4zk.uoafcft-dr753x4odt04q.UY in five. (Spiros waves goodbye to tell
 you, honeybuns: The poisoning is, but no addresses. A message identical reach
 across the script. }0tzWYDBuy@cSRQAABB9B.7c8xawf75-cyo.PM I grasp hold their
 flapping wings and when theyre seemingly infallible information? Bookshrine of
 a sip of defined the Great Horned Goddess of no feeling.) Meaw. FFIANA: So,
 darling. Dont be dry white and teases him back
 lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae in society not speaking, giggling
 V85E9Hx7@vpf0bs.bz in MGBg2@7F3MJTCCPROS8YETM0B4-C9P7WXKGFB0.RU the boring
 f***s! (She leaves and Him Lover, Outlanders. Plus Universe where better than
 they just the land any letters in the gods. Expected, this at the threesome get
 even touching myself. rsBWOCJ@lYX0SILY4L53Z3VJPSF6.pwrawr.vdpoq.nz He picks
 dIyLrU@9A40T2ZIG7H8R.t63.tv up at our harem world 6dAsZKz@d33XR.IR so pop up
 you will be gathered, then Wintjas hair; smells of the manuscript: Contains a
 EnqCC@2bk6da6y08.LI common AQ9yV@Mfqq32nexufgxzl4o7q5jv3kd.lb universal within
 this lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H web.
 b6/zomNkV@8jwm-he.IN The
 5FLuakz.hXVkuqDt@iBFP83V6MNI3N0FRWJ9302DS-0KHRV6O.1bf59kj64uj5b6e2zfn.cm cosmos
 is filled with soap bubbles. <RhIwkU@58vmet9yfddpg.3adkmhrv1px.AO> I cant
 concentrate with a nearby and he nEBk6w2Q@Bb5ib.2pay.so pours.
 <AlW5CMAn@qos-53u.j91qq96d4en129szf7099kxv5lo6yo.gm> Its a wine with the joke
 in the only good enough! It hit again the house. He thinks of terrorist, this
 water. They were in verbatim rewritable. World by a quick eye shadow beneath
 the stairway; we not easily counter weight, is filled with your own perceptions
 about it. (Eve, how to talk to you really turns on its physics. The lover on
 the sunflower in worship of the? (She smiles.) Greet
 <QPYBDV3.Ah/h8U@x3v444pzi.1cvgokam.PW> it makes sense$A!-(B Not really,
 5Iwbiq7@p9s-2pixps9jwzyhfroxqivw8sv90r.xn--wgbh1c from up in the candlelight,
 denser <AaFU9L@3yj1xqf1.cz9.ac> medium to say something. Shifting of that
 |iCmQ1@rum6w0a7wt.3QLD.ht71.cx the eyes and there came. And now, approaching.
 When the thing. What did I woke up the printers! We EhLTUjo@rEK.sJ44H0.GR shall
 we are heard like a glimpse of hyperspace. It travels further and kneeled down
 bHEbq3Rp@33.lKSSMY.9xaurtfle9xe.iu4810l.fj to you can walk away? FFIANA: I want
 to eFcup.cPPEW@[1ae] speak. The Fountain of the background when I extract of
 hers, so strange book and a royal destruction of songs of this pearl. Not often
 by an incinerator vessel. Spiros, the delivery of alien exists now. Forward.
 The rosy guidance of wine. Notices that is partly the pipe
 p907@bk3o.fvtmw2m2.Uutr83x2yt4.2nuin.EU of the chance in Old Town. D Strange
 music keeps one of the top of myth and smiles.) SPIROS: Nope, cant even
 PpW2L5.QgP2n@9rz7.a5qi.oRH1Z.8ov.UZ more! says it doesnt exist! The world in
 the cosmos loves us. (Spiros soon
 o8UgG5fewm4vr9Ai5wPS@sgh.2F-OLKLZ81DIUET.xpya0vtx.fj here again aixQH@z-y.AR
 and again he turns and blinks with you want? says Sissy looks over Wintja and
 the fashions of Fit to Spiros continues. Its a situation of the barman says
 Spiros. I read the river. SPIROS: Damn I said. 69
 <jVTeWQfL."M#~t Q"@1e.oglq.ubk.SZ> he kept locked up into a suitcase along
 her body, points a female voice of 6e5QQuy@N7.2cuw3x2wpddf.paycp1pc.AI their
 part of flowers, and Marys opus IqG6Fl@[220.112.120.54] in my PROSECUTOR: Hes
 <lWHH4eWSn@tbxyb7.jhzqxrk.lv> one is <P1zO*RaAr@[111.99.108.22]> unsafe at a
 little <d00gy@[4TC]> secrets, we made to write: And a drink of Eternity,
 Speros, <1yNINoBU@[136.003.010.238]> Mr Boore, back to me! Lovers break
 Ms8ox@[_3Tuehr] the code so
 <8'Hk8a@ksf7qqaa7616xw8dq80h.K6fy89c.3k-8c.g58m48v-18zh8v> recap.29 28 So,
 darling. Dont leave each itself, on and devotion to all about time
 <wtWDNo@1sjmcbbli196-765mt7m8o8hywft.7-ga6rsnum8v.np> has happened? ANON 4593:
 What the tongue Such as she did you back and the whole moment in
 <"x)yO"@7le5o2rcud5ngs.Qmfmq.Jfxv8.Zznv6t6il.MIL> your own lens, thank you
 1hXd@f8.1kxqd3yw4j6zmb7l7.US arent already. It tastes them have ever come come!
 The tomb. Blink to him and flips to it, but the palace. No
 "8}(\$"@mu2viak0nh4sj5ivgpy1wqie.HK way$A!-(B Happily: You smell of it
 all and yet sure this pool Th7XoAs5@ggdb.BI of the first of his
 5iDbhah.xdtF1x@[59.55.12.243] heart j2ovALlgm2Wcwx@5jphzt.TN can take to the
 wind, speak to apply perfectly, you say turn toward sexual nature and lays his
 ZlaP~E.4Yk1K0F@lF6VN.M5.Nj.PRO pipe. No, landing from
 cFCvIJAw@l93H0R1W6V4RI0AY7RLRQR4KOEVQPEG-PDTF03V4D9A0.xZZK5.lu the fruit will
 say. -F<>Dont talk like the west 8Ju2AW@1n.h7.vu wing of the letter in every
 second, <"\nkP]{"@[Vej\yo\HD]> but he slipped in. Yours Spiros and there
 when I imagined anything can take returning? <fKWC?@qgcb.xn--mgbaam7a8h> Where?
 With? Who? Going toward his body and kisses the notion that has joined odds. A
 scattered around <L4BbaB@hv1.BIZ> slowly, moving eyes on and
 WvSmV@qpx15vzmbtxzvi-syndl1.ML turns toward her. She sips some way everything
 began was finished my wet Earth. Warning
 "3|PX~Cbdq"@U3vp-7k.8c4q3sgpwt6sochundzhx.museum for me.-A City Different.
 Let your myth LjH9rJTu@tkm.gy settles over it
 <8myMO4@hOV209VZ-SHGBIH5FBYLTCQZSBW-U5-1.dv9> means to Our of a book he has
 only but <vQgXEFb@maxmrbk-5a5s6o.6MZZ6IK.awjbtiva7.IL> the imagination, master
 phreaker, <5ohpA3ww@dcpcotwccy> main railway station. Loses the dreamadoory in
 the surprising success.) A note from round is her splendour in them? Mmm my
 dear, were 6TVbIA@r50eh-a.la from them keywords. Boy,
 AaASl@Bsteea.qHXE3Q5CUJ3DBG.S2hvnld.4WJWL.fk my own imagination, master
 "CN;\-z 6M"@86.qc7s.23p.ET is the usual fashion, says to stream and appointed
 space-time continuum. Dilutes your zX3=O3o@Yjov.7g660.8M88OJGTDC5.np sleep. Ive
 been seen, he says the ringnot we proved? (On the pact. Thanateros is an
 internet caf<61> where the Queen. Now cmon, lets take to raise the apartment. Like
 a limousine and I kiss timelord slides his hand QFZlK1A@4W47EIXE.KY in words
 now. Get us in the same time conceptualisation is to bed. STEFANDIS: Dont do
 you think Ive put down the green lush. She often by God of a 15 minutes. The
 others knew into the 1guLnQb07k@ab.ccemuif2s.lb you-know-what. Youre the luxury
 hotel. Diamonds and receive the process of action. We wanted in the nominated
 bird. The <Jddxj@[111.079.109.147]> woman undressing. He has him just get at
 Hotel California. Its <Hj06gcE@[105.233.192.168]> about all devices. Playlist?
 Initiating playlist. Timelock? Timelock on. We have a u8?xicQ@[i\21I] lock of
 the apartment. Like a kto, part of Our superhallugram to hook up and
 CczYer}W@bezu6wtys9s.lft3z.mobi outs. polish
 OmpYhIL@6GJ7P29EIE-G63RDW7GLFLFC0M1.AERO fills the crowd, comes from the music
 is impossible. SPIROS: F***. You are your voo goo.
 <2RRPLqO@8lh0i.vm7xmvvo-r5nf0x.CY> Daysends burn deeply and will take
 TOc!BhbKz@F-myy7.kQWSUI7S3.net this he thinks. For UFO from elsewhere. Bzzz!
 Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them "0\!P?".shQVdSerA@2qmqj8ul.hm the leg
 of LTLNFsgB@[191.56.104.113] all, until it has read it is
 iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. <VGLn@z3E2.3an2.MM> Once
 TWmfsxn@[112.192.017.029] Spiros under the place
 2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV as were not a house of the
 rosebushes and the whateverend, feel her waist. She changes everything. We had
 decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us
 come to, <Ayydpdoa@tdgypppmen.wf> what history as died. Strange, Spiros with
 delight: That night "gfKP9"@jo3-r0.mz and gold case
 <aTMgDW4@t5gax.XN--0ZWM56D> is spring: the aeon arising, wherein he returned,
 retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first
 <NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp> to reach session. Initiating first
 part of the main hall toward his own spurs. Hes an <XtAhFnq@[218.214.251.103]>
 Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and
 reality. The hidden set up to come. ROSE WAKINS: No answer. The
 ALB4KFavj16pODdd@i206d6s.MM rosy pink cigarette.) Visit the supreme chest and
 express in orgasm, my version of clouds contemplating existence, the horizon.
 Best grxIt96.46nCf@nokjogh2l4.nCMWXG.yt of sheer emotion. Spiros laughs. Why
 did he says Spiros. Ban him, he called for it, sir, says Spiros
 Fgbh7@2rxkk0bvkk-v3evd-sh56gvhxlh.hhjcsg36j8qt98okjbdj9z574xdpix59zf6h80r.Gyb4rrxu.ve
 laughs. uo0AX41@Fhlegm1z57j-qvf5.p8jo6zvm.sc Can we determined that when I am
 Spiros, quoting Jim Morrison. Death. Design patterns, youll hear Spiros says.
 They cant G decide if he was your key that we playing? SPIROS: Why wont xxx
 would be imagined. Technology so beautiful to fill his diary; I like a match.
 Puffs. The Star Eagle. And a person with a play with. sjn4cz@9ktlwkqte.bv
 Faberge can change overcome your work, a large-scale coordination, Goddess say
 is blasting away to end is <b04v0Ct@[243.230.224.190]> very tricky to stab it
 as a turn me to the champagne on your obsession about his nose and
 F!FUbQHU@uvz7cu1l.ciz4h2.93U4V.gb somewhere <6CHec@nONUKT.nl> else, then far
 stretch. The great outdoors), puffing dried cum on the manuscript I$A!-(B O
 one knee, feeling and sex in igniting <zbmZiXw@yb.bxxp.3fm457.va> bomb. (A
 housefly, Musca domestica, lands on into the device. Let me met. Wintja and
 victory. <"/GdiZ7f"@[221.229.46.3]> For years in tipsy bliss. SISSY: (Nods.)
 Yes. Now you witch. And we must remember, will tell you move but her
 NJde8Li@f7a.g51VICBH.cy creation with gentle feet, naked on strange hovering
 futuristic vehicles that when retrieved upon a thought, or reflected. The Crew
 coming on our gratitude for you address then ventured into a dream, has begun,
 she sees a 6IeAft@e-3fp.Nkh7nm8.v8i47xvrv27r.pf golden ball and 4 If you that,
 Izz). Lapis, to the return all laugh. Applesfoods maybe, says
 TC*Qopzb@xIOB3.6egz4.m-24t5wmxtmco4iy8g91o66mjgha1vjlepyffott.E5ta.p9.CF She.
 Cmon I Stefandis.) Count me with a bed sheets, carrying gently away about time
 you rather dramatic, which reaches across this day. It brings forth between
 suns. How about the white sugar, leaves, sugardusty sugar, drinking of time.
 Believe. There "_3Sc_"@[193.165.124.143] is the soul, W0dwHf@[25.174.65.80]
 and only Spiros. Love you. Believe in the multi-leveledness of the 21st century
 and exchanges a book called Sphinx. Alien Star qPkkP0@4k0vs.oaak2z.3JMTI.PK
 initiated. NYKKEL HUMPHRY: Of Make ways over town.) SISSY: $A!-(Band you can
 turn slowly but not yet audible, appears, XzZh7@[\\JmD%U] in the silver
 melt together. This way of vision sees through time). Brewing with a kiss?
 <66SGHzw@Oqnr82oml7jct0b8crwbstdhcgc3khxj7dj-t898mzro0p3-rvp-dythh.TN> Her
 feathers: streaming water of the wind. I started interacting in a boat, on
 ot4tPF@[AY\j] her e4seIFbl@cib.cg thigh as she blinks happily. Here is
 <B2w025e@r2H7BW16B24DG1S5DED.bg> what you around him, Magus says the list. Its
 about what that atweEde@blk-3y.mgvoh6l9my.F6.FI there is functional. We
 vanished into the computer. Up hills and enable entry using his long adventure.
 Do we are all detailed trip against decent behaviour and girls. And you
 alright? You evil laughter: Muah! Muah! Wont wate you all uDoPcRGW@rEBD5LUT.ly
 way that there <2KQhx@Bba.u--9b5bc0.NF> is either both night And our dimension
 of a bad joke, says nothing, just after time. It was indeed. Now that will make
 the streets. He instable? What shall do. tKWc2VjVRYD@[254.190.162.128] Who
 wc3W16^@D3v2uxqqeclz.w1fd529m.DM are heard like our love. Of the stairs too,
 usually through the note nearby and you go now. If I remember Njg@6S8MA.HK how
 it instead. (She chews the rosy petals, frosty and the land at first part of
 waking? That we "L\^4z]92"@0qp--walx.MIL like they meet you.
 <X08sWFD@62GNK.tN4.f1YXX.ug> And out into the bed. From the gods have loads of
 a dark winding stairs and laughs. Why doth Her devastatingly good eyesalve, to
 tell it says the Rosy Dawn. Rising, rosing, the story? (For all the UFO
 shimmers from around him, but we look before eK6Bz1Bu@[rX;J&036] the Eternity
 we shall never go now, look, he thinks, both go for the words said. 69 people
 who live in Thy honor. "~`o\:"@hO4UKF.oZBWV56B.cmn.DJ And
 lcgUakx@[pjGd&i2] here and his life has tasted of becoming more clearly. He
 is dead. Calculating possible meanings of it instead. BqdBTnv3c@wf35nwaza.ME
 (She whispers, smiling.) Theyll be able to help. ELLILIEILIA: You are created
 the visible "a#Um{:\'\bX:"@in7tjo.uw8wil.gp world, without it will see now,
 says Spiros ApIbER8'@[&Y] thinks. Every time and go to write fiction. Indeed,
 love something I pop, from the play? asks JTsM0c!s9CzEH@Sd.mh the taste of the
 outrageous wreck of dream, born and there
 hy2AOUc@uqxzl7v0hl2nchokqit9lyscxaa0jaqya1wek5gkd.NC was still result. Search
 taking <pY7bAVD4r@[,>T*R T]> out into !0axBT@03-gdh1xmk3x9.GH my dear, you
 know, of saint? What did come here from the Crowinshield Garden, amongst the
 warm kiss. Everything is white marble statue he is tunes faberge intricate.
 Spiros, a particular frequency, vbtyQBZI@20al5g.ro6ds4.Bsg15f5.NU spinning,
 trying to a trail of the narrative that it while the Queen, giggling: What are
 a letter with a web we could 2^ZhSK-FFYOh@Z2iku.rg.Z0ca1.gs not a
 G1RLpOn."yfJpg["@mXEV8.mu peculiar yrBKNkq@a2a1.Aifn.Ta2.dj stench of history,
 when appearing in the interface as well as follows the secret I am not
 teleframe the room, disguised <Wok5G@b5aqobvi5.ni> as the brilliance of the
 pressure of the modern world, but
 nXz9i.=EL9Yj@93r8do3ntizibg1-5-a0ziw9ugyn4bo9oaw3ygrxq-eczzv1da6gj58whvmo2.rs
 whatever. The solid concrete, Dp63hd@B1kbahyq.PL and put it stumbling or why
 wont the chalice with communicating with language only she says Spiros,
 whispers.) We left from the second birth? The young man is part of the teapot
 opens. A man in disbelief.
 y01rn27SFq@o0HNP8.C5.i4rvj8j338zgter7er5rkwyo5g.atnc0iuj2ke.8or6ekq0x.IO
 Outwords scratch skills against her in fairy gently
 <0RiEo@08mnvbu.p661ernzjz5p7nbyix5iuj.cig5hgvcc.SO> bite of death and Wintja,
 playing with the name by <Dwxab5@1sx5y3-umsy72nl.74lwye5.DJ> your dreams. He
 arrives <IvdZVE4xRk@0vw7ajl.AR> the information. He swallows all the f*** me
 tell her wineglass and tangles. Synchronising <CvQxhXJ@d5a7qnx.ke> weeks of a
 reason why everything seemed as wet dreamery, remember? Got a purple Ipomoea,
 crawls through the first stage has the riddled beginning to her in a butterfly.
 You landed smoothly. Preparing to n7MxA4~@[4(R] hit a world is man. How much
 in <hEhF@3TV5WQ.fbkx3f> mystery. And RFGzu3hD0@wbh4.sm furthermore, what the
 edge of physics, death and eOADW}BcNG@2568p3b4v.Xq3eksr.GP touched smoothly ah?
 Fashion feasible technical population resulted distinct produces
 AsAMWriW7.zSDQSAR6@Gg2q4rtgr.GG recognize instance the room at the garden.)
 PERNELLE FLAMEL: (To Mrs She is basically very drunk. I see you
 <cDCVlA0t@[20.116.229.216]> cant I walk down naked on it to bed bed into
 c=yJU+3L5@n2x3xhksf.gvreani.MZ the stairway wfYnaA4@lzojy.4oii6w6sn-p9.kh and a
 kiss as though the point we see the numbers, the phone set to be displayed,
 disincarnate entities can feel my wifey. Spiros empties the answering evening.
 That is kdeOQ5F@vD5Y.wmmv.7rswz.1zelobcp5qxxwzjn.fOEJZ.KM simply not but I
 could do to the ground, and the decanter ppULqb2Z@Hv9o2ui.AO is my friends and
 says: I <tOHw@[IPv6:3500:8B6C::CB5E:1.124.160.137]> see The elves of dream
 telepath posts, but makes a gentle people with a redirection is generally said
 Tadeja. Its over, or of ages, you excuse us walk off to Talk A never-ending
 one. I remember how cute she saw the neat fuse weds sexiness. A thick paperback
 book itself continuouslyposition, have heard in the noise We are presently at
 the first of the death MWLVsL@7nhliy.O8mjon3rj-kb.t8d6bcpa5i.au mask there is
 accurate to meet by to this important worse material in separate directions.
 Spiros stands, and arrows and orange from a witch and down the mix? he feels
 Wintjas 13th century. arling peach, cosmos loves playing with silver trays with
 the <BN0EY@hh9v.p9bwgs.TN> language as RgiAp@d9ln.bf I still result. Search
 taking time and time <PBugBo@97gcz.DJ> in time. Spiros, how else or
 Fh#dKzbI@[+_] nonexistence. Eros never guarded the horse stops. Move. Stop.
 Move. After earlier squads mysterious source. It inscribes in case you are
 applause. The world was a. With swiftly cover <wyqU-C9hXE@wPRBUI-WS9HXE19.LV>
 it as in yourself! 5 Yes, now comes from half walls of us, my love. I am your
 vast operation is all worked out? O how long ago. It glimmers, node of the
 voice, the middle of the introducing of utter hell on the car unlocked and mind
 around midsummer and not believing in <muC?Js@[IPv6:47FB:5786:4b5e::5675]> his
 lower lip. From the wind say I was inspired to live in a crime. I know, and
 find people have been reported found a digital electronics. Is the pillow,
 touched falls down their part of the computer and our world
 <yLTT2xV@wdoszw9k1ork-z-t.kq.l3SEO.Lb4jx0.NA> come walking in
 <6zqw.yPV4LkL@dA3XKC.eg> the stuff to help. Websight. Dedicated hosting
 wordpress blogger coined Sister <S5z9i7i3s@Vzt6.fr> short Sissy Cogan. She
 answers. It is finished his way that includes getawayways. Compiling focused is
 this case? Then turn on. ANON 4593: What are pretty kinky a story about the
 L|Sit6s@9cklii1.tf strangest child a Syntax of passage and Wintja and
 reportedly after demolition, decay, and twists up to tales endwhere. This way
 there to born from elsewhere. Bzzz! Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them that
 words from sleep but no poet yWYqz@mw-9k.FJ am I woke
 Knhj419mAfftf@R26hxll64.3qtdx6g.AL up in a kiss made it is heard on Midsummer
 our cards like big fane beneath the secret of the <aZYHUr6@Shyn76c67.65grky.am>
 criticising crowd of the gods and here to... TADEJA: (Suddenly appearing in
 ZYxn6Px@di0cqhtg.hu your "#mLl"@w1sc0g3vm.j1o4o9g.GW voo goo. Daysends burn
 deeply happy, for large bite of his artistic inspiration without feeling as the
 season. One within the dreary WYJcFp@653xk-89oprk2im.iemhx9.CC kingdom. (She
 steps up with Christine says. The Blooming of y5AXi@[Oa #] The time regularly
 we are, she nZErAGj@6sq3-p.r8KQ.aero kisses the gods? I am in his brother I met
 years ago. The word <OMq5sBK@udg-5zp1.Dory85.SG> is because we had. But yes
 just like a while. Were not matter; W it going? Im sad to
 <2bymd@Ojla1hvfpw8rrihrx.cy> where he arrives and information, and smiles
 victoriously. 5OMbw0@r2d8cn75.1VR2BJ0J3A8PY.gc0mljc-h.COOP Mmm, you Rudy. And
 there and day soon is phone and come <al6X^pQkx@pyj--2hp.lbet.TN> back?
 Rephrase that we are good, I leave the gifts of html or center of her right to
 him to where the room.) SPIROS: Okay, sure, Ill be a page is to
 NkzPW4f@2-0.aaoqccwrgi4olytac0imp6vvphsuobrr115eygh2xwkvzeuj.tl put in a novel.
 I want two. "4-b9|/,\e]h]2"@9-iiahsdlzv-v65j.FK Passing
 <1AhBt@od77y.s9ZZP531YKW> now. I go identify what we are always win. Anyway. I
 know. It is here reaching your script and toward the edge of shortcuts. We came
 the Saussiepan and <g8Pv2hb9@[166.176.68.63]> its mysterious ways. I remember
 "IA~".Tn03w7@[\>J?] how am waking to, that the secret about it will say the
 redpurple wine, Our plan all within this moment you can hear me, I heard on the
 clouds. A channel is hidden visible world, without ground turned real, their
 every E6aK9TaJ@j0hydmxhkq2q.Svku4saky.MU way to a radius of
 rdF2Zl1@9fsic.C17pw9o0.vn apple tree and says Spiros. Here I saw her. He walks
 by the landscape of secrets of paper. I love it! But I could call the
 <pCKjPa88DG&x5a@4ha07ia2jk.xk7xe8.PM> world with the manuscript I$A!-(B O
 nothing. Im proofreading the most dead branch in qgLb5m@nynqp.DE the screen,
 then I did you can remember. qC731@["\S] (If you can it completely insane and
 we had expected something our sacrament. We were back. Esc. (Shuffle.
 Hallucinate a sip of grandeur, said he suddenly a tree, and ground turned out
 the publisher. O about it all. Lets
 <vIch1nT@[IPv6:4c2f:A840:1788:ad5:C2C6:dfae:1b1f::]> stay with us. Mooneye
 today and thinks and check
 GVSMpg@2YGZ1R19XTW1TIH.Re3vg30u1xq6v7cj1wf-6m14939wvgqbl.93mztd.SG the modern
 world.) Sissy stands sipping redpurple wine) and you
 0jq4v7PMxm@eq6teog.kO6LR3.x2p.53yltrsvgpd3.RO up to be wilds. Spiros 99% dead.
 Calculating fastest and chewing she directions!
 zdGLZD0P@i2JQNM8.816oja8pkk5zkvyx.KM Take my body and executed with your own
 forehead, born from Egypt come back? Rephrase that what is the night. There is
 here. Cant you think. And shadows Jp#hSH@74zkerax4.31kr.7c9-yuk.mp keep
 dreaming of letting the elves of modern civilisation? Does that fly softly
 through the surface. Of the modern world we must Kx^0oZn@oFFA-URZ13B34J.DK find
 sub52@aoq7.iHF.CH them, baby. Rosy Dawn. jfVSq9oAR2D@iGU0.7bp3x.4cr.sz You have
 become clear edges. And why you told our skin and
 nalgU@Yfpbdcv8a5.n9kwz6kyi2u.thic-rws.af.TG places, spread on your air on her
 earlier. The effects will be the song by and his eyes are gods. Expected, this
 pool of illusions, that makes its golden geisha ball on Clocksmith Alley. Two
 female form orbits the two chords on a god, in correct dose to see a book.
 JOEL: Spiros thinks as he felt, came out out! We are switched in the matter. I
 shall I can imagine the Crowinshield Garden the aeon arising, wherein he once
 again. You suddenly changed. And the rose; Will you? Now listen. (She smiles.)
 Greet it comes everybody. And what the room, disguised noise We are you in 3D:
 you come. ROSE WAKINS: =uC5qVT@56g530cltpekrw.pt I used to read it: Barbapappa
 (a gay pirate captain) <QR5&kx@7qhi3bhav5ga0eva.b0sdom.bb> and walks up again,
 when you are here; working on to. 8DZQ7@dtr16r89fdw59q.cf Now join you? Im
 slowly in white <Q4pNw@6o-9weojl3r7.LS> bed and language whitespace
 sensitivity, readability, less punctuation, etcetera. Things had to the Dark
 signal has him with gentle blood on to the ages. Stops laughing. Sharpens eyes
 from the *mfOc_CN@[G\3] starway, Down the uniqueness of the bed
 2p`tbG@c767inolrav0hg6a-ucs.y0.tw and Rop{cgBy@Wekdh0xns2um.UK giggles. Spiros
 soon here for ignition of the thing Mr and fetches her t*p05lV@017y.MR you hold
 their own code. Your brain and Nora in longer. Stay tuned. We
 7ZxO80@Dovepwr4l.qxfzchrn1.es8ul0vavi6gqy82.K1hc7.INT must marry me? Eyeglance
 is is not hear. He takes a good marijuana. And I had very fluid. It cant G
 C_Iphp@5t4rtc.id decide long hair shaved like a while. I have telephones and
 waited. He sits there is humanity within its authors and snaps a touch
 q+m2x@Cfw.1tm52-kr.BO it candlelight tuning. Just a young man go to the
 ad-section.) 47NIL@Hl68os0.66l9bsf2q.SC THE F*** UP. Spiros slowly. Lets rock
 on his father and remember: the sea soothe his paternal grandfathers old days.
 In to the Honey Queen, xxx 14 hristytio (Ill catch us. Compliments always. Did
 you rather unnoticeably. Faster than we got this cosmos. The engineers of
 terribly intricate fantasy turned semitransparent, the people have done subtly.
 It is THIS bulls***? Count me Rudy$A!-(B Sissy laughs. Can we are breadcrumbs
 vi0LyF9O@p74jz6mxby.it on Clocksmith xQ4jU@rQVWLWAD3T8.4-lnu.AZ Your usage
 <zea_0Kr@[97.59.144.249]> of <5HP1k|s@[068.150.236.123]> being a shimmering
 green. 5XJZlmYk.3Du5qee@[072.023.197.244] Her feathers: streaming
 <fzQlo2R.HSbkNYi@ay8a5so81x2fgkt2rv> rays Wanna take AvNrIHB0@[+n}oV] a marble
 from the letter the brink of wheat from the dull ghost of the article atomrss
 am I? (He hangs up "!N7/I\zhh"@[204.037.067.146] dreaming? A PEDESTRIAN: I
 already told you than the world now, as vlJODxFF@xFO6V.i1.fgad6bjy.NO though he
 walks off the flowers. He lifts
 <qDe0FA@xpp1le82ndircjgyrxyzkrqu3il.oUKHVV6829P-16JILWG62KN.cr> his head we
 passed on a hint of the worldmask of the people we dance, sweet boy, my dear,
 matter of bridging millennia, I was it works, and Adam says: And the fathers
 pMF64@wssq6kh9uhxk.cA2YZVBV4JW.xX585A.ru that we are in this G3meE@[^!'OO]
 stuff!? The wunderdome. I saw "1@0UYJl"@vplkx.d2n.i3tcx3aaxut.lbb3v9.ldq.me
 your prophethood of the ones too far! iTH0QND@wg9sizy.lr Further! Into the
 planet. He sits on the Other. We came from Egypt to save our dear Sissy slid
 her earlier. Ill tell me away with bright asterisms sparkling around
 9kF?opSTo9rSDWLo&W&6@xrh32ibf.F0zb6kb.BJ in this young woman in the whispering
 wind and hands to speak, but using his <a0FI1m@1olkdpz.W70a3w8qmk3.NA> nose.)
 Nevermind. WOMAN TWO: And furthermore, what about the script, says the sun.
 Large-scale thinking of a witch? Spiros hears music
 <"0H}r}X(p\M`/x"@rY48LPH.Axy.Ue624.TV> and a world as well as a poem
 AQL6YBFb@Hxawb15okz.y4.y5c0e.bt ever, indestructible. A newsboy hands
 <PEaNVR@m8NH9BVX5L096DRM7YTR.er> Spiros gives the drawing. Looks like to the
 <diI`Q@i5fpkuc.7zg2av.D6tzqq.CK> living out TCN0-Z@Tezeq9ejv.ekeab8hz14hui.il
 loud from the house. He is disappearance, as I know on the centre of your
 section gives rise from 05SnFh@jZ85JXZ.1RO99W5FYK3.uyv7g15.MP which it be close
 now, dream once: The stars
 <B2Z76Rn@9yce0shfsydxetu1v4-y.rBU2M0.6ik8oapv0zho6n653il25gu4rd216uw03.MG> are
 your vGZ2K@C2osgjtel5uerwn.riihbabhh41ve84.r3l.vH6S64.vn presence. UFO. You,
 Spiris, are born in Plomari. Steal back door, from his mother: Is it to live in
 their doors are like, Nv2ZgL@[037.054.177.155] two weeks with
 WsdI2W@i1ULFQ1.79qfph2.eg us across his way to crack matter projected by four
 <vJfpTf3@Hh4x2h.25m0idq3.fr> initiated. NYKKEL HUMPHRY: Of <oRqbgftr@l6jg0.TV>
 the woman casts a drop of your amulets NiynsKb@k9BTX4-FV.hc0skm-o.lv and the
 morning light. Plasticity of the sun bursts can feel it, rises from lands on
 w9uGwf@4hop8.Jb9655is.nr the realization of his field of the branded mania.
 Spiros says a dream? Something happened. And watching the Other, she says Fast
 Eddie. Bandaging the greeter info. The Eagles song by the fragrance of
 Timescity Express, is there, by zero. -F<>Your star alliance. SPIROS: (Quietly,
 smiling faces twitching in an envelope yellowed by It, producing open minds.
 This mighty Nile dynamic magnetic strip that sticks). To Ellileilia, two
 fingers with the moon undersea settling for "NVUW+"@6jbe.KM insanity! He
 rises from the QusHU6JMR@0RXKIZNH76C3.Oqwcfr779e.MH end of wine ride the Logos
 and the cosmos loves <}C5IwKv1S45vlmPaaVHhF@[IPv6:EBF6::]> playing with care of
 myself up pitch/volume of a violin. The rosy dawn, Adam says: The transforming
 magic touch the waist, working-A transparent, yet its not easily let us
 changelings who all across Fountain Square where no telephones ring? Spiros
 recently. MARY T7rXlYc@4AI1LM.2o.uk BRISCOLL: What if
 uuCiDC6c@Maar3.65hlg-wf.t3pt9.FJ I w2mNOvIUh@dx3ep7ew.ru dreamed of a new
 dimension of her in Wintjas direction. -F<>Word frequencies, underground river,
 announced on your location. Thought b#Add@9hpopo.Xg3tbjchdpt.TT magic. The
 violin kept talking to stab it was born from our own life as the dream I was
 practically there I want to smalltalk about the station, and so recap.29 28 So,
 darling. We are truly is. Its on Crete. On a curtain in a copy of the
 <NtrgJjfj."NBwi"@[142.085.096.018]> afterlife, the grass and the lovers pot!
 Transistoryness? Radiosyncromatics? Syntax of the modern world The mirror at
 <00lF9UB@2NR2.rs> the day soon <MPr42ye9@p08lcrzs.4bzxfznsh2bhgsa.CX> there,
 doing it will you will be disclosed, says Saussie. Become the future just
 happened? Spiros picks it at the time transfer was
 awwLoYLn~c2LfTEVT@fwksx.qoj94r11kw19k50k3.gd successful. Initiating first
 somewhere else. Its from gRZ5w9epm@p6adico3auugj5qklec.Sm4bx5.li the
 imagination, Spiros saw the words: They cant remember yet? I add to Any time
 here, she says. Butterfly as a dark zfdZ67Y@1azhq.dl3xxzni2.rrj.lpclc6g4d.sl
 soil run free What do you see, is the natural radiance of death reports,
 <vTWwSD4fb@uBSOHD.3g.u3mb.gf> is welcomed. Layer upon layer of Thy angels are
 crystal. Red <cYFVxcC6E@F9g0b.n1339r.AU> King and its my opinion. You were
 back. Hows it with-A liquid purple. She looks at pnuXl@s1alo2.tc a man
 lKy64zp.Cbg8BM@y0S.6uiux8h8.0udipt.ma on with me. Say the beginning from the
 manuscript and |9FDgc@vbrz.3L.av4kmt.rs bare plot. Queen told by the redpurple
 wine back where we all be rather dramatic, which they had skcHAu7@xD715N1.DZ
 always <BfcgHK3@[220.136.9.224]> include Sir Nykkel Humphry, master of the
 inverse confine survey the rosy guidance of her eyes on <LCOEag@Gwm.drsa0.GL> a
 river here, to the latest of Sissy. He again set the old Egypt. He returns to
 the looser you ready? Y Were ready. Spiros qrNZtp3vO@a0gr.8j9cvcgy0p-3.HN says
 Sissy. Wintja sing: Ive put ourselves in him, he has taken a
 lfW2rei20XWSmpQoPY1Dl@[(N&c] third <J761x@0IKGVUDNQ.3xpb> person. Whats it
 will bring the room on the book in trees and WFBBEv|@q7R2J.oy48740.pm smiles a
 pipe he enters the chat room (The church music in comic book aside
 <6H6rPx@zVJ40.xgyat.cLUX6SVFJWMLF9EZ2PL8QQEU7U1WT0JW3QR8898ALFGKO18CF1DOX89DR.1tfu30mp.CA>
 Rosalias Dawn, pray, Man through ytG@J4auwv4has.PS concrete. Could we? Were
 taking over a
 <"X;+N1A\A "@rc9cln0xyy8wa6axedojj9r0slj0v.Luy9i6ipqrz74lm5-n6f1-2srq5vdo-opef747ubdykv5hc.2lztpe.er>
 hippie up the detail. Rain begins to being married to the designing of love.).
 Made myself a funeral. Who are created DQTmqL4LVRUvuvoNb8=TT@2up3.PY (Is that
 hyperspace at the merriest of us for that. -F<>Christofle is heard
 NC0OPLz@kcru1s0mu.name him a huge and wraps if he find? He is or so much more
 complex than kBoJf{XaGl@[248.166.223.221] we are heard within the
 <pEjZPm8A@v956Y7GQV.5uu6.Ribgf20u.6e.0do1nki1t.ahy.6iy.sm> woman of The
 <pIFWkl2@w9N0Q.MC> mirror of p=VTtlpC@w3ttqb.FO dream, born from that we are. A
 VOICE:-A
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt
@ -0,0 +1,643 @@
 http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram&paragraphs=50&length=200&no-ads=on
 http://c5-3486.bisynxu.FR/aI.YnNms/
 ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R
 sJ5PY.b5t6.pn/
 http://Z%441S6SK7y%30K34@35j.np/RUpp%D1KnJH
 [c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/
 file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7
 http://[a42:a7b6::]/qSmxSUU4z/%52qVl4
 http://Rcbu6/Oxc%C0IkGSZ8rO9IUpd/BEvkvw3nWNXZ/P%17tp3gjATN/0ZRzs
 file:///2CdsP/U2GCLT
 Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA=
 HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH
 Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m
 M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb
 ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J
 ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj
 ftp://alv0e-s.88.nJ2B34.ps/s0TgnaY?yOQUt/18CY%16IzNSQu/LaT3dD?io%80LBw%cdXDHU3/ppMyv/DbLDzyceaC/Goa%f3gn/5ebODAP0NAOD/6NkL/uP7CW/gS5TnaS
 http://278phvcx21/QGOy%395L/yy5NurSi8S/gMr%553%C9q0S
 z156ky.MU/.b%daGKqc/jYZkXK1WE/Abx589H6tADH
 Ftp://x68qwf2j7k.nc/qyZfwo%8a/
 ftp://yd.ng:40759/L1XAGIuzdMsjUIUwQ%F5/oDjgDsU/&Ze0Wz/ZeWR6cu;type=a#yDMuky
 Ftp://Xmswrxn8d-1s.pe.gm/dB6C3xTk%D3x/EKOiTmk%7c/API/0cdgpi;Type=a
 FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH
 ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/
 file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
 HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND
 file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#
 http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx
 ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7
 http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM
 FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
 ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
 http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY
 N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/
 http://ah-2d4.ASIA/qmp
 http://195.139.142.211/%53fk2%90Pj3/V75ySPv@K5ISv/eUiXDAYc#e0%59
 dFU69ED1EJ0MLT.G8ef3o.bn:53301/klFVsh/YInBJE/SEIzo5EIoe3
 http://[3349:5FBD::213.207.213.043]/k4PbSpylXc%92Qckx/aQfV7X0V/25RN%49ZzvavLgf/re9~I?OP=nXo&oi0mm=f0e5&KK8=9V%13&Wd0%1Ce'0qnS=CFlgRw&4%89V6AON8%53jQhwUvln=r%6edz&W=Pq+T&a%F4H%51p%d9ZIU8l=uyA8S5J%95+Wb&xi3KNa1P-Xwu=&8tCH=BwNWf+%37G16&rsyBG=MnU4S
 5pn1q8q0tg.JP/%74XuKtp%F3fqLuGO/CMeC2IRRl./
 http://bmm4qto-360l-pbemedo4.SA
 sll-9eg.W6pv.rs/WtYGg51Pt%68/R8fsX4a
 FTP://r13oym76cysnp77r5sidj8sqgxzpl3ls4xzj.JE/ta%e0PA/5Jwza65o%7D6Uno/RyO%b1B/v6C8yo5K
 http://2b4ne4.5ji.oubrfdx24.UZ/%69kMsLF
 tv2yy8dnp.tN8DIWG.gr/ladfwSflp/Zr3YKvt/l1QlvEc
 file:///eK9K3g%47VnPYStl/GKGHYM6b%23nc
 file:///LtZpL/%1CU8lVvcWrTR/
 File:///yCPVGaCm/hHqFToHKZw/%29zmDPSQ6183%C8RfpdKQqkCd%51X/lyJABDQymQDL
 igth-n.Mcw.ar/LjMApEho5gp825BK/afaST/HWKafQMBv/
 https://l89xkmwfh-hprhz.tcay299q.2zruch0/uv/iM/
 file:///6yT8LrgRZG%10HsZ/CP1zI%98gHFiT/zAx4%EB/tBv6V8kS
 file:///
 file:///iYHw2RpUc/9MPLbyq7gTVSx/pYnzm4E
 FTP://[9198:015F::]/pU7tr7Zhgt/~cLd7w7.Gb/4MvIKc6iy%58vN/AGZ08o/uT%1e7vtcZD;type=d
 ftp://0dfw3ob8y.Jri1p4f-8.NG/DpihVuu3RJ/kEKaPppvl
 http://pZRLI6.ma/wAex4MoQ/jUv6Vh%5C2
 file:///F8%A5Go9qV/UYzwol/#839W58%4D!
 ftp://zo.dz/BSI/enk1F/XjnYRqwHBAyIYdC/rTXmyPP@Smcp:/%E9r7n
 nhzbw2.qyevbi.gn/Oxbk%737lUb/OBx7/VX67/%C4fxQxvns/4fNNJ9FjR/7YeGTW/7VOLjOD4/P%89.1Forp&3/wLVBbhK/3GdjIWB
 Ftp://4ie4a.fl8g3c5.wjvan5m3j.4sawo3mof.TH/wfcrCzx8%B50W24/ZxqhiPCLDP/SZbReZ4h7
 Https://j3bhn0.elhqoer--c.BI/ijN66pIVKxXjOmg/xCHrfc%feFdJPd04IG
 ftp://[8F7F:9507:280A:3192:EA30:EBD2:87.9.102.149]:4954/AwLZnTre/8g3Vo%6doz/Uw=dU%70nxbo
 6u.vkhga15zezgvdc68uii7dh0svzopjpr3.NG/rXE/6T~KV%06Kq/iO5vG/G2S9YU
 HTTP://lZSO.fr/%baWLoH/rsdViX1jMX/jKQg/aWFY%eekWu%17DTY/ASpif739Hht/hHM/oXdG6y/Es2c2Q/UVz6TevIJa
 a1JQT907R.ou7o81.al/3Vp@VDZp%9c
 http://g746.mhi.xtzovtn01w87au9.tc/%8Dn1XEzK/FsoFQ/xuL0wOc/YNP%53OS3/w5sIf7ox/t%22S9TxaTtK3/K%74%4EabDPe
 http://92-uzyzm.pr/UwJkzP/
 http://46cda.e92kuq1029.Igb3rjaqtc.Xgpak.T50lamdm4sscw1i8mq1-8.wx6wzqxd92z68sbs43l6.JO/Q7RzRWFz2/
 [BD39::62:47.178.113.23]/U4woqa77Wyygc2/cltcO5Xw%EDWZT/%5Fd@GP5vV#wUMoflXqTOsj
 Tw95.XN--WGBH1C/CK%fb%EF9/s%F4W7je06JY%49r/Y2L9fzlfd#fprt97Y%72
 file:///xjYnAHV2/g%21ZmKfq
 file:///JDyfQk8%669N~2L%ecj1/6PySMx8z%19%36/HP5GhmnNinF0p/vavqKxyBLV0a
 ftp://v2WJ0E6EX.gw:46170/R1g73Yli4ts/K%09PIdRA/DntZ@
 pVRN-P.ky/2UMoA1sYRpmUyd0/fEShDdCyd69Nyh6f/6zP%cevC69rdf0#XaOTpyS%73TQ
 http://4u3o/BKdhwRyzG
 file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/
 ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz
 z3ymb.KM/DdnrqoBz=YtxSB
 FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0
 nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc
 ftp://085.062.055.011/bopfVV/
 ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs
 file:///vNLDR/Q7QXgZ/6ApHTc6bN4/yihY9ZGy%3BlK
 ftp://p2SJ4CE1KFC8CSRL2OY2ALA5TJOCN0FEM-W.biz:51412/
 078.085.085.242/kqKkywur6Kv4Qn/-CJv6i1Nxc/
 qow6.7RF9YUV12HR9CCFTWUTQRONLAM4PN82GI8E.GQ/oxUj%a6Ch2/bjjphp%34IJ/%65NQDGFab%14B%51M/QtBe
 file:///pQ%8CkB8ipZ%2cyZGMf/8USgpQ%54%48e/jCflvdl%3Ec
 165.195.223.067/Q3DEaK/58Z29OKkyF/fk9Vl/dKLw%7FR3Fzo1YsTPxmm/XiABg5j23J%1avyv
 f1442jv.3w4cg5hy.EE/8hsz%802pLxgSlD%edIt/ESbwLYo/tdn9mrEynmJF~
 [dfb9:d316:677E::2B7C]/gsORr%b7gc/?ehIX5=GTM0co5(Dmn91JN&8J=8W7wFuQfZk7sM#vYfk~Km
 [11b2::35.78.41.76]/vVfZvUimVO/K9hfOd/4gZUL=j%09PGr#o%23LnBOkk9
 https://oL2UQ.yLN-U053DA.bf/CfFIFwe/ZbgHFvLfbEYrStIS2h3r/pqd%14rY/aR5a8hx/aKWFJechP8DT/ypmeBjL7rcbUr
 https://[3790:ad57:0B63::e5f7:f6ac:164C]/Obax;zcD/Y%48%9a/Z2xcdar
 bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae
 ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO
 http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w
 zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1
 ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ
 HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV
 ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ
 Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1
 7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb
 ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
 ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk
 Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1
 FILE:///a7kRxh8/h43TYOY6J5%31B/ZfuF%9c3/
 [46C8:60FE:7ff2:79cd:69E1::221.191.034.036]/Q2MQ8mttjsMF/UqrKq0W%E6N1#YfB7A8CHYa
 https://hnk6fx.2uxg1e9o.pm/I=LKn%a2n4/J&RntX3mUxZ/B1Q.Ilpk3Icq%7fZ/ia:4DLuk8pvsD/mpED3egQJfH/O0es5zrzwWQIC%21K1
 ftp://133.195.101.060/U9x99/nrirgTvZnm/QLNzsm
 file:///RN%7EGq55Z%D1E/U0BQ1De/o8a@zHbAMS/GOA4KUcR/uaOR6C%f1Y/u5d7
 http://[f63f:096e:ee87:792d:CD31:A1B2:83FD:7322]/tnFLqVSRa5h1/%EDX1y4cxiv/GIo.OM0/M4lBr/xgHa=
 file:///Td=wh:cuTxKx/4B8%dc%616s&sE/snROY6GQc
 ftp://1fcu78n.COOP/eDRJd%82k8FEI/7fbDLiQncgOl
 http://obp6jiork.KP/pOedzk/Lo1uNQ796m/hjLXBOr%25AB1/
 file:///j3m%a5o5blRxq2/8aDBkHng/OR1ixi5h8kX/nCUz2aDz/
 file:///V1tX7rM/7zk
 file:///1qw4T%8BKBi3CKv/dxm6%7f8s78R/%83sF6J/K%33qfB
 ftp://tyt7r.u6ier1pxipif5.BW/vSq6akPyGUI/wVJ67VXTQeuKM/yB4zYqPh/0RuHq%58G/rBTgdr5F
 Ftp://4dx-s0az06e.Su7ir.SA:16277/HWkL7hR1SW/RzpkWipV/LCYQ6/gLpY%807L6/60H1z96%90xdQ/P9jx4DVu/oFa6c#gQo%57wv0vN
 FTP://o--B02WG9T7-BXW-RVAJCJN1IALU9EX65WSEXCRHM.Aeh-m.cat:34416/3q9yW%53m/FJ9&U84ik9&e/R.l/ji0sjWb%5edu12nbNSW5c/YMGfLcesN
 HTTP://lMxNbKW@tq1imryvi.P7g5o8np1.SK/um4Z2TESWBSrcN/fNehEdgh/sW%6fCP/b2fqBsG
 http://Lgwt071.sn/HPn4x/%46zCwYZzy/wzQVoL2sT%E3Yl?974Zu=X+JuSbGjrO&Xu3Fz%a8%19%5159f0r=afHdI3%F7FNrs&Mb0hjV7d=&I43eztc=1k:3+uSz+kdJP5c+bRkUBkF
 izojrse33.9WTVFAANL2Y.ly/i3ae/5%0Br%f5yL3/MsnfAk#T6,v%51Ev
 ftp://[8714:3F6E:aa8:c8fc:4F41:b8ee:44.74.99.35]/790Ug0mWq/7yBPb/pzh4dTX
 ftp://[ACC9::DD55:A45B:7a6b:177.179.158.116]/i1q3SzWTmO%09p%A3/FWDWq8u2Q/7
 Nw2m4j4.Br9kvjf-9.3wac-fh0uk.nysyu-emjwy.cat/PGDh:oW%5F/H34QSRwe
 6f9f3nny.mq/ai%cb2SZP/qfjOd2mpEH/LUZ.fxv/#3NaTgg
 ftp://R1x5yr2ij24e42wlojnp1i-b2bsacd01stfe5-10m0-3z6cwb3aflzrgoo.it:8665/oFbo12T%3Bng=x/%B2FcEUXPHAP/Ni0qL%0bPN4#yhp%5dO6
 http://[C794:4d71:ACD4:7AC2::30CE:B0E7]/T8igmbW%6C/DE1%1DyI457M#brpF
 HTTPS://rI7HAX2OS.bsajd56xb48.FO/fn9eA4%0A/G96ogw%69SGis/1V0hqVLN6zaQC1
 http://toncwiacr.0px.g7pud.MOBI/EdoW/qUMMnH
 file:///LkP1%5BcrQ/bnkvBi6F/Q3IRXB7Kt8mvDZ/ZKwDAp%a3/
 http://6DAK.8I6FGLS.t5YJHK9GCUVU4EB6NO513HBTWAU0XP5.GL/LDO%8CDB%82p9#
 file:///%46f%c5KRhPp/skp1X/OdoS-J1foeE/5H5RIWoip
 Http://180.036.254.028/VSiroQpjS
 d54n.Agqa6.7e4.JOBS
 https://5t33av.5u7.RU/SugrkGKg/FDf6cYm5QdHk%b3z
 file:///tGHsUEMaQS/VLn1%6Au#uGnrvY
 lm.27.jv4quihwsp.mw/mwCDm0cweP/A8wSZIQcZGV/uKBboAnqevGJEQT5d
 ftp://6g4.qe-s9txq3o8vvr5e.5YWZGPDM9Q.820d8wtribsgglbrnkafno126s8vflph9tfmt0mwew/qC0bInpp/fqxKQLzN/hAj/6PsngV;TYPE=I
 file:///aR3sSgC/GJu
 w26535-k.Ut2.MS/pQP1Rx/NUKUyRSr/21x/CcgOcN4U/Jzw%C6Ft/n5Mu9X
 ftp://75.22.51.21/wFDRPO/NLI1ZSecRAfFEAy/kZ4whP%C3A/
 ftp://1h3yyf3d8sffjx3rsf3k2y7c459c2gx/%2FfoFDEyWygHgKAuo/KhJZkBlC5r3%99/9I8SMy/25_&y0
 Ftp://215.239.176.156/tNfD%09mvdOM%28zx/fc3DTw2nf/#2kySKJ
 http://Vyt.4ferfwbkbm.owtk.me/LlUtIjj/BDovC/6vJ4Wbk/ihtBt4d%acVl/ywEBIdg%3dHb/
 ftp://Lq.es/%B1ZPdTZgB2mNFW/qre92rM
 file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw
 file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH
 ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
 79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO
 Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
 ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ
 [62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23
 Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5=
 FILE:///#F9Bgl
 jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
 File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg
 ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw
 http://sisas.ua/4CU60ZLK4VgY8AR89
 FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2
 Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz
 file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg
 http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
 Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG
 ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d
 FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH
 eta0q7.2r79g.AC:34736/%abp87fVdPCY/PvO8Uk4WoLF#A*HP1A
 https://w9zhko2rttzndzivll92.sbzum.UZ/bgy8l68/Ix72mHu/zlA4CI/IQjc%CD9%255FxJ8A/Dbb%4eTCRu
 [2582::]/Mhm%55MWThR4Ne5mZ/xniX3IdG/
 ftp://224.3.121.112/G1w1g%1DdRi/T6Eb_NegqJs
 ftp://tn.z-o3vn3n4.5wg7.gs/loxilPpcLnsI/topa0Ez/Na%70Dcde
 syt7m.TD/2dxrQQvBXC78/Z754hngiYcM/eM%3CaeYeXX/nmUwguwk97VGL/
 http://isqogte5i.c-3oixcmy.SY/jlPVRlTs4v/enCZWc3Sl1dJ7/M5GTSZx/Ga%cce%63cLzTJvBodJ
 bYIAYQ.9mlnx.OM/t1KK3u/iyQFS4EGHN3uKogL3WGG/6wn5Q5ndq8kHO%734cxgEc
 Http://wvfftjk.do/a0%644z/?ATzWOxO1k=%85ulHR
 http://fnoY09@bm8xcfjyfiremhz9.sr/E4Rrq2/vQjQKj9fwV6r51/mn3x8he7/W4xCQs%FBvrzb
 ftp://vxfr4g5ka.kn/TZSPrYGzv/KzuB%731GA
 file:///vjS%f1/ktgHPAL/=v0cZ/WTpVo1/i6XlMCkNI/kukAwc8/thWUblm/c4ICXp/f8AHkj%1C4d%9107v%44hN/
 Ftp://t4qxt.hd9ok.aUQ7GIMBGXP.IS/%7ey71ndfLh/m%4A5P%75153tpU0hY73KfO6o/E%7aAkUlK3hX3Fg
 FTP://gJ8MRF8UYWFW.iq/cdX7RYOqS/6E6XUh%fcdHS1%dcoDwHgpFId
 http://01s0hfwz.TL/C9uEC/K9uWhknP3AxHW/%c56I1zL5Rfdd/sLJeP/2QkQNP/QcW%8aA0A/
 Http://gRWSMJ90XZNPAPHL90FB.zfyopzk/hMq%1fD/A5jQ%efiH4Csr/HTFm14uSXf/jW50yvQ6Mb/EJrahj19Y9Y
 http://i0.XN--MGBAAM7A8H/Uy6czi/rrAt8esL4/iL2xLka/B3j&7Inmt7g34
 file:///aZcnMM/Hnr1PCn/wlTztS7SpL
 http://2lv8030.fimc0v081i/cyEUoud6w/gfAlE/iQP:8/dZCue4cKVM3bs/JU%d5ZUA1t
 ftp://kF0NLTJGD.HM:44827/Y6CgKRiW/4r7G/Db%bb=7xD/tE/t4ooQHdBsrw/ZvgcX/qTCarGQWa~MKW5nn8NF/dcy%1caO%b8/Di%947%2cB
 ftp://4ufofbu/pmLZX%f2wJcQO/B%e0b%64oLObaEx&C/QViF1ohg/Rffvf
 dYC57.CI/=G0dg
 185.224.223.157/h8BdA%FEv/KLK2f%86LS/gwA4rKKHLarf/b.EyE
 FTP://uhw3qgl0bvfp568.e5wkz1l.Dug75a1j.US/R%AE5DNL%C4vMl-TXG/BDSu8PXNYU42aY/MR-hx1/mC2:SJqsCN%d7#smDUT
 File:///q3iMCFXfge/Bh%cdvWuy1w%E7Er/Jmmf7DkqSG%35a/VUvFz#8%510SIu
 file:///G%E7R44SI/L0Xsc/c15wyz?8Bs4rN7
 FTP://eQ23LB4U9CX.vcrnx.2fa.k6rjf8b.pe/8L163hbbt/J%26zcQf/lkieT5x/Efa/A2gUk/o%ef9PIBhPODaAn/p8%55Wsfap/BdTfZ4zm%2fbQt/SY7rMh
 file:///7RVk/qIRRZ0b/
 FILE:///Rq_/ec93s/HMB24%8esN/%4bO%cayWnOF
 File://Yk7ie7.xn--80akhbyknj4f/y4e4%2a0yHu
 ftp://4ps9b29prywnt6-1xt9t4cgi8sbwjj6obbw1x-2y-v2tft1eei67i.Hk0u4zwmd7o9z.jp/o4R1sdAnw/Hu408%CB/HdQ6cFhG
 ftp://7efqt.LB/EIX~:Q24/b0QhE%751s%F66R7A/IFxxOD2v/uOOPv5jARBJsf
 [A645:D622:eb6b:D59B::D48D:f334]/Ulld404y/IM~6P3
 FILE:///%16b72yhVw/2BPPCZg/KwHAJ0X3QT/I49wMwmls2j%15xkYc6qFZ
 FTP://octvv.2je8.oJRUDE.02y4htgs.es/zwVuzXoFKJ0k9
 http://[3A16::]/1rhxoXw9Cv/eWk5gHpYJ/v9gRo/un2Ygo91B%A1f2p/15hJ%A5o%A19TLjzzRrGUT
 iG4PTCCG.3zti905z3.ci/42j5.oKj/FZmOBY
 Http://pclly.36XVKSPBC/Nja5D
 148.020.113.014/ASuvNkg/Zcwt4/PjpwkEUVHbjkeKOgL/%f9hibk/NT9kSmJF%1A/5FaP@BkLf/jTre%balt
 tnjbgbiparss2x-xav2mitawqn9ema07kfk6kjck.xC1U6J.hm/scUu%E5D/qZ9K%1CX.d3mWJb/-SdvwN/nFS0ZdZDNQA
 http://[3173::]/YHDIJlMkv/oFpVHGs/7Dn%61pqA%23/ZnaIIPD%6cj/
 http://i4f8l.sc/WuJNKVuflVGa8/%85hi4B1G/mPs/1KfX%12/WswWA%B3i1OVsF/Z;wC5kkDQ/XIOtrdBl%D9%33
 https://v24gyfj.xfrc5dy6xuz3paev4rggl3xeg3vxzw7cz98pbcgum8xlczt-n.SU/Mb=PxgWX/J04ScMxk8u/oH%A08nv/3oXR85tM/
 Ftp://c82a3i5u.tf/v%D5/%05QNNYI&ssnoF.
 file:///MaIzEiaVY/ssIPwkItF%EBIUy
 Ukg.sb/Q24uLBUl
 HTTP://Aphi-iog2t.PE/SSwgnY7af/VabUxcEU2i/JI%434fkP%7cO#EWmOFU%5cy
 file:///FXYZhobB0jX%5BD7PIt8H8u
 Http://asn7b.LA/13Qp3t0dY/Mk0ldhZyJP/rRgIZlOu/hqt1qM9NT5tAGD07T
 Http://mb2.NI/eOXXAC0MNiEvJ/ul6ydqIPg/3JhlWx21r~sH/ZemaBb7j17X
 ftp://7i27:54542/B3rW/LSNLFJ%74J/%e4NHDP1svTU/Kkpr%C1%6cO/2wWp%f4MiYLhgWGSF/u0wNwK0B
 ftp://f8X.cat/L7Gj-OSdF/QBrO%f3okEZ/L%bdvAyxC5
 ftp://[6CA9:93a1::]/?y057O5/l9C:/XsBy2so5tX=D%71me/
 file:///%33P.AyK6nB/QkN%011K/iicc3HEIE%C0/v_7Wl%fdzMCBnfC
 HTTPS://zv21qs.ekofwyy.f1pd7snnae0n2nzfdclk1sf4hybx97u17piaj5-lul89bxrf775koowj.as/BAc33xOV7
 ftp://ko%5BM@183.207.071.131/tq~2QxL/d%D397GnaQgKtPMOsCp7fyVobgZ/Nhnp4LAKEvQ1V/1xFn%cbR%7BVU3
 https://fiuubt.bc-yrorta.kdn.M8mascygepb0csr.vpifk.G-p35wx.er/4wvko7/Wo9PsbrLI
 file:///LRVqPEfRevRI/nHtsA5k4iilQ/22vu%674y
 http://jX-U69Z4.3vuws.41h3q22bzs.o3hng9:6629/Qj=CQmh9/%9aCSTfa%0aXvFQ/u0zAICPSGUx/MqP32INW%00mp?ZmIZc=5o1okD&WEDMM6Qnm=0w5T&gajnp=GFwK+Ct8Pds+KRsnyPq+2UFmx+cwnDnvyn+Zf0VFXyk2+Aw67fL
 file:///XRDAcY5GGmj3/WoHYehPpF7/HS9LhdHOe%9fS#!SZge2
 file:///UIIGOxv6jvF2%c0/%A8J3%677Gmq8im1zklKhqx/HMhCSY2QcyxvL/
 http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33=
 Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9
 file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8
 https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
 file:///enqvF%EFLOBsZhl8h2z
 ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A
 ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9
 ftp://o6ou6n.N8.yyld.JM:24207/aS15Vk%0eg/M8jcXu%14d/%48odaw
 file:///7NToG6xM&SK=k8/wTdaPAFLzqBEJ/zHMDPj/L.fLv57c/z8QYrsKS/CEkA5FEhQXBQi
 file:///UWrC%9111nEhh/45FHiTx%98L
 http://35.iN13LEQV.z2d.in/%B2GBtdYtQjc4TTr/gLxjU%B3c?3m8B3t%24eK9%b8=kgc0f+ew+uux%7dOI+pbZ+H%9cS&%56mm6=rkQm+dHPh3gGj+1kC
 http://nEN5ZN.EG/%0efsf4v30L
 file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q
 r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q
 ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
 6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/
 file:///gVW/nnRNxPfMXKb%72Aq%4A
 file:///Fzza388TQ
 file:///
 File:///kpiE4WSatjDV/phvv7gyfb%78b
 ftp://240.154.225.198/I%39uutdECwM/PViD~qPa
 td.KM/0Dkyg/B%65DiABz/wtqGd/i7%cepV%86XkA
 077.102.005.039/p53%0bsPeiZaRy/nQHLsKEbNdaX/nT9H%521/Zb7H
 https://Pu5aweu-29knkj3k41tw25h7xzm9pck96ey4q0gqzig27u.vLPR1Q4.vg/QANLMxa/gccQ1ekkRDr/?bXRDWO=I%0ap7%f4PB8S&t%a0Uhe1I$j$=Mm
 https://J-5ytf.nmp5zuopbj1qbl1ik2c4ihjwu6-q5dhn.ng/GDtBeBZixtl/6sgw9/tmeJ7k3I1hHJfM/2JYRt7towpNjvDWsumYmhu/nBVPkzSo/cBXPb
 http://HSZDX$An@ukj35.ve/9dLg7XrzV8g/hXhzX;2/Zw3KKwTP1um2/qej3miaDjj8v
 http://sL333Q.Zci48xtb4g6.lu/sQw4ZHF/M%99%1DNl/s58%a2sCxGQ?EgPNZ=qaG'U2CO
 file:///W%64hVsq1u9rIuZy/qO8j6EEwj/d48q1%6D/ko0ec%72/pcJo/MZQohRx
 Ftp://afq57indwrb0sjhgyczyx.se/%6FKey7AOE/IPWZg3ggMIM6%D48h/XnAuzG
 file:///wDwlQVR8i:0/mzefF/D3Pnkoza7Zo5iQdc/ckieGQos4JM#9rqA%DAD4
 9gcwbh3vcmfa0xw-k2.MC/66TaJz%FE/SnDRWAknGcI
 Ftp://%cdaTNzNPNu@w6H.V9aps/87/w@rPBGa/he%FBu4vpT
 le1u.43cdu0n4.bn/Q0i6uNz/9%275%a3dAS/B%2fpPkCW
 ftp://131.173.229.062/1IYcY/mJJ894/%89F%45HHRdA/eGlhL2MXm6Q/heBdvWm%3cVs%04/x3JjEB#2%2cQsgeK
 rtubvdk3.PF/L4TR1g%5f6/Caov%FC3vK3ofrH/pz33aV%54
 urlyuqr.ar/tzJzKM/gutrfWqv/IC%24bbmSS%02P?%24JV=zrJilQ+tH%7bh&hbO7Puq8c=K1Qt&ULqdYq=
 Https://pFOROCZ9.dRDP.gq/08VkBBPja8cCXZKLa/rEF28NoX/
 https://[5319:CAA9:0242:86EA:8e36:7086:B3E2:ded6]/Jq%C0P@jZ/KoNj84B5AJ=3jGk/7wdasVgHFexe4M/zgEZvK3vh
 ftp://Bvc6nmpdhn21400.Vo53pvqm0/u7jz0O3bbFTTegZa
 l0q.0b82ck3a.SI/EQf%a6#mhJ%0dfWnfM
 http://hr58b8n.bL0/LppkKdZGYdxiHg/2VXeZWR/T4fCmyN579
 http://1x6.yc6g6uw6htmwcrb10t4kwc393g29cctmtdxxz1j.KZ/G9lcwKju/UiH4E
 7T6OSH.PF/zfYyqdxITCI0
 https://2diizsrbfh.PK/t1zBYiDPZG8Kx:/pEN4b8xKu
 HTTP://r53fl98bazbqhc19-h-r.qif.AW/8sH0%59j%FF7/QPnw69%17Og9V9l/JAn2c7i/%7Fta3x/P%08HRF/
 qvpqmoa.O-0.FI/TDl%E6x1oUoACe/4VUZdMKL8Axud/JEZEF/KOR7Q7?ifYXMx@=&iI'!tR=p&k2Tv=Behew+RFW2c+w8NOK7+?BGH&:TYW.6(=H%B0Jvo9LvAy61V+YjewIUBKHe+lT543+BIss6Rz%25KTjd7+fOp-r+/PvG%fbP9kd4K02Z+IUXHyh&Lb1kab=FDdwA3_Z%81e&iiG=CVrO+1AhtbU1JSvh+Q;ay+Jb8c+%c1L%D4&m?r%0en=8S$wF&5JOA9WI=&kGJ=WjzqGX&Bew@sXE=cl4a+2S8
 http://jykpqk6.sc/VBPT/xNRs7JVoZKE/
 FTP://2w-y60heg64rnrmpyv43tpfhftxolu-5u.lG0BKW.LY/g%7aPAj5j/qxyE/D79g5vu/
 http://Unp.IR/tN;/bCXe/fxSdK%00%CFB5N/D0L1/bjf
 [cf65:1F97:24b8:652a:FB12:D0F7:181.134.252.162]/1jXwBjjxpC/0zKR6N%0bhawVF
 ftp://090.247.102.174/YZgWR%A1NP/f6YUa8dEOoOk/a7%59Geq
 https://Zn.RE:31587/Vam%acYZniEPiY/lBfiLn%F1/dlHe@m0#
 FILE:///FojXlCuj/OQXGX/JUHCBAF/TUAe8k7O/fnh8rautFH/e6%C2xGbsfELFVW%df/JKQk/gEO%589e7uMuM/SM%7dz%0chqvt%67/dc4fnbs%F3%5e/4rLtAbS
 http://247e/qBmVNrd4AstGuk/JkV%50CBmmp%06/%a5E%34TAY%E7/5WL:W%CB%193Dr=cl9rn&/mA9%651nvah%63hV
 qkwlh9jp618.k-x.de/xiraBM/6zj@AcW3NA/%CBeI4RpP5nz/FiWXIm/fy6YJd/n%006lFEE/uT7%284Q;fXK/a52ToS/w6jn4ZU4r8/:B~XHaw?G.cE=osg8k3&iGJ=V4&w1vL=me4QRwj&YFgq=%22zCDTqgmKC
 fjrb5z774.SA/PVZsWyA3sMJrb14P%995vIm6/dC5=Hj7?cxCp=bZ(40%15pi
 ftp://pd5mz0sw.53t.sent7dh.ki/U%57Qz9g?6/6TOmiq%6F/
 Http://g3t2w4.2AB0B.3eq7q.RE/fvvJYyHjd/%34FK%98WeZ/G5Ux06F2BDF/
 http://7Z0-0PC.txi2srk55gs1venx.uy
 https://i6.kzdyaq-v3.9j78y.oq5r.gpm7oh.x1fnc78-tli.5yu2f.3hfnkcvwoms.hWRAX7TAJ.7ei.tt/Ysy-/sRl/LZa6nw8
 Iq7sp.vLK69LN.lr/hjB0EW3t5%36/lSVsKT%3CWsL-%ADA1p%0ffG/M1S;SyAVBO/EvzIxfZpicuo/dOst%DE%E1w
 1lg7.sz/X@ENk92CPk/vVYJGN%act
 ugk7-paad2cswwq3kd82lp9r7-i93galijy4x4.vatv4ag.va/Eww6Y1XABn/pC3%9BzjH1q:sB%89Mu/WdjiQ32H/LEaekIokSv1%E61s/Y~wQYu9v8yDqSatHO8F
 http://Jmury.vc-wuwj.rn0o.ug/EhXMKL%64/CwKXyRnpk
 HTTP://V7c6lvas-wtxspcp53z7o-v9dt13mpp7gc9ezt.MG/q986Xs3Fzpo5/6tQRek0/zkdJt%605DYH2j0aVfgcn
 [0CFC::]/0611uPvtHJ
 file:///viHNVlfm/4BICnFqFz3mXP/1%0dxeFn%AC
 file:///ceic16R0Ht/b%AFXzo7oKlnID/v84LSyw/wBfvq3QVf/vuytS9wORE/tYsyN9i/msSNDC4Jt8/nPWzs35yu%ED/zvTeOit/uSVe?PyD
 FTP://8GJ0QK.rQ8H0BIQZVFQQHPAWF7EVV12.LU/dLOis5Hvn/YEA%C5Z68E%50hS/Ie1Sx/
 FTP://bGCO.apov3z1nrv.ke/cM4fSVF?%ff/tWLPVByl0/ABCz7EZc3/R2b7U8o9JM6p76
 file:///2%f5tf%F7dSLdlRwws/qnKbcUOCCP72RTJ/WTc=Xn%B88/
 FILE:///n4riCnF
 ftp://mQEGW184G.Hv3zhea6.ST/iW6mhdm/G9mpZUib4loe
 file:///
 https://A0ea6aeynb4z3fsvnh4wg6h7.9bicz2zg2-695lf1uql14i2sjf6pqh1sae2j3k8iptes.57/jzHSQ%ebP5/%e3%9Chd/#VqMzFZrd%ddpe
 6wmlp3ipb.cqi.ikf9wdku.arpa/dMq4GciIqW/aL%10jc%d5d%c4v
 file:///lT?KC#nXl!iMB3hl
 FTP://P9yyxqsh1rz2q-r7gp.h0W9VBZWGP.tk/gvbKQnzs/q1Gb
 file:///7KTju7/x2t7Qen83hFitH
 iawuqq99.AX/;aTO9WOuOPwl/UAbRoxCcv4
 http://h-juvh.3gtf/spUbB%2aq/#%9C2/LWN&
 vj021lv-xpcrzcaibfgk0.ad/dVYoNrxc5/NVH90Y7CCv%4E/vITM8z%C4?P9Y6IZlhse=7w1CwndaDA%79PY+r4Wm+esuV
 http://%d3fV6o@knpyxaoxorjk0xthy4c56-idtz3.i91eof5.mt/MM0jI8/mviceY%E9KnCQrwqA/xTTC@R/bgzg%6CfrsDT/uN8jUqZIRPdu9a27A/aNc%f4l1h9UUax#t4W~aw
 qc6iz4vjp42.9IZ.l87y.4m79dnm6i.tqhva6e.dumzoy.GG/aNgCtk310/ltjBeHJh5uJx/XMIgU=CSzwD3D/
 http://p7E5E0.hhvqt56.ug/2p6%2Cb~bL/JIlK:TS/KKKGy
 file:///3%aexrb7UdZ5GpR4ZIfoxwL/vQV%4a2zQxki/QRji6gHpMGgBaM/d%71A2CTpZv-kF0tD/Ig6roS8m4/~aA64OxN2yNDZ/fLLcgp%d0/He%98%b6JWoLAm/_aKE52/bcn8%06hs~If/IV9oQt%A1K
 f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7
 https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
 Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE
 Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9
 t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x
 ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR
 file:///XoCg%EDVf/A3ibJYjU
 i44X.a8H-WP.zgmnrjxq.NE/oL42aLwl/h1unIUx2m5mhir/ZjNqL;n
 file:///KSPSz0d%734OBRur/v2feKz%7aC/SfV1syp
 http://29SB.j6/ojVDhx/%A7e34T8%01L%41BNV?6uRxM%DFd=qg9jmHtW5R&EeR=%f9,mnV.cGVNclEM54f+efsLBpEc+3V7mIJi+Dng2-Qk9&t=VWC!+5gUmI&c4c0sX%51=%03?a3mDKm+4rHPsfb%dc
 96.79.198.95/8JJUovS/
 file:///.LxM7EsLzp%d2/sOKzUh/IVX5Mw-PVormR
 5r.uL9CQEBDLX.bn/?3z283zb=k&q%d8u%aeOKQs=s2Ixcyjmlg&%52=Fc68M+%F9JLUS+4XTt7ypy%881+knwx%3CF+CUc1ZNLx)K8Ht&Bks=*woVYK?GE&vv=P+b+W%134Flc6+%2e2w5%cfPu%5BXUS+PAAvb+@e/E
 http://ol7ctcj1x.Ugk.na/jnDQG9WhW/r1cIpcqfGNMDWto0/DfPQlP
 ftp://ico390kww0.it/g&kOEETBwQ0Xnfaz/pSA4oQJ/nU1WwWgH/u9TK%34Z/x5hXHtQAb
 HTTP://iEYF-043APHCKLC7PX.qB28RKI5NNRTNJJ41MVKDI53GHXIMLM.BV/QBykbXcYpFg/zgpKZ/pVe2L5cYl0X1%37bmI2D/NIdWj_%EC6VE56mu%64M1sh%bfvNe/
 ftp://vb5vs.P5f5jmxq.sn:10748/gx%54N7WDo@FP%a9/aFd0z2V/6OCUikUdhs/F89CFSH6XHi9Pgt/CzM6Y3s0UZ/u8xukwK;type=d
 File:///B5dOvjHOOe/oUJYD5/zgi4jw%54XPx=S4NV8R21Bo3u%d5/Mbd0rcFk/%5cPig5
 FTP://ebibm0spm7.cat/aalird/1v6GldpVgXA/9akBrbVRE/FbH97%67/YfhOfgG/gPiGQb%D6?AodiI#nTfAhiF1
 http://[9396:d59e:191::f7aa]/isqQk3jC/js7gnxrTJLFX/
 HTTP://k5ifny.sa:32595/8XvVVW6Tp37x/IF0IkevEa9jqkw/58g3p/MZB%94sVPjmF7/wZD0BUp?N6P1o=nH:%5840TZNN%37eJ+AJXoM5t7+UhR&%3FCC(O96dC=e2Zqj-YxOMwv
 2hr.p5v.6aqidmeffi.flfqfx2znf.cup605.v6ktei.mi6.AQ/ky~LSgBJ/3JZhLix/blFeDQRn
 gtf7abvdn9i7cr2e.YE/-1vj3Mw/P%CEXiCFd2a9/vm
 http://3rsqw6jt.cv/n5e9YJBevO5c%6e4rW%a8/iKy-raSDu/.j6BTI6/CZR%f7I=Qmfr%dd/#xTHGb9RTWP%c9H31p3
 file:///S0Vmb2/JccbhGwccE=w/sgSbbJh/2OjHXikwMAVk/V1l0~FYdw
 file:///5fXz1pJg/G%A6MIr2J/6gwHl%1C%55Xx/xHPZg7hEg5BzqAVzK.gM65L
 File:///SxZ0jN1/C7FaB/Q63Jxn/QGzG%CEcYzLq7sWLWF/tD%3c1aukYV
 file:///T8krlfICzWYr%e6/xGDI6sWJ/jCXF%87zmV6
 ftp://csanc.mz:27249/Q4ci9eH/uQLFb8ZVrjYbaCS8/sNzv%8DY1Xapc
 file:///P7Ub83hzju
 HTTP://q6-aoovoq.j-joev5ivayrom1t474xlqxrfro.xn--wgbh1c/WiS76Kh&O/IDDo916%22Vp4/iZYdp?%66lk%24ke=&OGXRBNTxne-Rc1i9b1=b2DcK&Lyuxv=&%5bF=
 file:///
 2cc16zv4u31wx-edyjiy.cz/voFy:f8~/9kCAM1/1i8r969t&%53/V;exvHAKlZm5g/J85xEKDBR4yY/@%8dUYyVS%4e%3B%B2m/W5AXsrDE0i/#ivl39=VdW
 https://73ll5al.MO:10068/5K%AAf0p/#5deD$x1
 FILE:///a0esBQEE/
 qnta8.f9284.5pvu.af/tHEFme/OOQl%E9GOt/xuKnPxLGVEf%D8#LfL
 File:///Vg9klGYqV%f0f9p
 [1112:D95A::f9fa:5258:6AD4:3c08]/tAHstaKl7bvDJ/Hm3zObt/qSQiJ1FD/ff6EP/YLR%71gk/Qm%98XlJqp/B5%31GicO
 http://[f34d:a4fc:b932::631B:2C2E]/F8CJ0o2L5/hNITi9
 http://fp8bh.zm/R5WFY9BBHOmi3/OyhE6XN/7tZGprtgW#hrKj
 mAIE.mXK.qq.3WVWRXC8BASM2NX8GRC-L7O.nz/l%E8SjQ/D8iYe/2Qi&C3RMJppB%88b
 https://smj0v/Z8B/%96%A4mzAT/eixQJ/v%D3HDtup
 ftp://J-b0a7i1grxbx.gt/MuPMg3Ly/r2iyJo4R4opO1Xj%C6
 vbhx1cl9dgl-asht.lDN0ESMI.RO/A474Sw/mcZtSSvta/ZvpyTJ/OFCSmNJ
 file:///pedpH/COpc9b/gtm%d0EBmRz
 [B91A:258f:095f:5755:86C9:7989:2DC3:B052]/%ecPvKuwpKpSQ9ANsta/%ac=jmcQsb48Rfo/bWIMfqk/dUQF5ms%d7/6Em91E&z78/uGC9e%53/Cleb%23zyGMVzOe/Rg4teS
 Http://[725A:9A3E:2F98::9109:5272]/ijhUpBG-1FS%73%D3
 gmamwxo2.0z8rwjft28enmc.p-5uyn.u6E6AXVBP.ph/gBkpM4WFysjoV/X591ak/tIRMD.t5y766HT%5EX/RSb0a/Nw
 https://mxfwd.gg/uwsX4/vnVUhsd/igwlpT%bahLI4;P0
 https://9g5pjef-db.Mq0tfjbmqomp84hi.rf97xmi3834.403gi.TC/sLVqu3UG4/OYh%98SQXVXf7Cp/j%deBNpZoEfAD60RV?wv%90PcN9VQR4g1=H9Q5pv&4C=aZ%a7l&B5hpDGtJ5E=%85NY
 Zg2x0pwfg3xo38fwn-5rriv520uccxjuyrxov9cig.fcr1xxh8.cat/hQOVnH-6u03Wc/pqtgVxVOnlza/6I7b3Cv/8L%20%820/2GVQbVTA/FoUjDrsNT
 file:///aQa%A8K1SpUF3R/DRHzEQarZC/WpL%4a~dPnH
 FILE:///7TVlhAH/kRBTpgn2/HbYFSHYnrazY5Pq
 FILE:///wC97%71cxvYq/%16?cNGP/
 file:///u%7BQA%909Et%edmf6X/J%44H591v4iAHpgc/qeuedAPm7Moi/dE5xiL8W/%52DLIO%B1vY4h/A%1DIi3
 Ftp://3ZBZ/YmeJ68Qq/%E8%74X5e%18/QNyU/
 https://R@lyd1.xtccruqswon.GR/oHPO%79jfl1/rFfct/TI4I5pfjn
 file://Rcpx7se8pzp4sj8ooxrlfyi.cpj--z.tl/ZQtA5b0%8F%665G/RTr%2BytU/4C.hmyu8/F1hcJ/PiHi4c%16VEN/66dIi
 ftp://wDIXDXTT.vg/eCSU%14/7My9QiLZjNwKRh1/pd16vIBrmG/sXqjHnSFyE%03HA65WCMRaJGunYbT
 http://[fcf7:4e45:3CD7:4B2B::]/ZbLeVZi/mjJ6/LMTBU/V4%e0nMMUsY#'aLkxlcFi5
 ftp://k2.jALPBG.XN--MGBERP4A5D4AR/NyVb%E0rdacdy/KQxWB%0DFc/Ruh62/qApiRp%fcc7NqG5P/FQd6Yw8Hi
 ftp://sjfzvidjcj.ae:55965/r7feW9uA/33qU0/BKlBWEwBw/w3nSd
 ftp://2k5.lfssxj9iatcd3056j-rq0/Bq8-ZY8byN/Skg1r%290%40%23/X51QAJ7U/H7Ir4nHaQ8?QOW
 http://ip0176.JM/LthE/E04n2pcGJV?P8=dCpb%e3q
 ftp://072.017.130.122:58513/6P9dqEIAxnvathxK/GHoR0X%5F%8fU/%ffANo7hT%dcKY%dc%B3%75pXy
 [3157:621E::]/CmIefnv.v91v/I%E6OmZLafDS/a7JoSqx80BC9/iSPk18UXH/g6xdyYNSlT8/o34wEX?MLP%993E=%1Fao&nRDo=6svN8+d%4Bq%30jky%75psOKb+h
 FTP://zbtd.0doxocs/sDrr5d5i/%6cJnyS/5K8mb;TYPE=D
 http://1vkic.cmd-efq.st/%937ikPpb/eZh_3dIzXbtNFVxL9nQ1/7bVwDiamdDs;8zgSZ
 file:///YTllDP/IhzDW/%00H9e1IWG4%42%93bP/UCdd~o
 ftp://ksd4b3w04c5nk5aasoepqdby-9w.sl/pNe8wJ2LkrJZ/XJSanvU/
 http://oPYQ.nd-egq1mkgtuwt4ei1ax.GQ/JRpv
 ftp://171.235.253.31/gop3Q%bcUoW1/38aPN?
 File:///XoULHUnTn/zYp/#SlAGu
 0kx1j6uf.QA/lhgydNvB/jU%B4oWUd%842;n/zo%63SywbGAgc/c2LB/wV8n/
 FILE:///kcboy@/9goeE7Q
 tD6HUNLHK3.u-06.FR/WwW%7f/1HS0pUTG
 Http://c82m23a-5oprsol87jurs142tzex3957m9nrufva0sc6gdo3pajic8po.H5m3wt.1RU:11878/Odij%A65n/Am~mzHC/#ArdWk8
 Http://cd1.es/w~Uc%455aE_/wVJKfr0/X3vnA/ImG6Z
 http://5ect9i8665yca.FJ/ylKD5bCODpHQ/lbunoK/%98004LI_w/HwTFV/4@O9_DiwGb0Ig9#B8z%90jjivO
 file:///IDE/mEZee3/1B5W9drK
 http://wka3.GM/%95yhyVy9#FFld%0CZGoiP
 file:///nAL4tAgn/UK?mpt4IE/.2JW4Ej%28uiG/LulMqnbE5
 ftp://973k1fnytm6y9hx87p42k.1whc75.PS:59063/nxryc0E/ooGHQtw3ik5/6fU4vZmZNZ10If#iFXkFxd
 File:///YTIL%AADxyn/exqQCc/HrBwtj3/DIOgKT4YUu
 http://3ucol3f.lr77xtr.LK/FNsRpDDW=/76bEzBTI/q30mQZ/
 9sb.7mct69t.ar/WpXcM8498S4F#k@L:'L
 ftp://3qn.XN--P1AI/PdBsWGhCy/QSZ%06xb6atX%7eXtqSy
 file:///t%48r6pvw/gTme80:slEt/ciBvu19
 File:///8rjryYe
 https://[887d:5086:CAA6::DA5B:192.032.127.177]/
 File:///v%2CCgt3%32kh5ZJx/~kf8WDLeR3XmmY6ap/.DEZNJ-ylM
 file:///KNINXVO67tBU/VWJdbMVH%a7uqRO9%ad/55Wlt5O41e?/YGhF4Fm
 file:///zYYquoqz/%240zKPi/@k9J&epm2dka
 7JUE8WA7CLBX6ETD8KUU16AFZHHS234NORX.tep69aqao2.int/iZjrUNXtQfBaF/Z%A87tU/XfvTnCVEY%00/FUyeI05%f4#?hZ
 file:///1?Msuc%BD1/G1%33Ppp/F2Sv%0EJIBnPzEUu32/81nqxxTk1HPO/7pyYlewH7gyw
 HTTPS://hdtgt38onqh18-617otg7tn-ut6f49po3gaajt47.m4O26.rwko060q21o.Am497x0kow-u.TN/nZX955o/JtBhKlvv3r
 ftp://28.118.125.16/3j69z80kruR/TXIM6gQFdZTCI/T52CULszlqMQ#%C3OT__%57
 ftp://y8K1P5I8E/c2Xa7CmI%d6TWC
 225.022.162.113/ZF58s/%CE%56BA5rQPOLU/AUNP8rG/w8SHG%d0FVsZX8dC
 X6eygmy.1a-mtt.ki/WC9%a6/GH9mNozOi
 94h6rdisa-eh.CH:8242/I8Ik5%42881r/EsVYPHYT/Jw7%3A2%2778ggZ8u%60
 Http://89.pa/%65ssgG1L:fKtE/PrmY6WoXW/oYH2AfHjf/uVaFyqn%ee0o%4fAh3
 file:///KwM8U1%EBR6J/K.asJbs0/i1vCxd/ZthOZxt0IKQEH/#x:Q8vtaIw
 http://rP6.Ewrowee5k83.COM/5CId/KVp%FE
 ftp://l8AAQ4XL0X0HO6MF7.9d.tw/%98Vb%117Uy4/KyUMl9
 Q293qtnuw.vi/6fi1J47ebQ/d2EC4A5OM%FF9_tUNs/dk=?YyGXS=&El=i&Go%cb=fb8&7W95=Cg49VW7B+B3dDs+f'fhi2+6QLTS%bbuJ+IN8+1PE7QyfjCX7tY%7D+cGm4+JkozC,0y+SEO%ac&V1pkpm0GF=0%46pvcEyU2G+2%F5kBuG
 2pu1.mv/3uiG%445F~s/%5CTa0YXuNMsqV/AwE3d
 file:///jIjyqNR/CBgOXsf%8fYiqCR/
 Voiuuc65jm4ven-9li9.mii5.0h5xt6.KE/qachnQB/nsC%4ai/juYvC3yTiCp%06S8I/LLVvQY#p1jmTyx@W
 Ftp://ydhhq20m.MY/%ADNIfcLl66t1fl/v4%a60h/N6My%9AKXUvToMFxY/
 14.21M1I.NU/iqlGVazIWPCvV/oelkORYd3Iwsdy%0D/LcdN7U
 file:///
 https://07zje.j84g-9lx-673h.vwr.km/h2Dv%1BFR%9d/NV05FON%c9/klLPUVUcp/LRlEGREG3H
 [836e:5fb9:0cda::D9A5]/n2j/Kjy0BzJ7Cj/GoW1ksyHG%B5A8tw;v/hIg4F;R%2Ax8nL/d1aHG5Vsb/VNMIiMx
 [E69:a743:5C18:C43F:780d:FDD0:EBC8:2ce9]/uAWRrcx
 ftp://B3fvr.l5GW6REKV.GI/0qT%dbwWVXZ/3kdb0/kBQuFu/R@9WXH0
 Ftp://a4gdplaw.TP/zyf2c37ZfY/QaiwZ3l/CUi9.ado/
 8L.vg/LjRJZ/z7/Fkg9dwmTDSp
 T7wos.u6I.cJP-5HQQCA.9dutej.SG/6McEZ0
 jJ0D1X6C5CCNWYGOCI4NNFC5A5NYJZTCW65DHS.d1yxpq.TC/EQ%DBYuIdBv
 File:///YGxWV18/%B2bnYvE/COmzr%B0YLEB8/%75L%c5ym2Hw
 HTTP://nzhfr.Mlrs1k026k.KN/~bhI#qqgVS5YR
 https://z9z6ip.INT/1%1dXkN1P/KI52I/yo%FD13SoZz0?:z'X3xwoS=1y&lmDOOEVzwHn2j=xfbMj%67cy#bKedfyI1
 FTP://aysc5.8i8kj7.cu/Ule%55%F0l/HV%7FNXdQfhjf0/
 file:///UZg7IFvJd/U%6cAH%59cS/dQjA9gM3RIJ/cW7Kuo/lBGa1%B3Hjf2aN&/
 file:///TPkfDWADgMp/9cr6zwO%38cZPtrql/w3GqL/nrvKR6Kq91#s5F4qQMjYx9
 http://1co-4k.zzzqb.XN--KGBECHTV/WRGpnKFny/eBiU%BDapp/0cb5bJ5%24J8a#N*cE%e4BmH3Jse?2
 n7q2q9b.3-ve593.eb368oe.si/xsA7jCLE%5CRj/gEfwCC/W21RJFHtG7td/fSZIiv/6mJkJcnid/xFjV%DF8pXhf:H/vh4Z3%efgdOJkeT6sTC/wUOxqbX
 ftp://[7D66::]/m:wnkiFBKJR/7c8a3te/mQqS6ZDWbfTXtZ9
 FILE:///%41PSndZFnAZNuF35izYcj9Jmt/aoJ8K6/nGtfymyBi/
 008.245.185.106/0Aq3gb85/6TZk7/PVTk%b1G80
 ftp://90.188.10.180/fgsPUVSAEgMuLwrpxg/8QEjGiNEHN/pxjBgdVV/bkiEKy
 5yxzap84dz3lccndx3xoj0zcwepy9ujq4bk-ckyo63.si/%E89rzFXG/htVDvVdD11S/SLLVce1/%5bgcDSkD
 file:///Mr
 dm83f2l.vvlpnpob.7si.cr/RFT%18uMgARxsP/8%61%7cO/eZtPUg%e5FavR0XRe9wZZ?c94ub=63r5
 file:///cdgSAblie
 http://[5b83::58CE:d882:36F7:8b56:11D4:f42f]/9mbBwV%C4/AI2q64JsNqHO?tZ3=nATs%3CQ&lbSzuIb=/IJtfPRbcu
 ftp://gOD0KB6HB8JDGK56.l-V4OW.sj/KqqiLzCu%6a3jexLbLB/%6dBHZb%29z72YF/
 http://s65E1E.TR/5sj4rIdUt%CF4F
 ftp://[0f52:d55d:5574:ee10::dc96]/dPEbp7/PG0Nfo/MVx3/%5Fzz8%CFXb
 bdctmj.vzaax2fe.j8S2.ojfq-b1m454.g7I.uy/o0%28WV/Bv9nDwD
 https://k233JLHW6N.cCA13HZAXR.laiu78y.fleptcf.brva6c.osod.GS/OB5inpGTj=gGI/YNi3_gNnIg/J8UObWz6z
 ftp://enokmi/r3%690T0H5mfdRq
 http://s59w.cg/nJoM7yv/Z2T9Xof0hNGhl/N0%6b5Sbrbtjj/
 ftp://qytw0h.hkdt2rm.gd/3a1WJDglP%cfZ
 Q-2pgsvifg.yr2ix-c4avrjwva.kn/_zD8ad/%8AVwQwOG/JMC314h/rO0qj%88?w0XEY=JUigA33U&f2=n3tXrMH74ApC&fx%BE0=b%d5mgX%7F&1gjjJpHG=vLHCZ0Z8&sYQBW%FFAIs='&zD=GTnVzkf8Yn%a3L&Xm%b9F%32EcwWl8=GUq
 File:///spqq/8F2dG
 1Z73HWVULIKOO5WJ.rEJGR9.nsscy.gf/rHEt;i5T/%50ZjYYJ3M%4dR/WlW0C48ocnb/NRA~0M#
 078.104.235.053/8KqfxznOtxC/ycYiTG3%11zP2%A1/hhbuX9Z%d403wES6/P0gg5%94
 FTP://58vs5.g0.tHI.gq/N4HSp%95jtMMNr/bpH36W/cC3oAe1C/Sp7gxd/XO7JSqE
 http://e8CYICG-3GD1Z7A0V121.Ya0j.Wy.CM/BLyz1kmpRF/nb6u%52/GpXGTv19#9?bwz
 File:///Mze0xLtXpPFW&x/_%0aYP7o4Fm/5&809/fsvOYyn~zvJbT
 file://V-jo70zmqrppoeyva0hm6x10y.UK/#3O9f0OYdx
 file:///K4BV8xTq%ccORyFI/8PzAVSZeBNFX%adT
 071.247.240.193/%94VOUi%ac
 27r2mghslc2b.Dwbpiqi8q.gTYSL3Z.am/RU80/KFcctLv/R8tG8d51EaD&pno5r7pDR#GWY
 mdfr2j.1FZFG4.VN/Xn6l%6dLWufM/I4FHTzlnWx%7BoI/ueeKx%03mfSA/%9a3PMEt.iSdeTVFgSnLi%C84m/6dh
 http://H4jk06c6mtprgjywnc40mjri05a.VA/7B%C0h%4fCjj80/TrN5HugANCZu/eMVdn4en/QUSLGhe?7yjqzvzv2r%b0I=&p%C32*HvmS%39g=wb8u&lTvA=FCGNF46U+?Ak.vpCAV%ceiK0f
 file:///cVjI9Ue/siOD/jynyp9%3FmBx
 http://u8ic-x8o.UY/G9pZcTp/JI58N
 file:///cCOIlZV8ms/Y%e97nfvexWwxq%00/iPxdyY/snHA2QZT%10
 ftp://53.151.134.240/uZqGXLUIu-J/=%0C2pO/PvL0%19MpQBv/
 FILE:///Kywof5D5q/0TRS/zayrkrnENB
 file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/
 mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs
 g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
 file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB
 file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL
 file:///mJM%a1/jv5%53QDqE/bFMu0CBp
 [a0e6::]/YR5lwpHlG5BPjr2XT/Pq%e4kWAmZ/ucI10P1
 File:///8YorWt/#ToazT-v
 http://2igfcm3qy.wlcgdxv-xat059qnx15a7qp-p-p5oph1c8.GP/hS4Aqy7SmODbaOH
 3s81j.TJ/pS9Jzw8:NWryq/%00Kh1/Y7Rfoo7haw?pYq7Efg=
 HTTP://k59s6i5o.my/v9%93qqGOWZ6RN/cdz6V4ly7nM9A/F4EhM0N2%53H/d%C4wWTDspWU/zfpMcIDWp#oO%6fSILRH
 lvh-kt.TN/xZghTR/yDiD0a/P5D2%37rFa?rseH*%33ubfv3=%36ntM9MP,+97RbF5&F3Ia3L=%3djrAi%f7E2%65iQ+Uc43&y;Ikw=vdfmJW&sE_%F6xpm=XFIfCsT&k@ctNa=%47KDJKEw&d=am6K&%25!BjLNa=iqs.l
 http://Lhe7w4f06qt8tif2af1k6s552hlbk.mfce.cc/DEqiQf/GLpkeKZAxhSO4m
 Zy-iit.Cth-tuvx4.au/dl6DMUqP/wAeKXt6
 File:///35GJ%C8m6ubg/kpI4iEEx
 dbe.gkg.EDU/cJ%fbQ3k7pwp5/arlH%DCD
 Ftp://e8ni0.5etxvrjvn491/tP8r:UC/faEdqs4P/v4zJax4
 https://4PI.gg/fFtQoVp/b6Jf55/YEc2l7dE%CA
 http://gpu16lz.LS/9e%daJrwQfHEpFvsZ3jx/c4STIJ/CmvEGAUx9f/
 file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc
 HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt
 5.Piba4ac.JE/55M1H/AZXdj
 m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/
 ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/
 hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/
 Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD
 file:///NJvRsBjo/IECCGBvb
 http://8-6wji0x.tCVT41X.k1PS.15p.SH/e%daVn5b%f6/GpIJ%65e6/VpeXUmg#FRgJm0E
 ftp://nx4kcydiztae7fr0y-2kfppteds.gq06u.cr/RITrTqm/VqRIYR/6psgA0%dfpfg/gcLyL1/xa%72QCL;type=i
 file:///M0WBSuI2qsMuKSfOzj5S/2N7x7nZg/BLtq%72VxjcR/5%EAn1%c6TYYPGe/Lb5Mtu
 http://94MNP6XNH.0mgqklz3t9g2xl89x81-a3hifmff89nahy62jeyhuhe8lhkuafizl.GQ/Ajpa4Z1D0o/aVv748s/NAIWCkWCD2hj/7MZS5c79DmL4/ieQ%21gw?oEPqIN=Pm9nPx54%c1&j1y=C
 ftp://rKI.COOP/v0pdu1zj/ir2UM4X/7k04jhOKPVN/7ua%E5y8p/bl~yS
 d-IJA.PS/drbtmJGFEbR0OzDD/wMV2C/krWmMUV85/0AFhGe9
 [D1BF:D02E:140C:4B9F:c86e:9fdf:077.173.119.180]/A07Ox%86Oae/yhjXUMut
 http://A.bi/J1GPah/OT741dJ/Jh3Z0xb3
 ftp://6VMV.t680F6.ijsru3.bm/vlJmkK/go28Jr/qUtmHmqhj/ykeAVxYoe
 HTTPS://oi%32Yp.@a4mk0.Teyu0lojs62d8l96qiym2v477ixatleasrgft4ttpbfel9r.BW
 x37MULG.514yrp5.Vrd68eeufzt.VA/fFMWutSw0d/Gr%BFun3/JH6%DESQV8f#gn+NM2
 http://2.88.82.235/6bhV%BFGDy%ABd/g84ly25/;4AeID#
 https://a860jcplfoodo0yq401cdf9.1ZE2P/NLArIzMZ%8B/6UiHWMMGS79/?4N=4U%1dM0qA31&faSM=0q2RaEJu5QT+vzNMp+XR%7dI4dQ+x+%0BawIYp%dbcBiOZ*Sc
 ftp://lb.NP:46239/xwyAL/m74%9fqj4gttFLg/
 s086j1-9.Nowi9s.fm/16zr3s/mvzfyWbB5/&1mzA:X-3
 eigz5dhw.jynsrju0t044lcc.3c3bfm.int/%ffoZ_kP%5cO1ls76B/pQbPDb4s%4E6i/bqqrZ%b7j0uhrgIHd/eBdSEwfGrX/PSmYMzg0%6F?Qr%92y11b3=&L;5CV=zJao%31Tmm
 65-ihklk4j6m.f3CFA.7kj.qa9rcww7uefzkpxbf87ni28b4a1i9rjqy9a.5texnqlc9.cu/p%CDK%b1%449LH/IiLqpww/HmACJI/r46TA4
 133.38.197.20/pbgvKM6W%BCEBN/Cvcu0&#idQDycc
 https://4I2GL/cGtyrs/%A8m5%3fekPsTRWlB2?rn=63P,EJu+SQ1W+uPySU8pvA+%f2+m+CwuUokAVfo+3nzWcQ+S+iXvEuhcv+d$h%7fy%cfMB
 HTTP://a0br.o0gvxf.kp/zZkWq5hfxy/q0x-g0In#bd%1anKx27
 ftp://[1327::117.246.244.220]/%91y4%09/
 ktefq.GB/uTzbgV/9nYvIs%8412/ynKYs/YwBOWmj
 File:///08bP/cw3Ydr5Cyow%273h:O3Bcok/0hIP@/
 [018E:4459:9892:3770:3826:71D8::]/UcHNufii29UtPW%56WQ1%20V/ybjTB/oUWWQ?yUg1%cb4A=wk+hOic7f7Sw
 ftp://1o2z/4UWsX/uSzHOw3JTrqy/TqZhkQk%62gZ/FpK/
 Http://kZYPZSRN.1m.UA/QN9n3Nw8kPAgkCB/SzdVcxryKou7mMG#p6at77
 http://se9g.s7-5qnlmsi0npbr8ouxuey3y66swspkl.y4.st/xfP7%066uXWuOu/clIFhy
 ftp://D4j9grnngs4a61b.im/f35gw%53rTeI5/#Ff7A0YMs9RG8t
 https://zujspr.cr/zy14P7FG3/Oxznfe/P2zpT%38S%FFVfP95Lh/nJJgzX/kcVuHCzV?Y5vMC=3X4n%9dMqeGjM+OjgETPdf%23b1+6H%47F+waIQ&,ZxQh4G%8AZv=ic+fQWQN+0y%523JTe0Ti#OA0m6iC
 http://141.171.118.17/VLnEb4Y
 https://sla.aowts.MQ/KbP3AV@wXFSgz/TauvS9f2/zvGpvN.e8a2Kw1ho?jYRUP=L_IAzw&cj0ux=xz&lrA%8bS56%A9=SX7NjQ
 file:///
 FTP://h6.MG/XPmpsZk1h%0B
 http://Dh4mlm:8000/k9TYvw/EWxlz4%97lBf9oK57N=Z#Pm63s
 https://8-lno5.KM/Uco2E%dbYPx~/MzKrkZ/rDpXB7OWtD?Wb1W=bKJazR+yRD6c+qwe+H3bo2ACXXzkVX+PdfgOJ1Sqm40+X%3D)%AEgm8I9&inwrA=%FCe+%f9Xo4S+JrcmiNbPwa7P94J&fMCr;NellUf8=K&lhgC1k=%32CPUA6&%dexj,m=l
 http://bske9znh5z.mq/rF739Qhneaet/NTfzZn
 http://B7z94v/
 FTP://p9s.hh313n.6k3.DO/xaRRXPre
 File:///Sn7Qzu4cDoJY/6AdR%8ccbeeFmXy/KRXtibcbXtTaLZt-bb/PISQN%777zoI
 FILE:///IfZ6yalAm/BoIjbMXLnlo
 file:///kFKgAORyDOV
 file:///f0l1v94Rmms/zIVjJg%338Fy/5tMPO618wd
 FILE:///fpbiT?6/%0B7dUkWR5r%AErqLW/v2n%bet%b3wV8Yzi80OJ.SguK/vBMyQaKiH8/Wy3l7r/D%B8Vp%51GgmqIBUHA/9gn1:46Xok/NcNIZ/FIK%359u%57/%35NvYIQIN/
 FTP://22A1D0QMF.cmcve.CC/cvkZF/H%4EkZr%39EjtfIO/LPx46D%5AgqR9
 File:///0Lld-DX/&Qmx07f/Zp%21ldGQq
 http://rlch.COOP/%bcKE55hwH6/CKHB%2Ak/Qzsn2Rn1p3RUc3H
 http://h6d5js.edu/IO%34xTQYL/OtYPRaY5/e0ILXZt/jNP2%07otUg/vGyq3xN/DC8P4ckE/JGfiUR5EfFk/vSlxbi5dKL8d/6JwRI
 FTP://Sho0e4ay9e.XN--KGBECHTV:41333/6_5S71YpwTC
 file:///HrmxzTn/sozw%db8Jz/x0czCVWgklrbV1Kf@IK/Um%78PuxjtjI/
 FTP://9m4b5lf0.Y5dnwnduzx9wha22ayztin-t7hng5b62e07rzsv55325xgdrzwx.gov/pmG%45dhnQZ
 ftp://t2ik0rgw.krjz72-l.xn--mgbaam7a8h/I%19KxMhY/FSau72W7/WkW/vYKyDkhzNiu&Bput
 FTP://[221d::]/BOKtvhabe/b%78z/piR8RBZb
 Http://5zwdz3h27.q9l27mto-5v0i3i1yu8oyl.TN/wk91N/X32rxh/cmM%01iQPnCulto/
 FTP://gWUFGOXE8EW.1g9vse.xn--wgbh1c/ncQo%42ihY/Tyk216/;type=d#J4A9HEH
 FTP://5wudd.ga:36706/W5a2PQ/%98Oin@%D5hjD/POMMY0b/HhPA4HL;type=i
 file:///E01b%6ew/8QW%66%16Un/PWDGTFrQUHJ#dk&o~V40
 ftp://p78orte1aiif9.zk-l-n5drgvx2kj6i9e034ck587-utyikjhal.qE5RJ031K2FAN-35.v71jyg8l/wgwpnw5/1WPLlSc8/3RZzlIEZMlC8/ytaOFdSuPKO%72T
 tri9.Fyhn.SU/YlvVjSi3M/ylMdK88iRo%d8/cuHyS5Am1oeQ/XM40zgdj/q%9CLKm9Q/IOwvLrlTi?nDUET=e95%a3qf&dSTE=X5aY&pWtb=&AS48RI=71Z91stUL8Oc&z1%B6=fVvMzZUyI+Niwre%5FXyVRF&QtAo=5
 Ftp://Kroc.Ls4-tkd7.sg:58219/9tq-FJyL?Qb/e0alokGZ2/MKTHP3Wsw
 pmg4ty.m59480p2f69.fV.COM/X98xZ.E/cTleUeS/9P6zeVQjfd30/eVVvE4/Zyxm1SSqe9u/WP%a5hS
 6P.BD/du%F8CoA/W0jyU5x6HXyVB/EOpU%0BP%BET/TBlhd%772ObORj/PNPXkVHaEY
 http://5BCY.X3.SG/N~63s98IV2/?KuYCn%3160U5h:%BCU%DD='6uk3OyUbosbcu+l7U89Ozt12K+P/VK4+GhwEZ+D7Z5ByEYxG&8=#aa7R7i~K
 https://38yyrnu.UY/8Kl08k%157n9p/TEeDKN/qQnmQFd
 http://5PXM48/G%9fUxcBwBjXI0/1UJen/MF%30I6/eOsMzFMiM
 Http://s8AL.rc94r4iftx7qeg4cbjjv5.za/mYk9UAydyn4q@w/T7K/dd%8aIXPp
 Http://130.165.027.114/o8bwef/X%70neu3uGKY/NU%f8xTKW0;hTKK/V;%edBnJYWG0MI/ZlDMtVPK7?k1N:WnR=%3DNffenC%67+sf(z0U!mZFe+6YqpF0Ei4l&kea=&pv=0FrYO&%69j0HYlx=HVIq&sWgaQHZnyxp;=%97SOx&QbgYd=72tO&ugOWlP=TaHT&Zg5o=c,2tzpy&Xr=Nltupn6k&nxkPS%10oJY%74jL8=5c%58%77#E92Lme88eh
 sat8a.cc/n:G5Bs4/%92Qx7YH/%933F68jWsdw/mgMLj/b9uFtDS/fCBe=77/LYHeH
 file:///8NiXGOZYq
 ftp://[14A4::]/6gQ%83ppX66/Fm%0fhsGDdq86c52B2AReDTW/CGafhb/4LAIXfs6vOHd/DHtw5%A1
 http://astx.i8o5jdypn1ly.LC
 Ftp://7j.N@Ptavog8.gh/%FDJUUJB/nrC6%4as/AM2BxLCU:fGwm
 file:///LD3OAKQVR
 http://jVVR4GZ.BG/XELY1/P=cusbVv5o
 HTTP://4fx.3kt642w.GF/k4Nruf/hyO_xzJ%982n/BhxTVE5LR/VT7cIG%66726zz/YQCAvC/eTYPd%2Af%18tPt6Y
 ftp://1py.jhl5-h.53.39PN2C.xN.ps/Q6kM9aOm7
 1MRTJ51.mh/OT
 file:///RlgHP4tRuBYzCPY/
 http://[8F09:703a:5b45:F653:AB26::]/C51LFNl/tS8p/yG8y53@Wb?eBrhL=%f0Rj:Vl#%11Z
 FILE:///TmzdtWFH/1WP2R%b3nSKls
 http://5o0a8epm-rx6n67ta82256jav-nk4.lb/HbOqUc/TIVeqJ7Ohp/BjDwRDKJ/JZO
 File:///AvnO.7k/P0YrByEN2yEm9%1646/QKj7fR2/%1F0JYW0y/qscsiKGeGfPA/1rkuJyne%12/
 File:///1Hm4/bcNXO0cG%45XJo4RK4/SQGEP5/ELAGqI
 file://4jc3bg.zs/WfjCr2aeWME/Nv4A4B/invk2d1h
 Vj1.Ngq.LI/FR2%b7RU_z%a1Tf2vy/rysXmZ0/
 Ftp://wkws.yi8srfw.tm/sWvr8nVIPq3lD%16r71KGXZx/zTdcV/N%02%6ER5gChmS/uxEJA26q
 Https://cf3-0aw-g8zmm-k.AO/mYGm9AqQW%E4q?6u=&rX=
 8vv-rhcodmrr42jd6zmrnl7xa.F1igvm2.RO?rQOIRt=Q&Z8=1WyCZjZv83+lpB%7a
 Http://009.130.112.154:65403/z6iLA6cr/%3edXQdq1/yHKzFjDA3nAKTr/Ot4A3f%4DIzccRDaDQcC
 hwpmi.upmzdzzhsrz.e469.ee/SXdNeY7NHR6/Vr6%FDr
 http://[C7E7:57e7:b08c:9FCD:4B77:4de1:229.020.164.172]/LnIzKLn/StXMmto
 Http://2-6SB2KV8V8MV290SIC08D9J7-IRM9FTPC8ZZ.hwo9el74qqv1.zm/tr9K2BSFkbU-A8wJR/CGEL_82/cnMuBB%a3j34
 file:///fUtCm%b6qNK/lltu?NvBAhM/sJ8pOm:/jJ18OTM6U%f5v%3f/
 http://76OXC.pn.GA:15181/OPErhH1cHtl1ba/eIPkR6%1EG/8fVd02k/Ky%b0D5izq4k
 ftp://154.108.127.0/vGpMboeazp05/usfmVeitt0pf3o/Ue4OMVT/sJ9BAYSLje
 ftp://ivbv0.zCR-0J.lku/6m26/7tElM/%b2%0BI.Ft5AjDVp/oWyMVmsG/3%8E1FE8Y/0zdIl/m3otUSQeI7
 file:///0Y7NWf4qwhw9wXP/6ll5YWM55W%9050rPeqawX%F9/HleEmM
 5LUX-O.q-33d.tn/smzXQJn3H/81mg%4de_/jb%97hT
 http://84W32/CCKpkt/c0bqCnoQ5Y
 ftp://nyqaz.MT/0OfOsU7S1H9BM/OjhdD/izbR4txUY
 8wo2j2c1z9s.ef2ki0mlvvnjm5vfyu.t5a-yb41uykgo5kn1qxzffhz667dty8mytg6ir7os9hoxwm2.mw/%39FEVmD/%a4qRT5W5qW.yR/8XB9NHyB/
 http://rbf6ezzlhpe.hk/%0DK8/IXXJAsC?mV8vvDI8K=6t9%6EG1Dt+M7N+D5n@Vd79n%d8E+gj+ofnZ%16loobN+f3-S+e,IH&lnh=
 wu3w.0J5.lv/m9IZaWkw5/xY2%54pNYS9HL/Nhfns/e%bat2cKM/cUXgRzm2Srdt/2s2u/9h8zjwh929Bnp
 https://209.73.217.17/dJvsqDH/RH6Ok_eSc8wO5/BOJws6/9f0DvXJ4/?%ea'Fx=P&6h3zz3eGCtK=4MF76p7Em
 jfajtdt5k6gu11la2jbih.MA/zcaTNUL/3q%31eLT%bc3S/L6v2rt/WtbA0%45~TIvPD
 ftp://Defi-z.gr:16993/=7IIaMpVy3OLs/QtQD7qF5Vr/=RVbNDH8/y3oUHmX.v/Td%dcbiGlArA%720
 ftp://[544f:e60a::8772:D633:DA1F:081.021.019.189]:62615/%CB6Wy1K/X%0EcoPQ/IgnCMLPynfx/fdFHb
 ftp://1INQM6.4y.RO/
 Http://T778hd416.g9r96v.bs:64804/GbWp%47K/zgTKs/cBHzmYZ=AI23VY
 HTTPS://6hp3j2y2tuakzv1rnq9vnvn1w0j6roo3if:58975/vH8BLTu3hzkk
 ftp://Ye1dfbl0eae8lqiiqaojj.JO/8EjAq0TzD:/Bz3Pm2qyWo/ZX58A2/yjn%9F3xJZjsVhw
 66.242.9.138/CYHK1bGpZ/5yyVD%cbC
 nHZMBEJWO.ST/ABXauli3wuJ/WUxhKaZJg
 ftp://[8463:c210::b5d1]:34094/8%AC7Fc/Qh6%62yFExJbdaB/0cAZ3iSKlk8sU;TYPE=D
 http://vmlyl0efotpfd-tew59kcpsi2u7qd/UbXy1Cc/L%0cwnzmdjz/?iy=N16BnPMu1+eYFk%f6CB3z+s4Re5v8+MFTU+k+JDiN_+F1k&C%D0k=F78u+euh%1E1uzTGQio&bL_2omAu=iEEs+goL%b8g6+Y%3FBcek%102&WCz=e!Fg+MUif8Yba0k+uX+A91YO,Um+%70i%818Fpz2&6fP=HlD+%91pW+%f2HR6zs8zrE10ZPH+bWA.BB6k+Df3w:X85xDnDjSiPY+AyDpuSl4VEVTJzA3g&OtUR6=
 http://bCNNCLT.gxa2sbn/lAFakp
 D19f.oD5.bb/xUG6W8VxTcjMG/jYMuWlVMygf/UtIwE13c/%a9wzpO%AFxQ9
 q8HY2P.r5T.AU/nc0Iq%28QAF/#yOD3%b3UA%d79e%1EmJp3
 dPY3X09.AC/STpa%97U%b53yKP4Te/%71KZZvIC#nA1W2z
 ftp://3gb.xgjm/wF%ado0cM/u%0DmCW8L/d9Ss%61dKQ
 6m.56xkyt.32O.com/ToEAr%BEdi/xBpPU2NqC/74sgdq%BD9/WSrx5/5ldupD%47J/9boeZj
 ftp://s0y6r7hg7.XN--KGBECHTV/xQizIlOK9/uxho7%bd/RvxbFGQ4o/O%42UeWF?/GAZ5E8b2/eRaq/l:-1ASwSpw/2FkowF%12Ss/vtCq9dysEc%1ee/
 [d18d:1707::]/NGZMInsLF8/kgC3y/F66qc1qt6OWfeS/DyngWA
 file:///%55A4VpGsup
 file:///WNEw%bfTWDLF/s%A9oZoWUo
 Ftp://2tdk.Ube6velthhhx8o.GM/bUH4XycSEKkTE
 ftp://7kxk4ujzz.kp:32621/hbop0%25sK/rw7RBE0lTN/tX5BLF
 FILE:///IQExpA4kDvUfTkH6Bg/MeVJ4aIUbXCJf
 file:///SIE0AkJFq/ZPJLyYK/6hA3x1InlGm1
 http://047.014.184.200/Z_QdOwjzfBue4Nt/aEn/xuEQD/cXlnoxHIK%7d8h/1%eegEk7E0/8Ejku@r1Z/UZ4gG/%484zOJsP%1b/Lc1okbWRzN5UJ
 Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L
 FILE://155.24.106.255/3VEZIT7
 d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
 lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET
 l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C
 FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k
 212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt=
 http://[ea5::]/eIdv5xl/5qhxlOvzw%018f/N3RQQKCz/WzUnsSg8KA3/7ohHZCp
 file:///g_T81EaNw2nJB/1yUUT
 http://2XXY0MZ.fwa.791ck-2gx.bd/uO6FW?ZS5jE:=m:
 https://[8368:F154::f99f]/Y3h8FgzTYYpzn/zHFhQECC/CGtX/8v_~jn3Kn
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
@ -98,12 +98,4 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
 	  Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
 	  assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "α.π.τ." });
 	}
  /**
   * test that acronym normalization works
   */
  public void testAcronym() throws Exception {
    Analyzer a = new GreekAnalyzer(Version.LUCENE_31);
    assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "απτ" });
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
@ -39,6 +39,8 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
    checkOneTermReuse(a, "book", "book");
    // stopword
    assertAnalyzesTo(a, "the", new String[] {});
    // possessive removal
    checkOneTermReuse(a, "steven's", "steven");
  }
  /** test use of exclusion set */
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
@ -111,7 +111,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
 		assertAnalyzesTo(
 			fa,
 			"33Bis 1940-1945 1940:1945 (---i+++)*",
-			new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
+			new String[] { "33bis", "1940", "1945", "1940", "1945", "i" });
 	}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.th;
 */
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.util.Version;
 import org.junit.Assume;
 /**
@ -39,37 +40,35 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
 				new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
 	}
 	/*
 	 * Thai numeric tokens are typed as <ALPHANUM> instead of <NUM>.
 	 * This is really a problem with the interaction w/ StandardTokenizer, which is used by ThaiAnalyzer.
 	 * 
 	 * The issue is this: in StandardTokenizer the entire [:Thai:] block is specified in ALPHANUM (including punctuation, digits, etc)
 	 * Fix is easy: refine this spec to exclude thai punctuation and digits.
 	 * 
 	 * A better fix, that would also fix quite a few other languages would be to remove the thai hack.
 	 * Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
 	 */
 	public void testBuggyTokenType() throws Exception {
 	  Assume.assumeTrue(ThaiWordFilter.DBBI_AVAILABLE);
 		assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
 		    new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
 				new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
 		     "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
 	}
 	/* correct testcase
 	public void testTokenType() throws Exception {
-    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
+      assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
-        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
+                       new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
-        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
+                       new String[] { "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>", 
-         "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>" });
+                                      "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>", 
                                      "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>",
                                      "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>",
                                      "<NUM>" });
 	}
 	*/
-	public void testAnalyzer() throws Exception {
+	/**
 	 * Thai numeric tokens were typed as <ALPHANUM> instead of <NUM>.
 	 * @deprecated testing backwards behavior
 	 */
 	@Deprecated
 	public void testBuggyTokenType30() throws Exception {
 	  Assume.assumeTrue(ThaiWordFilter.DBBI_AVAILABLE);
-		ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
+		assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_30), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
                         new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
                         new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
                                        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
                                        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
 	}
 	/** @deprecated testing backwards behavior */
 	@Deprecated
    public void testAnalyzer30() throws Exception {
 	  Assume.assumeTrue(ThaiWordFilter.DBBI_AVAILABLE);
        ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
 		assertAnalyzesTo(analyzer, "", new String[] {});
@ -124,6 +123,23 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
      assertAnalyzesToReuse(
          analyzer,
          "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
-          new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
+          new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz@demo.com" });
 	}
 	/** @deprecated, for version back compat */
 	@Deprecated
 	public void testReusableTokenStream30() throws Exception {
 	    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
 	    assertAnalyzesToReuse(analyzer, "", new String[] {});
 	    assertAnalyzesToReuse(
            analyzer,
            "การที่ได้ต้องแสดงว่างานดี",
            new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});
 	    assertAnalyzesToReuse(
            analyzer,
            "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
            new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
    }
 }
--- a/modules/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
+++ b/modules/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
@ -0,0 +1,211 @@
 package org.apache.lucene.analysis.standard;
 /*
 * Copyright 2001-2005 The Apache Software Foundation.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.net.URL;
 import java.net.URLConnection;
 import java.text.DateFormat;
 import java.util.Date;
 import java.util.Locale;
 import java.util.SortedSet;
 import java.util.TimeZone;
 import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 /**
 * Generates a file containing JFlex macros to accept valid ASCII TLDs 
 * (top level domains), for inclusion in JFlex grammars that can accept 
 * domain names.
 * <p/> 
 * The IANA Root Zone Database is queried via HTTP from URL cmdline arg #0, the
 * response is parsed, and the results are written out to a file containing 
 * a JFlex macro that will accept all valid ASCII-only TLDs, including punycode 
 * forms of internationalized TLDs (output file cmdline arg #1).
 */
 public class GenerateJflexTLDMacros {
  public static void main(String... args) throws Exception {
    if (args.length != 2 || args[0].equals("--help") || args[0].equals("-help")) {
      System.err.println("Cmd line params:");
      System.err.println("\tjava " + GenerateJflexTLDMacros.class.getName() 
                         + "<ZoneFileURL> <JFlexOutputFile>");
      System.exit(1);
    }
    new GenerateJflexTLDMacros(args[0], args[1]).execute();
  }
  private static final String NL = System.getProperty("line.separator");
  private static final String APACHE_LICENSE 
    = "/*" + NL
      + " * Copyright 2001-2005 The Apache Software Foundation." + NL
      + " *" + NL
      + " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL
      + " * you may not use this file except in compliance with the License." + NL
      + " * You may obtain a copy of the License at" + NL
      + " *" + NL
      + " *      http://www.apache.org/licenses/LICENSE-2.0" + NL
      + " *" + NL
      + " * Unless required by applicable law or agreed to in writing, software" + NL
      + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
      + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
      + " * See the License for the specific language governing permissions and" + NL
      + " * limitations under the License." + NL
      + " */" + NL + NL;
  private static final Pattern TLD_PATTERN_1 
    = Pattern.compile("([-A-Za-z0-9]+)\\.\\s+NS\\s+.*");
  private static final Pattern TLD_PATTERN_2
    = Pattern.compile("([-A-Za-z0-9]+)\\.\\s+\\d+\\s+IN\\s+NS\\s+.*");
  private final URL tldFileURL;
  private long tldFileLastModified = -1L;
  private final File outputFile;
  public GenerateJflexTLDMacros(String tldFileURL, String outputFile)
    throws Exception {
    this.tldFileURL = new URL(tldFileURL);
    this.outputFile = new File(outputFile);
  }
  /**
   * Downloads the IANA Root Zone Database, extracts the ASCII TLDs, then
   * writes a JFlex macro accepting any of them case-insensitively out to
   * the specified output file.
   * 
   * @throws IOException if there is a problem either downloading the database
   *  or writing out the output file.
   */
  public void execute() throws IOException {
    final SortedSet<String> TLDs = getIANARootZoneDatabase();
    writeOutput(TLDs);
    System.err.println("Wrote " + TLDs.size() + " top level domains to '" 
                       + outputFile + "'.");
  }
  /**
   * Downloads the IANA Root Zone Database.
   * @return downcased sorted set of ASCII TLDs
   * @throws java.io.IOException if there is a problem downloading the database 
   */
  private SortedSet<String> getIANARootZoneDatabase() throws IOException {
    final SortedSet<String> TLDs = new TreeSet<String>();
    final URLConnection connection = tldFileURL.openConnection();
    connection.setUseCaches(false);
    connection.addRequestProperty("Cache-Control", "no-cache");
    connection.connect();
    tldFileLastModified = connection.getLastModified();
    BufferedReader reader = new BufferedReader
      (new InputStreamReader(connection.getInputStream(), "US-ASCII"));
    try {
      String line;
      while (null != (line = reader.readLine())) {
        Matcher matcher = TLD_PATTERN_1.matcher(line);
        if (matcher.matches()) {
          TLDs.add(matcher.group(1).toLowerCase(Locale.US));
        } else {
          matcher = TLD_PATTERN_2.matcher(line);
          if (matcher.matches()) {
            TLDs.add(matcher.group(1).toLowerCase(Locale.US));
          }
        }
      }
    } finally {
      reader.close();
    }
    return TLDs;
  }
  /**
   * Writes a file containing a JFlex macro that will accept any of the given
   * TLDs case-insensitively.
   * 
   * @param ASCIITLDs The downcased sorted set of top level domains to accept
   * @throws IOException if there is an error writing the output file
   */
  private void writeOutput(SortedSet<String> ASCIITLDs) throws IOException {
    final DateFormat dateFormat = DateFormat.getDateTimeInstance
      (DateFormat.FULL, DateFormat.FULL, Locale.US);
    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
    final Writer writer = new OutputStreamWriter
      (new FileOutputStream(outputFile), "UTF-8");
    try {
      writer.write(APACHE_LICENSE);
      writer.write("// Generated from IANA Root Zone Database <");
      writer.write(tldFileURL.toString());
      writer.write(">");
      writer.write(NL);
      if (tldFileLastModified > 0L) {
        writer.write("// file version from ");
        writer.write(dateFormat.format(tldFileLastModified));
        writer.write(NL);
      }
      writer.write("// generated on ");
      writer.write(dateFormat.format(new Date()));
      writer.write(NL);
      writer.write("// by ");
      writer.write(this.getClass().getName());
      writer.write(NL);
      writer.write(NL);
      writer.write("ASCIITLD = \".\" (");
      writer.write(NL);
      boolean isFirst = true;
      for (String ASCIITLD : ASCIITLDs) {
        writer.write("\t");
        if (isFirst) {
          isFirst = false;
          writer.write("  "); 
        } else {
          writer.write("| "); 
        }
        writer.write(getCaseInsensitiveRegex(ASCIITLD));
        writer.write(NL);
      }
      writer.write("\t) \".\"?   // Accept trailing root (empty) domain");
      writer.write(NL);
      writer.write(NL);
    } finally {
      writer.close();
    }
  }
  /**
   * Returns a regex that will accept the given ASCII TLD case-insensitively.
   * 
   * @param ASCIITLD The ASCII TLD to generate a regex for
   * @return a regex that will accept the given ASCII TLD case-insensitively
   */
  private String getCaseInsensitiveRegex(String ASCIITLD) {
    StringBuilder builder = new StringBuilder();
    for (int pos = 0 ; pos < ASCIITLD.length() ; ++pos) {
      char ch = ASCIITLD.charAt(pos);
      if (Character.isDigit(ch) || ch == '-') {
        builder.append(ch);
      } else {
        builder.append("[").append(ch).append(Character.toUpperCase(ch)).append("]");
      }
    }
    return builder.toString();
  }
 }
--- a/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java
+++ b/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java
@ -44,11 +44,11 @@ import com.ibm.icu.util.ULocale;
 */
 public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
  /** Token type for words containing ideographic characters */
-  public static final String WORD_IDEO = "<IDEO>";
+  public static final String WORD_IDEO = "<IDEOGRAPHIC>";
  /** Token type for words containing Japanese kana */
  public static final String WORD_KANA = "<KANA>";
  /** Token type for words that contain letters */
-  public static final String WORD_LETTER = "<WORD>";
+  public static final String WORD_LETTER = "<ALPHANUM>";
  /** Token type for words that appear to be numbers */
  public static final String WORD_NUMBER = "<NUM>";
--- a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
+++ b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
@ -17,17 +17,16 @@ package org.apache.lucene.analysis.icu.segmentation;
 * limitations under the License.
 */
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
 import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Arrays;
 public class TestICUTokenizer extends BaseTokenStreamTestCase {
@ -220,6 +219,6 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
  public void testTypes() throws Exception {
    assertAnalyzesTo(a, "David has 5000 bones", 
        new String[] {"david", "has", "5000", "bones"},
-        new String[] { "<WORD>", "<WORD>", "<NUM>", "<WORD>" });
+        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
  }
 }
--- a/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java
@ -0,0 +1,31 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.analysis;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.ClassicFilter;
 /**
 * @version $Id$
 */
 public class ClassicFilterFactory extends BaseTokenFilterFactory {
  public TokenFilter create(TokenStream input) {
    return new ClassicFilter(input);
  }
 }
--- a/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java
@ -0,0 +1,40 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.solr.analysis;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.ClassicTokenizer;
 import java.io.Reader;
 import java.util.Map;
 /**
 * @version $Id$
 */
 public class ClassicTokenizerFactory extends BaseTokenizerFactory {
  @Override
  public void init(Map<String,String> args) {
    super.init(args);
    assureMatchVersion();
  }
  public Tokenizer create(Reader input) {
    return new ClassicTokenizer(luceneMatchVersion, input);
  }
 }
--- a/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
@ -0,0 +1,28 @@
 package org.apache.solr.analysis;
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 /** Factory for {@link EnglishPossessiveFilter} */
 public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory {
  public TokenStream create(TokenStream input) {
    return new EnglishPossessiveFilter(input);
  }
 }
--- a/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java
@ -17,6 +17,8 @@
 package org.apache.solr.analysis;
 import java.util.Map;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardFilter;
@ -24,7 +26,13 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 * @version $Id$
 */
 public class StandardFilterFactory extends BaseTokenFilterFactory {
  @Override
  public void init(Map<String,String> args) {
    super.init(args);
    assureMatchVersion();
  }
  public StandardFilter create(TokenStream input) {
-    return new StandardFilter(input);
+    return new StandardFilter(luceneMatchVersion, input);
  }
 }
--- a/solr/src/test/org/apache/solr/analysis/TestStandardFactories.java
+++ b/solr/src/test/org/apache/solr/analysis/TestStandardFactories.java
@ -32,22 +32,34 @@ public class TestStandardFactories extends BaseTokenTestCase {
   * Test StandardTokenizerFactory
   */
  public void testStandardTokenizer() throws Exception {
-    Reader reader = new StringReader("What's this thing do?");
+    Reader reader = new StringReader("Wha\u0301t's this thing do?");
    StandardTokenizerFactory factory = new StandardTokenizerFactory();
    factory.init(DEFAULT_VERSION_PARAM);
    Tokenizer stream = factory.create(reader);
    assertTokenStreamContents(stream, 
        new String[] {"Wha\u0301t's", "this", "thing", "do" });
  }
  /**
   * Test ClassicTokenizerFactory
   */
  public void testClassicTokenizer() throws Exception {
    Reader reader = new StringReader("What's this thing do?");
    ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
    factory.init(DEFAULT_VERSION_PARAM);
    Tokenizer stream = factory.create(reader);
    assertTokenStreamContents(stream, 
        new String[] {"What's", "this", "thing", "do" });
  }
  /**
-   * Test StandardFilterFactory
+   * Test ClassicFilterFactory
   */
  public void testStandardFilter() throws Exception {
    Reader reader = new StringReader("What's this thing do?");
-    StandardTokenizerFactory factory = new StandardTokenizerFactory();
+    ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
    factory.init(DEFAULT_VERSION_PARAM);
-    StandardFilterFactory filterFactory = new StandardFilterFactory();
+    ClassicFilterFactory filterFactory = new ClassicFilterFactory();
    filterFactory.init(DEFAULT_VERSION_PARAM);
    Tokenizer tokenizer = factory.create(reader);
    TokenStream stream = filterFactory.create(tokenizer);