LUCENE-2167: Implement StandardTokenizer with the UAX#29 Standard

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1002032 13f79535-47bb-0310-9956-ffa450edef68
2010-09-28 06:16:16 +00:00 · 2010-09-28 06:16:16 +00:00 · 3c26a9167c
parent c562b10b2e
commit 3c26a9167c
65 changed files with 13107 additions and 749 deletions
--- a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
+++ b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
@ -17,18 +17,7 @@ package org.apache.lucene.benchmark.quality;
 * limitations under the License.
 */

-import java.io.BufferedReader;
-import java.io.File;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-
 import org.apache.lucene.benchmark.BenchmarkTestCase;
-import org.apache.lucene.benchmark.byTask.TestPerfTasksLogic;
-import org.apache.lucene.benchmark.quality.Judge;
-import org.apache.lucene.benchmark.quality.QualityQuery;
-import org.apache.lucene.benchmark.quality.QualityQueryParser;
-import org.apache.lucene.benchmark.quality.QualityBenchmark;
 import org.apache.lucene.benchmark.quality.trec.TrecJudge;
 import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
 import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
@ -36,6 +25,12 @@ import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.FSDirectory;

+import java.io.BufferedReader;
+import java.io.File;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+
 /**
 * Test that quality run does its job.
 * <p>
@ -177,6 +172,7 @@ public class TestQualityRun extends BenchmarkTestCase {
    String algLines[] = {
        "# ----- properties ",
        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "analyzer=org.apache.lucene.analysis.standard.ClassicAnalyzer",
        "docs.file=" + getWorkDirResourcePath("reuters.578.lines.txt.bz2"),
        "content.source.log.step=2500",
        "doc.term.vector=false",
--- a/modules/analysis/CHANGES.txt
+++ b/modules/analysis/CHANGES.txt
@ -9,6 +9,12 @@ API Changes

 * LUCENE-2413: Removed the AnalyzerUtil in common/miscellaneous.  (Robert Muir)

+ * LUCENE-2167: StandardTokenizer/Analyzer in common/standard/ now implement
+   the Word Break rules from the Unicode Text Segmentation algorithm (UAX#29),
+   as well as tokenizing URLs and email addresses according to the relevant
+   RFCs.  ClassicTokenizer/Analyzer retains the old StandardTokenizer/Analyzer
+   behavior.  (Steven Rowe, Robert Muir, Uwe Schindler)
+
 New Features
   
 * LUCENE-2413: Consolidated Solr analysis components into common. 
--- a/modules/analysis/NOTICE.txt
+++ b/modules/analysis/NOTICE.txt
@ -52,3 +52,8 @@ See http://project.carrot2.org/license.html.

 The SmartChineseAnalyzer source code (smartcn) was
 provided by Xiaoping Gao and copyright 2009 by www.imdict.net.
+
+WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/) 
+is derived from Unicode data such as the Unicode Character Database. 
+See http://unicode.org/copyright.html for more details.
+
--- a/modules/analysis/common/build.xml
+++ b/modules/analysis/common/build.xml
@ -38,7 +38,7 @@

  <target name="compile-core" depends="jflex-notice, common.compile-core"/>

-  <target name="jflex" depends="jflex-check,clean-jflex,jflex-StandardAnalyzer,jflex-wiki-tokenizer"/>
+  <target name="jflex" depends="jflex-check,clean-jflex,jflex-StandardAnalyzer,jflex-UAX29Tokenizer,jflex-wiki-tokenizer"/>

  <target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
@ -49,27 +49,61 @@
           nobak="on"/>
  </target>

-  <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
+  <target name="jflex-StandardAnalyzer" depends="init,jflex-check,gen-tlds" if="jflex.present">
    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
 			<classpath refid="jflex.classpath"/>
    </taskdef>

-    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex"
+    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard"
           nobak="on" />
-    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex"
+    <jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard"
           nobak="on" />
  </target>

+  <target name="jflex-UAX29Tokenizer" depends="jflex-check" if="jflex.present">
+    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
+			<classpath refid="jflex.classpath"/>
+    </taskdef>
+    <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex"
+           outdir="src/java/org/apache/lucene/analysis/standard"
+           nobak="on" />
+  </target>
+  
  <target name="clean-jflex">
    <delete>
      <fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
        <containsregexp expression="generated.*by.*JFlex"/>
      </fileset>
      <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
-    	<containsregexp expression="generated.*by.*JFlex"/>
+        <containsregexp expression="generated.*by.*JFlex"/>
      </fileset>
    </delete>
  </target>
+  
+  <property name="tld.zones" value="http://www.internic.net/zones/root.zone"/>
+  <property name="tld.output" location="src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro"/>
+
+  <target name="gen-tlds" depends="compile-tools">
+    <java
+      classname="org.apache.lucene.analysis.standard.GenerateJflexTLDMacros"
+      dir="."
+      fork="true"
+      failonerror="true">
+      <classpath>
+      	<pathelement location="${build.dir}/classes/tools"/>
+      </classpath>
+      <arg value="${tld.zones}"/>
+      <arg value="${tld.output}"/>
+    </java>
+  </target>
+
+  <target name="compile-tools">
+    <compile
+      srcdir="src/tools/java"
+      destdir="${build.dir}/classes/tools">
+      <classpath refid="classpath"/>
+    </compile>
+  </target>
 </project>
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
@ -132,7 +132,7 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
  @Override
  public TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@ -218,7 +218,7 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new LowerCaseFilter(matchVersion, source);
-    result = new StandardFilter(result);
+    result = new StandardFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(excltable != null && !excltable.isEmpty())
      result = new KeywordMarkerFilter(result, excltable);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@ -247,7 +247,7 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stoptable);
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@ -120,7 +120,7 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -237,7 +237,7 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter( matchVersion, result, stopwords);
    result = new KeywordMarkerFilter(result, exclusionSet);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@ -135,7 +135,7 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new GreekLowerCaseFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_31))
-      result = new StandardFilter(result);
+      result = new StandardFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if (matchVersion.onOrAfter(Version.LUCENE_31))
      result = new GreekStemFilter(result);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@ -104,6 +104,9 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
    TokenStream result = new StandardFilter(source);
+    // prior to this we get the classic behavior, standardfilter does it for us.
+    if (matchVersion.onOrAfter(Version.LUCENE_31))
+      result = new EnglishPossessiveFilter(result);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
@ -0,0 +1,52 @@
+package org.apache.lucene.analysis.en;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/**
+ * TokenFilter that removes possessives (trailing 's) from words.
+ */
+public final class EnglishPossessiveFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+  public EnglishPossessiveFilter(TokenStream input) {
+    super(input);
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (!input.incrementToken()) {
+      return false;
+    }
+    
+    final char[] buffer = termAtt.buffer();
+    final int bufferLength = termAtt.length();
+    
+    if (bufferLength >= 2 &&
+        buffer[bufferLength-2] == '\'' &&
+        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S'))
+      termAtt.setLength(bufferLength - 2); // Strip last 2 characters off
+
+    return true;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@ -120,7 +120,7 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@ -120,7 +120,7 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@ -240,7 +240,7 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
      Reader reader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-      TokenStream result = new StandardFilter(source);
+      TokenStream result = new StandardFilter(matchVersion, source);
      result = new ElisionFilter(matchVersion, result);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stopwords);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@ -120,7 +120,7 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
@ -119,7 +119,7 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, source);
    result = new StopFilter(matchVersion, result, stopwords);
    if (!stemExclusionSet.isEmpty()) {
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@ -120,7 +120,7 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@ -246,7 +246,7 @@ public final class DutchAnalyzer extends ReusableAnalyzerBase {
      Reader aReader) {
    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
-      TokenStream result = new StandardFilter(source);
+      TokenStream result = new StandardFilter(matchVersion, source);
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter(matchVersion, result, stoptable);
      if (!excltable.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@ -120,7 +120,7 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@ -120,7 +120,7 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@ -124,7 +124,7 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -175,7 +175,7 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase
        Reader reader) {
      if (matchVersion.onOrAfter(Version.LUCENE_31)) {
        final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-        TokenStream result = new StandardFilter(source);
+        TokenStream result = new StandardFilter(matchVersion, source);
        result = new LowerCaseFilter(matchVersion, result);
        result = new StopFilter(matchVersion, result, stopwords);
        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.snowball;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 import org.apache.lucene.analysis.standard.*;
 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
@ -80,7 +81,11 @@ public final class SnowballAnalyzer extends Analyzer {
  @Override
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(matchVersion, reader);
-    result = new StandardFilter(result);
+    result = new StandardFilter(matchVersion, result);
+    // remove the possessive 's for english stemmers
+    if (matchVersion.onOrAfter(Version.LUCENE_31) && 
+        (name.equals("English") || name.equals("Porter") || name.equals("Lovins")))
+      result = new EnglishPossessiveFilter(result);
    // Use a special lowercase filter for turkish, the stemmer expects it.
    if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
      result = new TurkishLowerCaseFilter(result);
@ -108,7 +113,7 @@ public final class SnowballAnalyzer extends Analyzer {
    if (streams == null) {
      streams = new SavedStreams();
      streams.source = new StandardTokenizer(matchVersion, reader);
-      streams.result = new StandardFilter(streams.source);
+      streams.result = new StandardFilter(matchVersion, streams.source);
      // Use a special lowercase filter for turkish, the stemmer expects it.
      if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
        streams.result = new TurkishLowerCaseFilter(streams.result);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
@ -0,0 +1,318 @@
+/*
+ * Copyright 2001-2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
+// file version from Tuesday, September 14, 2010 11:34:20 AM UTC
+// generated on Wednesday, September 15, 2010 7:00:44 AM UTC
+// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
+
+ASCIITLD = "." (
+	  [aA][cC]
+	| [aA][dD]
+	| [aA][eE]
+	| [aA][eE][rR][oO]
+	| [aA][fF]
+	| [aA][gG]
+	| [aA][iI]
+	| [aA][lL]
+	| [aA][mM]
+	| [aA][nN]
+	| [aA][oO]
+	| [aA][qQ]
+	| [aA][rR]
+	| [aA][rR][pP][aA]
+	| [aA][sS]
+	| [aA][sS][iI][aA]
+	| [aA][tT]
+	| [aA][uU]
+	| [aA][wW]
+	| [aA][xX]
+	| [aA][zZ]
+	| [bB][aA]
+	| [bB][bB]
+	| [bB][dD]
+	| [bB][eE]
+	| [bB][fF]
+	| [bB][gG]
+	| [bB][hH]
+	| [bB][iI]
+	| [bB][iI][zZ]
+	| [bB][jJ]
+	| [bB][mM]
+	| [bB][nN]
+	| [bB][oO]
+	| [bB][rR]
+	| [bB][sS]
+	| [bB][tT]
+	| [bB][vV]
+	| [bB][wW]
+	| [bB][yY]
+	| [bB][zZ]
+	| [cC][aA]
+	| [cC][aA][tT]
+	| [cC][cC]
+	| [cC][dD]
+	| [cC][fF]
+	| [cC][gG]
+	| [cC][hH]
+	| [cC][iI]
+	| [cC][kK]
+	| [cC][lL]
+	| [cC][mM]
+	| [cC][nN]
+	| [cC][oO]
+	| [cC][oO][mM]
+	| [cC][oO][oO][pP]
+	| [cC][rR]
+	| [cC][uU]
+	| [cC][vV]
+	| [cC][xX]
+	| [cC][yY]
+	| [cC][zZ]
+	| [dD][eE]
+	| [dD][jJ]
+	| [dD][kK]
+	| [dD][mM]
+	| [dD][oO]
+	| [dD][zZ]
+	| [eE][cC]
+	| [eE][dD][uU]
+	| [eE][eE]
+	| [eE][gG]
+	| [eE][rR]
+	| [eE][sS]
+	| [eE][tT]
+	| [eE][uU]
+	| [fF][iI]
+	| [fF][jJ]
+	| [fF][kK]
+	| [fF][mM]
+	| [fF][oO]
+	| [fF][rR]
+	| [gG][aA]
+	| [gG][bB]
+	| [gG][dD]
+	| [gG][eE]
+	| [gG][fF]
+	| [gG][gG]
+	| [gG][hH]
+	| [gG][iI]
+	| [gG][lL]
+	| [gG][mM]
+	| [gG][nN]
+	| [gG][oO][vV]
+	| [gG][pP]
+	| [gG][qQ]
+	| [gG][rR]
+	| [gG][sS]
+	| [gG][tT]
+	| [gG][uU]
+	| [gG][wW]
+	| [gG][yY]
+	| [hH][kK]
+	| [hH][mM]
+	| [hH][nN]
+	| [hH][rR]
+	| [hH][tT]
+	| [hH][uU]
+	| [iI][dD]
+	| [iI][eE]
+	| [iI][lL]
+	| [iI][mM]
+	| [iI][nN]
+	| [iI][nN][fF][oO]
+	| [iI][nN][tT]
+	| [iI][oO]
+	| [iI][qQ]
+	| [iI][rR]
+	| [iI][sS]
+	| [iI][tT]
+	| [jJ][eE]
+	| [jJ][mM]
+	| [jJ][oO]
+	| [jJ][oO][bB][sS]
+	| [jJ][pP]
+	| [kK][eE]
+	| [kK][gG]
+	| [kK][hH]
+	| [kK][iI]
+	| [kK][mM]
+	| [kK][nN]
+	| [kK][pP]
+	| [kK][rR]
+	| [kK][wW]
+	| [kK][yY]
+	| [kK][zZ]
+	| [lL][aA]
+	| [lL][bB]
+	| [lL][cC]
+	| [lL][iI]
+	| [lL][kK]
+	| [lL][rR]
+	| [lL][sS]
+	| [lL][tT]
+	| [lL][uU]
+	| [lL][vV]
+	| [lL][yY]
+	| [mM][aA]
+	| [mM][cC]
+	| [mM][dD]
+	| [mM][eE]
+	| [mM][gG]
+	| [mM][hH]
+	| [mM][iI][lL]
+	| [mM][kK]
+	| [mM][lL]
+	| [mM][mM]
+	| [mM][nN]
+	| [mM][oO]
+	| [mM][oO][bB][iI]
+	| [mM][pP]
+	| [mM][qQ]
+	| [mM][rR]
+	| [mM][sS]
+	| [mM][tT]
+	| [mM][uU]
+	| [mM][uU][sS][eE][uU][mM]
+	| [mM][vV]
+	| [mM][wW]
+	| [mM][xX]
+	| [mM][yY]
+	| [mM][zZ]
+	| [nN][aA]
+	| [nN][aA][mM][eE]
+	| [nN][cC]
+	| [nN][eE]
+	| [nN][eE][tT]
+	| [nN][fF]
+	| [nN][gG]
+	| [nN][iI]
+	| [nN][lL]
+	| [nN][oO]
+	| [nN][pP]
+	| [nN][rR]
+	| [nN][uU]
+	| [nN][zZ]
+	| [oO][mM]
+	| [oO][rR][gG]
+	| [pP][aA]
+	| [pP][eE]
+	| [pP][fF]
+	| [pP][gG]
+	| [pP][hH]
+	| [pP][kK]
+	| [pP][lL]
+	| [pP][mM]
+	| [pP][nN]
+	| [pP][rR]
+	| [pP][rR][oO]
+	| [pP][sS]
+	| [pP][tT]
+	| [pP][wW]
+	| [pP][yY]
+	| [qQ][aA]
+	| [rR][eE]
+	| [rR][oO]
+	| [rR][sS]
+	| [rR][uU]
+	| [rR][wW]
+	| [sS][aA]
+	| [sS][bB]
+	| [sS][cC]
+	| [sS][dD]
+	| [sS][eE]
+	| [sS][gG]
+	| [sS][hH]
+	| [sS][iI]
+	| [sS][jJ]
+	| [sS][kK]
+	| [sS][lL]
+	| [sS][mM]
+	| [sS][nN]
+	| [sS][oO]
+	| [sS][rR]
+	| [sS][tT]
+	| [sS][uU]
+	| [sS][vV]
+	| [sS][yY]
+	| [sS][zZ]
+	| [tT][cC]
+	| [tT][dD]
+	| [tT][eE][lL]
+	| [tT][fF]
+	| [tT][gG]
+	| [tT][hH]
+	| [tT][jJ]
+	| [tT][kK]
+	| [tT][lL]
+	| [tT][mM]
+	| [tT][nN]
+	| [tT][oO]
+	| [tT][pP]
+	| [tT][rR]
+	| [tT][rR][aA][vV][eE][lL]
+	| [tT][tT]
+	| [tT][vV]
+	| [tT][wW]
+	| [tT][zZ]
+	| [uU][aA]
+	| [uU][gG]
+	| [uU][kK]
+	| [uU][sS]
+	| [uU][yY]
+	| [uU][zZ]
+	| [vV][aA]
+	| [vV][cC]
+	| [vV][eE]
+	| [vV][gG]
+	| [vV][iI]
+	| [vV][nN]
+	| [vV][uU]
+	| [wW][fF]
+	| [wW][sS]
+	| [xX][nN]--0[zZ][wW][mM]56[dD]
+	| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
+	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
+	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
+	| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
+	| [xX][nN]--[fF][iI][qQ][sS]8[sS]
+	| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
+	| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
+	| [xX][nN]--[gG]6[wW]251[dD]
+	| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
+	| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
+	| [xX][nN]--[jJ]6[wW]193[gG]
+	| [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]
+	| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
+	| [xX][nN]--[kK][pP][rR][wW]13[dD]
+	| [xX][nN]--[kK][pP][rR][yY]57[dD]
+	| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
+	| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
+	| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
+	| [xX][nN]--[oO]3[cC][wW]4[hH]
+	| [xX][nN]--[pP]1[aA][iI]
+	| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
+	| [xX][nN]--[wW][gG][bB][hH]1[cC]
+	| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
+	| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
+	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
+	| [yY][eE]
+	| [yY][tT]
+	| [zZ][aA]
+	| [zZ][mM]
+	| [zZ][wW]
+	) "."?   // Accept trailing root (empty) domain
+
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@ -0,0 +1,140 @@
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.core.StopAnalyzer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+/**
+ * Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
+ * LowerCaseFilter} and {@link StopFilter}, using a list of
+ * English stop words.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ClassicAnalyzer:
+ * <ul>
+ *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
+ *         supplementary characters in stopwords
+ *   <li> As of 2.9, StopFilter preserves position
+ *        increments
+ *   <li> As of 2.4, Tokens incorrectly identified as acronyms
+ *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+ * </ul>
+ * 
+ * ClassicAnalyzer was named StandardAnalyzer in Lucene versions prior to 3.1. 
+ * As of 3.1, {@link StandardAnalyzer} implements Unicode text segmentation,
+ * as specified by UAX#29.
+ */
+public final class ClassicAnalyzer extends StopwordAnalyzerBase {
+
+  /** Default maximum allowed token length */
+  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+  /**
+   * Specifies whether deprecated acronyms should be replaced with HOST type.
+   * See {@linkplain "https://issues.apache.org/jira/browse/LUCENE-1068"}
+   */
+  private final boolean replaceInvalidAcronym;
+
+  /** An unmodifiable set containing some common English words that are usually not
+  useful for searching. */
+  public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
+
+  /** Builds an analyzer with the given stop words.
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
+   * @param stopWords stop words */
+  public ClassicAnalyzer(Version matchVersion, Set<?> stopWords) {
+    super(matchVersion, stopWords);
+    replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
+  }
+
+  /** Builds an analyzer with the default stop words ({@link
+   * #STOP_WORDS_SET}).
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
+   */
+  public ClassicAnalyzer(Version matchVersion) {
+    this(matchVersion, STOP_WORDS_SET);
+  }
+
+  /** Builds an analyzer with the stop words from the given file.
+   * @see WordlistLoader#getWordSet(File)
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
+   * @param stopwords File to read stop words from */
+  public ClassicAnalyzer(Version matchVersion, File stopwords) throws IOException {
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+  }
+
+  /** Builds an analyzer with the stop words from the given reader.
+   * @see WordlistLoader#getWordSet(Reader)
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
+   * @param stopwords Reader to read stop words from */
+  public ClassicAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+  }
+
+  /**
+   * Set maximum allowed token length.  If a token is seen
+   * that exceeds this length then it is discarded.  This
+   * setting only takes effect the next time tokenStream or
+   * reusableTokenStream is called.
+   */
+  public void setMaxTokenLength(int length) {
+    maxTokenLength = length;
+  }
+    
+  /**
+   * @see #setMaxTokenLength
+   */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+  @Override
+  protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
+    final ClassicTokenizer src = new ClassicTokenizer(matchVersion, reader);
+    src.setMaxTokenLength(maxTokenLength);
+    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
+    TokenStream tok = new ClassicFilter(src);
+    tok = new LowerCaseFilter(matchVersion, tok);
+    tok = new StopFilter(matchVersion, tok, stopwords);
+    return new TokenStreamComponents(src, tok) {
+      @Override
+      protected boolean reset(final Reader reader) throws IOException {
+        src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
+        return super.reset(reader);
+      }
+    };
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicFilter.java
@ -0,0 +1,73 @@
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+/** Normalizes tokens extracted with {@link ClassicTokenizer}. */
+
+public class ClassicFilter extends TokenFilter {
+
+  /** Construct filtering <i>in</i>. */
+  public ClassicFilter(TokenStream in) {
+    super(in);
+  }
+
+  private static final String APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+  private static final String ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
+
+  // this filters uses attribute type
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  
+  /** Returns the next token in the stream, or null at EOS.
+   * <p>Removes <tt>'s</tt> from the end of words.
+   * <p>Removes dots from acronyms.
+   */
+  @Override
+  public final boolean incrementToken() throws java.io.IOException {
+    if (!input.incrementToken()) {
+      return false;
+    }
+
+    final char[] buffer = termAtt.buffer();
+    final int bufferLength = termAtt.length();
+    final String type = typeAtt.type();
+
+    if (type == APOSTROPHE_TYPE &&      // remove 's
+        bufferLength >= 2 &&
+        buffer[bufferLength-2] == '\'' &&
+        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
+      // Strip last 2 characters off
+      termAtt.setLength(bufferLength - 2);
+    } else if (type == ACRONYM_TYPE) {      // remove dots
+      int upto = 0;
+      for(int i=0;i<bufferLength;i++) {
+        char c = buffer[i];
+        if (c != '.')
+          buffer[upto++] = c;
+      }
+      termAtt.setLength(upto);
+    }
+
+    return true;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java
@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;
+
+/** A grammar-based tokenizer constructed with JFlex
+ *
+ * <p> This should be a good tokenizer for most European-language documents:
+ *
+ * <ul>
+ *   <li>Splits words at punctuation characters, removing punctuation. However, a 
+ *     dot that's not followed by whitespace is considered part of a token.
+ *   <li>Splits words at hyphens, unless there's a number in the token, in which case
+ *     the whole token is interpreted as a product number and is not split.
+ *   <li>Recognizes email addresses and internet hostnames as one token.
+ * </ul>
+ *
+ * <p>Many applications have specific tokenizer needs.  If this tokenizer does
+ * not suit your application, please consider copying this source code
+ * directory to your project and maintaining your own grammar-based tokenizer.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating ClassicAnalyzer:
+ * <ul>
+ *   <li> As of 2.4, Tokens incorrectly identified as acronyms
+ *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
+ * </ul>
+ * 
+ * ClassicTokenizer was named StandardTokenizer in Lucene versions prior to 3.1.
+ * As of 3.1, {@link StandardTokenizer} implements Unicode text segmentation,
+ * as specified by UAX#29.
+ */
+
+public final class ClassicTokenizer extends Tokenizer {
+  /** A private instance of the JFlex-constructed scanner */
+  private StandardTokenizerInterface scanner;
+
+  public static final int ALPHANUM          = 0;
+  public static final int APOSTROPHE        = 1;
+  public static final int ACRONYM           = 2;
+  public static final int COMPANY           = 3;
+  public static final int EMAIL             = 4;
+  public static final int HOST              = 5;
+  public static final int NUM               = 6;
+  public static final int CJ                = 7;
+
+  /**
+   * @deprecated this solves a bug where HOSTs that end with '.' are identified
+   *             as ACRONYMs.
+   */
+  @Deprecated
+  public static final int ACRONYM_DEP       = 8;
+  
+  /** String token types that correspond to token type int constants */
+  public static final String [] TOKEN_TYPES = new String [] {
+    "<ALPHANUM>",
+    "<APOSTROPHE>",
+    "<ACRONYM>",
+    "<COMPANY>",
+    "<EMAIL>",
+    "<HOST>",
+    "<NUM>",
+    "<CJ>",
+    "<ACRONYM_DEP>"
+  };
+
+  private boolean replaceInvalidAcronym;
+    
+  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+  /** Set the max allowed token length.  Any token longer
+   *  than this is skipped. */
+  public void setMaxTokenLength(int length) {
+    this.maxTokenLength = length;
+  }
+
+  /** @see #setMaxTokenLength */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+  /**
+   * Creates a new instance of the {@link ClassicTokenizer}.  Attaches
+   * the <code>input</code> to the newly created JFlex scanner.
+   *
+   * @param input The input reader
+   *
+   * See http://issues.apache.org/jira/browse/LUCENE-1068
+   */
+  public ClassicTokenizer(Version matchVersion, Reader input) {
+    super();
+    init(input, matchVersion);
+  }
+
+  /**
+   * Creates a new ClassicTokenizer with a given {@link AttributeSource}. 
+   */
+  public ClassicTokenizer(Version matchVersion, AttributeSource source, Reader input) {
+    super(source);
+    init(input, matchVersion);
+  }
+
+  /**
+   * Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} 
+   */
+  public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
+    super(factory);
+    init(input, matchVersion);
+  }
+
+  private final void init(Reader input, Version matchVersion) {
+    this.scanner = new ClassicTokenizerImpl(input);
+
+    if (matchVersion.onOrAfter(Version.LUCENE_24)) {
+      replaceInvalidAcronym = true;
+    } else {
+      replaceInvalidAcronym = false;
+    }
+    this.input = input;    
+  }
+
+  // this tokenizer generates three attributes:
+  // term offset, positionIncrement and type
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+
+  /*
+   * (non-Javadoc)
+   *
+   * @see org.apache.lucene.analysis.TokenStream#next()
+   */
+  @Override
+  public final boolean incrementToken() throws IOException {
+    clearAttributes();
+    int posIncr = 1;
+
+    while(true) {
+      int tokenType = scanner.getNextToken();
+
+      if (tokenType == StandardTokenizerInterface.YYEOF) {
+        return false;
+      }
+
+      if (scanner.yylength() <= maxTokenLength) {
+        posIncrAtt.setPositionIncrement(posIncr);
+        scanner.getText(termAtt);
+        final int start = scanner.yychar();
+        offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
+        // This 'if' should be removed in the next release. For now, it converts
+        // invalid acronyms to HOST. When removed, only the 'else' part should
+        // remain.
+        if (tokenType == ClassicTokenizer.ACRONYM_DEP) {
+          if (replaceInvalidAcronym) {
+            typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.HOST]);
+            termAtt.setLength(termAtt.length() - 1); // remove extra '.'
+          } else {
+            typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM]);
+          }
+        } else {
+          typeAtt.setType(ClassicTokenizer.TOKEN_TYPES[tokenType]);
+        }
+        return true;
+      } else
+        // When we skip a too-long term, we still increment the
+        // position increment
+        posIncr++;
+    }
+  }
+  
+  @Override
+  public final void end() {
+    // set final offset
+    int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+    offsetAtt.setOffset(finalOffset, finalOffset);
+  }
+
+  @Override
+  public void reset(Reader reader) throws IOException {
+    super.reset(reader);
+    scanner.yyreset(reader);
+  }
+
+  /**
+   * Prior to https://issues.apache.org/jira/browse/LUCENE-1068, ClassicTokenizer mischaracterized as acronyms tokens like www.abc.com
+   * when they should have been labeled as hosts instead.
+   * @return true if ClassicTokenizer now returns these tokens as Hosts, otherwise false
+   *
+   * @deprecated Remove in 3.X and make true the only valid value
+   */
+  @Deprecated
+  public boolean isReplaceInvalidAcronym() {
+    return replaceInvalidAcronym;
+  }
+
+  /**
+   *
+   * @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST.
+   * @deprecated Remove in 3.X and make true the only valid value
+   *
+   * See https://issues.apache.org/jira/browse/LUCENE-1068
+   */
+  @Deprecated
+  public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
+    this.replaceInvalidAcronym = replaceInvalidAcronym;
+  }
+}
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 17.05.10 14:50 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/15/10 3:01 AM */

 package org.apache.lucene.analysis.standard;

@ -21,7 +21,7 @@ package org.apache.lucene.analysis.standard;

 /*

-WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
+WARNING: if you change ClassicTokenizerImpl.jflex and need to regenerate
      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!

 */
@ -33,10 +33,10 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
 * This class is a scanner generated by 
 * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 17.05.10 14:50 from the specification file
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/newtrunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex</tt>
+ * on 9/15/10 3:01 AM from the specification file
+ * <tt>c:/Users/us/IdeaProjects/lucene/test-dev-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
 */
-class StandardTokenizerImplOrig implements StandardTokenizerInterface {
+class ClassicTokenizerImpl implements StandardTokenizerInterface {

  /** This character denotes the end of file */
  public static final int YYEOF = -1;
@ -383,7 +383,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param   in  the java.io.Reader to read input from.
   */
-  StandardTokenizerImplOrig(java.io.Reader in) {
+  ClassicTokenizerImpl(java.io.Reader in) {
    this.zzReader = in;
  }

@ -393,7 +393,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param   in  the java.io.Inputstream to read input from.
   */
-  StandardTokenizerImplOrig(java.io.InputStream in) {
+  ClassicTokenizerImpl(java.io.InputStream in) {
    this(new java.io.InputStreamReader(in));
  }

--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.standard;

 /*

-WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
+WARNING: if you change ClassicTokenizerImpl.jflex and need to regenerate
      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!

 */
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

 %%

-%class StandardTokenizerImplOrig
+%class ClassicTokenizerImpl
 %implements StandardTokenizerInterface
 %unicode 3.0
 %integer
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@ -39,10 +39,12 @@ import java.util.Set;
 * <p>You must specify the required {@link Version}
 * compatibility when creating StandardAnalyzer:
 * <ul>
- *   <li> As of 3.1, StopFilter correctly handles Unicode 4.0
- *         supplementary characters in stopwords
- *   <li> As of 2.9, StopFilter preserves position
- *        increments
+ *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
+ *        and StopFilter correctly handles Unicode 4.0 supplementary characters
+ *        in stopwords.  {@link ClassicTokenizer} and {@link ClassicAnalyzer} 
+ *        are the pre-3.1 implementations of StandardTokenizer and
+ *        StandardAnalyzer.
+ *   <li> As of 2.9, StopFilter preserves position increments
 *   <li> As of 2.4, Tokens incorrectly identified as acronyms
 *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
 * </ul>
@ -122,7 +124,7 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
    final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
    src.setMaxTokenLength(maxTokenLength);
    src.setReplaceInvalidAcronym(replaceInvalidAcronym);
-    TokenStream tok = new StandardFilter(src);
+    TokenStream tok = new StandardFilter(matchVersion, src);
    tok = new LowerCaseFilter(matchVersion, tok);
    tok = new StopFilter(matchVersion, tok, stopwords);
    return new TokenStreamComponents(src, tok) {
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@ -17,33 +17,45 @@ package org.apache.lucene.analysis.standard;
 * limitations under the License.
 */

+import java.io.IOException;
+
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.Version;

-/** Normalizes tokens extracted with {@link StandardTokenizer}. */
-
-public final class StandardFilter extends TokenFilter {
-
-  /** Construct filtering <i>in</i>. */
+/**
+ * Normalizes tokens extracted with {@link StandardTokenizer}.
+ */
+public class StandardFilter extends TokenFilter {
+  private final Version matchVersion;
+  
  public StandardFilter(TokenStream in) {
-    super(in);
+    this(Version.LUCENE_30, in);
  }
-
-  private static final String APOSTROPHE_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.APOSTROPHE];
-  private static final String ACRONYM_TYPE = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.ACRONYM];
+  
+  public StandardFilter(Version matchVersion, TokenStream in) {
+    super(in);
+    this.matchVersion = matchVersion;
+  }
+  
+  private static final String APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+  private static final String ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];

  // this filters uses attribute type
  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  
-  /** Returns the next token in the stream, or null at EOS.
-   * <p>Removes <tt>'s</tt> from the end of words.
-   * <p>Removes dots from acronyms.
-   */
  @Override
-  public final boolean incrementToken() throws java.io.IOException {
+  public final boolean incrementToken() throws IOException {
+    if (matchVersion.onOrAfter(Version.LUCENE_31))
+      return input.incrementToken(); // TODO: add some niceties for the new grammar
+    else
+      return incrementTokenClassic();
+  }
+  
+  public final boolean incrementTokenClassic() throws IOException {
    if (!input.incrementToken()) {
      return false;
    }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@ -17,39 +17,42 @@

 package org.apache.lucene.analysis.standard;

-import java.io.IOException;
-import java.io.Reader;
-
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Version;

-/** A grammar-based tokenizer constructed with JFlex
- *
- * <p> This should be a good tokenizer for most European-language documents:
- *
- * <ul>
- *   <li>Splits words at punctuation characters, removing punctuation. However, a 
- *     dot that's not followed by whitespace is considered part of a token.
- *   <li>Splits words at hyphens, unless there's a number in the token, in which case
- *     the whole token is interpreted as a product number and is not split.
- *   <li>Recognizes email addresses and internet hostnames as one token.
- * </ul>
- *
+import java.io.IOException;
+import java.io.Reader;
+
+/** A grammar-based tokenizer constructed with JFlex.
+ * <p>
+ * As of Lucene version 3.1, this class implements the Word Break rules from the
+ * Unicode Text Segmentation algorithm, as specified in 
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
+ * <p/>
+ * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
+ * characters (characters above the Basic Multilingual Plane, which contains
+ * those up to and including U+FFFF), this scanner will not recognize them
+ * properly.  If you need to be able to process text containing supplementary 
+ * characters, consider using the ICU4J-backed implementation in contrib/icu  
+ * ({@link org.apache.lucene.analysis.icu.segmentation.ICUTokenizer})
+ * instead of this class, since the ICU4J-backed implementation does not have
+ * this limitation.
 * <p>Many applications have specific tokenizer needs.  If this tokenizer does
 * not suit your application, please consider copying this source code
 * directory to your project and maintaining your own grammar-based tokenizer.
 *
 * <a name="version"/>
 * <p>You must specify the required {@link Version}
- * compatibility when creating StandardAnalyzer:
+ * compatibility when creating StandardTokenizer:
 * <ul>
- *   <li> As of 2.4, Tokens incorrectly identified as acronyms
- *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
+ *   <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
+ *   If you use a previous version number, you get the exact behavior of
+ *   {@link ClassicTokenizer} for backwards compatibility.
 * </ul>
 */

@ -58,12 +61,22 @@ public final class StandardTokenizer extends Tokenizer {
  private StandardTokenizerInterface scanner;

  public static final int ALPHANUM          = 0;
+  /** @deprecated */
+  @Deprecated
  public static final int APOSTROPHE        = 1;
+  /** @deprecated */
+  @Deprecated
  public static final int ACRONYM           = 2;
+  /** @deprecated */
+  @Deprecated
  public static final int COMPANY           = 3;
  public static final int EMAIL             = 4;
+  /** @deprecated */
+  @Deprecated
  public static final int HOST              = 5;
  public static final int NUM               = 6;
+  /** @deprecated */
+  @Deprecated
  public static final int CJ                = 7;

  /**
@ -73,6 +86,11 @@ public final class StandardTokenizer extends Tokenizer {
  @Deprecated
  public static final int ACRONYM_DEP       = 8;

+  public static final int URL = 9;
+  public static final int SOUTHEAST_ASIAN = 10;
+  public static final int IDEOGRAPHIC = 11;
+  public static final int HIRAGANA = 12;
+  
  /** String token types that correspond to token type int constants */
  public static final String [] TOKEN_TYPES = new String [] {
    "<ALPHANUM>",
@ -83,7 +101,11 @@ public final class StandardTokenizer extends Tokenizer {
    "<HOST>",
    "<NUM>",
    "<CJ>",
-    "<ACRONYM_DEP>"
+    "<ACRONYM_DEP>",
+    "<URL>",
+    "<SOUTHEAST_ASIAN>",
+    "<IDEOGRAPHIC>",
+    "<HIRAGANA>"
  };

  private boolean replaceInvalidAcronym;
@ -132,7 +154,7 @@ public final class StandardTokenizer extends Tokenizer {

  private final void init(Reader input, Version matchVersion) {
    this.scanner = matchVersion.onOrAfter(Version.LUCENE_31) ?
-      new StandardTokenizerImpl31(input) : new StandardTokenizerImplOrig(input);
+      new StandardTokenizerImpl(input) : new ClassicTokenizerImpl(input);
    if (matchVersion.onOrAfter(Version.LUCENE_24)) {
      replaceInvalidAcronym = true;
    } else {
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
@ -0,0 +1,260 @@
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/**
+ * This class implements Word Break rules from the Unicode Text Segmentation 
+ * algorithm, as specified in 
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
+ * URLs and email addresses are also tokenized according to the relevant RFCs.
+ * <p/>
+ * Tokens produced are of the following types:
+ * <ul>
+ *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
+ *   <li>&lt;NUM&gt;: A number</li>
+ *   <li>&lt;URL&gt;: A URL</li>
+ *   <li>&lt;EMAIL&gt;: An email address</li>
+ *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
+ *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
+ *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
+ *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+ * </ul>
+ * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
+ * characters (characters above the Basic Multilingual Plane, which contains
+ * those up to and including U+FFFF), this scanner will not recognize them
+ * properly.  If you need to be able to process text containing supplementary 
+ * characters, consider using the ICU4J-backed implementation in contrib/icu  
+ * ({@link org.apache.lucene.analysis.icu.segmentation.ICUTokenizer})
+ * instead of this class, since the ICU4J-backed implementation does not have
+ * this limitation.
+ */
+%%
+
+%unicode 5.2
+%integer
+%final
+%public
+%class StandardTokenizerImpl
+%implements StandardTokenizerInterface
+%function getNextToken
+%char
+
+// UAX#29 WB4. X (Extend | Format)* --> X
+//
+ALetterEx      = \p{WB:ALetter}                     [\p{WB:Format}\p{WB:Extend}]*
+// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
+NumericEx      = [\p{WB:Numeric}\uFF10-\uFF19]      [\p{WB:Format}\p{WB:Extend}]*
+KatakanaEx     = \p{WB:Katakana}                    [\p{WB:Format}\p{WB:Extend}]* 
+MidLetterEx    = [\p{WB:MidLetter}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* 
+MidNumericEx   = [\p{WB:MidNum}\p{WB:MidNumLet}]    [\p{WB:Format}\p{WB:Extend}]*
+ExtendNumLetEx = \p{WB:ExtendNumLet}                [\p{WB:Format}\p{WB:Extend}]*
+
+
+// URL and E-mail syntax specifications:
+//
+//     RFC-952:  DOD INTERNET HOST TABLE SPECIFICATION
+//     RFC-1035: DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
+//     RFC-1123: Requirements for Internet Hosts - Application and Support
+//     RFC-1738: Uniform Resource Locators (URL)
+//     RFC-3986: Uniform Resource Identifier (URI): Generic Syntax
+//     RFC-5234: Augmented BNF for Syntax Specifications: ABNF
+//     RFC-5321: Simple Mail Transfer Protocol
+//     RFC-5322: Internet Message Format
+
+%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
+
+DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
+DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
+DomainNameLoose  = {DomainLabel} ("." {DomainLabel})*
+
+IPv4DecimalOctet = "0"{0,2} [0-9] | "0"? [1-9][0-9] | "1" [0-9][0-9] | "2" ([0-4][0-9] | "5" [0-5])
+IPv4Address  = {IPv4DecimalOctet} ("." {IPv4DecimalOctet}){3} 
+IPv6Hex16Bit = [0-9A-Fa-f]{1,4}
+IPv6LeastSignificant32Bits = {IPv4Address} | ({IPv6Hex16Bit} ":" {IPv6Hex16Bit})
+IPv6Address =                                                  ({IPv6Hex16Bit} ":"){6} {IPv6LeastSignificant32Bits}
+            |                                             "::" ({IPv6Hex16Bit} ":"){5} {IPv6LeastSignificant32Bits}
+            |                            {IPv6Hex16Bit}?  "::" ({IPv6Hex16Bit} ":"){4} {IPv6LeastSignificant32Bits}
+            | (({IPv6Hex16Bit} ":"){0,1} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){3} {IPv6LeastSignificant32Bits}
+            | (({IPv6Hex16Bit} ":"){0,2} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){2} {IPv6LeastSignificant32Bits}
+            | (({IPv6Hex16Bit} ":"){0,3} {IPv6Hex16Bit})? "::"  {IPv6Hex16Bit} ":"     {IPv6LeastSignificant32Bits}
+            | (({IPv6Hex16Bit} ":"){0,4} {IPv6Hex16Bit})? "::"                         {IPv6LeastSignificant32Bits}
+            | (({IPv6Hex16Bit} ":"){0,5} {IPv6Hex16Bit})? "::"                         {IPv6Hex16Bit}
+            | (({IPv6Hex16Bit} ":"){0,6} {IPv6Hex16Bit})? "::"
+
+URIunreserved = [-._~A-Za-z0-9]
+URIpercentEncoded = "%" [0-9A-Fa-f]{2}
+URIsubDelims = [!$&'()*+,;=]
+URIloginSegment = ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims})*
+URIlogin = {URIloginSegment} (":" {URIloginSegment})? "@"
+URIquery    = "?" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
+URIfragment = "#" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
+URIport = ":" [0-9]{1,5}
+URIhostStrict = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameStrict}  
+URIhostLoose  = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameLoose} 
+
+URIauthorityStrict =             {URIhostStrict} {URIport}?
+URIauthorityLoose  = {URIlogin}? {URIhostLoose}  {URIport}?
+
+HTTPsegment = ({URIunreserved} | {URIpercentEncoded} | [;:@&=])*
+HTTPpath = ("/" {HTTPsegment})*
+HTTPscheme = [hH][tT][tT][pP][sS]? "://"
+HTTPurlFull = {HTTPscheme} {URIauthorityLoose}  {HTTPpath}? {URIquery}? {URIfragment}?
+// {HTTPurlNoScheme} excludes {URIlogin}, because it could otherwise accept e-mail addresses
+HTTPurlNoScheme =          {URIauthorityStrict} {HTTPpath}? {URIquery}? {URIfragment}?
+HTTPurl = {HTTPurlFull} | {HTTPurlNoScheme}
+
+FTPorFILEsegment = ({URIunreserved} | {URIpercentEncoded} | [?:@&=])*
+FTPorFILEpath = "/" {FTPorFILEsegment} ("/" {FTPorFILEsegment})*
+FTPtype = ";" [tT][yY][pP][eE] "=" [aAiIdD]
+FTPscheme = [fF][tT][pP] "://"
+FTPurl = {FTPscheme} {URIauthorityLoose} {FTPorFILEpath} {FTPtype}? {URIfragment}?
+
+FILEscheme = [fF][iI][lL][eE] "://"
+FILEurl = {FILEscheme} {URIhostLoose}? {FTPorFILEpath} {URIfragment}?
+
+URL = {HTTPurl} | {FTPurl} | {FILEurl}
+
+EMAILquotedString = [\"] ([\u0001-\u0008\u000B\u000C\u000E-\u0021\u0023-\u005B\u005D-\u007E] | [\\] [\u0000-\u007F])* [\"]
+EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
+EMAILlabel = {EMAILatomText}+ | {EMAILquotedString}
+EMAILlocalPart = {EMAILlabel} ("." {EMAILlabel})*
+EMAILdomainLiteralText = [\u0001-\u0008\u000B\u000C\u000E-\u005A\u005E-\u007F] | [\\] [\u0000-\u007F]
+// DFA minimization allows {IPv6Address} and {IPv4Address} to be included 
+// in the {EMAILbracketedHost} definition without incurring any size penalties, 
+// since {EMAILdomainLiteralText} recognizes all valid IP addresses.
+// The IP address regexes are included in {EMAILbracketedHost} simply as a 
+// reminder that they are acceptable bracketed host forms.
+EMAILbracketedHost = "[" ({EMAILdomainLiteralText}* | {IPv4Address} | [iI][pP][vV] "6:" {IPv6Address}) "]"
+EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
+
+%{
+  /** Alphanumeric sequences */
+  public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
+  
+  /** Numbers */
+  public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
+  
+  /** URLs with scheme: HTTP(S), FTP, or FILE; no-scheme URLs match HTTP syntax */
+  public static final int URL_TYPE = StandardTokenizer.URL;
+  
+  /** E-mail addresses */
+  public static final int EMAIL_TYPE = StandardTokenizer.EMAIL;
+  
+  /**
+   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
+   * together as as a single token rather than broken up, because the logic
+   * required to break them at word boundaries is too complex for UAX#29.
+   * {@see Unicode Line Breaking Algorithm http://www.unicode.org/reports/tr14/#SA}
+   */
+  public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
+  
+  public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
+  
+  public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
+
+  public final int yychar()
+  {
+    return yychar;
+  }
+
+  /**
+   * Fills CharTermAttribute with the current token text.
+   */
+  public final void getText(CharTermAttribute t) {
+    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+  }
+%}
+
+%%
+
+// UAX#29 WB1. 	sot 	÷ 	
+//        WB2. 		÷ 	eot
+//
+<<EOF>> { return StandardTokenizerInterface.YYEOF; }
+
+{URL}   { return URL_TYPE; }
+{EMAIL} { return EMAIL_TYPE; }
+
+// UAX#29 WB8.   Numeric × Numeric
+//        WB11.  Numeric (MidNum | MidNumLet) × Numeric
+//        WB12.  Numeric × (MidNum | MidNumLet) Numeric
+//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
+//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
+//
+{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} 
+                              | {MidNumericEx} {NumericEx} 
+                              | {NumericEx})*
+{ExtendNumLetEx}* 
+  { return NUMERIC_TYPE; }
+
+
+// UAX#29 WB5.   ALetter × ALetter
+//        WB6.   ALetter × (MidLetter | MidNumLet) ALetter
+//        WB7.   ALetter (MidLetter | MidNumLet) × ALetter
+//        WB9.   ALetter × Numeric
+//        WB10.  Numeric × ALetter
+//        WB13.  Katakana × Katakana
+//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
+//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
+//
+{ExtendNumLetEx}*  ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
+                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
+                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) 
+({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
+                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
+                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) )*
+{ExtendNumLetEx}*  
+  { return WORD_TYPE; }
+
+
+// From UAX #29:
+//
+//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
+//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
+//    boundary property values based on criteria outside of the scope of this
+//    annex.  That means that satisfactory treatment of languages like Chinese
+//    or Thai requires special handling.
+// 
+// In Unicode 5.2, only one character has the \p{Line_Break = Contingent_Break}
+// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
+//
+// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
+// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
+// Lao, etc.) are kept together.  This grammar does the same below.
+//
+// See also the Unicode Line Breaking Algorithm:
+//
+//    http://www.unicode.org/reports/tr14/#SA
+//
+\p{LB:Complex_Context}+ { return SOUTH_EAST_ASIAN_TYPE; }
+
+// UAX#29 WB14.  Any ÷ Any
+//
+\p{Script:Han} { return IDEOGRAPHIC_TYPE; }
+\p{Script:Hiragana} { return HIRAGANA_TYPE; }
+
+
+// UAX#29 WB3.   CR × LF
+//        WB3a.  (Newline | CR | LF) ÷
+//        WB3b.  ÷ (Newline | CR | LF)
+//        WB14.  Any ÷ Any
+//
+[^] { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex
@ -1,134 +0,0 @@
-package org.apache.lucene.analysis.standard;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
-
-WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
-      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!
-
-*/
-
-import java.io.Reader;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-%%
-
-%class StandardTokenizerImpl31
-%implements StandardTokenizerInterface
-%unicode 4.0
-%integer
-%function getNextToken
-%pack
-%char
-
-%{
-
-public static final int ALPHANUM          = StandardTokenizer.ALPHANUM;
-public static final int APOSTROPHE        = StandardTokenizer.APOSTROPHE;
-public static final int ACRONYM           = StandardTokenizer.ACRONYM;
-public static final int COMPANY           = StandardTokenizer.COMPANY;
-public static final int EMAIL             = StandardTokenizer.EMAIL;
-public static final int HOST              = StandardTokenizer.HOST;
-public static final int NUM               = StandardTokenizer.NUM;
-public static final int CJ                = StandardTokenizer.CJ;
-/**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- *             as ACRONYMs.
- */
-public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
-
-public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
-
-public final int yychar()
-{
-    return yychar;
-}
-
-/**
- * Fills CharTermAttribute with the current token text.
- */
-public final void getText(CharTermAttribute t) {
-  t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-}
-
-%}
-
-THAI       = [\u0E00-\u0E59]
-
-// basic word: a sequence of digits & letters (includes Thai to enable ThaiAnalyzer to function)
-ALPHANUM   = ({LETTER}|{THAI}|[:digit:])+
-
-// internal apostrophes: O'Reilly, you're, O'Reilly's
-// use a post-filter to remove possessives
-APOSTROPHE =  {ALPHA} ("'" {ALPHA})+
-
-// acronyms: U.S.A., I.B.M., etc.
-// use a post-filter to remove dots
-ACRONYM    =  {LETTER} "." ({LETTER} ".")+
-
-ACRONYM_DEP	= {ALPHANUM} "." ({ALPHANUM} ".")+
-
-// company names like AT&T and Excite@Home.
-COMPANY    =  {ALPHA} ("&"|"@") {ALPHA}
-
-// email addresses
-EMAIL      =  {ALPHANUM} (("."|"-"|"_") {ALPHANUM})* "@" {ALPHANUM} (("."|"-") {ALPHANUM})+
-
-// hostname
-HOST       =  {ALPHANUM} ((".") {ALPHANUM})+
-
-// floating point, serial, model numbers, ip addresses, etc.
-// every other segment must have at least one digit
-NUM        = ({ALPHANUM} {P} {HAS_DIGIT}
-           | {HAS_DIGIT} {P} {ALPHANUM}
-           | {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+
-           | {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
-           | {ALPHANUM} {P} {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
-           | {HAS_DIGIT} {P} {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+)
-
-// punctuation
-P	         = ("_"|"-"|"/"|"."|",")
-
-// at least one digit
-HAS_DIGIT  = ({LETTER}|[:digit:])* [:digit:] ({LETTER}|[:digit:])*
-
-ALPHA      = ({LETTER})+
-
-// From the JFlex manual: "the expression that matches everything of <a> not matched by <b> is !(!<a>|<b>)"
-LETTER     = !(![:letter:]|{CJ})
-
-// Chinese and Japanese (but NOT Korean, which is included in [:letter:])
-CJ         = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
-
-WHITESPACE = \r\n | [ \r\n\t\f]
-
-%%
-
-{ALPHANUM}                                                     { return ALPHANUM; }
-{APOSTROPHE}                                                   { return APOSTROPHE; }
-{ACRONYM}                                                      { return ACRONYM; }
-{COMPANY}                                                      { return COMPANY; }
-{EMAIL}                                                        { return EMAIL; }
-{HOST}                                                         { return HOST; }
-{NUM}                                                          { return NUM; }
-{CJ}                                                           { return CJ; }
-{ACRONYM_DEP}                                                  { return ACRONYM_DEP; }
-
-/** Ignore the rest */
-. | {WHITESPACE}                                               { /* ignore */ }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 17.05.10 14:50 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/15/10 3:01 AM */

 package org.apache.lucene.analysis.standard;

@ -19,33 +19,51 @@ package org.apache.lucene.analysis.standard;
 * limitations under the License.
 */

-/*
-
-WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
-      the tokenizer, only use the trunk version of JFlex 1.5 at the moment!
-
-*/
-
+import java.io.IOException;
 import java.io.Reader;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;


 /**
- * This class is a scanner generated by 
- * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 17.05.10 14:50 from the specification file
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/newtrunk/lucene/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex</tt>
+ * This class implements Word Break rules from the Unicode Text Segmentation 
+ * algorithm, as specified in 
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
+ * <p/>
+ * Tokens produced are of the following types:
+ * <ul>
+ *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
+ *   <li>&lt;NUM&gt;: A number</li>
+ *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
+ *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
+ *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
+ *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+ * </ul>
+ * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
+ * characters (characters above the Basic Multilingual Plane, which contains
+ * those up to and including U+FFFF), this scanner will not recognize them
+ * properly.  If you need to be able to process text containing supplementary 
+ * characters, consider using the ICU4J-backed implementation in contrib/icu  
+ * ({@link org.apache.lucene.analysis.icu.segmentation.ICUTokenizer})
+ * instead of this class, since the ICU4J-backed implementation does not have
+ * this limitation.
 */
-class StandardTokenizerImpl31 implements StandardTokenizerInterface {
+
+public final class UAX29Tokenizer extends Tokenizer {

  /** This character denotes the end of file */
-  public static final int YYEOF = -1;
+  private static final int YYEOF = -1;

  /** initial size of the lookahead buffer */
  private static final int ZZ_BUFFERSIZE = 16384;

  /** lexical states */
-  public static final int YYINITIAL = 0;
+  private static final int YYINITIAL = 0;

  /**
   * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
@ -61,68 +79,113 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
   * Translates characters to character classes
   */
  private static final String ZZ_CMAP_PACKED = 
-    "\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5"+
-    "\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12"+
-    "\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12"+
-    "\5\0\27\12\1\0\37\12\1\0\u013f\12\31\0\162\12\4\0\14\12"+
-    "\16\0\5\12\11\0\1\12\213\0\1\12\13\0\1\12\1\0\3\12"+
-    "\1\0\1\12\1\0\24\12\1\0\54\12\1\0\46\12\1\0\5\12"+
-    "\4\0\202\12\10\0\105\12\1\0\46\12\2\0\2\12\6\0\20\12"+
-    "\41\0\46\12\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12"+
-    "\56\0\32\12\5\0\13\12\25\0\12\2\4\0\2\12\1\0\143\12"+
-    "\1\0\1\12\17\0\2\12\7\0\2\12\12\2\3\12\2\0\1\12"+
-    "\20\0\1\12\1\0\36\12\35\0\3\12\60\0\46\12\13\0\1\12"+
-    "\u0152\0\66\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2"+
-    "\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12"+
-    "\3\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12\4\0\12\2"+
-    "\2\12\23\0\6\12\4\0\2\12\2\0\26\12\1\0\7\12\1\0"+
-    "\2\12\1\0\2\12\1\0\2\12\37\0\4\12\1\0\1\12\7\0"+
-    "\12\2\2\0\3\12\20\0\11\12\1\0\3\12\1\0\26\12\1\0"+
-    "\7\12\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0"+
-    "\2\12\4\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0"+
-    "\7\12\1\0\2\12\1\0\5\12\3\0\1\12\36\0\2\12\1\0"+
-    "\3\12\4\0\12\2\1\0\1\12\21\0\1\12\1\0\6\12\3\0"+
-    "\3\12\1\0\4\12\3\0\2\12\1\0\1\12\1\0\2\12\3\0"+
-    "\2\12\3\0\3\12\3\0\10\12\1\0\3\12\55\0\11\2\25\0"+
-    "\10\12\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\46\0"+
-    "\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12\1\0"+
-    "\12\12\1\0\5\12\3\0\1\12\40\0\1\12\1\0\2\12\4\0"+
-    "\12\2\25\0\10\12\1\0\3\12\1\0\27\12\1\0\20\12\46\0"+
-    "\2\12\4\0\12\2\25\0\22\12\3\0\30\12\1\0\11\12\1\0"+
-    "\1\12\2\0\7\12\71\0\1\1\60\12\1\1\2\12\14\1\7\12"+
-    "\11\1\12\2\47\0\2\12\1\0\1\12\2\0\2\12\1\0\1\12"+
-    "\2\0\1\12\6\0\4\12\1\0\7\12\1\0\3\12\1\0\1\12"+
-    "\1\0\1\12\2\0\2\12\1\0\4\12\1\0\2\12\11\0\1\12"+
-    "\2\0\5\12\1\0\1\12\11\0\12\2\2\0\2\12\42\0\1\12"+
-    "\37\0\12\2\26\0\10\12\1\0\42\12\35\0\4\12\164\0\42\12"+
-    "\1\0\5\12\1\0\2\12\25\0\12\2\6\0\6\12\112\0\46\12"+
-    "\12\0\51\12\7\0\132\12\5\0\104\12\5\0\122\12\6\0\7\12"+
-    "\1\0\77\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\1\12"+
-    "\1\0\4\12\2\0\47\12\1\0\1\12\1\0\4\12\2\0\37\12"+
-    "\1\0\1\12\1\0\4\12\2\0\7\12\1\0\1\12\1\0\4\12"+
-    "\2\0\7\12\1\0\7\12\1\0\27\12\1\0\37\12\1\0\1\12"+
-    "\1\0\4\12\2\0\7\12\1\0\47\12\1\0\23\12\16\0\11\2"+
-    "\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0\32\12\5\0\113\12"+
-    "\25\0\15\12\1\0\4\12\16\0\22\12\16\0\22\12\16\0\15\12"+
-    "\1\0\3\12\17\0\64\12\43\0\1\12\4\0\1\12\3\0\12\2"+
-    "\46\0\12\2\6\0\130\12\10\0\51\12\127\0\35\12\51\0\12\2"+
-    "\36\12\2\0\5\12\u038b\0\154\12\224\0\234\12\4\0\132\12\6\0"+
-    "\26\12\2\0\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0"+
-    "\1\12\1\0\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0"+
-    "\7\12\1\0\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0"+
-    "\6\12\4\0\15\12\5\0\3\12\1\0\7\12\164\0\1\12\15\0"+
-    "\1\12\202\0\1\12\4\0\1\12\2\0\12\12\1\0\1\12\3\0"+
-    "\5\12\6\0\1\12\1\0\1\12\1\0\1\12\1\0\4\12\1\0"+
-    "\3\12\1\0\7\12\3\0\3\12\5\0\5\12\u0ebb\0\2\12\52\0"+
-    "\5\12\5\0\2\12\3\0\1\13\126\13\6\13\3\13\1\13\132\13"+
-    "\1\13\4\13\5\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0"+
-    "\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12"+
-    "\u0773\0\u2ba4\12\u215c\0\u012e\13\2\13\73\13\225\13\7\12\14\0\5\12"+
-    "\5\0\1\12\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12"+
-    "\1\0\2\12\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12"+
-    "\2\0\66\12\50\0\14\12\164\0\5\12\1\0\207\12\23\0\12\2"+
-    "\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12"+
-    "\2\0\6\12\2\0\6\12\2\0\3\12\43\0";
+    "\47\0\1\7\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6"+
+    "\5\0\32\1\4\0\1\10\1\0\32\1\57\0\1\1\2\0\1\2"+
+    "\7\0\1\1\1\0\1\5\2\0\1\1\5\0\27\1\1\0\37\1"+
+    "\1\0\u01ca\1\4\0\14\1\16\0\5\1\7\0\1\1\1\0\1\1"+
+    "\21\0\160\2\5\1\1\0\2\1\2\0\4\1\1\6\7\0\1\1"+
+    "\1\5\3\1\1\0\1\1\1\0\24\1\1\0\123\1\1\0\213\1"+
+    "\1\0\7\2\234\1\13\0\46\1\2\0\1\1\7\0\47\1\1\0"+
+    "\1\6\7\0\55\2\1\0\1\2\1\0\2\2\1\0\2\2\1\0"+
+    "\1\2\10\0\33\1\5\0\4\1\1\5\13\0\4\2\10\0\2\6"+
+    "\2\0\13\2\6\0\52\1\24\2\1\0\12\3\1\0\1\3\1\6"+
+    "\1\0\2\1\1\2\143\1\1\0\1\1\17\2\2\1\2\2\1\0"+
+    "\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2\1\1\1\2"+
+    "\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3\41\1\11\2"+
+    "\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2\1\1\11\2"+
+    "\1\1\3\2\1\1\5\2\322\0\4\2\66\1\2\0\1\2\1\1"+
+    "\21\2\1\0\1\1\5\2\2\0\12\1\2\2\2\0\12\3\1\0"+
+    "\2\1\6\0\7\1\1\0\3\2\1\0\10\1\2\0\2\1\2\0"+
+    "\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0\1\2\1\1"+
+    "\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2\4\0\2\1"+
+    "\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2\1\0\6\1"+
+    "\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1\1\0\2\1"+
+    "\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2\2\0\3\2"+
+    "\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3\2\2\3\1"+
+    "\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0\26\1\1\0"+
+    "\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\10\2\1\0"+
+    "\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2\2\0\12\3"+
+    "\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
+    "\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2"+
+    "\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0"+
+    "\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1"+
+    "\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1"+
+    "\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2"+
+    "\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2\1\0\10\1"+
+    "\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\3\0\1\1"+
+    "\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0\2\1\6\0"+
+    "\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0\3\1"+
+    "\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2\1\1\7\2"+
+    "\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1\1\0\2\1"+
+    "\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0\3\1\1\0"+
+    "\27\1\1\0\20\1\3\0\1\1\7\2\1\0\3\2\1\0\4\2"+
+    "\11\0\1\2\10\0\2\1\2\2\2\0\12\3\12\0\6\1\2\0"+
+    "\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0"+
+    "\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\22\0"+
+    "\2\2\15\0\60\11\1\12\2\11\7\12\5\0\7\11\10\12\1\0"+
+    "\12\3\47\0\2\11\1\0\1\11\2\0\2\11\1\0\1\11\2\0"+
+    "\1\11\6\0\4\11\1\0\7\11\1\0\3\11\1\0\1\11\1\0"+
+    "\1\11\2\0\2\11\1\0\4\11\1\12\2\11\6\12\1\0\2\12"+
+    "\1\11\2\0\5\11\1\0\1\11\1\0\6\12\2\0\12\3\2\0"+
+    "\2\11\42\0\1\1\27\0\2\2\6\0\12\3\13\0\1\2\1\0"+
+    "\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1\4\0\24\2"+
+    "\1\0\2\2\4\1\4\0\10\2\1\0\44\2\11\0\1\2\71\0"+
+    "\53\11\24\12\1\11\12\3\6\0\6\11\4\12\4\11\3\12\1\11"+
+    "\3\12\2\11\7\12\3\11\4\12\15\11\14\12\1\11\1\12\12\3"+
+    "\4\12\2\11\46\1\12\0\53\1\1\0\1\1\3\0\u0149\1\1\0"+
+    "\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0"+
+    "\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0"+
+    "\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1\4\0"+
+    "\1\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0"+
+    "\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\2"+
+    "\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1\1\0\3\1"+
+    "\1\0\2\2\14\0\64\11\40\12\3\0\1\11\4\0\1\11\1\12"+
+    "\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1\10\0\51\1"+
+    "\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2\4\0\14\2"+
+    "\12\0\12\3\36\11\2\0\5\11\13\0\54\11\4\0\21\12\7\11"+
+    "\2\12\6\0\13\3\3\0\2\11\40\0\27\1\5\2\4\0\65\11"+
+    "\12\12\1\0\35\12\2\0\1\2\12\3\6\0\12\3\6\0\16\11"+
+    "\122\0\5\2\57\1\21\2\7\1\4\0\12\3\21\0\11\2\14\0"+
+    "\3\2\36\1\12\2\3\0\2\1\12\3\106\0\44\1\24\2\10\0"+
+    "\12\3\3\0\3\1\12\3\44\1\122\0\3\2\1\0\25\2\4\1"+
+    "\1\2\4\1\1\2\15\0\300\1\47\2\26\0\3\2\u0116\1\2\0"+
+    "\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1\1\0"+
+    "\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1\1\0"+
+    "\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1\4\0"+
+    "\15\1\5\0\3\1\1\0\7\1\17\0\4\2\10\0\2\7\12\0"+
+    "\1\7\2\0\1\5\2\0\5\2\20\0\2\10\3\0\1\6\17\0"+
+    "\1\10\13\0\5\2\5\0\6\2\1\0\1\1\15\0\1\1\20\0"+
+    "\5\1\73\0\41\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0"+
+    "\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0"+
+    "\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1\21\0"+
+    "\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1\6\0"+
+    "\4\1\3\2\16\0\46\1\12\0\66\1\11\0\1\1\20\0\27\1"+
+    "\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
+    "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\57\0\1\1"+
+    "\120\0\32\13\1\0\131\13\14\0\326\13\57\0\1\1\1\0\1\13"+
+    "\31\0\11\13\6\2\1\0\5\4\2\0\3\13\1\1\1\1\4\0"+
+    "\126\14\2\0\2\2\2\4\3\14\133\4\1\0\4\4\5\0\51\1"+
+    "\3\0\136\1\21\0\30\1\70\0\20\4\320\0\57\4\1\0\130\4"+
+    "\250\0\u19b6\13\112\0\u51cc\13\64\0\u048d\1\103\0\56\1\2\0\u010d\1"+
+    "\3\0\20\1\12\3\2\1\24\0\40\1\2\0\15\1\4\2\11\0"+
+    "\2\2\1\0\31\1\10\0\120\1\2\2\45\0\11\1\2\0\147\1"+
+    "\2\0\2\1\156\0\7\1\1\2\3\1\1\2\4\1\1\2\27\1"+
+    "\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0"+
+    "\22\2\6\1\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1"+
+    "\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3"+
+    "\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3"+
+    "\6\0\33\11\1\12\4\0\60\11\1\12\1\11\3\12\2\11\2\12"+
+    "\5\11\2\12\1\11\1\12\1\11\30\0\5\11\340\0\43\1\10\2"+
+    "\1\0\2\2\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1"+
+    "\u2104\0\u012e\13\2\0\76\13\2\0\152\13\46\0\7\1\14\0\5\1"+
+    "\5\0\1\1\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1"+
+    "\1\0\2\1\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1"+
+    "\2\0\66\1\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6"+
+    "\13\0\7\2\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0"+
+    "\1\6\1\5\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7"+
+    "\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1"+
+    "\4\0\1\10\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1"+
+    "\2\0\6\1\2\0\6\1\2\0\3\1\34\0\3\2\4\0";

  /** 
   * Translates characters to character classes
@ -135,13 +198,11 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private static final int [] ZZ_ACTION = zzUnpackAction();

  private static final String ZZ_ACTION_PACKED_0 =
-    "\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4"+
-    "\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4"+
-    "\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12"+
-    "\1\4";
+    "\1\0\1\1\1\2\1\3\1\2\1\1\1\4\1\5"+
+    "\1\6\1\2\1\0\1\2\1\0\1\3\2\0";

  private static int [] zzUnpackAction() {
-    int [] result = new int[51];
+    int [] result = new int[16];
    int offset = 0;
    offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
    return result;
@ -166,16 +227,11 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private static final int [] ZZ_ROWMAP = zzUnpackRowMap();

  private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124"+
-    "\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304"+
-    "\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134"+
-    "\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4"+
-    "\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206"+
-    "\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214"+
-    "\0\u0268\0\u0276\0\u0284";
+    "\0\0\0\15\0\32\0\47\0\64\0\101\0\116\0\15"+
+    "\0\15\0\133\0\150\0\165\0\202\0\217\0\101\0\234";

  private static int [] zzUnpackRowMap() {
-    int [] result = new int[51];
+    int [] result = new int[16];
    int offset = 0;
    offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
    return result;
@ -198,49 +254,21 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private static final int [] ZZ_TRANS = zzUnpackTrans();

  private static final String ZZ_TRANS_PACKED_0 =
-    "\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2"+
-    "\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13"+
-    "\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11"+
-    "\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20"+
-    "\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0"+
-    "\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27"+
-    "\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0"+
-    "\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37"+
-    "\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44"+
-    "\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0"+
-    "\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4"+
-    "\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0"+
-    "\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24"+
-    "\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54"+
-    "\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0"+
-    "\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56"+
-    "\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52"+
-    "\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31"+
-    "\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0"+
-    "\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0"+
-    "\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33"+
-    "\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13"+
-    "\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11"+
-    "\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57"+
-    "\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0"+
-    "\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37"+
-    "\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40"+
-    "\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12"+
-    "\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13"+
-    "\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16"+
-    "\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13"+
-    "\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25"+
-    "\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0"+
-    "\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0"+
-    "\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0"+
-    "\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0"+
-    "\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0"+
-    "\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0"+
-    "\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0"+
-    "\1\11\2\52\1\0\1\24\3\0";
+    "\1\2\1\3\1\2\1\4\1\5\3\2\1\6\2\7"+
+    "\1\10\1\11\16\0\2\3\1\12\1\0\1\13\1\0"+
+    "\1\13\1\14\1\0\1\3\3\0\1\3\2\4\2\0"+
+    "\2\15\1\16\1\0\1\4\4\0\1\5\1\0\1\5"+
+    "\3\0\1\14\1\0\1\5\3\0\1\3\1\17\1\4"+
+    "\1\5\3\0\1\17\1\0\1\17\13\0\2\7\3\0"+
+    "\1\3\2\12\2\0\2\20\1\14\1\0\1\12\3\0"+
+    "\1\3\1\13\7\0\1\13\3\0\1\3\1\14\1\12"+
+    "\1\5\3\0\1\14\1\0\1\14\4\0\1\15\1\4"+
+    "\6\0\1\15\3\0\1\3\1\16\1\4\1\5\3\0"+
+    "\1\16\1\0\1\16\4\0\1\20\1\12\6\0\1\20"+
+    "\2\0";

  private static int [] zzUnpackTrans() {
-    int [] result = new int[658];
+    int [] result = new int[169];
    int offset = 0;
    offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
    return result;
@ -278,11 +306,11 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();

  private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0"+
-    "\1\1\1\0\17\1\1\0\1\1\3\0\5\1";
+    "\1\0\1\11\5\1\2\11\1\1\1\0\1\1\1\0"+
+    "\1\1\2\0";

  private static int [] zzUnpackAttribute() {
-    int [] result = new int[51];
+    int [] result = new int[16];
    int offset = 0;
    offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
    return result;
@ -350,35 +378,124 @@ class StandardTokenizerImpl31 implements StandardTokenizerInterface {
  private boolean zzEOFDone;

  /* user code: */
+  /** Alphanumeric sequences */
+  public static final String WORD_TYPE = "<ALPHANUM>";
+  
+  /** Numbers */
+  public static final String NUMERIC_TYPE = "<NUM>";
+  
+  /**
+   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
+   * together as as a single token rather than broken up, because the logic
+   * required to break them at word boundaries is too complex for UAX#29.
+   * {@see Unicode Line Breaking Algorithm http://www.unicode.org/reports/tr14/#SA}
+   */
+  public static final String SOUTH_EAST_ASIAN_TYPE = "<SOUTHEAST_ASIAN>";
+  
+  public static final String IDEOGRAPHIC_TYPE = "<IDEOGRAPHIC>";
+  
+  public static final String HIRAGANA_TYPE = "<HIRAGANA>";
+  
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt 
+    = addAttribute(PositionIncrementAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  
+  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+  private int posIncr;

-public static final int ALPHANUM          = StandardTokenizer.ALPHANUM;
-public static final int APOSTROPHE        = StandardTokenizer.APOSTROPHE;
-public static final int ACRONYM           = StandardTokenizer.ACRONYM;
-public static final int COMPANY           = StandardTokenizer.COMPANY;
-public static final int EMAIL             = StandardTokenizer.EMAIL;
-public static final int HOST              = StandardTokenizer.HOST;
-public static final int NUM               = StandardTokenizer.NUM;
-public static final int CJ                = StandardTokenizer.CJ;
-/**
- * @deprecated this solves a bug where HOSTs that end with '.' are identified
- *             as ACRONYMs.
- */
-public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
+  
+  /**
+   * @param source The AttributeSource to use
+   * @param input The input reader
+   */
+  public UAX29Tokenizer(AttributeSource source, Reader input) {
+    super(source, input);
+    zzReader = input;
+  }
+  
+  /**
+   * @param factory The AttributeFactory to use
+   * @param input The input reader
+   */
+  public UAX29Tokenizer(AttributeFactory factory, Reader input) {
+    super(factory, input); 
+    zzReader = input;
+  }
+  
+  /** 
+   * Set the max allowed token length.  Any token longer than this is skipped.
+   * @param length the new max allowed token length
+   */
+  public void setMaxTokenLength(int length) {
+    this.maxTokenLength = length;
+  }

-public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+  /**
+   * Returns the max allowed token length.  Any token longer than this is 
+   * skipped.
+   * @return the max allowed token length 
+   */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }

-public final int yychar()
-{
-    return yychar;
-}
+  @Override
+  public final void end() {
+    // set final offset
+    int finalOffset = correctOffset(yychar + yylength());
+    offsetAtt.setOffset(finalOffset, finalOffset);
+  }

-/**
- * Fills CharTermAttribute with the current token text.
- */
-public final void getText(CharTermAttribute t) {
-  t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-}
+  @Override
+  public void reset(Reader reader) throws IOException {
+    super.reset(reader);
+    yyreset(reader);
+  }

+  @Override
+  public final boolean incrementToken() throws IOException {
+    // This method is required because of two JFlex limitations:
+    // 1. No way to insert code at the beginning of the generated scanning
+    //    get-next-token method; and
+    // 2. No way to declare @Override on the generated scanning method.
+    clearAttributes();
+    posIncr = 1;
+    return getNextToken();
+  }
+
+  /**
+   * Populates this TokenStream's CharTermAttribute and OffsetAttribute from
+   * the current match, the TypeAttribute from the passed-in tokenType, and
+   * the PositionIncrementAttribute to one, unless the immediately previous
+   * token(s) was/were skipped because maxTokenLength was exceeded, in which
+   * case the PositionIncrementAttribute is set to one plus the number of
+   * skipped overly long tokens. 
+   * <p/> 
+   * If maxTokenLength is exceeded, the CharTermAttribute is set back to empty
+   * and false is returned.
+   * 
+   * @param tokenType The type of the matching token
+   * @return true there is a token available (not too long); false otherwise 
+   */
+  private boolean populateAttributes(String tokenType) {
+    boolean isTokenAvailable = false;
+    if (yylength() > maxTokenLength) {
+      // When we skip a too-long token, we treat it like a stopword, introducing
+      // a position increment gap
+      ++posIncr;
+    } else {
+      termAtt.copyBuffer(zzBuffer, zzStartRead, yylength());
+      posIncrAtt.setPositionIncrement(posIncr);
+      offsetAtt.setOffset(correctOffset(yychar),
+                          correctOffset(yychar + yylength()));
+      typeAtt.setType(tokenType);
+      isTokenAvailable = true;
+    }
+    return isTokenAvailable;
+  }


  /**
@ -387,7 +504,8 @@ public final void getText(CharTermAttribute t) {
   *
   * @param   in  the java.io.Reader to read input from.
   */
-  StandardTokenizerImpl31(java.io.Reader in) {
+  public UAX29Tokenizer(java.io.Reader in) {
+    super(in);
    this.zzReader = in;
  }

@ -397,7 +515,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param   in  the java.io.Inputstream to read input from.
   */
-  StandardTokenizerImpl31(java.io.InputStream in) {
+  public UAX29Tokenizer(java.io.InputStream in) {
    this(new java.io.InputStreamReader(in));
  }

@ -411,7 +529,7 @@ public final void getText(CharTermAttribute t) {
    char [] map = new char[0x10000];
    int i = 0;  /* index in packed string  */
    int j = 0;  /* index in unpacked array */
-    while (i < 1234) {
+    while (i < 2138) {
      int  count = packed.charAt(i++);
      char value = packed.charAt(i++);
      do map[j++] = value; while (--count > 0);
@ -477,7 +595,7 @@ public final void getText(CharTermAttribute t) {
  /**
   * Closes the input stream.
   */
-  public final void yyclose() throws java.io.IOException {
+  private final void yyclose() throws java.io.IOException {
    zzAtEOF = true;            /* indicate end of file */
    zzEndRead = zzStartRead;  /* invalidate buffer    */

@ -498,7 +616,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param reader   the new input stream 
   */
-  public final void yyreset(java.io.Reader reader) {
+  private final void yyreset(java.io.Reader reader) {
    zzReader = reader;
    zzAtBOL  = true;
    zzAtEOF  = false;
@ -515,7 +633,7 @@ public final void getText(CharTermAttribute t) {
  /**
   * Returns the current lexical state.
   */
-  public final int yystate() {
+  private final int yystate() {
    return zzLexicalState;
  }

@ -525,7 +643,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @param newState the new lexical state
   */
-  public final void yybegin(int newState) {
+  private final void yybegin(int newState) {
    zzLexicalState = newState;
  }

@ -533,7 +651,7 @@ public final void getText(CharTermAttribute t) {
  /**
   * Returns the text matched by the current regular expression.
   */
-  public final String yytext() {
+  private final String yytext() {
    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
  }

@ -549,7 +667,7 @@ public final void getText(CharTermAttribute t) {
   *
   * @return the character at position pos
   */
-  public final char yycharat(int pos) {
+  private final char yycharat(int pos) {
    return zzBuffer[zzStartRead+pos];
  }

@ -557,7 +675,7 @@ public final void getText(CharTermAttribute t) {
  /**
   * Returns the length of the matched text region.
   */
-  public final int yylength() {
+  private final int yylength() {
    return zzMarkedPos-zzStartRead;
  }

@ -597,7 +715,7 @@ public final void getText(CharTermAttribute t) {
   * @param number  the number of characters to be read again.
   *                This number must not be greater than yylength()!
   */
-  public void yypushback(int number)  {
+  private void yypushback(int number)  {
    if ( number > yylength() )
      zzScanError(ZZ_PUSHBACK_2BIG);

@ -612,7 +730,7 @@ public final void getText(CharTermAttribute t) {
   * @return      the next token
   * @exception   java.io.IOException  if any I/O-Error occurs
   */
-  public int getNextToken() throws java.io.IOException {
+  private boolean getNextToken() throws java.io.IOException {
    int zzInput;
    int zzAction;

@ -685,49 +803,35 @@ public final void getText(CharTermAttribute t) {

      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
        case 5: 
-          { return NUM;
+          { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true;
+          }
+        case 7: break;
+        case 1: 
+          { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
+          }
+        case 8: break;
+        case 3: 
+          { if (populateAttributes(NUMERIC_TYPE)) return true;
+          }
+        case 9: break;
+        case 6: 
+          { if (populateAttributes(HIRAGANA_TYPE)) return true;
+          }
+        case 10: break;
+        case 4: 
+          { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true;
          }
        case 11: break;
-        case 9: 
-          { return ACRONYM;
+        case 2: 
+          { if (populateAttributes(WORD_TYPE)) return true;
          }
        case 12: break;
-        case 7: 
-          { return COMPANY;
-          }
-        case 13: break;
-        case 10: 
-          { return EMAIL;
-          }
-        case 14: break;
-        case 1: 
-          { /* ignore */
-          }
-        case 15: break;
-        case 6: 
-          { return APOSTROPHE;
-          }
-        case 16: break;
-        case 3: 
-          { return CJ;
-          }
-        case 17: break;
-        case 8: 
-          { return ACRONYM_DEP;
-          }
-        case 18: break;
-        case 2: 
-          { return ALPHANUM;
-          }
-        case 19: break;
-        case 4: 
-          { return HOST;
-          }
-        case 20: break;
        default: 
          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
            zzAtEOF = true;
-            return YYEOF;
+              {
+                return false;
+              }
          } 
          else {
            zzScanError(ZZ_NO_MATCH);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex
@ -0,0 +1,273 @@
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+
+/**
+ * This class implements Word Break rules from the Unicode Text Segmentation 
+ * algorithm, as specified in 
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a> 
+ * <p/>
+ * Tokens produced are of the following types:
+ * <ul>
+ *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
+ *   <li>&lt;NUM&gt;: A number</li>
+ *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
+ *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
+ *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
+ *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+ * </ul>
+ * <b>WARNING</b>: Because JFlex does not support Unicode supplementary 
+ * characters (characters above the Basic Multilingual Plane, which contains
+ * those up to and including U+FFFF), this scanner will not recognize them
+ * properly.  If you need to be able to process text containing supplementary 
+ * characters, consider using the ICU4J-backed implementation in contrib/icu  
+ * ({@link org.apache.lucene.analysis.icu.segmentation.ICUTokenizer})
+ * instead of this class, since the ICU4J-backed implementation does not have
+ * this limitation.
+ */
+%%
+
+%unicode 5.2
+%final
+%public
+%apiprivate
+%class UAX29Tokenizer
+%extends Tokenizer
+%type boolean
+%function getNextToken
+%char
+
+%init{
+  super(in);
+%init}
+
+// WB4. X (Extend | Format)* --> X
+//
+ALetterEx      = \p{WB:ALetter}                     [\p{WB:Format}\p{WB:Extend}]*
+// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
+NumericEx      = [\p{WB:Numeric}\uFF10-\uFF19]      [\p{WB:Format}\p{WB:Extend}]*
+KatakanaEx     = \p{WB:Katakana}                    [\p{WB:Format}\p{WB:Extend}]* 
+MidLetterEx    = [\p{WB:MidLetter}\p{WB:MidNumLet}] [\p{WB:Format}\p{WB:Extend}]* 
+MidNumericEx   = [\p{WB:MidNum}\p{WB:MidNumLet}]    [\p{WB:Format}\p{WB:Extend}]*
+ExtendNumLetEx = \p{WB:ExtendNumLet}                [\p{WB:Format}\p{WB:Extend}]*
+
+%{
+  /** Alphanumeric sequences */
+  public static final String WORD_TYPE = "<ALPHANUM>";
+  
+  /** Numbers */
+  public static final String NUMERIC_TYPE = "<NUM>";
+  
+  /**
+   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
+   * together as as a single token rather than broken up, because the logic
+   * required to break them at word boundaries is too complex for UAX#29.
+   * {@see Unicode Line Breaking Algorithm http://www.unicode.org/reports/tr14/#SA}
+   */
+  public static final String SOUTH_EAST_ASIAN_TYPE = "<SOUTHEAST_ASIAN>";
+  
+  public static final String IDEOGRAPHIC_TYPE = "<IDEOGRAPHIC>";
+  
+  public static final String HIRAGANA_TYPE = "<HIRAGANA>";
+  
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt 
+    = addAttribute(PositionIncrementAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  
+  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+  private int posIncr;
+
+  
+  /**
+   * @param source The AttributeSource to use
+   * @param input The input reader
+   */
+  public UAX29Tokenizer(AttributeSource source, Reader input) {
+    super(source, input);
+    zzReader = input;
+  }
+  
+  /**
+   * @param factory The AttributeFactory to use
+   * @param input The input reader
+   */
+  public UAX29Tokenizer(AttributeFactory factory, Reader input) {
+    super(factory, input); 
+    zzReader = input;
+  }
+  
+  /** 
+   * Set the max allowed token length.  Any token longer than this is skipped.
+   * @param length the new max allowed token length
+   */
+  public void setMaxTokenLength(int length) {
+    this.maxTokenLength = length;
+  }
+
+  /**
+   * Returns the max allowed token length.  Any token longer than this is 
+   * skipped.
+   * @return the max allowed token length 
+   */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+  @Override
+  public final void end() {
+    // set final offset
+    int finalOffset = correctOffset(yychar + yylength());
+    offsetAtt.setOffset(finalOffset, finalOffset);
+  }
+
+  @Override
+  public void reset(Reader reader) throws IOException {
+    super.reset(reader);
+    yyreset(reader);
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    // This method is required because of two JFlex limitations:
+    // 1. No way to insert code at the beginning of the generated scanning
+    //    get-next-token method; and
+    // 2. No way to declare @Override on the generated scanning method.
+    clearAttributes();
+    posIncr = 1;
+    return getNextToken();
+  }
+
+  /**
+   * Populates this TokenStream's CharTermAttribute and OffsetAttribute from
+   * the current match, the TypeAttribute from the passed-in tokenType, and
+   * the PositionIncrementAttribute to one, unless the immediately previous
+   * token(s) was/were skipped because maxTokenLength was exceeded, in which
+   * case the PositionIncrementAttribute is set to one plus the number of
+   * skipped overly long tokens. 
+   * <p/> 
+   * If maxTokenLength is exceeded, the CharTermAttribute is set back to empty
+   * and false is returned.
+   * 
+   * @param tokenType The type of the matching token
+   * @return true there is a token available (not too long); false otherwise 
+   */
+  private boolean populateAttributes(String tokenType) {
+    boolean isTokenAvailable = false;
+    if (yylength() > maxTokenLength) {
+      // When we skip a too-long token, we treat it like a stopword, introducing
+      // a position increment gap
+      ++posIncr;
+    } else {
+      termAtt.copyBuffer(zzBuffer, zzStartRead, yylength());
+      posIncrAtt.setPositionIncrement(posIncr);
+      offsetAtt.setOffset(correctOffset(yychar),
+                          correctOffset(yychar + yylength()));
+      typeAtt.setType(tokenType);
+      isTokenAvailable = true;
+    }
+    return isTokenAvailable;
+  }
+%}
+
+%%
+
+// WB1. 	sot 	÷ 	
+// WB2. 		÷ 	eot
+//
+<<EOF>> { return false; }
+
+
+// WB8.   Numeric × Numeric
+// WB11.  Numeric (MidNum | MidNumLet) × Numeric
+// WB12.  Numeric × (MidNum | MidNumLet) Numeric
+// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
+// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
+//
+{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} 
+                              | {MidNumericEx} {NumericEx} 
+                              | {NumericEx})*
+{ExtendNumLetEx}* 
+  { if (populateAttributes(NUMERIC_TYPE)) return true; }
+
+
+// WB5.   ALetter × ALetter
+// WB6.   ALetter × (MidLetter | MidNumLet) ALetter
+// WB7.   ALetter (MidLetter | MidNumLet) × ALetter
+// WB9.   ALetter × Numeric
+// WB10.  Numeric × ALetter
+// WB13.  Katakana × Katakana
+// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
+// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
+//
+{ExtendNumLetEx}*  ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
+                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
+                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) 
+({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
+                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
+                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) )*
+{ExtendNumLetEx}*  
+  { if (populateAttributes(WORD_TYPE)) return true; }
+
+
+// From UAX #29:
+//
+//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
+//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
+//    boundary property values based on criteria outside of the scope of this
+//    annex.  That means that satisfactory treatment of languages like Chinese
+//    or Thai requires special handling.
+// 
+// In Unicode 5.2, only one character has the \p{Line_Break = Contingent_Break}
+// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
+//
+// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
+// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
+// Lao, etc.) are kept together.  This grammar does the same below.
+//
+// See also the Unicode Line Breaking Algorithm:
+//
+//    http://www.unicode.org/reports/tr14/#SA
+//
+\p{LB:Complex_Context}+ { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true; }
+
+// WB14.  Any ÷ Any
+//
+\p{Script:Han} { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true; }
+\p{Script:Hiragana} { if (populateAttributes(HIRAGANA_TYPE)) return true; }
+
+
+// WB3.   CR × LF
+// WB3a.  (Newline | CR | LF) ÷
+// WB3b.  ÷ (Newline | CR | LF)
+// WB14.  Any ÷ Any
+//
+[^] { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
@ -17,9 +17,43 @@
 -->
 <html>
 <head>
-   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+    <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>
-A fast grammar-based tokenizer constructed with JFlex.
+<p>The <code>org.apache.lucene.analysis.standard</code> package contains three
+    fast grammar-based tokenizers constructed with JFlex:</p>
+<ul>
+    <li><code><a href="StandardTokenizer.html">StandardTokenizer</a></code>:
+        as of Lucene 3.1, implements the Word Break rules from the Unicode Text 
+        Segmentation algorithm, as specified in 
+        <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
+        URLs and email addresses are also tokenized according to the relevant RFCs.
+        <code><a href="StandardAnalyzer">StandardAnalyzer</a></code> includes
+        <code>StandardTokenizer</code>, 
+        <code><a href="StandardFilter">StandardFilter</a></code>, 
+        <code><a href="../../../../../../all/org/apache/lucene/analysis/LowerCaseFilter.html">LowerCaseFilter</a></code>
+        and <code><a href="../../../../../../all/org/apache/lucene/analysis/StopFilter.html">StopFilter</a></code>.
+        When the <code>Version</code> specified in the constructor is lower than 
+        3.1, the <code><a href="ClassicTokenizer.html">ClassicTokenizer</a></code>
+        implementation is invoked.</li>
+    <li><code><a href="ClassicTokenizer.html">ClassicTokenizer</a></code>:
+        this class was formerly (prior to Lucene 3.1) named 
+        <code>StandardTokenizer</code>.  (Its tokenization rules are not
+        based on the Unicode Text Segmentation algorithm.)
+        <code><a href="ClassicAnalyzer">ClassicAnalyzer</a></code> includes
+        <code>ClassicTokenizer</code>,
+        <code><a href="StandardFilter">StandardFilter</a></code>, 
+        <code><a href="../../../../../../all/org/apache/lucene/analysis/LowerCaseFilter.html">LowerCaseFilter</a></code>
+        and <code><a href="../../../../../../all/org/apache/lucene/analysis/StopFilter.html">StopFilter</a></code>.
+    </li>
+    <li><code><a href="UAX29Tokenizer.html">UAX29Tokenizer</a></code>: 
+        implements the Word Break rules from the Unicode Text Segmentation 
+        algorithm, as specified in
+        <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
+        Unlike <code>StandardTokenizer</code>, URLs and email addresses are
+        <b>not</b> tokenized as single tokens, but are instead split up into 
+        tokens according to the UAX#29 word break rules.
+    </li>
+</ul>
 </body>
 </html>
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@ -120,7 +120,7 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new LowerCaseFilter(matchVersion, result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@ -58,7 +58,7 @@ public final class ThaiAnalyzer extends ReusableAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    if (matchVersion.onOrAfter(Version.LUCENE_31))
      result = new LowerCaseFilter(matchVersion, result);
    result = new ThaiWordFilter(matchVersion, result);
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@ -123,7 +123,7 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
  protected TokenStreamComponents createComponents(String fieldName,
      Reader reader) {
    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
-    TokenStream result = new StandardFilter(source);
+    TokenStream result = new StandardFilter(matchVersion, source);
    result = new TurkishLowerCaseFilter(result);
    result = new StopFilter(matchVersion, result, stopwords);
    if(!stemExclusionSet.isEmpty())
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPage.html
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPage.html
@ -0,0 +1,267 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
+<meta name="robots" content="index,nofollow">
+
+<title>Resources - Lucene-java Wiki</title>
+<script type="text/javascript" src="/moin_static184/common/js/common.js"></script>
+
+<script type="text/javascript">
+<!--
+var search_hint = "Search";
+//-->
+</script>
+
+
+<link rel="stylesheet" type="text/css" charset="utf-8" media="all" href="/moin_static184/modernized/css/common.css">
+<link rel="stylesheet" type="text/css" charset="utf-8" media="screen" href="/moin_static184/modernized/css/screen.css">
+<link rel="stylesheet" type="text/css" charset="utf-8" media="print" href="/moin_static184/modernized/css/print.css">
+<link rel="stylesheet" type="text/css" charset="utf-8" media="projection" href="/moin_static184/modernized/css/projection.css">
+
+<!-- css only for MS IE6/IE7 browsers -->
+<!--[if lt IE 8]>
+   <link rel="stylesheet" type="text/css" charset="utf-8" media="all" href="/moin_static184/modernized/css/msie.css">
+<![endif]-->
+
+
+
+
+
+<link rel="Start" href="/lucene-java/FrontPageEN">
+<link rel="Alternate" title="Wiki Markup" href="/lucene-java/Resources?action=raw">
+<link rel="Alternate" media="print" title="Print View" href="/lucene-java/Resources?action=print">
+<link rel="Appendix" title="IntroductionToApacheLucene.jp.jpg" href="/lucene-java/Resources?action=AttachFile&amp;do=view&amp;target=IntroductionToApacheLucene.jp.jpg">
+<link rel="Appendix" title="SuchmaschinenEntwickelnMitApacheLucene.de.jpg" href="/lucene-java/Resources?action=AttachFile&amp;do=view&amp;target=SuchmaschinenEntwickelnMitApacheLucene.de.jpg">
+<link rel="Appendix" title="building.search.applications.png" href="/lucene-java/Resources?action=AttachFile&amp;do=view&amp;target=building.search.applications.png">
+<link rel="Appendix" title="lia3d.jpg" href="/lucene-java/Resources?action=AttachFile&amp;do=view&amp;target=lia3d.jpg">
+<link rel="Search" href="/lucene-java/FindPage">
+<link rel="Index" href="/lucene-java/TitleIndex">
+<link rel="Glossary" href="/lucene-java/WordIndex">
+<link rel="Help" href="/lucene-java/HelpOnFormatting">
+</head>
+
+<body  lang="en" dir="ltr">
+
+<div id="header">
+
+<form id="searchform" method="get" action="/lucene-java/Resources">
+<div>
+<input type="hidden" name="action" value="fullsearch">
+<input type="hidden" name="context" value="180">
+<label for="searchinput">Search:</label>
+<input id="searchinput" type="text" name="value" value="" size="20"
+    onfocus="searchFocus(this)" onblur="searchBlur(this)"
+    onkeyup="searchChange(this)" onchange="searchChange(this)" alt="Search">
+<input id="titlesearch" name="titlesearch" type="submit"
+    value="Titles" alt="Search Titles">
+<input id="fullsearch" name="fullsearch" type="submit"
+    value="Text" alt="Search Full Text">
+</div>
+</form>
+<script type="text/javascript">
+<!--// Initialize search form
+var f = document.getElementById('searchform');
+f.getElementsByTagName('label')[0].style.display = 'none';
+var e = document.getElementById('searchinput');
+searchChange(e);
+searchBlur(e);
+//-->
+</script>
+
+<div id="logo"><a href="/lucene-java/FrontPageEN">Lucene-java Wiki</a></div>
+<div id="username"><a href="/lucene-java/Resources?action=login" id="login" rel="nofollow">Login</a></div>
+<h1 id="locationline">
+
+<span id="pagelocation"><a class="backlink" href="/lucene-java/Resources?action=fullsearch&amp;context=180&amp;value=linkto%3A%22Resources%22" rel="nofollow" title="Click to do a full-text search for this title">Resources</a></span>
+</h1>
+
+
+<ul id="navibar">
+<li class="wikilink"><a href="/lucene-java/FrontPageEN">FrontPageEN</a></li><li class="wikilink"><a href="/lucene-java/RecentChanges">RecentChanges</a></li><li class="wikilink"><a href="/lucene-java/FindPage">FindPage</a></li><li class="wikilink"><a href="/lucene-java/HelpContents">HelpContents</a></li><li class="current"><a href="/lucene-java/Resources">Resources</a></li>
+</ul>
+
+<div id="pageline"><hr style="display:none;"></div>
+
+<ul class="editbar"><li><span class="disabled">Immutable Page</span></li><li class="toggleCommentsButton" style="display:none;"><a href="#" class="nbcomment" onClick="toggleComments();return false;">Comments</a></li><li><a class="nbinfo" href="/lucene-java/Resources?action=info" rel="nofollow">Info</a></li><li>
+<form class="actionsmenu" method="GET" action="/lucene-java/Resources">
+<div>
+    <label>More Actions:</label>
+    <select name="action"
+        onchange="if ((this.selectedIndex != 0) &&
+                      (this.options[this.selectedIndex].disabled == false)) {
+                this.form.submit();
+            }
+            this.selectedIndex = 0;">
+        <option value="raw">Raw Text</option>
+<option value="print">Print View</option>
+<option value="RenderAsDocbook">Render as Docbook</option>
+<option value="refresh">Delete Cache</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="SpellCheck">Check Spelling</option>
+<option value="LikePages">Like Pages</option>
+<option value="LocalSiteMap">Local Site Map</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="RenamePage" disabled class="disabled">Rename Page</option>
+<option value="CopyPage">Copy Page</option>
+<option value="DeletePage" disabled class="disabled">Delete Page</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="MyPages">My Pages</option>
+<option value="show" disabled class="disabled">Subscribe User</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="show" disabled class="disabled">Remove Spam</option>
+<option value="show" disabled class="disabled">Revert to this revision</option>
+<option value="show" disabled class="disabled">Package Pages</option>
+<option value="SyncPages">Sync Pages</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="Load">Load</option>
+<option value="Save">Save</option>
+    </select>
+    <input type="submit" value="Do">
+    
+</div>
+<script type="text/javascript">
+<!--// Init menu
+actionsMenuInit('More Actions:');
+//-->
+</script>
+</form>
+</li></ul>
+
+</div>
+
+<div id="page" lang="en" dir="ltr">
+<div dir="ltr" id="content" lang="en"><span class="anchor" id="top"></span>
+<span class="anchor" id="line-2"></span><p class="line867"><div class="table-of-contents"><p class="table-of-contents-heading">Contents<ol><li>
+<a href="#Introductions">Introductions</a></li><li>
+<a href="#Blogs">Blogs</a></li><li>
+<a href="#Books">Books</a></li><li>
+<a href="#Articles">Articles</a></li><li>
+<a href="#Interviews">Interviews</a></li><li>
+<a href="#Papers">Papers</a></li><li>
+<a href="#Presentations">Presentations</a></li><li>
+<a href="#Training">Training</a></li><li>
+<a href="#Corpora">Corpora</a></li><li>
+<a href="#Other">Other</a></li></ol></div> <span class="anchor" id="line-3"></span><span class="anchor" id="line-4"></span><p class="line867">
+<h1 id="Introductions">Introductions</h1>
+<span class="anchor" id="line-5"></span><span class="anchor" id="line-6"></span><ul><li><p class="line862">The API documentation contains  <a class="http" href="http://lucene.apache.org/java/3_0_1/api/all/overview-summary.html#overview_description">a short and simple code example</a> that shows the basic way to index and search <span class="anchor" id="line-7"></span></li><li><p class="line862">The <a class="http" href="http://lucene.apache.org/java/3_0_1/gettingstarted.html">Getting Started Guide</a> that describes the demos that come with Lucene <span class="anchor" id="line-8"></span><span class="anchor" id="line-9"></span><span class="anchor" id="line-10"></span></li></ul><p class="line867">
+<h1 id="Blogs">Blogs</h1>
+<span class="anchor" id="line-11"></span><span class="anchor" id="line-12"></span><ul><li><p class="line891"><a class="http" href="http://lucene.grantingersoll.com">Grant's Grunts: Lucene edition</a> - Grant Ingersoll's thoughts on the Lucene ecosystem. <span class="anchor" id="line-13"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/blog/">Lucid Imagination's Blog</a> - Many of the Lucene and Solr committers blog here about how to use Lucene and Solr <span class="anchor" id="line-14"></span></li><li><p class="line891"><a class="http" href="http://blog.sematext.com/">Sematext Blog</a> - Search and Analytics covering Lucene, Solr, Nutch, Hadoop, HBase, and more <span class="anchor" id="line-15"></span><span class="anchor" id="line-16"></span><span class="anchor" id="line-17"></span></li></ul><p class="line867">
+<h1 id="Books">Books</h1>
+<span class="anchor" id="line-18"></span><span class="anchor" id="line-19"></span><ul><li><p class="line891"><img alt="http://www.manning.com/hatcher3/hatcher3_cover150.jpg" class="external_image" src="http://www.manning.com/hatcher3/hatcher3_cover150.jpg" title="http://www.manning.com/hatcher3/hatcher3_cover150.jpg" /> "<a class="http" href="http://www.manning.com/hatcher3/">Lucene in Action, Second Edition"</a> by Erik Hatcher, Otis Gospodneti&#263;, and Michael McCandless <span class="anchor" id="line-20"></span></li><li><p class="line891"><img alt="building.search.applications.png" class="attachment" src="/lucene-java/Resources?action=AttachFile&amp;do=get&amp;target=building.search.applications.png" title="building.search.applications.png" /> "<a class="http" href="http://www.amazon.com/Building-Search-Applications-Lucene-Lingpipe/dp/0615204252/">Building Search Applications: Lucene, LingPipe, and Gate</a>" by Manu Konchady; Mustru Publishing; June 2008; ISBN 978-0615204253 <span class="anchor" id="line-21"></span></li><li><p class="line891"><img alt="IntroductionToApacheLucene.jp.jpg" class="attachment" src="/lucene-java/Resources?action=AttachFile&amp;do=get&amp;target=IntroductionToApacheLucene.jp.jpg" title="IntroductionToApacheLucene.jp.jpg" /> "<a class="http" href="http://www.amazon.co.jp/exec/obidos/ASIN/4774127809/503-9461699-1775907">Apache Lucene 入門 ~Java・オープンソース・全文検索システムの構築</a>" 関口 宏司 ; 技術評論社 ; 2006/05/17 ; ISBN: 4774127809 (<span class="u">Introduction to Apache Lucene: Construction of Java Open Source Full Text Retrieval Systems</span> by Koshi Sekiguti ; Gijutsu-Hyohron Co., Ltd.) <span class="anchor" id="line-22"></span></li><li><p class="line891"><img alt="lia3d.jpg" class="attachment" src="/lucene-java/Resources?action=AttachFile&amp;do=get&amp;target=lia3d.jpg" title="lia3d.jpg" /> "<a class="http" href="http://www.lucenebook.com">Lucene In Action</a>" by Erik Hatcher, Otis Gospodneti&#263;; Manning Publications; December 2004; ISBN 1932394281 (also available from <a class="http" href="http://www.amazon.com/exec/obidos/ASIN/1932394281">Amazon.com</a>) <span class="anchor" id="line-23"></span></li><li><p class="line891"><img alt="SuchmaschinenEntwickelnMitApacheLucene.de.jpg" class="attachment" src="/lucene-java/Resources?action=AttachFile&amp;do=get&amp;target=SuchmaschinenEntwickelnMitApacheLucene.de.jpg" title="SuchmaschinenEntwickelnMitApacheLucene.de.jpg" /> Manfred Hardt, Dr. Fabian Theis: "<a class="http" href="http://www.amazon.de/Suchmaschinen-entwickeln-mit-Apache-Lucene/dp/3935042450">Suchmaschinen entwickeln mit Apache Lucene</a>"; Software &amp; Support Verlag, Frankfurt/Main, Germany; September 2004; ISBN 3935042450 (<span class="u">Developing Search Engines with Apache Lucene</span>) <span class="anchor" id="line-24"></span><span class="anchor" id="line-25"></span></li></ul><p class="line867">
+<h1 id="Articles">Articles</h1>
+<span class="anchor" id="line-26"></span><span class="anchor" id="line-27"></span><ul><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Getting-Started-with-Lucene/">Getting Started with Lucene</a> (by Grant Ingersoll) <br>
+ (<em>Published: January 2009 - article</em>) <span class="anchor" id="line-28"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Optimizing-Findability-in-Lucene-and-Solr/">Optimizing Findability in Lucene and Solr</a> (by  Grant Ingersoll)<br>
+ (<em>Published: January 2009 - article</em>) <span class="anchor" id="line-29"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Debugging-Relevance-Issues-in-Search/">Debugging Relevance Issues in Search</a> (by Grant Ingersoll)<br>
+ (<em>Published: January 2009 - article</em>) <span class="anchor" id="line-30"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Scaling-Lucene-and-Solr/">Scaling Lucene and Solr</a> (by Mark Miller)<br>
+ (<em>Published: January 2009 - article</em>)  <span class="anchor" id="line-31"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Introduction-to-Apache-Lucene-and-Solr/">Introduction to Apache Lucene and Solr</a> (by Marc Krellenstein)<br>
+(<em>Published: January 2009 - article</em>)  <span class="anchor" id="line-32"></span></li><li><p class="line891"><a class="http" href="http://cephas.net/blog/2008/03/30/how-morelikethis-works-in-lucene/">How MoreLikeThis Works in Lucene</a> (by Aaron Johnson)<br>
+(<em>Last updated: March 2008 - blog entry</em>) <span class="anchor" id="line-33"></span></li><li><p class="line891"><a class="http" href="http://schmidt.devlib.org/software/lucene-wikipedia.html">Lucene Wikipedia indexer</a> (by Marco Schmidt)<br>
+(<em>Last updated: November 2007 - tutorial</em>) <span class="anchor" id="line-34"></span></li><li><p class="line891"><a class="http" href="http://marceloochoa.blogspot.com/2007/09/running-lucene-inside-your-oracle-jvm.html">Running Lucene inside your Oracle JVM</a> (by Marcelo Ochoa)<br>
+(<em>Last updated: September 2007 - blog entry</em>) <span class="anchor" id="line-35"></span></li><li><p class="line891"><a class="http" href="http://www.onjava.com/pub/a/onjava/2007/05/24/using-the-lucene-query-parser-without-lucene.html">Using the Lucene Query Parser Without Lucene</a> (by Marcin Maciukiewicz and Daniel Owsiański)<br>
+(<em>Published: May 2007 - article</em>) <span class="anchor" id="line-36"></span></li><li><p class="line891"><a class="http" href="http://www.javaworld.com/javaworld/jw-09-2006/jw-0925-lucene.html">Integrate advanced search functionalities into your apps</a> (by John Ferguson Smart)<br>
+(<em>Published: September 2006 - article</em>) <span class="anchor" id="line-37"></span></li><li><p class="line891"><a class="http" href="http://www-128.ibm.com/developerworks/java/library/wa-lucene2/index.html?ca=drs-">Beef up Web search applications with Lucene</a> (by Deng Peng Zhou)<br>
+(<em>Published: August 2006 - article</em>) <span class="anchor" id="line-38"></span></li><li><p class="line891"><a class="http" href="http://www.freesearch.pe.kr/tag/Lucene">Lecture &amp; Etc : Lucene index file format for Korean</a> (by Jeon Hee-Won)<br>
+(<em>Published: July 2006 - article</em>) <span class="anchor" id="line-39"></span></li><li>Cai Ziegler: "Suche nach Suche -- Apaches Lucene: eigene Suche und Indizierung"; iX 6/2006, Seite 120; Heise Zeitschriften Verlag, Hannover, Germany <span class="anchor" id="line-40"></span></li><li><p class="line891"><a class="http" href="http://www-128.ibm.com/developerworks/java/library/wa-lucene/index.html">Delve inside the Lucene indexing mechanism</a> (by Deng Peng Zhou)<br>
+(<em>Published: June 2006 - article</em>) <span class="anchor" id="line-41"></span></li><li><p class="line891"><a class="http" href="http://www.onjava.com/pub/a/onjava/2006/01/18/using-lucene-to-search-java-source.html">Using Lucene to Search Java Source Code</a> (by Renuka Sindhgatta)<br>
+(<em>Published: January 2006 - article</em>) <span class="anchor" id="line-42"></span></li><li><p class="line891"><a class="http" href="http://www.jroller.com/page/wakaleo/?anchor=lucene_a_tutorial_introduction_to">Lucene : a tutorial introduction to full-text indexing in Java</a> (by John Ferguson Smart)<br>
+(<em>Published: October 2005 - article</em>) <span class="anchor" id="line-43"></span></li><li>Daniel Naber: "Herr der Suche -- Eigene Anwendungen mit Volltextsuche erweitern"; c't 7/2005, Seite 196; Heise Zeitschriften Verlag, Hannover, Germany <span class="anchor" id="line-44"></span></li><li><p class="line891"><a class="http" href="http://blog.dev.sf.net/index.php?/archives/10-Behind-the-Scenes-of-the-SourceForge.net-Search-System.html">Behind the Scenes of the SourceForge.net Search System</a> (by Chris Conrad)<br>
+(<em>Last updated: June 2005 - blog entry</em>) <span class="anchor" id="line-45"></span></li><li><p class="line891"><a class="http" href="http://today.java.net/pub/a/today/2005/08/09/didyoumean.html">Did You Mean: Lucene?</a> (by Tom White)<br>
+(<em>Published: August 2005 - article</em>) <span class="anchor" id="line-46"></span></li><li><p class="line891"><a class="http" href="http://www.developer.com/java/other/article.php/3490471">Meet Lucene</a> (by Otis Gospodneti&#263;, Eric Hatcher)<br>
+(<em>Published: March 2005 - article</em>) <span class="anchor" id="line-47"></span></li><li><p class="line891"><a class="http" href="http://www.theserverside.com/tt/articles/article.tss?l=ILoveLucene">I Love Lucene</a> (by Dion Almaer)<br>
+(<em>Published: January 2005 - article</em>) <span class="anchor" id="line-48"></span></li><li><p class="line891"><a class="http" href="http://javaboutique.internet.com/tutorials/HTMLParser/article.html">Unweaving a Tangled Web With HTMLParser and Lucene</a> (by Keld H. Hansen)<br>
+(<em>Last updated: October 2004 - tutorial</em>) <span class="anchor" id="line-49"></span></li><li><p class="line891"><a class="http" href="http://bilgidata.com/localhost/bilgidata/yazi.jsp@dosya=a_lucene.xml.html">Lucene Introduction in Turkish</a> Java Bazl&#305; Arama Motoru - Lusin (by Burak Bayraml&#305;)<br>
+(<em>Last updated: August 2004 - tutorial</em>) <span class="anchor" id="line-50"></span></li><li><p class="line891"><a class="http" href="http://www.chedong.com/tech/lucene.html">Lucene Introduction in Chinese</a> Lucene&#65306;&#22522;&#20110;Java&#30340;&#20840;&#25991;&#26816;&#32034;&#24341;&#25806;&#31616;&#20171; (by Che Dong; &#20316;&#32773;&#65306; &#36710;&#19996;)<br>
+(<em>Last updated: May 2004 - tutorial</em>) <span class="anchor" id="line-51"></span></li><li><p class="line891"><a class="http" href="http://javatechniques.com/public/java/docs/basics/lucene-memory-search.html">Lucene In-Memory Text Search</a> (by Philip Isenhour)<br>
+(<em>Last updated: May 2004 - tutorial</em>) <span class="anchor" id="line-52"></span></li><li><p class="line891"><a class="http" href="http://www.javaranch.com/newsletter/200404/Lucene.html">The Lucene Search Engine: Adding Search to Your Applications</a> (by Thomas Paul)<br>
+(<em>Published: April 2004 - article</em>) <span class="anchor" id="line-53"></span></li><li><p class="line891"><a class="http" href="http://www.darksleep.com/lucene/">Lucene Tutorial</a> (by Steven J. Owens)<br>
+(<em>Last updated: March 2004 - tutorial</em>) <span class="anchor" id="line-54"></span></li><li><p class="line891"><a class="http" href="http://www-igm.univ-mlv.fr/~dr/XPOSE2003/lucene/articleLucene.html">Lucene Introduction in French</a> Exposés Système sur le thème de l'opensource : Analyse de la structure de Lucene. (by Sun Seng TAN)<br>
+(<em>Last updated: February 2004 - tutorial</em>) <span class="anchor" id="line-55"></span></li><li><p class="line891"><a class="http" href="http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html">QueryParser Rules</a> (by Erik Hatcher)<br>
+(<em>Published November 2003 - article</em>) <span class="anchor" id="line-56"></span></li><li><p class="line891"><a class="http" href="http://builder.com.com/5100-6389-5054799.html">Give your Web site its own search engine using Lucene</a> (by Jeffrey Linwood)<br>
+(<em>Published July 2003 - article</em>) <span class="anchor" id="line-57"></span></li><li><p class="line891"><a class="http" href="http://today.java.net/pub/a/today/2003/07/30/LuceneIntro.html">Lucene Intro</a> (by Erik Hatcher)<br>
+(<em>Published: July 2003 - article</em>) <span class="anchor" id="line-58"></span></li><li><p class="line891"><a class="http" href="http://www-106.ibm.com/developerworks/library/j-lucene/">Parsing, indexing, and searching XML with Digester and Lucene</a> (by Otis Gospodneti&#263;)<br>
+(<em>Published June 2003 - article</em>) <span class="anchor" id="line-59"></span></li><li><p class="line891"><a class="http" href="http://www.xml.com/pub/a/ws/2003/05/13/email.html">Using Python, Jython, and Lucene to Search Outlook Email</a> (by Jon Udell)<br>
+(<em>Published: May 2003 - article</em>) <span class="anchor" id="line-60"></span></li><li><p class="line891"><a class="http" href="http://www.onjava.com/pub/a/onjava/2003/03/05/lucene.html">Advanced Text Indexing with Lucene</a> (by Otis Gospodneti&#263;)<br>
+(<em>Published: March 2003 - article</em>) <span class="anchor" id="line-61"></span></li><li><p class="line891"><a class="http" href="http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html">Introduction to Text Indexing with Apache Jakarta Lucene</a> (by Otis Gospodneti&#263;)<br>
+(<em>Published: January 2003 - article</em>) <span class="anchor" id="line-62"></span></li><li><p class="line862">Manfred Hardt: "Suchmaschinen entwickeln mit Java und Lucene - Wo war denn noch gleich ... ?"; JavaMagazin 9/2002; Software &amp; Support Verlag, Frankfurt/Main, Germany <span class="anchor" id="line-63"></span></li><li><p class="line891"><a class="http" href="http://javangelist.snipsnap.org/space/Lucene-Mini-Tutorial">Lucene Mini-Tutorial</a> (by funzel)<br>
+(<em>Last updated: April 2002 - tutorial</em>) <span class="anchor" id="line-64"></span></li><li><p class="line891"><a class="http" href="http://www.javaworld.com/javaworld/jw-09-2000/jw-0915-lucene.html">The Lucene search engine Powerful flexible and free</a> (by Brian Goetz)<br>
+(<em>Published September 2000 - article</em>) <span class="anchor" id="line-65"></span><span class="anchor" id="line-66"></span></li></ul><p class="line867">
+<h1 id="Interviews">Interviews</h1>
+<span class="anchor" id="line-67"></span><span class="anchor" id="line-68"></span><ul><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=109">Interview with Lucene creator Doug Cutting</a> Podcast.  Summary: Doug talks about the creation of Lucene, Nutch and Hadoop. (<em>Published January 2009</em>) <span class="anchor" id="line-69"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=108">Interview with Lucene/Solr committer Chris Hostetter</a> Podcast.  Summary: Chris talks about Solr, Lucene and their usage at CNET. (<em>Published January 2009</em>) <span class="anchor" id="line-70"></span></li><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=113">Interview with Lucene/Solr committer Ryan McKinley</a> Podcast.  Summary: Ryan discusses Solr, Lucene and geospatial searching with Lucene (<a class="nonexistent" href="/lucene-java/LocalLucene/LocalSolr">LocalLucene/LocalSolr</a>) and his usage of Lucene/Solr throughout his career. (<em>Published January 2009</em>) <span class="anchor" id="line-71"></span><span class="anchor" id="line-72"></span><span class="anchor" id="line-73"></span><span class="anchor" id="line-74"></span></li></ul><p class="line867">
+<h1 id="Papers">Papers</h1>
+<span class="anchor" id="line-75"></span><span class="anchor" id="line-76"></span><ul><li><p class="line891"><a class="http" href="http://lucene.sourceforge.net/publications.html">http://lucene.sourceforge.net/publications.html</a> Doug Cuttings papers from the old Lucene web site <span class="anchor" id="line-77"></span><span class="anchor" id="line-78"></span></li></ul><p class="line867">
+<h1 id="Presentations">Presentations</h1>
+<span class="anchor" id="line-79"></span><ul><li><p class="line891"><a class="http" href="http://people.apache.org/~buschmi/apachecon/AdvancedIndexingLuceneAtlanta07.ppt">Advanced Indexing Techniques with Apache Lucene - Payloads</a> presented by Michael Busch at <a class="http" href="http://www.us.apachecon.com/us2007/">ApacheCon U.S. 2007</a><br>
+(<em>Presented November 2007 - PDF slide show</em>) <span class="anchor" id="line-80"></span></li><li><p class="line891"><a class="http" href="http://people.apache.org/~yonik/presentations/lucene_intro.pdf">Full-Text Search with Lucene</a> presented by Yonik Seeley at <a class="http" href="http://www.eu.apachecon.com">ApacheCon Europe 2007</a>.<br>
+(<em>Presented May 2007 - PDF slide show</em>) <span class="anchor" id="line-81"></span></li><li><p class="line891"><a class="http" href="http://www.cnlp.org/presentations/slides/AdvancedLuceneEU.pdf">Advanced Lucene</a> presented by Grant Ingersoll of <a class="http" href="http://www.cnlp.org">CNLP</a> at <a class="http" href="http://www.eu.apachecon.com">ApacheCon Europe 2007</a>.  Covers term vectors, query tips and tricks and Lucene performance tuning related to indexing, searching and document retrieval.<br>
+(<em>Presented May 2007 - PDF slide show</em>) <span class="anchor" id="line-82"></span></li><li><p class="line891"><a class="http" href="http://blogs.atlassian.com/rebelutionary/downloads/tssjs2007-lucene-generic-data-indexing.pdf">Lucene: Generic Data Indexing</a> presented by Mike Cannon-Brookes, CEO, <a class="http" href="http://www.atlassian.com/">Atlassian Software Systems</a> at <a class="http" href="http://javasymposium.techtarget.com/lasvegas/index.html">TSSJS Las Vegas 2007</a>.  Covers how Atlassian use Lucene as a generic indexing framework for indexing and finding arbitrary collections of complex objects.<br>
+(<em>Presented March 2007 - PDF slide show</em>) <span class="anchor" id="line-83"></span></li><li><p class="line891"><a class="http" href="http://www.cnlp.org/apachecon2005/AdvancedLucene.ppt">Advanced Lucene</a> presented by Grant Ingersoll of the <a class="http" href="http://www.cnlp.org">Center for Natural Language Processing</a> at <a class="http" href="http://www.apachecon.com">ApacheCon 2005</a>.  Covers term vectors, span queries, using Lucene in a basic question answering system, and several Lucene case studies from <a class="http" href="http://www.cnlp.org">http://www.cnlp.org</a>.  The accompanying <a class="http" href="http://www.cnlp.org/apachecon2005">CNLP ApacheCon 2005 Information website</a> contains many working examples using term vectors and span queries. <span class="anchor" id="line-84"></span></li><li><p class="line891"><a class="http" href="http://lucene.sourceforge.net/talks/pisa/">Lucene lecture at The University of Pisa</a> (by Doug Cutting)<br>
+(<em>Presented November 2004 - lecture notes</em>) <span class="anchor" id="line-85"></span></li><li><p class="line891"><a class="http" href="http://conferences.oreillynet.com/presentations/os2003/hatcher_erik_lucene.pdf">Introducing Lucene</a> (by Erik Hatcher)<br>
+(<em>Presented at OS2003, July 2003 - PDF slide show</em>) <span class="anchor" id="line-86"></span></li><li><p class="line891"><a class="http" href="http://lucene.sourceforge.net/talks/inktomi/">The Lucene Search Engine: Inktomi Seminar</a> (by Doug Cutting)<br>
+(<em>Presented June, 2000 - seminar notes</em>) <span class="anchor" id="line-87"></span><span class="anchor" id="line-88"></span></li></ul><p class="line867">
+<h1 id="Training">Training</h1>
+<span class="anchor" id="line-89"></span><span class="anchor" id="line-90"></span><ul><li><p class="line891"><a class="http" href="http://www.lucidimagination.com/How-We-Can-Help/Training/">http://www.lucidimagination.com/How-We-Can-Help/Training/</a> - Training on Lucene created by Lucene committers and contributors (Grant Ingersoll, Erik Hatcher and the rest of the team at Lucid Imagination).   <span class="anchor" id="line-91"></span></li><li><p class="line891"><a class="http" href="http://www.lucenebootcamp.com">Lucene Boot Camp</a> - Training by Lucene committer Grant Ingersoll.  Offered exclusively at <a class="http" href="http://www.apachecon.com">ApacheCon</a>. <span class="anchor" id="line-92"></span><span class="anchor" id="line-93"></span></li></ul><p class="line867">
+<h1 id="Corpora">Corpora</h1>
+<span class="anchor" id="line-94"></span><ul><li><p class="line862">DMOZ RDF dump - <a class="http" href="http://rdf.dmoz.org/">http://rdf.dmoz.org/</a> <span class="anchor" id="line-95"></span></li><li><p class="line862">CMU newsgroups  - <a class="http" href="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html">http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html</a> <span class="anchor" id="line-96"></span></li><li><p class="line862">CMU webpages  - <a class="http" href="http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/">http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/</a> <span class="anchor" id="line-97"></span></li><li><p class="line862">Reuters  - <a class="http" href="http://www.daviddlewis.com/resources/testcollections/reuters21578">http://www.daviddlewis.com/resources/testcollections/reuters21578</a> <span class="anchor" id="line-98"></span></li><li><p class="line862">Enron emails - <a class="http" href="http://www-2.cs.cmu.edu/~enron/">http://www-2.cs.cmu.edu/~enron/</a> <span class="anchor" id="line-99"></span></li><li><p class="line862">JRC-ACQUIS Multilingual Parallel Corpus - <a class="http" href="http://wt.jrc.it/lt/Acquis/">http://wt.jrc.it/lt/Acquis/</a> <span class="anchor" id="line-100"></span><span class="anchor" id="line-101"></span></li></ul><p class="line867">
+<h1 id="Other">Other</h1>
+<span class="anchor" id="line-102"></span><ul><li><p class="line891"><a class="http" href="http://www.java201.com/resources/browse/38-all.html">Lucene Resources</a> - Articles, Books, FAQs, Forums, Presentations, Wiki. <span class="anchor" id="line-103"></span></li><li><p class="line891"><a class="http" href="http://www.nabble.com/Web-Search-f2787.html">Lucene Search Forum</a> - hosted by <a class="http" href="http://www.nabble.com">Nabble</a> archiving all Lucene and Nutch mailing lists into a searchable archive/forum. The search is coded using Lucene. <span class="anchor" id="line-104"></span></li><li><p class="line891"><a class="http" href="http://www.lucenetutorial.com">LuceneTutorial.com</a> - Tips and tricks, sample applications, code samples, best practices. <span class="anchor" id="line-105"></span></li></ul><span class="anchor" id="bottom"></span></div><p id="pageinfo" class="info" lang="en" dir="ltr">Resources  (last edited 2010-05-03 22:31:43 by <span title="SteveRowe @ ist-h335-d03.syr.edu[128.230.84.100]"><a class="nonexistent" href="/lucene-java/SteveRowe" title="SteveRowe @ ist-h335-d03.syr.edu[128.230.84.100]">SteveRowe</a></span>)</p>
+
+<div id="pagebottom"></div>
+</div>
+
+
+<div id="footer">
+<ul class="editbar"><li><span class="disabled">Immutable Page</span></li><li class="toggleCommentsButton" style="display:none;"><a href="#" class="nbcomment" onClick="toggleComments();return false;">Comments</a></li><li><a class="nbinfo" href="/lucene-java/Resources?action=info" rel="nofollow">Info</a></li><li>
+<form class="actionsmenu" method="GET" action="/lucene-java/Resources">
+<div>
+    <label>More Actions:</label>
+    <select name="action"
+        onchange="if ((this.selectedIndex != 0) &&
+                      (this.options[this.selectedIndex].disabled == false)) {
+                this.form.submit();
+            }
+            this.selectedIndex = 0;">
+        <option value="raw">Raw Text</option>
+<option value="print">Print View</option>
+<option value="RenderAsDocbook">Render as Docbook</option>
+<option value="refresh">Delete Cache</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="SpellCheck">Check Spelling</option>
+<option value="LikePages">Like Pages</option>
+<option value="LocalSiteMap">Local Site Map</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="RenamePage" disabled class="disabled">Rename Page</option>
+<option value="CopyPage">Copy Page</option>
+<option value="DeletePage" disabled class="disabled">Delete Page</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="MyPages">My Pages</option>
+<option value="show" disabled class="disabled">Subscribe User</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="show" disabled class="disabled">Remove Spam</option>
+<option value="show" disabled class="disabled">Revert to this revision</option>
+<option value="show" disabled class="disabled">Package Pages</option>
+<option value="SyncPages">Sync Pages</option>
+<option value="show" disabled class="disabled">------------------------</option>
+<option value="Load">Load</option>
+<option value="Save">Save</option>
+    </select>
+    <input type="submit" value="Do">
+    
+</div>
+<script type="text/javascript">
+<!--// Init menu
+actionsMenuInit('More Actions:');
+//-->
+</script>
+</form>
+</li></ul>
+
+<ul id="credits">
+<li><a href="http://moinmo.in/" title="This site uses the MoinMoin Wiki software.">MoinMoin Powered</a></li><li><a href="http://moinmo.in/Python" title="MoinMoin is written in Python.">Python Powered</a></li><li><a href="http://moinmo.in/GPL" title="MoinMoin is GPL licensed.">GPL licensed</a></li><li><a href="http://validator.w3.org/check?uri=referer" title="Click here to validate this page.">Valid HTML 4.01</a></li>
+</ul>
+
+
+</div>
+</body>
+</html>
+
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPageURLs.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/LuceneResourcesWikiPageURLs.txt
@ -0,0 +1,105 @@
+http://www.w3.org/TR/html4/strict.dtd
+http://lucene.apache.org/java/3_0_1/api/all/overview-summary.html#overview_description
+http://lucene.apache.org/java/3_0_1/gettingstarted.html
+http://lucene.grantingersoll.com
+http://www.lucidimagination.com/blog/
+http://blog.sematext.com/
+http://www.manning.com/hatcher3/hatcher3_cover150.jpg
+http://www.manning.com/hatcher3/hatcher3_cover150.jpg
+http://www.manning.com/hatcher3/hatcher3_cover150.jpg
+http://www.manning.com/hatcher3/
+http://www.amazon.com/Building-Search-Applications-Lucene-Lingpipe/dp/0615204252/
+http://www.amazon.co.jp/exec/obidos/ASIN/4774127809/503-9461699-1775907
+http://www.lucenebook.com
+http://www.amazon.com/exec/obidos/ASIN/1932394281
+Amazon.com
+http://www.amazon.de/Suchmaschinen-entwickeln-mit-Apache-Lucene/dp/3935042450
+http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Getting-Started-with-Lucene/
+http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Optimizing-Findability-in-Lucene-and-Solr/
+http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Debugging-Relevance-Issues-in-Search/
+http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Scaling-Lucene-and-Solr/
+http://www.lucidimagination.com/Community/Hear-from-the-Experts/Articles/Introduction-to-Apache-Lucene-and-Solr/
+http://cephas.net/blog/2008/03/30/how-morelikethis-works-in-lucene/
+http://schmidt.devlib.org/software/lucene-wikipedia.html
+http://marceloochoa.blogspot.com/2007/09/running-lucene-inside-your-oracle-jvm.html
+http://www.onjava.com/pub/a/onjava/2007/05/24/using-the-lucene-query-parser-without-lucene.html
+http://www.javaworld.com/javaworld/jw-09-2006/jw-0925-lucene.html
+http://www-128.ibm.com/developerworks/java/library/wa-lucene2/index.html?ca=drs-
+http://www.freesearch.pe.kr/tag/Lucene
+http://www-128.ibm.com/developerworks/java/library/wa-lucene/index.html
+http://www.onjava.com/pub/a/onjava/2006/01/18/using-lucene-to-search-java-source.html
+http://www.jroller.com/page/wakaleo/?anchor=lucene_a_tutorial_introduction_to
+http://blog.dev.sf.net/index.php?/archives/10-Behind-the-Scenes-of-the-SourceForge.net-Search-System.html
+SourceForge.net
+http://today.java.net/pub/a/today/2005/08/09/didyoumean.html
+http://www.developer.com/java/other/article.php/3490471
+http://www.theserverside.com/tt/articles/article.tss?l=ILoveLucene
+http://javaboutique.internet.com/tutorials/HTMLParser/article.html
+http://bilgidata.com/localhost/bilgidata/yazi.jsp@dosya=a_lucene.xml.html
+http://www.chedong.com/tech/lucene.html
+http://javatechniques.com/public/java/docs/basics/lucene-memory-search.html
+http://www.javaranch.com/newsletter/200404/Lucene.html
+http://www.darksleep.com/lucene/
+http://www-igm.univ-mlv.fr/~dr/XPOSE2003/lucene/articleLucene.html
+http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html
+http://builder.com.com/5100-6389-5054799.html
+http://today.java.net/pub/a/today/2003/07/30/LuceneIntro.html
+http://www-106.ibm.com/developerworks/library/j-lucene/
+http://www.xml.com/pub/a/ws/2003/05/13/email.html
+http://www.onjava.com/pub/a/onjava/2003/03/05/lucene.html
+http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html
+http://javangelist.snipsnap.org/space/Lucene-Mini-Tutorial
+http://www.javaworld.com/javaworld/jw-09-2000/jw-0915-lucene.html
+http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=109
+http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=108
+http://www.lucidimagination.com/index.php?option=com_content&amp;task=view&amp;id=113
+http://lucene.sourceforge.net/publications.html
+http://lucene.sourceforge.net/publications.html
+http://people.apache.org/~buschmi/apachecon/AdvancedIndexingLuceneAtlanta07.ppt
+http://www.us.apachecon.com/us2007/
+http://people.apache.org/~yonik/presentations/lucene_intro.pdf
+http://www.eu.apachecon.com
+http://www.cnlp.org/presentations/slides/AdvancedLuceneEU.pdf
+http://www.cnlp.org
+http://www.eu.apachecon.com
+http://blogs.atlassian.com/rebelutionary/downloads/tssjs2007-lucene-generic-data-indexing.pdf
+http://www.atlassian.com/
+http://javasymposium.techtarget.com/lasvegas/index.html
+http://www.cnlp.org/apachecon2005/AdvancedLucene.ppt
+http://www.cnlp.org
+http://www.apachecon.com
+http://www.cnlp.org
+http://www.cnlp.org
+http://www.cnlp.org/apachecon2005
+http://lucene.sourceforge.net/talks/pisa/
+http://conferences.oreillynet.com/presentations/os2003/hatcher_erik_lucene.pdf
+http://lucene.sourceforge.net/talks/inktomi/
+http://www.lucidimagination.com/How-We-Can-Help/Training/
+http://www.lucidimagination.com/How-We-Can-Help/Training/
+http://www.lucenebootcamp.com
+http://www.apachecon.com
+http://rdf.dmoz.org/
+http://rdf.dmoz.org/
+http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html
+http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html
+http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/
+http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/
+http://www.daviddlewis.com/resources/testcollections/reuters21578
+http://www.daviddlewis.com/resources/testcollections/reuters21578
+http://www-2.cs.cmu.edu/~enron/
+http://www-2.cs.cmu.edu/~enron/
+http://wt.jrc.it/lt/Acquis/
+http://wt.jrc.it/lt/Acquis/
+http://www.java201.com/resources/browse/38-all.html
+http://www.nabble.com/Web-Search-f2787.html
+http://www.nabble.com
+http://www.lucenetutorial.com
+LuceneTutorial.com
+ist-h335-d03.syr.edu
+128.230.84.100
+ist-h335-d03.syr.edu
+128.230.84.100
+http://moinmo.in/
+http://moinmo.in/Python
+http://moinmo.in/GPL
+http://validator.w3.org/check?uri=referer
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
@ -0,0 +1,311 @@
+package org.apache.lucene.analysis.core;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.standard.ClassicAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.Version;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ * <p/>
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
+
+  private Analyzer  a = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+
+  public void testMaxTermLength() throws Exception {
+    ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+    sa.setMaxTokenLength(5);
+    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
+  }
+
+  public void testMaxTermLength2() throws Exception {
+    ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"});
+    sa.setMaxTokenLength(5);
+    
+    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
+  }
+
+  public void testMaxTermLength3() throws Exception {
+    char[] chars = new char[255];
+    for(int i=0;i<255;i++)
+      chars[i] = 'a';
+    String longTerm = new String(chars, 0, 255);
+    
+    assertAnalyzesTo(a, "ab cd " + longTerm + " xy z", new String[]{"ab", "cd", longTerm, "xy", "z"});
+    assertAnalyzesTo(a, "ab cd " + longTerm + "a xy z", new String[]{"ab", "cd", "xy", "z"});
+  }
+
+  public void testAlphanumeric() throws Exception {
+    // alphanumeric tokens
+    assertAnalyzesTo(a, "B2B", new String[]{"b2b"});
+    assertAnalyzesTo(a, "2B", new String[]{"2b"});
+  }
+
+  public void testUnderscores() throws Exception {
+    // underscores are delimiters, but not in email addresses (below)
+    assertAnalyzesTo(a, "word_having_underscore", new String[]{"word", "having", "underscore"});
+    assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new String[]{"word", "underscore", "stopwords"});
+  }
+
+  public void testDelimiters() throws Exception {
+    // other delimiters: "-", "/", ","
+    assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
+    assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
+    assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
+  }
+
+  public void testApostrophes() throws Exception {
+    // internal apostrophes: O'Reilly, you're, O'Reilly's
+    // possessives are actually removed by StardardFilter, not the tokenizer
+    assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"});
+    assertAnalyzesTo(a, "you're", new String[]{"you're"});
+    assertAnalyzesTo(a, "she's", new String[]{"she"});
+    assertAnalyzesTo(a, "Jim's", new String[]{"jim"});
+    assertAnalyzesTo(a, "don't", new String[]{"don't"});
+    assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly"});
+  }
+
+  public void testTSADash() throws Exception {
+    // t and s had been stopwords in Lucene <= 2.0, which made it impossible
+    // to correctly search for these terms:
+    assertAnalyzesTo(a, "s-class", new String[]{"s", "class"});
+    assertAnalyzesTo(a, "t-com", new String[]{"t", "com"});
+    // 'a' is still a stopword:
+    assertAnalyzesTo(a, "a-class", new String[]{"class"});
+  }
+
+  public void testCompanyNames() throws Exception {
+    // company names
+    assertAnalyzesTo(a, "AT&T", new String[]{"at&t"});
+    assertAnalyzesTo(a, "Excite@Home", new String[]{"excite@home"});
+  }
+
+  public void testLucene1140() throws Exception {
+    try {
+      ClassicAnalyzer analyzer = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+      assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
+    } catch (NullPointerException e) {
+      fail("Should not throw an NPE and it did");
+    }
+
+  }
+
+  public void testDomainNames() throws Exception {
+    // Current lucene should not show the bug
+    ClassicAnalyzer a2 = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+
+    // domain names
+    assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
+    //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
+    // the following should be recognized as HOST:
+    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
+
+    // 2.3 should show the bug
+    a2 = new ClassicAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
+    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
+
+    // 2.4 should not show the bug
+    a2 = new ClassicAnalyzer(Version.LUCENE_24);
+    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
+  }
+
+  public void testEMailAddresses() throws Exception {
+    // email addresses, possibly with underscores, periods, etc
+    assertAnalyzesTo(a, "test@example.com", new String[]{"test@example.com"});
+    assertAnalyzesTo(a, "first.lastname@example.com", new String[]{"first.lastname@example.com"});
+    assertAnalyzesTo(a, "first_lastname@example.com", new String[]{"first_lastname@example.com"});
+  }
+
+  public void testNumeric() throws Exception {
+    // floating point, serial, model numbers, ip addresses, etc.
+    // every other segment must have at least one digit
+    assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
+    assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"});
+    assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
+    assertAnalyzesTo(a, "1-2-3", new String[]{"1-2-3"});
+    assertAnalyzesTo(a, "a1-b2-c3", new String[]{"a1-b2-c3"});
+    assertAnalyzesTo(a, "a1-b-c3", new String[]{"a1-b-c3"});
+  }
+
+  public void testTextWithNumbers() throws Exception {
+    // numbers
+    assertAnalyzesTo(a, "David has 5000 bones", new String[]{"david", "has", "5000", "bones"});
+  }
+
+  public void testVariousText() throws Exception {
+    // various
+    assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted"});
+    assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"});
+    assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"});
+    assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"});
+  }
+
+  public void testAcronyms() throws Exception {
+    // acronyms have their dots stripped
+    assertAnalyzesTo(a, "U.S.A.", new String[]{"usa"});
+  }
+
+  public void testCPlusPlusHash() throws Exception {
+    // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens.
+    assertAnalyzesTo(a, "C++", new String[]{"c"});
+    assertAnalyzesTo(a, "C#", new String[]{"c"});
+  }
+
+  public void testKorean() throws Exception {
+    // Korean words
+    assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
+  }
+
+  // Compliance with the "old" JavaCC-based analyzer, see:
+  // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752
+
+  public void testComplianceFileName() throws Exception {
+    assertAnalyzesTo(a, "2004.jpg",
+            new String[]{"2004.jpg"},
+            new String[]{"<HOST>"});
+  }
+
+  public void testComplianceNumericIncorrect() throws Exception {
+    assertAnalyzesTo(a, "62.46",
+            new String[]{"62.46"},
+            new String[]{"<HOST>"});
+  }
+
+  public void testComplianceNumericLong() throws Exception {
+    assertAnalyzesTo(a, "978-0-94045043-1",
+            new String[]{"978-0-94045043-1"},
+            new String[]{"<NUM>"});
+  }
+
+  public void testComplianceNumericFile() throws Exception {
+    assertAnalyzesTo(
+            a,
+            "78academyawards/rules/rule02.html",
+            new String[]{"78academyawards/rules/rule02.html"},
+            new String[]{"<NUM>"});
+  }
+
+  public void testComplianceNumericWithUnderscores() throws Exception {
+    assertAnalyzesTo(
+            a,
+            "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs",
+            new String[]{"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"},
+            new String[]{"<NUM>"});
+  }
+
+  public void testComplianceNumericWithDash() throws Exception {
+    assertAnalyzesTo(a, "mid-20th", new String[]{"mid-20th"},
+            new String[]{"<NUM>"});
+  }
+
+  public void testComplianceManyTokens() throws Exception {
+    assertAnalyzesTo(
+            a,
+            "/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm "
+                    + "safari-0-sheikh-zayed-grand-mosque.jpg",
+            new String[]{"money.cnn.com", "magazines", "fortune",
+                    "fortune", "archive/2007/03/19/8402357", "index.htm",
+                    "safari-0-sheikh", "zayed", "grand", "mosque.jpg"},
+            new String[]{"<HOST>", "<ALPHANUM>", "<ALPHANUM>",
+                    "<ALPHANUM>", "<NUM>", "<HOST>", "<NUM>", "<ALPHANUM>",
+                    "<ALPHANUM>", "<HOST>"});
+  }
+
+  public void testJava14BWCompatibility() throws Exception {
+    ClassicAnalyzer sa = new ClassicAnalyzer(Version.LUCENE_30);
+    assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
+  }
+
+  /**
+   * Make sure we skip wicked long terms.
+  */
+  public void testWickedLongTerm() throws IOException {
+    RAMDirectory dir = new RAMDirectory();
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
+      TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT)));
+
+    char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
+    Arrays.fill(chars, 'x');
+    Document doc = new Document();
+    final String bigTerm = new String(chars);
+
+    // This produces a too-long term:
+    String contents = "abc xyz x" + bigTerm + " another term";
+    doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
+    writer.addDocument(doc);
+
+    // Make sure we can add another normal document
+    doc = new Document();
+    doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
+    writer.addDocument(doc);
+    writer.close();
+
+    IndexReader reader = IndexReader.open(dir, true);
+
+    // Make sure all terms < max size were indexed
+    assertEquals(2, reader.docFreq(new Term("content", "abc")));
+    assertEquals(1, reader.docFreq(new Term("content", "bbb")));
+    assertEquals(1, reader.docFreq(new Term("content", "term")));
+    assertEquals(1, reader.docFreq(new Term("content", "another")));
+
+    // Make sure position is still incremented when
+    // massive term is skipped:
+    DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
+                                                                MultiFields.getDeletedDocs(reader),
+                                                                "content",
+                                                                new BytesRef("another"));
+    assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS);
+    assertEquals(1, tps.freq());
+    assertEquals(3, tps.nextPosition());
+
+    // Make sure the doc that has the massive term is in
+    // the index:
+    assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
+
+    reader.close();
+
+    // Make sure we can add a document with exactly the
+    // maximum length term, and search on that term:
+    doc = new Document();
+    doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
+    ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
+    sa.setMaxTokenLength(100000);
+    writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
+    writer.addDocument(doc);
+    writer.close();
+    reader = IndexReader.open(dir, true);
+    assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
+    reader.close();
+
+    dir.close();
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
@ -1,35 +1,33 @@
 package org.apache.lucene.analysis.core;

-import java.io.IOException;
-import java.util.Arrays;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocsAndPositionsEnum;
-import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.MultiFields;
-
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.Version;
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;

+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;

 /**
- * Copyright 2004 The Apache Software Foundation
- * <p/>
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -38,277 +36,365 @@ import org.apache.lucene.util.BytesRef;
 */

 public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
-
-  private Analyzer a = new StandardAnalyzer(TEST_VERSION_CURRENT);
-
-  public void testMaxTermLength() throws Exception {
-    StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
-    sa.setMaxTokenLength(5);
-    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"});
+  
+  public void testHugeDoc() throws IOException {
+    StringBuilder sb = new StringBuilder();
+    char whitespace[] = new char[4094];
+    Arrays.fill(whitespace, ' ');
+    sb.append(whitespace);
+    sb.append("testing 1234");
+    String input = sb.toString();
+    StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
  }

-  public void testMaxTermLength2() throws Exception {
-    StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
-    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "toolong", "xy", "z"});
-    sa.setMaxTokenLength(5);
-    
-    assertAnalyzesTo(sa, "ab cd toolong xy z", new String[]{"ab", "cd", "xy", "z"}, new int[]{1, 1, 2, 1});
-  }
+  private Analyzer a = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents
+      (String fieldName, Reader reader) {

-  public void testMaxTermLength3() throws Exception {
-    char[] chars = new char[255];
-    for(int i=0;i<255;i++)
-      chars[i] = 'a';
-    String longTerm = new String(chars, 0, 255);
-    
-    assertAnalyzesTo(a, "ab cd " + longTerm + " xy z", new String[]{"ab", "cd", longTerm, "xy", "z"});
-    assertAnalyzesTo(a, "ab cd " + longTerm + "a xy z", new String[]{"ab", "cd", "xy", "z"});
-  }
-
-  public void testAlphanumeric() throws Exception {
-    // alphanumeric tokens
-    assertAnalyzesTo(a, "B2B", new String[]{"b2b"});
-    assertAnalyzesTo(a, "2B", new String[]{"2b"});
-  }
-
-  public void testUnderscores() throws Exception {
-    // underscores are delimiters, but not in email addresses (below)
-    assertAnalyzesTo(a, "word_having_underscore", new String[]{"word", "having", "underscore"});
-    assertAnalyzesTo(a, "word_with_underscore_and_stopwords", new String[]{"word", "underscore", "stopwords"});
-  }
-
-  public void testDelimiters() throws Exception {
-    // other delimiters: "-", "/", ","
-    assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
-    assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
-    assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
-  }
-
-  public void testApostrophes() throws Exception {
-    // internal apostrophes: O'Reilly, you're, O'Reilly's
-    // possessives are actually removed by StardardFilter, not the tokenizer
-    assertAnalyzesTo(a, "O'Reilly", new String[]{"o'reilly"});
-    assertAnalyzesTo(a, "you're", new String[]{"you're"});
-    assertAnalyzesTo(a, "she's", new String[]{"she"});
-    assertAnalyzesTo(a, "Jim's", new String[]{"jim"});
-    assertAnalyzesTo(a, "don't", new String[]{"don't"});
-    assertAnalyzesTo(a, "O'Reilly's", new String[]{"o'reilly"});
-  }
-
-  public void testTSADash() throws Exception {
-    // t and s had been stopwords in Lucene <= 2.0, which made it impossible
-    // to correctly search for these terms:
-    assertAnalyzesTo(a, "s-class", new String[]{"s", "class"});
-    assertAnalyzesTo(a, "t-com", new String[]{"t", "com"});
-    // 'a' is still a stopword:
-    assertAnalyzesTo(a, "a-class", new String[]{"class"});
-  }
-
-  public void testCompanyNames() throws Exception {
-    // company names
-    assertAnalyzesTo(a, "AT&T", new String[]{"at&t"});
-    assertAnalyzesTo(a, "Excite@Home", new String[]{"excite@home"});
-  }
-
-  public void testLucene1140() throws Exception {
-    try {
-      StandardAnalyzer analyzer = new StandardAnalyzer(TEST_VERSION_CURRENT);
-      assertAnalyzesTo(analyzer, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
-    } catch (NullPointerException e) {
-      fail("Should not throw an NPE and it did");
+      Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(tokenizer);
    }
+  };

+  /** Passes through tokens with type "<URL>" and blocks all other types. */
+  private class URLFilter extends TokenFilter {
+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+    public URLFilter(TokenStream in) {
+      super(in);
+    }
+    @Override
+    public final boolean incrementToken() throws java.io.IOException {
+      boolean isTokenAvailable = false;
+      while (input.incrementToken()) {
+        if (typeAtt.type() == StandardTokenizer.TOKEN_TYPES[StandardTokenizer.URL]) {
+          isTokenAvailable = true;
+          break;
+        }
+      }
+      return isTokenAvailable;
+    }
+  }
+  
+  /** Passes through tokens with type "<EMAIL>" and blocks all other types. */
+  private class EmailFilter extends TokenFilter {
+    private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+    public EmailFilter(TokenStream in) {
+      super(in);
+    }
+    @Override
+    public final boolean incrementToken() throws java.io.IOException {
+      boolean isTokenAvailable = false;
+      while (input.incrementToken()) {
+        if (typeAtt.type() == StandardTokenizer.TOKEN_TYPES[StandardTokenizer.EMAIL]) {
+          isTokenAvailable = true;
+          break;
+        }
+      }
+      return isTokenAvailable;
+    }
  }

-  public void testDomainNames() throws Exception {
-    // Current lucene should not show the bug
-    StandardAnalyzer a2 = new StandardAnalyzer(TEST_VERSION_CURRENT);
+  private Analyzer urlAnalyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      StandardTokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+      tokenizer.setMaxTokenLength(Integer.MAX_VALUE);  // Tokenize arbitrary length URLs
+      TokenFilter filter = new URLFilter(tokenizer);
+      return new TokenStreamComponents(tokenizer, filter);
+    }
+  };

-    // domain names
-    assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
-    //Notice the trailing .  See https://issues.apache.org/jira/browse/LUCENE-1068.
-    // the following should be recognized as HOST:
-    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
+  private Analyzer emailAnalyzer = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+      TokenFilter filter = new EmailFilter(tokenizer);
+      return new TokenStreamComponents(tokenizer, filter);
+    }
+  };

-    // 2.3 should show the bug
-    a2 = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
-    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
-
-    // 2.4 should not show the bug
-    a2 = new StandardAnalyzer(Version.LUCENE_24);
-    assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
+  public void testArmenian() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։",
+        new String[] { "Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", 
+        "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը" } );
+  }
+  
+  public void testAmharic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም",
+        new String[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" } );
+  }
+  
+  public void testArabic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.",
+        new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا",
+        "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" } ); 
+  }
+  
+  public void testAramaic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀",
+        new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ",
+        "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"});
+  }
+  
+  public void testBengali() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।",
+        new String[] { "এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার",
+        "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়েছে" });
+  }
+  
+  public void testFarsi() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ ۲۵ دی ۱۳۷۹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.",
+        new String[] { "ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "۲۵", "دی", "۱۳۷۹", "به", "صورت", "مکملی",
+        "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد" });
+  }
+  
+  public void testGreek() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.",
+        new String[] { "Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που",
+        "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα" });
  }

-  public void testEMailAddresses() throws Exception {
-    // email addresses, possibly with underscores, periods, etc
-    assertAnalyzesTo(a, "test@example.com", new String[]{"test@example.com"});
-    assertAnalyzesTo(a, "first.lastname@example.com", new String[]{"first.lastname@example.com"});
-    assertAnalyzesTo(a, "first_lastname@example.com", new String[]{"first_lastname@example.com"});
+  public void testThai() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔",
+        new String[] { "การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔" });
+  }
+  
+  public void testLao() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", 
+        new String[] { "ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ" });
+  }
+  
+  public void testTibetan() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །",
+                     new String[] { "སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", 
+                                    "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", 
+                                    "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ" });
+  }
+  
+  /*
+   * For chinese, tokenize as char (these can later form bigrams or whatever)
+   */
+  public void testChinese() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 １２３４ Ｔｅｓｔｓ ",
+        new String[] { "我", "是", "中", "国", "人", "１２３４", "Ｔｅｓｔｓ"});
+  }
+  
+  public void testEmpty() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new String[] {});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new String[] {});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new String[] {});
+  }
+  
+  /* test various jira issues this analyzer is related to */
+  
+  public void testLUCENE1545() throws Exception {
+    /*
+     * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
+     * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
+     * Expected result is only on token "moͤchte".
+     */
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" }); 
+  }
+  
+  /* Tests from StandardAnalyzer, just to show behavior is similar */
+  public void testAlphanumericSA() throws Exception {
+    // alphanumeric tokens
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new String[]{"B2B"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new String[]{"2B"});
  }

-  public void testNumeric() throws Exception {
+  public void testDelimitersSA() throws Exception {
+    // other delimiters: "-", "/", ","
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
+  }
+
+  public void testApostrophesSA() throws Exception {
+    // internal apostrophes: O'Reilly, you're, O'Reilly's
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
+  }
+
+  public void testNumericSA() throws Exception {
    // floating point, serial, model numbers, ip addresses, etc.
-    // every other segment must have at least one digit
-    assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
-    assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"r2d2", "c3po"});
-    assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
-    assertAnalyzesTo(a, "1-2-3", new String[]{"1-2-3"});
-    assertAnalyzesTo(a, "a1-b2-c3", new String[]{"a1-b2-c3"});
-    assertAnalyzesTo(a, "a1-b-c3", new String[]{"a1-b-c3"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
  }

-  public void testTextWithNumbers() throws Exception {
+  public void testTextWithNumbersSA() throws Exception {
    // numbers
-    assertAnalyzesTo(a, "David has 5000 bones", new String[]{"david", "has", "5000", "bones"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"});
  }

-  public void testVariousText() throws Exception {
+  public void testVariousTextSA() throws Exception {
    // various
-    assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"c", "embedded", "developers", "wanted"});
-    assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "foo", "bar"});
-    assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "foo", "bar"});
-    assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"quoted", "word"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
  }

-  public void testAcronyms() throws Exception {
-    // acronyms have their dots stripped
-    assertAnalyzesTo(a, "U.S.A.", new String[]{"usa"});
-  }
-
-  public void testCPlusPlusHash() throws Exception {
-    // It would be nice to change the grammar in StandardTokenizer.jj to make "C#" and "C++" end up as tokens.
-    assertAnalyzesTo(a, "C++", new String[]{"c"});
-    assertAnalyzesTo(a, "C#", new String[]{"c"});
-  }
-
-  public void testKorean() throws Exception {
+  public void testKoreanSA() throws Exception {
    // Korean words
-    assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
+  }
+  
+  public void testOffsets() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
+        new String[] {"David", "has", "5000", "bones"},
+        new int[] {0, 6, 10, 15},
+        new int[] {5, 9, 14, 20});
+  }
+  
+  public void testTypes() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
+        new String[] {"David", "has", "5000", "bones"},
+        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
+  }
+  
+  public void testWikiURLs() throws Exception {
+    Reader reader = null;
+    String luceneResourcesWikiPage;
+    try {
+      reader = new InputStreamReader
+        (getClass().getResourceAsStream("LuceneResourcesWikiPage.html"), "UTF-8");
+      StringBuilder builder = new StringBuilder();
+      char[] buffer = new char[1024];
+      int numCharsRead;
+      while (-1 != (numCharsRead = reader.read(buffer))) {
+        builder.append(buffer, 0, numCharsRead);
+      }
+      luceneResourcesWikiPage = builder.toString(); 
+    } finally {
+      if (null != reader) {
+        reader.close();
+      }
+    }
+    assertTrue(null != luceneResourcesWikiPage 
+               && luceneResourcesWikiPage.length() > 0);
+    BufferedReader bufferedReader = null;
+    String[] urls;
+    try {
+      List<String> urlList = new ArrayList<String>();
+      bufferedReader = new BufferedReader(new InputStreamReader
+        (getClass().getResourceAsStream("LuceneResourcesWikiPageURLs.txt"), "UTF-8"));
+      String line;
+      while (null != (line = bufferedReader.readLine())) {
+        line = line.trim();
+        if (line.length() > 0) {
+          urlList.add(line);
+        }
+      }
+      urls = urlList.toArray(new String[urlList.size()]);
+    } finally {
+      if (null != bufferedReader) {
+        bufferedReader.close();
+      }
+    }
+    assertTrue(null != urls && urls.length > 0);
+    BaseTokenStreamTestCase.assertAnalyzesTo
+      (urlAnalyzer, luceneResourcesWikiPage, urls);
+  }
+  
+  public void testEmails() throws Exception {
+    Reader reader = null;
+    String randomTextWithEmails;
+    try {
+      reader = new InputStreamReader
+        (getClass().getResourceAsStream("random.text.with.email.addresses.txt"), "UTF-8");
+      StringBuilder builder = new StringBuilder();
+      char[] buffer = new char[1024];
+      int numCharsRead;
+      while (-1 != (numCharsRead = reader.read(buffer))) {
+        builder.append(buffer, 0, numCharsRead);
+      }
+      randomTextWithEmails = builder.toString(); 
+    } finally {
+      if (null != reader) {
+        reader.close();
+      }
+    }
+    assertTrue(null != randomTextWithEmails 
+               && randomTextWithEmails.length() > 0);
+    BufferedReader bufferedReader = null;
+    String[] emails;
+    try {
+      List<String> emailList = new ArrayList<String>();
+      bufferedReader = new BufferedReader(new InputStreamReader
+        (getClass().getResourceAsStream("email.addresses.from.random.text.with.email.addresses.txt"), "UTF-8"));
+      String line;
+      while (null != (line = bufferedReader.readLine())) {
+        line = line.trim();
+        if (line.length() > 0) {
+          emailList.add(line);
+        }
+      }
+      emails = emailList.toArray(new String[emailList.size()]);
+    } finally {
+      if (null != bufferedReader) {
+        bufferedReader.close();
+      }
+    }
+    assertTrue(null != emails && emails.length > 0);
+    BaseTokenStreamTestCase.assertAnalyzesTo
+      (emailAnalyzer, randomTextWithEmails, emails);
  }

-  // Compliance with the "old" JavaCC-based analyzer, see:
-  // https://issues.apache.org/jira/browse/LUCENE-966#action_12516752
-
-  public void testComplianceFileName() throws Exception {
-    assertAnalyzesTo(a, "2004.jpg",
-            new String[]{"2004.jpg"},
-            new String[]{"<HOST>"});
+  public void testURLs() throws Exception {
+    Reader reader = null;
+    String randomTextWithURLs;
+    try {
+      reader = new InputStreamReader
+        (getClass().getResourceAsStream("random.text.with.urls.txt"), "UTF-8");
+      StringBuilder builder = new StringBuilder();
+      char[] buffer = new char[1024];
+      int numCharsRead;
+      while (-1 != (numCharsRead = reader.read(buffer))) {
+        builder.append(buffer, 0, numCharsRead);
+      }
+      randomTextWithURLs = builder.toString(); 
+    } finally {
+      if (null != reader) {
+        reader.close();
+      }
+    }
+    assertTrue(null != randomTextWithURLs 
+               && randomTextWithURLs.length() > 0);
+    BufferedReader bufferedReader = null;
+    String[] urls;
+    try {
+      List<String> urlList = new ArrayList<String>();
+      bufferedReader = new BufferedReader(new InputStreamReader
+        (getClass().getResourceAsStream("urls.from.random.text.with.urls.txt"), "UTF-8"));
+      String line;
+      while (null != (line = bufferedReader.readLine())) {
+        line = line.trim();
+        if (line.length() > 0) {
+          urlList.add(line);
+        }
+      }
+      urls = urlList.toArray(new String[urlList.size()]);
+    } finally {
+      if (null != bufferedReader) {
+        bufferedReader.close();
+      }
+    }
+    assertTrue(null != urls && urls.length > 0);
+    BaseTokenStreamTestCase.assertAnalyzesTo
+      (urlAnalyzer, randomTextWithURLs, urls);
  }

-  public void testComplianceNumericIncorrect() throws Exception {
-    assertAnalyzesTo(a, "62.46",
-            new String[]{"62.46"},
-            new String[]{"<HOST>"});
-  }
-
-  public void testComplianceNumericLong() throws Exception {
-    assertAnalyzesTo(a, "978-0-94045043-1",
-            new String[]{"978-0-94045043-1"},
-            new String[]{"<NUM>"});
-  }
-
-  public void testComplianceNumericFile() throws Exception {
-    assertAnalyzesTo(
-            a,
-            "78academyawards/rules/rule02.html",
-            new String[]{"78academyawards/rules/rule02.html"},
-            new String[]{"<NUM>"});
-  }
-
-  public void testComplianceNumericWithUnderscores() throws Exception {
-    assertAnalyzesTo(
-            a,
-            "2006-03-11t082958z_01_ban130523_rtridst_0_ozabs",
-            new String[]{"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"},
-            new String[]{"<NUM>"});
-  }
-
-  public void testComplianceNumericWithDash() throws Exception {
-    assertAnalyzesTo(a, "mid-20th", new String[]{"mid-20th"},
-            new String[]{"<NUM>"});
-  }
-
-  public void testComplianceManyTokens() throws Exception {
-    assertAnalyzesTo(
-            a,
-            "/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm "
-                    + "safari-0-sheikh-zayed-grand-mosque.jpg",
-            new String[]{"money.cnn.com", "magazines", "fortune",
-                    "fortune", "archive/2007/03/19/8402357", "index.htm",
-                    "safari-0-sheikh", "zayed", "grand", "mosque.jpg"},
-            new String[]{"<HOST>", "<ALPHANUM>", "<ALPHANUM>",
-                    "<ALPHANUM>", "<NUM>", "<HOST>", "<NUM>", "<ALPHANUM>",
-                    "<ALPHANUM>", "<HOST>"});
-  }
-
-  public void testJava14BWCompatibility() throws Exception {
-    StandardAnalyzer sa = new StandardAnalyzer(Version.LUCENE_30);
-    assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test", "test" });
-    sa = new StandardAnalyzer(Version.LUCENE_31);
-    assertAnalyzesTo(sa, "test\u02C6test", new String[] { "test\u02C6test" });
-  }
-
-  /**
-   * Make sure we skip wicked long terms.
-  */
-  public void testWickedLongTerm() throws IOException {
-    RAMDirectory dir = new RAMDirectory();
-    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
-      TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
-
-    char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
-    Arrays.fill(chars, 'x');
-    Document doc = new Document();
-    final String bigTerm = new String(chars);
-
-    // This produces a too-long term:
-    String contents = "abc xyz x" + bigTerm + " another term";
-    doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
-    writer.addDocument(doc);
-
-    // Make sure we can add another normal document
-    doc = new Document();
-    doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
-    writer.addDocument(doc);
-    writer.close();
-
-    IndexReader reader = IndexReader.open(dir, true);
-
-    // Make sure all terms < max size were indexed
-    assertEquals(2, reader.docFreq(new Term("content", "abc")));
-    assertEquals(1, reader.docFreq(new Term("content", "bbb")));
-    assertEquals(1, reader.docFreq(new Term("content", "term")));
-    assertEquals(1, reader.docFreq(new Term("content", "another")));
-
-    // Make sure position is still incremented when
-    // massive term is skipped:
-    DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
-                                                                MultiFields.getDeletedDocs(reader),
-                                                                "content",
-                                                                new BytesRef("another"));
-    assertTrue(tps.nextDoc() != DocsEnum.NO_MORE_DOCS);
-    assertEquals(1, tps.freq());
-    assertEquals(3, tps.nextPosition());
-
-    // Make sure the doc that has the massive term is in
-    // the index:
-    assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
-
-    reader.close();
-
-    // Make sure we can add a document with exactly the
-    // maximum length term, and search on that term:
-    doc = new Document();
-    doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
-    StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
-    sa.setMaxTokenLength(100000);
-    writer  = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
-    writer.addDocument(doc);
-    writer.close();
-    reader = IndexReader.open(dir, true);
-    assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
-    reader.close();
-
-    dir.close();
+  public void testUnicodeWordBreaks() throws Exception {
+    WordBreakTestUnicode_5_2_0 wordBreakTest = new WordBreakTestUnicode_5_2_0();
+    wordBreakTest.test(a);
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java
@ -0,0 +1,204 @@
+package org.apache.lucene.analysis.core;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.UAX29Tokenizer;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestUAX29Tokenizer extends BaseTokenStreamTestCase {
+  
+  public void testHugeDoc() throws IOException {
+    StringBuilder sb = new StringBuilder();
+    char whitespace[] = new char[4094];
+    Arrays.fill(whitespace, ' ');
+    sb.append(whitespace);
+    sb.append("testing 1234");
+    String input = sb.toString();
+    UAX29Tokenizer tokenizer = new UAX29Tokenizer(new StringReader(input));
+    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
+  }
+
+  private Analyzer a = new ReusableAnalyzerBase() {
+    @Override
+    protected TokenStreamComponents createComponents
+      (String fieldName, Reader reader) {
+
+      Tokenizer tokenizer = new UAX29Tokenizer(reader);
+      return new TokenStreamComponents(tokenizer);
+    }
+  };
+
+  public void testArmenian() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Վիքիպեդիայի 13 միլիոն հոդվածները (4,600` հայերեն վիքիպեդիայում) գրվել են կամավորների կողմից ու համարյա բոլոր հոդվածները կարող է խմբագրել ցանկաց մարդ ով կարող է բացել Վիքիպեդիայի կայքը։",
+        new String[] { "Վիքիպեդիայի", "13", "միլիոն", "հոդվածները", "4,600", "հայերեն", "վիքիպեդիայում", "գրվել", "են", "կամավորների", "կողմից", 
+        "ու", "համարյա", "բոլոր", "հոդվածները", "կարող", "է", "խմբագրել", "ցանկաց", "մարդ", "ով", "կարող", "է", "բացել", "Վիքիպեդիայի", "կայքը" } );
+  }
+  
+  public void testAmharic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም",
+        new String[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" } );
+  }
+  
+  public void testArabic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "الفيلم الوثائقي الأول عن ويكيبيديا يسمى \"الحقيقة بالأرقام: قصة ويكيبيديا\" (بالإنجليزية: Truth in Numbers: The Wikipedia Story)، سيتم إطلاقه في 2008.",
+        new String[] { "الفيلم", "الوثائقي", "الأول", "عن", "ويكيبيديا", "يسمى", "الحقيقة", "بالأرقام", "قصة", "ويكيبيديا",
+        "بالإنجليزية", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "سيتم", "إطلاقه", "في", "2008" } ); 
+  }
+  
+  public void testAramaic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ܘܝܩܝܦܕܝܐ (ܐܢܓܠܝܐ: Wikipedia) ܗܘ ܐܝܢܣܩܠܘܦܕܝܐ ܚܐܪܬܐ ܕܐܢܛܪܢܛ ܒܠܫܢ̈ܐ ܣܓܝܐ̈ܐ܂ ܫܡܗ ܐܬܐ ܡܢ ܡ̈ܠܬܐ ܕ\"ܘܝܩܝ\" ܘ\"ܐܝܢܣܩܠܘܦܕܝܐ\"܀",
+        new String[] { "ܘܝܩܝܦܕܝܐ", "ܐܢܓܠܝܐ", "Wikipedia", "ܗܘ", "ܐܝܢܣܩܠܘܦܕܝܐ", "ܚܐܪܬܐ", "ܕܐܢܛܪܢܛ", "ܒܠܫܢ̈ܐ", "ܣܓܝܐ̈ܐ", "ܫܡܗ",
+        "ܐܬܐ", "ܡܢ", "ܡ̈ܠܬܐ", "ܕ", "ܘܝܩܝ", "ܘ", "ܐܝܢܣܩܠܘܦܕܝܐ"});
+  }
+  
+  public void testBengali() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "এই বিশ্বকোষ পরিচালনা করে উইকিমিডিয়া ফাউন্ডেশন (একটি অলাভজনক সংস্থা)। উইকিপিডিয়ার শুরু ১৫ জানুয়ারি, ২০০১ সালে। এখন পর্যন্ত ২০০টিরও বেশী ভাষায় উইকিপিডিয়া রয়েছে।",
+        new String[] { "এই", "বিশ্বকোষ", "পরিচালনা", "করে", "উইকিমিডিয়া", "ফাউন্ডেশন", "একটি", "অলাভজনক", "সংস্থা", "উইকিপিডিয়ার",
+        "শুরু", "১৫", "জানুয়ারি", "২০০১", "সালে", "এখন", "পর্যন্ত", "২০০টিরও", "বেশী", "ভাষায়", "উইকিপিডিয়া", "রয়েছে" });
+  }
+  
+  public void testFarsi() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ویکی پدیای انگلیسی در تاریخ ۲۵ دی ۱۳۷۹ به صورت مکملی برای دانشنامهٔ تخصصی نوپدیا نوشته شد.",
+        new String[] { "ویکی", "پدیای", "انگلیسی", "در", "تاریخ", "۲۵", "دی", "۱۳۷۹", "به", "صورت", "مکملی",
+        "برای", "دانشنامهٔ", "تخصصی", "نوپدیا", "نوشته", "شد" });
+  }
+  
+  public void testGreek() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Γράφεται σε συνεργασία από εθελοντές με το λογισμικό wiki, κάτι που σημαίνει ότι άρθρα μπορεί να προστεθούν ή να αλλάξουν από τον καθένα.",
+        new String[] { "Γράφεται", "σε", "συνεργασία", "από", "εθελοντές", "με", "το", "λογισμικό", "wiki", "κάτι", "που",
+        "σημαίνει", "ότι", "άρθρα", "μπορεί", "να", "προστεθούν", "ή", "να", "αλλάξουν", "από", "τον", "καθένα" });
+  }
+
+  public void testThai() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "การที่ได้ต้องแสดงว่างานดี. แล้วเธอจะไปไหน? ๑๒๓๔",
+        new String[] { "การที่ได้ต้องแสดงว่างานดี", "แล้วเธอจะไปไหน", "๑๒๓๔" });
+  }
+  
+  public void testLao() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ສາທາລະນະລັດ ປະຊາທິປະໄຕ ປະຊາຊົນລາວ", 
+        new String[] { "ສາທາລະນະລັດ", "ປະຊາທິປະໄຕ", "ປະຊາຊົນລາວ" });
+  }
+  
+  public void testTibetan() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "སྣོན་མཛོད་དང་ལས་འདིས་བོད་ཡིག་མི་ཉམས་གོང་འཕེལ་དུ་གཏོང་བར་ཧ་ཅང་དགེ་མཚན་མཆིས་སོ། །",
+                     new String[] { "སྣོན", "མཛོད", "དང", "ལས", "འདིས", "བོད", "ཡིག", 
+                                    "མི", "ཉམས", "གོང", "འཕེལ", "དུ", "གཏོང", "བར", 
+                                    "ཧ", "ཅང", "དགེ", "མཚན", "མཆིས", "སོ" });
+  }
+  
+  /*
+   * For chinese, tokenize as char (these can later form bigrams or whatever)
+   */
+  public void testChinese() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "我是中国人。 １２３４ Ｔｅｓｔｓ ",
+        new String[] { "我", "是", "中", "国", "人", "１２３４", "Ｔｅｓｔｓ"});
+  }
+  
+  public void testEmpty() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new String[] {});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new String[] {});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new String[] {});
+  }
+  
+  /* test various jira issues this analyzer is related to */
+  
+  public void testLUCENE1545() throws Exception {
+    /*
+     * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
+     * The word "moͤchte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
+     * Expected result is only on token "moͤchte".
+     */
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "moͤchte", new String[] { "moͤchte" }); 
+  }
+  
+  /* Tests from StandardAnalyzer, just to show behavior is similar */
+  public void testAlphanumericSA() throws Exception {
+    // alphanumeric tokens
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new String[]{"B2B"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new String[]{"2B"});
+  }
+
+  public void testDelimitersSA() throws Exception {
+    // other delimiters: "-", "/", ","
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
+  }
+
+  public void testApostrophesSA() throws Exception {
+    // internal apostrophes: O'Reilly, you're, O'Reilly's
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
+  }
+
+  public void testNumericSA() throws Exception {
+    // floating point, serial, model numbers, ip addresses, etc.
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
+  }
+
+  public void testTextWithNumbersSA() throws Exception {
+    // numbers
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"});
+  }
+
+  public void testVariousTextSA() throws Exception {
+    // various
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
+  }
+
+  public void testKoreanSA() throws Exception {
+    // Korean words
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "안녕하세요 한글입니다", new String[]{"안녕하세요", "한글입니다"});
+  }
+  
+  public void testOffsets() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
+        new String[] {"David", "has", "5000", "bones"},
+        new int[] {0, 6, 10, 15},
+        new int[] {5, 9, 14, 20});
+  }
+  
+  public void testTypes() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
+        new String[] {"David", "has", "5000", "bones"},
+        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
+  }
+  
+  public void testUnicodeWordBreaks() throws Exception {
+    WordBreakTestUnicode_5_2_0 wordBreakTest = new WordBreakTestUnicode_5_2_0();
+    wordBreakTest.test(a);
+  }
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_5_2_0.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_5_2_0.java
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt
@ -0,0 +1,265 @@
+dJ8ngFi@avz13m.CC
+JCAVLRJg@3aqiq2yui.gm
+kU-l6DS@[082.015.228.189]
+37layCJS@j5NVP7NWAY.VG
+"%U@?\B"@Fl2d.md
+aH3QW@tw8uo2.eu
+Bvd#@tupjv.sn
+SBMm0Nm.oyk70.rMNdd8k.#ru3LI.gMMLBI.0dZRD4d.RVK2nY@au58t.B13albgy4u.mt
+DvdUJk@61zwkit7dkd3rcq4v.BD
+~+Kdz@3mousnl.SE
+C'ts`@Vh4zk.uoafcft-dr753x4odt04q.UY
+}0tzWYDBuy@cSRQAABB9B.7c8xawf75-cyo.PM
+lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae
+V85E9Hx7@vpf0bs.bz
+MGBg2@7F3MJTCCPROS8YETM0B4-C9P7WXKGFB0.RU
+rsBWOCJ@lYX0SILY4L53Z3VJPSF6.pwrawr.vdpoq.nz
+dIyLrU@9A40T2ZIG7H8R.t63.tv
+6dAsZKz@d33XR.IR
+EnqCC@2bk6da6y08.LI
+AQ9yV@Mfqq32nexufgxzl4o7q5jv3kd.lb
+lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H
+b6/zomNkV@8jwm-he.IN
+5FLuakz.hXVkuqDt@iBFP83V6MNI3N0FRWJ9302DS-0KHRV6O.1bf59kj64uj5b6e2zfn.cm
+RhIwkU@58vmet9yfddpg.3adkmhrv1px.AO
+nEBk6w2Q@Bb5ib.2pay.so
+AlW5CMAn@qos-53u.j91qq96d4en129szf7099kxv5lo6yo.gm
+QPYBDV3.Ah/h8U@x3v444pzi.1cvgokam.PW
+5Iwbiq7@p9s-2pixps9jwzyhfroxqivw8sv90r.xn--wgbh1c
+AaFU9L@3yj1xqf1.cz9.ac
+|iCmQ1@rum6w0a7wt.3QLD.ht71.cx
+EhLTUjo@rEK.sJ44H0.GR
+bHEbq3Rp@33.lKSSMY.9xaurtfle9xe.iu4810l.fj
+eFcup.cPPEW@[1ae]
+p907@bk3o.fvtmw2m2.Uutr83x2yt4.2nuin.EU
+PpW2L5.QgP2n@9rz7.a5qi.oRH1Z.8ov.UZ
+o8UgG5fewm4vr9Ai5wPS@sgh.2F-OLKLZ81DIUET.xpya0vtx.fj
+aixQH@z-y.AR
+jVTeWQfL."M#~t Q"@1e.oglq.ubk.SZ
+6e5QQuy@N7.2cuw3x2wpddf.paycp1pc.AI
+IqG6Fl@[220.112.120.54]
+lWHH4eWSn@tbxyb7.jhzqxrk.lv
+P1zO*RaAr@[111.99.108.22]
+d00gy@[4TC]
+1yNINoBU@[136.003.010.238]
+Ms8ox@[_3Tuehr]
+wtWDNo@1sjmcbbli196-765mt7m8o8hywft.7-ga6rsnum8v.np
+"x)yO"@7le5o2rcud5ngs.Qmfmq.Jfxv8.Zznv6t6il.MIL
+1hXd@f8.1kxqd3yw4j6zmb7l7.US
+"8}(\$"@mu2viak0nh4sj5ivgpy1wqie.HK
+Th7XoAs5@ggdb.BI
+5iDbhah.xdtF1x@[59.55.12.243]
+j2ovALlgm2Wcwx@5jphzt.TN
+ZlaP~E.4Yk1K0F@lF6VN.M5.Nj.PRO
+cFCvIJAw@l93H0R1W6V4RI0AY7RLRQR4KOEVQPEG-PDTF03V4D9A0.xZZK5.lu
+8Ju2AW@1n.h7.vu
+"\nkP]{"@[Vej\yo\HD]
+fKWC?@qgcb.xn--mgbaam7a8h
+L4BbaB@hv1.BIZ
+WvSmV@qpx15vzmbtxzvi-syndl1.ML
+"3|PX~Cbdq"@U3vp-7k.8c4q3sgpwt6sochundzhx.museum
+LjH9rJTu@tkm.gy
+vQgXEFb@maxmrbk-5a5s6o.6MZZ6IK.awjbtiva7.IL
+6TVbIA@r50eh-a.la
+AaASl@Bsteea.qHXE3Q5CUJ3DBG.S2hvnld.4WJWL.fk
+"CN;\-z 6M"@86.qc7s.23p.ET
+zX3=O3o@Yjov.7g660.8M88OJGTDC5.np
+QFZlK1A@4W47EIXE.KY
+1guLnQb07k@ab.ccemuif2s.lb
+Jddxj@[111.079.109.147]
+Hj06gcE@[105.233.192.168]
+u8?xicQ@[i\21I]
+CczYer}W@bezu6wtys9s.lft3z.mobi
+OmpYhIL@6GJ7P29EIE-G63RDW7GLFLFC0M1.AERO
+2RRPLqO@8lh0i.vm7xmvvo-r5nf0x.CY
+TOc!BhbKz@F-myy7.kQWSUI7S3.net
+"0\!P?".shQVdSerA@2qmqj8ul.hm
+LTLNFsgB@[191.56.104.113]
+iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU
+VGLn@z3E2.3an2.MM
+TWmfsxn@[112.192.017.029]
+2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV
+CjaPC63@['\RDrwk]
+Ayydpdoa@tdgypppmen.wf
+"gfKP9"@jo3-r0.mz
+aTMgDW4@t5gax.XN--0ZWM56D
+mcDrMO3FQ@nwc21.y5qd45lesryrp.IL
+NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp
+XtAhFnq@[218.214.251.103]
+x0S8uos@[109.82.126.233]
+ALB4KFavj16pODdd@i206d6s.MM
+grxIt96.46nCf@nokjogh2l4.nCMWXG.yt
+Fgbh7@2rxkk0bvkk-v3evd-sh56gvhxlh.hhjcsg36j8qt98okjbdj9z574xdpix59zf6h80r.Gyb4rrxu.ve
+uo0AX41@Fhlegm1z57j-qvf5.p8jo6zvm.sc
+sjn4cz@9ktlwkqte.bv
+b04v0Ct@[243.230.224.190]
+F!FUbQHU@uvz7cu1l.ciz4h2.93U4V.gb
+6CHec@nONUKT.nl
+zbmZiXw@yb.bxxp.3fm457.va
+"/GdiZ7f"@[221.229.46.3]
+NJde8Li@f7a.g51VICBH.cy
+6IeAft@e-3fp.Nkh7nm8.v8i47xvrv27r.pf
+TC*Qopzb@xIOB3.6egz4.m-24t5wmxtmco4iy8g91o66mjgha1vjlepyffott.E5ta.p9.CF
+"_3Sc_"@[193.165.124.143]
+W0dwHf@[25.174.65.80]
+qPkkP0@4k0vs.oaak2z.3JMTI.PK
+XzZh7@[\\JmD%U]
+66SGHzw@Oqnr82oml7jct0b8crwbstdhcgc3khxj7dj-t898mzro0p3-rvp-dythh.TN
+ot4tPF@[AY\j]
+e4seIFbl@cib.cg
+B2w025e@r2H7BW16B24DG1S5DED.bg
+atweEde@blk-3y.mgvoh6l9my.F6.FI
+uDoPcRGW@rEBD5LUT.ly
+2KQhx@Bba.u--9b5bc0.NF
+tKWc2VjVRYD@[254.190.162.128]
+wc3W16^@D3v2uxqqeclz.w1fd529m.DM
+Njg@6S8MA.HK
+"L\^4z]92"@0qp--walx.MIL
+X08sWFD@62GNK.tN4.f1YXX.ug
+eK6Bz1Bu@[rX;J&036]
+"~`o\:"@hO4UKF.oZBWV56B.cmn.DJ
+lcgUakx@[pjGd&i2]
+BqdBTnv3c@wf35nwaza.ME
+"a#Um{:\'\bX:"@in7tjo.uw8wil.gp
+ApIbER8'@[&Y]
+JTsM0c!s9CzEH@Sd.mh
+hy2AOUc@uqxzl7v0hl2nchokqit9lyscxaa0jaqya1wek5gkd.NC
+pY7bAVD4r@[,>T*R T]
+!0axBT@03-gdh1xmk3x9.GH
+vbtyQBZI@20al5g.ro6ds4.Bsg15f5.NU
+2^ZhSK-FFYOh@Z2iku.rg.Z0ca1.gs
+G1RLpOn."yfJpg["@mXEV8.mu
+yrBKNkq@a2a1.Aifn.Ta2.dj
+Wok5G@b5aqobvi5.ni
+nXz9i.=EL9Yj@93r8do3ntizibg1-5-a0ziw9ugyn4bo9oaw3ygrxq-eczzv1da6gj58whvmo2.rs
+Dp63hd@B1kbahyq.PL
+y01rn27SFq@o0HNP8.C5.i4rvj8j338zgter7er5rkwyo5g.atnc0iuj2ke.8or6ekq0x.IO
+0RiEo@08mnvbu.p661ernzjz5p7nbyix5iuj.cig5hgvcc.SO
+Dwxab5@1sx5y3-umsy72nl.74lwye5.DJ
+IvdZVE4xRk@0vw7ajl.AR
+CvQxhXJ@d5a7qnx.ke
+n7MxA4~@[4(R]
+RFGzu3hD0@wbh4.sm
+eOADW}BcNG@2568p3b4v.Xq3eksr.GP
+AsAMWriW7.zSDQSAR6@Gg2q4rtgr.GG
+cDCVlA0t@[20.116.229.216]
+c=yJU+3L5@n2x3xhksf.gvreani.MZ
+wfYnaA4@lzojy.4oii6w6sn-p9.kh
+kdeOQ5F@vD5Y.wmmv.7rswz.1zelobcp5qxxwzjn.fOEJZ.KM
+ppULqb2Z@Hv9o2ui.AO
+tOHw@[IPv6:3500:8B6C::CB5E:1.124.160.137]
+MWLVsL@7nhliy.O8mjon3rj-kb.t8d6bcpa5i.au
+BN0EY@hh9v.p9bwgs.TN
+RgiAp@d9ln.bf
+PBugBo@97gcz.DJ
+Fh#dKzbI@[+_]
+wyqU-C9hXE@wPRBUI-WS9HXE19.LV
+muC?Js@[IPv6:47FB:5786:4b5e::5675]
+yLTT2xV@wdoszw9k1ork-z-t.kq.l3SEO.Lb4jx0.NA
+6zqw.yPV4LkL@dA3XKC.eg
+S5z9i7i3s@Vzt6.fr
+L|Sit6s@9cklii1.tf
+yWYqz@mw-9k.FJ
+Knhj419mAfftf@R26hxll64.3qtdx6g.AL
+aZYHUr6@Shyn76c67.65grky.am
+ZYxn6Px@di0cqhtg.hu
+"#mLl"@w1sc0g3vm.j1o4o9g.GW
+WYJcFp@653xk-89oprk2im.iemhx9.CC
+y5AXi@[Oa #]
+nZErAGj@6sq3-p.r8KQ.aero
+OMq5sBK@udg-5zp1.Dory85.SG
+2bymd@Ojla1hvfpw8rrihrx.cy
+5OMbw0@r2d8cn75.1VR2BJ0J3A8PY.gc0mljc-h.COOP
+al6X^pQkx@pyj--2hp.lbet.TN
+NkzPW4f@2-0.aaoqccwrgi4olytac0imp6vvphsuobrr115eygh2xwkvzeuj.tl
+"4-b9|/,\e]h]2"@9-iiahsdlzv-v65j.FK
+g8Pv2hb9@[166.176.68.63]
+"IA~".Tn03w7@[\>J?]
+E6aK9TaJ@j0hydmxhkq2q.Svku4saky.MU
+rdF2Zl1@9fsic.C17pw9o0.vn
+pCKjPa88DG&x5a@4ha07ia2jk.xk7xe8.PM
+qgLb5m@nynqp.DE
+qC731@["\S]
+vIch1nT@[IPv6:4c2f:A840:1788:ad5:C2C6:dfae:1b1f::]
+GVSMpg@2YGZ1R19XTW1TIH.Re3vg30u1xq6v7cj1wf-6m14939wvgqbl.93mztd.SG
+0jq4v7PMxm@eq6teog.kO6LR3.x2p.53yltrsvgpd3.RO
+zdGLZD0P@i2JQNM8.816oja8pkk5zkvyx.KM
+Jp#hSH@74zkerax4.31kr.7c9-yuk.mp
+Kx^0oZn@oFFA-URZ13B34J.DK
+sub52@aoq7.iHF.CH
+jfVSq9oAR2D@iGU0.7bp3x.4cr.sz
+nalgU@Yfpbdcv8a5.n9kwz6kyi2u.thic-rws.af.TG
+=uC5qVT@56g530cltpekrw.pt
+QR5&kx@7qhi3bhav5ga0eva.b0sdom.bb
+8DZQ7@dtr16r89fdw59q.cf
+Q4pNw@6o-9weojl3r7.LS
+*mfOc_CN@[G\3]
+2p`tbG@c767inolrav0hg6a-ucs.y0.tw
+Rop{cgBy@Wekdh0xns2um.UK
+t*p05lV@017y.MR
+7ZxO80@Dovepwr4l.qxfzchrn1.es8ul0vavi6gqy82.K1hc7.INT
+C_Iphp@5t4rtc.id
+q+m2x@Cfw.1tm52-kr.BO
+47NIL@Hl68os0.66l9bsf2q.SC
+vi0LyF9O@p74jz6mxby.it
+xQ4jU@rQVWLWAD3T8.4-lnu.AZ
+zea_0Kr@[97.59.144.249]
+5HP1k|s@[068.150.236.123]
+5XJZlmYk.3Du5qee@[072.023.197.244]
+AvNrIHB0@[+n}oV]
+"!N7/I\zhh"@[204.037.067.146]
+vlJODxFF@xFO6V.i1.fgad6bjy.NO
+qDe0FA@xpp1le82ndircjgyrxyzkrqu3il.oUKHVV6829P-16JILWG62KN.cr
+pMF64@wssq6kh9uhxk.cA2YZVBV4JW.xX585A.ru
+G3meE@[^!'OO]
+"1@0UYJl"@vplkx.d2n.i3tcx3aaxut.lbb3v9.ldq.me
+iTH0QND@wg9sizy.lr
+9kF?opSTo9rSDWLo&W&6@xrh32ibf.F0zb6kb.BJ
+a0FI1m@1olkdpz.W70a3w8qmk3.NA
+"0H}r}X(p\M`/x"@rY48LPH.Axy.Ue624.TV
+AQL6YBFb@Hxawb15okz.y4.y5c0e.bt
+PEaNVR@m8NH9BVX5L096DRM7YTR.er
+diI`Q@i5fpkuc.7zg2av.D6tzqq.CK
+TCN0-Z@Tezeq9ejv.ekeab8hz14hui.il
+05SnFh@jZ85JXZ.1RO99W5FYK3.uyv7g15.MP
+B2Z76Rn@9yce0shfsydxetu1v4-y.rBU2M0.6ik8oapv0zho6n653il25gu4rd216uw03.MG
+vGZ2K@C2osgjtel5uerwn.riihbabhh41ve84.r3l.vH6S64.vn
+Nv2ZgL@[037.054.177.155]
+WsdI2W@i1ULFQ1.79qfph2.eg
+vJfpTf3@Hh4x2h.25m0idq3.fr
+oRqbgftr@l6jg0.TV
+NiynsKb@k9BTX4-FV.hc0skm-o.lv
+w9uGwf@4hop8.Jb9655is.nr
+"NVUW+"@6jbe.KM
+QusHU6JMR@0RXKIZNH76C3.Oqwcfr779e.MH
+}C5IwKv1S45vlmPaaVHhF@[IPv6:EBF6::]
+T7rXlYc@4AI1LM.2o.uk
+uuCiDC6c@Maar3.65hlg-wf.t3pt9.FJ
+w2mNOvIUh@dx3ep7ew.ru
+b#Add@9hpopo.Xg3tbjchdpt.TT
+NtrgJjfj."NBwi"@[142.085.096.018]
+00lF9UB@2NR2.rs
+MPr42ye9@p08lcrzs.4bzxfznsh2bhgsa.CX
+awwLoYLn~c2LfTEVT@fwksx.qoj94r11kw19k50k3.gd
+gRZ5w9epm@p6adico3auugj5qklec.Sm4bx5.li
+zfdZ67Y@1azhq.dl3xxzni2.rrj.lpclc6g4d.sl
+vTWwSD4fb@uBSOHD.3g.u3mb.gf
+cYFVxcC6E@F9g0b.n1339r.AU
+pnuXl@s1alo2.tc
+lKy64zp.Cbg8BM@y0S.6uiux8h8.0udipt.ma
+|9FDgc@vbrz.3L.av4kmt.rs
+skcHAu7@xD715N1.DZ
+BfcgHK3@[220.136.9.224]
+LCOEag@Gwm.drsa0.GL
+qrNZtp3vO@a0gr.8j9cvcgy0p-3.HN
+lfW2rei20XWSmpQoPY1Dl@[(N&c]
+WFBBEv|@q7R2J.oy48740.pm
+6H6rPx@zVJ40.xgyat.cLUX6SVFJWMLF9EZ2PL8QQEU7U1WT0JW3QR8898ALFGKO18CF1DOX89DR.1tfu30mp.CA
+ytG@J4auwv4has.PS
+"X;+N1A\A "@rc9cln0xyy8wa6axedojj9r0slj0v.Luy9i6ipqrz74lm5-n6f1-2srq5vdo-opef747ubdykv5hc.2lztpe.er
+DQTmqL4LVRUvuvoNb8=TT@2up3.PY
+NC0OPLz@kcru1s0mu.name
+kBoJf{XaGl@[248.166.223.221]
+pEjZPm8A@v956Y7GQV.5uu6.Ribgf20u.6e.0do1nki1t.ahy.6iy.sm
+pIFWkl2@w9N0Q.MC
+p=VTtlpC@w3ttqb.FO
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl
@ -0,0 +1,206 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use warnings;
+use strict;
+use File::Spec;
+use Getopt::Long;
+use LWP::UserAgent;
+
+my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
+
+my $version = '';
+unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+\.\d+/) {
+  print STDERR "Usage: $script_name -v <version>\n";
+  print STDERR "\tversion must be of the form X.Y.Z, e.g. 5.2.0\n"
+      if ($version);
+  exit 1;
+}
+my $url_prefix = "http://www.unicode.org/Public/${version}/ucd";
+my $scripts_url = "${url_prefix}/Scripts.txt";
+my $line_break_url = "${url_prefix}/LineBreak.txt";
+my $word_break_url = "${url_prefix}/auxiliary/WordBreakProperty.txt";
+my $word_break_test_url = "${url_prefix}/auxiliary/WordBreakTest.txt";
+my $underscore_version = $version;
+$underscore_version =~ s/\./_/g;
+my $class_name = "WordBreakTestUnicode_${underscore_version}";
+my $output_filename = "${class_name}.java";
+my $header =<<"__HEADER__";
+package org.apache.lucene.analysis.core;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+
+/**
+ * This class was automatically generated by ${script_name}
+ * from: ${url_prefix}/auxiliary/WordBreakTest.txt
+ *
+ * WordBreakTest.txt indicates the points in the provided character sequences
+ * at which conforming implementations must and must not break words.  This
+ * class tests for expected token extraction from each of the test sequences
+ * in WordBreakTest.txt, where the expected tokens are those character
+ * sequences bounded by word breaks and containing at least one character
+ * from one of the following character sets:
+ *
+ *    \\p{Script = Han}                (From $scripts_url)
+ *    \\p{Script = Hiragana}
+ *    \\p{LineBreak = Complex_Context} (From $line_break_url)
+ *    \\p{WordBreak = ALetter}         (From $word_break_url)
+ *    \\p{WordBreak = Katakana}
+ *    \\p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
+ *    [\\uFF10-\\uFF19]                 (Full-width Arabic digits)
+ */
+public class ${class_name} extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+__HEADER__
+
+my $codepoints = [];
+map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
+# Complex_Context is an alias for 'SA', which is used in LineBreak.txt
+# Using lowercase versions of property value names to allow for case-
+# insensitive comparison with the names in the Unicode data files.
+parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
+parse_Unicode_data_file($scripts_url, $codepoints, 
+                        {'han' => 1, 'hiragana' => 1});
+parse_Unicode_data_file($word_break_url, $codepoints,
+                        {'aletter' => 1, 'katakana' => 1, 'numeric' => 1});
+my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
+
+my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
+open OUT, ">$output_path"
+  || die "Error opening '$output_path' for writing: $!";
+
+print STDERR "Writing '$output_path'...";
+
+print OUT $header;
+
+for my $line (@tests) {
+  next if ($line =~ /^\s*\#/);
+  # ÷ 0001 × 0300 ÷  #  ÷ [0.2] <START OF HEADING> (Other) × [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+  my ($sequence) = $line =~ /^(.*?)\s*\#/;
+  print OUT "    // $line\n";
+  $sequence =~ s/\s*÷\s*$//; # Trim trailing break character
+  my $test_string = $sequence;
+  $test_string =~ s/\s*÷\s*/\\u/g;
+  $test_string =~ s/\s*×\s*/\\u/g;
+  $test_string =~ s/\\u000A/\\n/g;
+  $test_string =~ s/\\u000D/\\r/g;
+  $sequence =~ s/^\s*÷\s*//; # Trim leading break character
+  my @tokens = ();
+  for my $candidate (split /\s*÷\s*/, $sequence) {
+    my @chars = ();
+    my $has_wanted_char = 0;
+    while ($candidate =~ /([0-9A-F]+)/gi) {
+      push @chars, $1;
+      unless ($has_wanted_char) {
+        $has_wanted_char = 1 if (defined($codepoints->[hex($1)]));
+      }
+    }
+    if ($has_wanted_char) {
+      push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
+    }
+  }
+  print OUT "    assertAnalyzesTo(analyzer, \"${test_string}\",\n";
+  print OUT "                     new String[] { ";
+  print OUT join(", ", @tokens), " });\n\n";
+}
+
+print OUT "  }\n}\n";
+close OUT;
+print STDERR "done.\n";
+
+
+# sub parse_Unicode_data_file
+#
+# Downloads and parses the specified Unicode data file, parses it, and
+# extracts code points assigned any of the given property values, defining
+# the corresponding array position in the passed-in target array.
+#
+# Takes in the following parameters:
+#
+#  - URL of the Unicode data file to download and parse
+#  - Reference to target array
+#  - Reference to hash of property values to get code points for
+#
+sub parse_Unicode_data_file {
+  my $url = shift;
+  my $target = shift;
+  my $wanted_property_values = shift;
+  my $content = get_URL_content($url);
+  print STDERR "Parsing '$url'...";
+  my @lines = split /\r?\n/, $content;
+  for (@lines) {
+    s/\s*#.*//;         # Strip trailing comments
+    s/\s+$//;           # Strip trailing space
+    next unless (/\S/); # Skip empty lines
+    my ($start, $end, $property_value);
+    if (/^([0-9A-F]{4,5})\s*;\s*(.+)/i) {
+      # 00AA       ; LATIN
+      $start = $end = hex $1;
+      $property_value = lc $2; # Property value names are case-insensitive
+    } elsif (/^([0-9A-F]{4,5})..([0-9A-F]{4,5})\s*;\s*(.+)/i) {
+      # 0AE6..0AEF ; Gujarati
+      $start = hex $1;
+      $end = hex $2;
+      $property_value = lc $3; # Property value names are case-insensitive
+    } else {
+      next;
+    }
+    if (defined($wanted_property_values->{$property_value})) {
+      for my $code_point ($start..$end) {
+        $target->[$code_point] = 1;
+      }
+    }
+  }
+  print STDERR "done.\n";
+}
+
+# sub get_URL_content
+#
+# Retrieves and returns the content of the given URL.
+#
+sub get_URL_content {
+  my $url = shift;
+  print STDERR "Retrieving '$url'...";
+  my $user_agent = LWP::UserAgent->new;
+  my $request = HTTP::Request->new(GET => $url);
+  my $response = $user_agent->request($request);
+  unless ($response->is_success) {
+    print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
+    exit 1;
+  }
+  print STDERR "done.\n";
+  return $response->content;
+}
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt
@ -0,0 +1,427 @@
+=========
+This file was generated in part (i.e. without the email addresses)
+by the random text generator at:
+<http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-rosalixion-word-2gram&paragraphs=20&length=200&suppress-quotes=on&no-ads=on>
+=========
+waist and Wintja are relearning how dJ8ngFi@avz13m.CC we spread out, but it
+here before, our dimension of story. In Bed and Marys opus in the last thing
+actually having difficulties moving, Spiros rises to our hidden on your
+<JCAVLRJg@3aqiq2yui.gm> orders, my love: Im seven doors and with gentle
+fingers, then disappears? Whats the idea <kU-l6DS@[082.015.228.189]> of
+<37layCJS@j5NVP7NWAY.VG> the "%U@?\B"@Fl2d.md pages blowing to appear on Earth
+in motion (what rules did we can take a radio changes. A VOICE: Hes a
+scoundrel. VOICES: Burn him! Burn him! SPIROS: Want to team of the couple is
+the sweetest love aH3QW@tw8uo2.eu of the teaching teaches members to
+communicate with time interplaying and linked and you marry it. It will leave
+Bvd#@tupjv.sn the logic of it from hereing those people were all
+SBMm0Nm.oyk70.rMNdd8k.#ru3LI.gMMLBI.0dZRD4d.RVK2nY@au58t.B13albgy4u.mt the
+artist stray? Does a few rose doom the UFO with my dear Sissy says Sissy,
+holding hands up a bit of DvdUJk@61zwkit7dkd3rcq4v.BD fate falls asleep. When
+an internet age is ~+Kdz@3mousnl.SE currently working with his bedside table,
+and brings in a shimmering timeshifty verse vortex, the dream. Victory is
+hallucination, my hand for more. Mmm my head,
+C'ts`@Vh4zk.uoafcft-dr753x4odt04q.UY in five. (Spiros waves goodbye to tell
+you, honeybuns: The poisoning is, but no addresses. A message identical reach
+across the script. }0tzWYDBuy@cSRQAABB9B.7c8xawf75-cyo.PM I grasp hold their
+flapping wings and when theyre seemingly infallible information? Bookshrine of
+a sip of defined the Great Horned Goddess of no feeling.) Meaw. FFIANA: So,
+darling. Dont be dry white and teases him back
+lMahAA.j/5.RqUjS745.DtkcYdi@d2-4gb-l6.ae in society not speaking, giggling
+V85E9Hx7@vpf0bs.bz in MGBg2@7F3MJTCCPROS8YETM0B4-C9P7WXKGFB0.RU the boring
+f***s! (She leaves and Him Lover, Outlanders. Plus Universe where better than
+they just the land any letters in the gods. Expected, this at the threesome get
+even touching myself. rsBWOCJ@lYX0SILY4L53Z3VJPSF6.pwrawr.vdpoq.nz He picks
+dIyLrU@9A40T2ZIG7H8R.t63.tv up at our harem world 6dAsZKz@d33XR.IR so pop up
+you will be gathered, then Wintjas hair; smells of the manuscript: Contains a
+EnqCC@2bk6da6y08.LI common AQ9yV@Mfqq32nexufgxzl4o7q5jv3kd.lb universal within
+this lv'p@tqk.vj5s0tgl.0dlu7su3iyiaz.dqso.494.3hb76.XN--MGBAAM7A8H web.
+b6/zomNkV@8jwm-he.IN The
+5FLuakz.hXVkuqDt@iBFP83V6MNI3N0FRWJ9302DS-0KHRV6O.1bf59kj64uj5b6e2zfn.cm cosmos
+is filled with soap bubbles. <RhIwkU@58vmet9yfddpg.3adkmhrv1px.AO> I cant
+concentrate with a nearby and he nEBk6w2Q@Bb5ib.2pay.so pours.
+<AlW5CMAn@qos-53u.j91qq96d4en129szf7099kxv5lo6yo.gm> Its a wine with the joke
+in the only good enough! It hit again the house. He thinks of terrorist, this
+water. They were in verbatim rewritable. World by a quick eye shadow beneath
+the stairway; we not easily counter weight, is filled with your own perceptions
+about it. (Eve, how to talk to you really turns on its physics. The lover on
+the sunflower in worship of the? (She smiles.) Greet
+<QPYBDV3.Ah/h8U@x3v444pzi.1cvgokam.PW> it makes sense$A!-(B Not really,
+5Iwbiq7@p9s-2pixps9jwzyhfroxqivw8sv90r.xn--wgbh1c from up in the candlelight,
+denser <AaFU9L@3yj1xqf1.cz9.ac> medium to say something. Shifting of that
+|iCmQ1@rum6w0a7wt.3QLD.ht71.cx the eyes and there came. And now, approaching.
+When the thing. What did I woke up the printers! We EhLTUjo@rEK.sJ44H0.GR shall
+we are heard like a glimpse of hyperspace. It travels further and kneeled down
+bHEbq3Rp@33.lKSSMY.9xaurtfle9xe.iu4810l.fj to you can walk away? FFIANA: I want
+to eFcup.cPPEW@[1ae] speak. The Fountain of the background when I extract of
+hers, so strange book and a royal destruction of songs of this pearl. Not often
+by an incinerator vessel. Spiros, the delivery of alien exists now. Forward.
+The rosy guidance of wine. Notices that is partly the pipe
+p907@bk3o.fvtmw2m2.Uutr83x2yt4.2nuin.EU of the chance in Old Town. D Strange
+music keeps one of the top of myth and smiles.) SPIROS: Nope, cant even
+PpW2L5.QgP2n@9rz7.a5qi.oRH1Z.8ov.UZ more! says it doesnt exist! The world in
+the cosmos loves us. (Spiros soon
+o8UgG5fewm4vr9Ai5wPS@sgh.2F-OLKLZ81DIUET.xpya0vtx.fj here again aixQH@z-y.AR
+and again he turns and blinks with you want? says Sissy looks over Wintja and
+the fashions of Fit to Spiros continues. Its a situation of the barman says
+Spiros. I read the river. SPIROS: Damn I said. 69
+<jVTeWQfL."M#~t Q"@1e.oglq.ubk.SZ> he kept locked up into a suitcase along
+her body, points a female voice of 6e5QQuy@N7.2cuw3x2wpddf.paycp1pc.AI their
+part of flowers, and Marys opus IqG6Fl@[220.112.120.54] in my PROSECUTOR: Hes
+<lWHH4eWSn@tbxyb7.jhzqxrk.lv> one is <P1zO*RaAr@[111.99.108.22]> unsafe at a
+little <d00gy@[4TC]> secrets, we made to write: And a drink of Eternity,
+Speros, <1yNINoBU@[136.003.010.238]> Mr Boore, back to me! Lovers break
+Ms8ox@[_3Tuehr] the code so
+<8'Hk8a@ksf7qqaa7616xw8dq80h.K6fy89c.3k-8c.g58m48v-18zh8v> recap.29 28 So,
+darling. Dont leave each itself, on and devotion to all about time
+<wtWDNo@1sjmcbbli196-765mt7m8o8hywft.7-ga6rsnum8v.np> has happened? ANON 4593:
+What the tongue Such as she did you back and the whole moment in
+<"x)yO"@7le5o2rcud5ngs.Qmfmq.Jfxv8.Zznv6t6il.MIL> your own lens, thank you
+1hXd@f8.1kxqd3yw4j6zmb7l7.US arent already. It tastes them have ever come come!
+The tomb. Blink to him and flips to it, but the palace. No
+"8}(\$"@mu2viak0nh4sj5ivgpy1wqie.HK way$A!-(B Happily: You smell of it
+all and yet sure this pool Th7XoAs5@ggdb.BI of the first of his
+5iDbhah.xdtF1x@[59.55.12.243] heart j2ovALlgm2Wcwx@5jphzt.TN can take to the
+wind, speak to apply perfectly, you say turn toward sexual nature and lays his
+ZlaP~E.4Yk1K0F@lF6VN.M5.Nj.PRO pipe. No, landing from
+cFCvIJAw@l93H0R1W6V4RI0AY7RLRQR4KOEVQPEG-PDTF03V4D9A0.xZZK5.lu the fruit will
+say. -F<>Dont talk like the west 8Ju2AW@1n.h7.vu wing of the letter in every
+second, <"\nkP]{"@[Vej\yo\HD]> but he slipped in. Yours Spiros and there
+when I imagined anything can take returning? <fKWC?@qgcb.xn--mgbaam7a8h> Where?
+With? Who? Going toward his body and kisses the notion that has joined odds. A
+scattered around <L4BbaB@hv1.BIZ> slowly, moving eyes on and
+WvSmV@qpx15vzmbtxzvi-syndl1.ML turns toward her. She sips some way everything
+began was finished my wet Earth. Warning
+"3|PX~Cbdq"@U3vp-7k.8c4q3sgpwt6sochundzhx.museum for me.-A City Different.
+Let your myth LjH9rJTu@tkm.gy settles over it
+<8myMO4@hOV209VZ-SHGBIH5FBYLTCQZSBW-U5-1.dv9> means to Our of a book he has
+only but <vQgXEFb@maxmrbk-5a5s6o.6MZZ6IK.awjbtiva7.IL> the imagination, master
+phreaker, <5ohpA3ww@dcpcotwccy> main railway station. Loses the dreamadoory in
+the surprising success.) A note from round is her splendour in them? Mmm my
+dear, were 6TVbIA@r50eh-a.la from them keywords. Boy,
+AaASl@Bsteea.qHXE3Q5CUJ3DBG.S2hvnld.4WJWL.fk my own imagination, master
+"CN;\-z 6M"@86.qc7s.23p.ET is the usual fashion, says to stream and appointed
+space-time continuum. Dilutes your zX3=O3o@Yjov.7g660.8M88OJGTDC5.np sleep. Ive
+been seen, he says the ringnot we proved? (On the pact. Thanateros is an
+internet caf<61> where the Queen. Now cmon, lets take to raise the apartment. Like
+a limousine and I kiss timelord slides his hand QFZlK1A@4W47EIXE.KY in words
+now. Get us in the same time conceptualisation is to bed. STEFANDIS: Dont do
+you think Ive put down the green lush. She often by God of a 15 minutes. The
+others knew into the 1guLnQb07k@ab.ccemuif2s.lb you-know-what. Youre the luxury
+hotel. Diamonds and receive the process of action. We wanted in the nominated
+bird. The <Jddxj@[111.079.109.147]> woman undressing. He has him just get at
+Hotel California. Its <Hj06gcE@[105.233.192.168]> about all devices. Playlist?
+Initiating playlist. Timelock? Timelock on. We have a u8?xicQ@[i\21I] lock of
+the apartment. Like a kto, part of Our superhallugram to hook up and
+CczYer}W@bezu6wtys9s.lft3z.mobi outs. polish
+OmpYhIL@6GJ7P29EIE-G63RDW7GLFLFC0M1.AERO fills the crowd, comes from the music
+is impossible. SPIROS: F***. You are your voo goo.
+<2RRPLqO@8lh0i.vm7xmvvo-r5nf0x.CY> Daysends burn deeply and will take
+TOc!BhbKz@F-myy7.kQWSUI7S3.net this he thinks. For UFO from elsewhere. Bzzz!
+Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them "0\!P?".shQVdSerA@2qmqj8ul.hm the leg
+of LTLNFsgB@[191.56.104.113] all, until it has read it is
+iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. <VGLn@z3E2.3an2.MM> Once
+TWmfsxn@[112.192.017.029] Spiros under the place
+2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV as were not a house of the
+rosebushes and the whateverend, feel her waist. She changes everything. We had
+decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us
+come to, <Ayydpdoa@tdgypppmen.wf> what history as died. Strange, Spiros with
+delight: That night "gfKP9"@jo3-r0.mz and gold case
+<aTMgDW4@t5gax.XN--0ZWM56D> is spring: the aeon arising, wherein he returned,
+retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first
+<NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp> to reach session. Initiating first
+part of the main hall toward his own spurs. Hes an <XtAhFnq@[218.214.251.103]>
+Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and
+reality. The hidden set up to come. ROSE WAKINS: No answer. The
+ALB4KFavj16pODdd@i206d6s.MM rosy pink cigarette.) Visit the supreme chest and
+express in orgasm, my version of clouds contemplating existence, the horizon.
+Best grxIt96.46nCf@nokjogh2l4.nCMWXG.yt of sheer emotion. Spiros laughs. Why
+did he says Spiros. Ban him, he called for it, sir, says Spiros
+Fgbh7@2rxkk0bvkk-v3evd-sh56gvhxlh.hhjcsg36j8qt98okjbdj9z574xdpix59zf6h80r.Gyb4rrxu.ve
+laughs. uo0AX41@Fhlegm1z57j-qvf5.p8jo6zvm.sc Can we determined that when I am
+Spiros, quoting Jim Morrison. Death. Design patterns, youll hear Spiros says.
+They cant G decide if he was your key that we playing? SPIROS: Why wont xxx
+would be imagined. Technology so beautiful to fill his diary; I like a match.
+Puffs. The Star Eagle. And a person with a play with. sjn4cz@9ktlwkqte.bv
+Faberge can change overcome your work, a large-scale coordination, Goddess say
+is blasting away to end is <b04v0Ct@[243.230.224.190]> very tricky to stab it
+as a turn me to the champagne on your obsession about his nose and
+F!FUbQHU@uvz7cu1l.ciz4h2.93U4V.gb somewhere <6CHec@nONUKT.nl> else, then far
+stretch. The great outdoors), puffing dried cum on the manuscript I$A!-(B O
+one knee, feeling and sex in igniting <zbmZiXw@yb.bxxp.3fm457.va> bomb. (A
+housefly, Musca domestica, lands on into the device. Let me met. Wintja and
+victory. <"/GdiZ7f"@[221.229.46.3]> For years in tipsy bliss. SISSY: (Nods.)
+Yes. Now you witch. And we must remember, will tell you move but her
+NJde8Li@f7a.g51VICBH.cy creation with gentle feet, naked on strange hovering
+futuristic vehicles that when retrieved upon a thought, or reflected. The Crew
+coming on our gratitude for you address then ventured into a dream, has begun,
+she sees a 6IeAft@e-3fp.Nkh7nm8.v8i47xvrv27r.pf golden ball and 4 If you that,
+Izz). Lapis, to the return all laugh. Applesfoods maybe, says
+TC*Qopzb@xIOB3.6egz4.m-24t5wmxtmco4iy8g91o66mjgha1vjlepyffott.E5ta.p9.CF She.
+Cmon I Stefandis.) Count me with a bed sheets, carrying gently away about time
+you rather dramatic, which reaches across this day. It brings forth between
+suns. How about the white sugar, leaves, sugardusty sugar, drinking of time.
+Believe. There "_3Sc_"@[193.165.124.143] is the soul, W0dwHf@[25.174.65.80]
+and only Spiros. Love you. Believe in the multi-leveledness of the 21st century
+and exchanges a book called Sphinx. Alien Star qPkkP0@4k0vs.oaak2z.3JMTI.PK
+initiated. NYKKEL HUMPHRY: Of Make ways over town.) SISSY: $A!-(Band you can
+turn slowly but not yet audible, appears, XzZh7@[\\JmD%U] in the silver
+melt together. This way of vision sees through time). Brewing with a kiss?
+<66SGHzw@Oqnr82oml7jct0b8crwbstdhcgc3khxj7dj-t898mzro0p3-rvp-dythh.TN> Her
+feathers: streaming water of the wind. I started interacting in a boat, on
+ot4tPF@[AY\j] her e4seIFbl@cib.cg thigh as she blinks happily. Here is
+<B2w025e@r2H7BW16B24DG1S5DED.bg> what you around him, Magus says the list. Its
+about what that atweEde@blk-3y.mgvoh6l9my.F6.FI there is functional. We
+vanished into the computer. Up hills and enable entry using his long adventure.
+Do we are all detailed trip against decent behaviour and girls. And you
+alright? You evil laughter: Muah! Muah! Wont wate you all uDoPcRGW@rEBD5LUT.ly
+way that there <2KQhx@Bba.u--9b5bc0.NF> is either both night And our dimension
+of a bad joke, says nothing, just after time. It was indeed. Now that will make
+the streets. He instable? What shall do. tKWc2VjVRYD@[254.190.162.128] Who
+wc3W16^@D3v2uxqqeclz.w1fd529m.DM are heard like our love. Of the stairs too,
+usually through the note nearby and you go now. If I remember Njg@6S8MA.HK how
+it instead. (She chews the rosy petals, frosty and the land at first part of
+waking? That we "L\^4z]92"@0qp--walx.MIL like they meet you.
+<X08sWFD@62GNK.tN4.f1YXX.ug> And out into the bed. From the gods have loads of
+a dark winding stairs and laughs. Why doth Her devastatingly good eyesalve, to
+tell it says the Rosy Dawn. Rising, rosing, the story? (For all the UFO
+shimmers from around him, but we look before eK6Bz1Bu@[rX;J&036] the Eternity
+we shall never go now, look, he thinks, both go for the words said. 69 people
+who live in Thy honor. "~`o\:"@hO4UKF.oZBWV56B.cmn.DJ And
+lcgUakx@[pjGd&i2] here and his life has tasted of becoming more clearly. He
+is dead. Calculating possible meanings of it instead. BqdBTnv3c@wf35nwaza.ME
+(She whispers, smiling.) Theyll be able to help. ELLILIEILIA: You are created
+the visible "a#Um{:\'\bX:"@in7tjo.uw8wil.gp world, without it will see now,
+says Spiros ApIbER8'@[&Y] thinks. Every time and go to write fiction. Indeed,
+love something I pop, from the play? asks JTsM0c!s9CzEH@Sd.mh the taste of the
+outrageous wreck of dream, born and there
+hy2AOUc@uqxzl7v0hl2nchokqit9lyscxaa0jaqya1wek5gkd.NC was still result. Search
+taking <pY7bAVD4r@[,>T*R T]> out into !0axBT@03-gdh1xmk3x9.GH my dear, you
+know, of saint? What did come here from the Crowinshield Garden, amongst the
+warm kiss. Everything is white marble statue he is tunes faberge intricate.
+Spiros, a particular frequency, vbtyQBZI@20al5g.ro6ds4.Bsg15f5.NU spinning,
+trying to a trail of the narrative that it while the Queen, giggling: What are
+a letter with a web we could 2^ZhSK-FFYOh@Z2iku.rg.Z0ca1.gs not a
+G1RLpOn."yfJpg["@mXEV8.mu peculiar yrBKNkq@a2a1.Aifn.Ta2.dj stench of history,
+when appearing in the interface as well as follows the secret I am not
+teleframe the room, disguised <Wok5G@b5aqobvi5.ni> as the brilliance of the
+pressure of the modern world, but
+nXz9i.=EL9Yj@93r8do3ntizibg1-5-a0ziw9ugyn4bo9oaw3ygrxq-eczzv1da6gj58whvmo2.rs
+whatever. The solid concrete, Dp63hd@B1kbahyq.PL and put it stumbling or why
+wont the chalice with communicating with language only she says Spiros,
+whispers.) We left from the second birth? The young man is part of the teapot
+opens. A man in disbelief.
+y01rn27SFq@o0HNP8.C5.i4rvj8j338zgter7er5rkwyo5g.atnc0iuj2ke.8or6ekq0x.IO
+Outwords scratch skills against her in fairy gently
+<0RiEo@08mnvbu.p661ernzjz5p7nbyix5iuj.cig5hgvcc.SO> bite of death and Wintja,
+playing with the name by <Dwxab5@1sx5y3-umsy72nl.74lwye5.DJ> your dreams. He
+arrives <IvdZVE4xRk@0vw7ajl.AR> the information. He swallows all the f*** me
+tell her wineglass and tangles. Synchronising <CvQxhXJ@d5a7qnx.ke> weeks of a
+reason why everything seemed as wet dreamery, remember? Got a purple Ipomoea,
+crawls through the first stage has the riddled beginning to her in a butterfly.
+You landed smoothly. Preparing to n7MxA4~@[4(R] hit a world is man. How much
+in <hEhF@3TV5WQ.fbkx3f> mystery. And RFGzu3hD0@wbh4.sm furthermore, what the
+edge of physics, death and eOADW}BcNG@2568p3b4v.Xq3eksr.GP touched smoothly ah?
+Fashion feasible technical population resulted distinct produces
+AsAMWriW7.zSDQSAR6@Gg2q4rtgr.GG recognize instance the room at the garden.)
+PERNELLE FLAMEL: (To Mrs She is basically very drunk. I see you
+<cDCVlA0t@[20.116.229.216]> cant I walk down naked on it to bed bed into
+c=yJU+3L5@n2x3xhksf.gvreani.MZ the stairway wfYnaA4@lzojy.4oii6w6sn-p9.kh and a
+kiss as though the point we see the numbers, the phone set to be displayed,
+disincarnate entities can feel my wifey. Spiros empties the answering evening.
+That is kdeOQ5F@vD5Y.wmmv.7rswz.1zelobcp5qxxwzjn.fOEJZ.KM simply not but I
+could do to the ground, and the decanter ppULqb2Z@Hv9o2ui.AO is my friends and
+says: I <tOHw@[IPv6:3500:8B6C::CB5E:1.124.160.137]> see The elves of dream
+telepath posts, but makes a gentle people with a redirection is generally said
+Tadeja. Its over, or of ages, you excuse us walk off to Talk A never-ending
+one. I remember how cute she saw the neat fuse weds sexiness. A thick paperback
+book itself continuouslyposition, have heard in the noise We are presently at
+the first of the death MWLVsL@7nhliy.O8mjon3rj-kb.t8d6bcpa5i.au mask there is
+accurate to meet by to this important worse material in separate directions.
+Spiros stands, and arrows and orange from a witch and down the mix? he feels
+Wintjas 13th century. arling peach, cosmos loves playing with silver trays with
+the <BN0EY@hh9v.p9bwgs.TN> language as RgiAp@d9ln.bf I still result. Search
+taking time and time <PBugBo@97gcz.DJ> in time. Spiros, how else or
+Fh#dKzbI@[+_] nonexistence. Eros never guarded the horse stops. Move. Stop.
+Move. After earlier squads mysterious source. It inscribes in case you are
+applause. The world was a. With swiftly cover <wyqU-C9hXE@wPRBUI-WS9HXE19.LV>
+it as in yourself! 5 Yes, now comes from half walls of us, my love. I am your
+vast operation is all worked out? O how long ago. It glimmers, node of the
+voice, the middle of the introducing of utter hell on the car unlocked and mind
+around midsummer and not believing in <muC?Js@[IPv6:47FB:5786:4b5e::5675]> his
+lower lip. From the wind say I was inspired to live in a crime. I know, and
+find people have been reported found a digital electronics. Is the pillow,
+touched falls down their part of the computer and our world
+<yLTT2xV@wdoszw9k1ork-z-t.kq.l3SEO.Lb4jx0.NA> come walking in
+<6zqw.yPV4LkL@dA3XKC.eg> the stuff to help. Websight. Dedicated hosting
+wordpress blogger coined Sister <S5z9i7i3s@Vzt6.fr> short Sissy Cogan. She
+answers. It is finished his way that includes getawayways. Compiling focused is
+this case? Then turn on. ANON 4593: What are pretty kinky a story about the
+L|Sit6s@9cklii1.tf strangest child a Syntax of passage and Wintja and
+reportedly after demolition, decay, and twists up to tales endwhere. This way
+there to born from elsewhere. Bzzz! Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them that
+words from sleep but no poet yWYqz@mw-9k.FJ am I woke
+Knhj419mAfftf@R26hxll64.3qtdx6g.AL up in a kiss made it is heard on Midsummer
+our cards like big fane beneath the secret of the <aZYHUr6@Shyn76c67.65grky.am>
+criticising crowd of the gods and here to... TADEJA: (Suddenly appearing in
+ZYxn6Px@di0cqhtg.hu your "#mLl"@w1sc0g3vm.j1o4o9g.GW voo goo. Daysends burn
+deeply happy, for large bite of his artistic inspiration without feeling as the
+season. One within the dreary WYJcFp@653xk-89oprk2im.iemhx9.CC kingdom. (She
+steps up with Christine says. The Blooming of y5AXi@[Oa #] The time regularly
+we are, she nZErAGj@6sq3-p.r8KQ.aero kisses the gods? I am in his brother I met
+years ago. The word <OMq5sBK@udg-5zp1.Dory85.SG> is because we had. But yes
+just like a while. Were not matter; W it going? Im sad to
+<2bymd@Ojla1hvfpw8rrihrx.cy> where he arrives and information, and smiles
+victoriously. 5OMbw0@r2d8cn75.1VR2BJ0J3A8PY.gc0mljc-h.COOP Mmm, you Rudy. And
+there and day soon is phone and come <al6X^pQkx@pyj--2hp.lbet.TN> back?
+Rephrase that we are good, I leave the gifts of html or center of her right to
+him to where the room.) SPIROS: Okay, sure, Ill be a page is to
+NkzPW4f@2-0.aaoqccwrgi4olytac0imp6vvphsuobrr115eygh2xwkvzeuj.tl put in a novel.
+I want two. "4-b9|/,\e]h]2"@9-iiahsdlzv-v65j.FK Passing
+<1AhBt@od77y.s9ZZP531YKW> now. I go identify what we are always win. Anyway. I
+know. It is here reaching your script and toward the edge of shortcuts. We came
+the Saussiepan and <g8Pv2hb9@[166.176.68.63]> its mysterious ways. I remember
+"IA~".Tn03w7@[\>J?] how am waking to, that the secret about it will say the
+redpurple wine, Our plan all within this moment you can hear me, I heard on the
+clouds. A channel is hidden visible world, without ground turned real, their
+every E6aK9TaJ@j0hydmxhkq2q.Svku4saky.MU way to a radius of
+rdF2Zl1@9fsic.C17pw9o0.vn apple tree and says Spiros. Here I saw her. He walks
+by the landscape of secrets of paper. I love it! But I could call the
+<pCKjPa88DG&x5a@4ha07ia2jk.xk7xe8.PM> world with the manuscript I$A!-(B O
+nothing. Im proofreading the most dead branch in qgLb5m@nynqp.DE the screen,
+then I did you can remember. qC731@["\S] (If you can it completely insane and
+we had expected something our sacrament. We were back. Esc. (Shuffle.
+Hallucinate a sip of grandeur, said he suddenly a tree, and ground turned out
+the publisher. O about it all. Lets
+<vIch1nT@[IPv6:4c2f:A840:1788:ad5:C2C6:dfae:1b1f::]> stay with us. Mooneye
+today and thinks and check
+GVSMpg@2YGZ1R19XTW1TIH.Re3vg30u1xq6v7cj1wf-6m14939wvgqbl.93mztd.SG the modern
+world.) Sissy stands sipping redpurple wine) and you
+0jq4v7PMxm@eq6teog.kO6LR3.x2p.53yltrsvgpd3.RO up to be wilds. Spiros 99% dead.
+Calculating fastest and chewing she directions!
+zdGLZD0P@i2JQNM8.816oja8pkk5zkvyx.KM Take my body and executed with your own
+forehead, born from Egypt come back? Rephrase that what is the night. There is
+here. Cant you think. And shadows Jp#hSH@74zkerax4.31kr.7c9-yuk.mp keep
+dreaming of letting the elves of modern civilisation? Does that fly softly
+through the surface. Of the modern world we must Kx^0oZn@oFFA-URZ13B34J.DK find
+sub52@aoq7.iHF.CH them, baby. Rosy Dawn. jfVSq9oAR2D@iGU0.7bp3x.4cr.sz You have
+become clear edges. And why you told our skin and
+nalgU@Yfpbdcv8a5.n9kwz6kyi2u.thic-rws.af.TG places, spread on your air on her
+earlier. The effects will be the song by and his eyes are gods. Expected, this
+pool of illusions, that makes its golden geisha ball on Clocksmith Alley. Two
+female form orbits the two chords on a god, in correct dose to see a book.
+JOEL: Spiros thinks as he felt, came out out! We are switched in the matter. I
+shall I can imagine the Crowinshield Garden the aeon arising, wherein he once
+again. You suddenly changed. And the rose; Will you? Now listen. (She smiles.)
+Greet it comes everybody. And what the room, disguised noise We are you in 3D:
+you come. ROSE WAKINS: =uC5qVT@56g530cltpekrw.pt I used to read it: Barbapappa
+(a gay pirate captain) <QR5&kx@7qhi3bhav5ga0eva.b0sdom.bb> and walks up again,
+when you are here; working on to. 8DZQ7@dtr16r89fdw59q.cf Now join you? Im
+slowly in white <Q4pNw@6o-9weojl3r7.LS> bed and language whitespace
+sensitivity, readability, less punctuation, etcetera. Things had to the Dark
+signal has him with gentle blood on to the ages. Stops laughing. Sharpens eyes
+from the *mfOc_CN@[G\3] starway, Down the uniqueness of the bed
+2p`tbG@c767inolrav0hg6a-ucs.y0.tw and Rop{cgBy@Wekdh0xns2um.UK giggles. Spiros
+soon here for ignition of the thing Mr and fetches her t*p05lV@017y.MR you hold
+their own code. Your brain and Nora in longer. Stay tuned. We
+7ZxO80@Dovepwr4l.qxfzchrn1.es8ul0vavi6gqy82.K1hc7.INT must marry me? Eyeglance
+is is not hear. He takes a good marijuana. And I had very fluid. It cant G
+C_Iphp@5t4rtc.id decide long hair shaved like a while. I have telephones and
+waited. He sits there is humanity within its authors and snaps a touch
+q+m2x@Cfw.1tm52-kr.BO it candlelight tuning. Just a young man go to the
+ad-section.) 47NIL@Hl68os0.66l9bsf2q.SC THE F*** UP. Spiros slowly. Lets rock
+on his father and remember: the sea soothe his paternal grandfathers old days.
+In to the Honey Queen, xxx 14 hristytio (Ill catch us. Compliments always. Did
+you rather unnoticeably. Faster than we got this cosmos. The engineers of
+terribly intricate fantasy turned semitransparent, the people have done subtly.
+It is THIS bulls***? Count me Rudy$A!-(B Sissy laughs. Can we are breadcrumbs
+vi0LyF9O@p74jz6mxby.it on Clocksmith xQ4jU@rQVWLWAD3T8.4-lnu.AZ Your usage
+<zea_0Kr@[97.59.144.249]> of <5HP1k|s@[068.150.236.123]> being a shimmering
+green. 5XJZlmYk.3Du5qee@[072.023.197.244] Her feathers: streaming
+<fzQlo2R.HSbkNYi@ay8a5so81x2fgkt2rv> rays Wanna take AvNrIHB0@[+n}oV] a marble
+from the letter the brink of wheat from the dull ghost of the article atomrss
+am I? (He hangs up "!N7/I\zhh"@[204.037.067.146] dreaming? A PEDESTRIAN: I
+already told you than the world now, as vlJODxFF@xFO6V.i1.fgad6bjy.NO though he
+walks off the flowers. He lifts
+<qDe0FA@xpp1le82ndircjgyrxyzkrqu3il.oUKHVV6829P-16JILWG62KN.cr> his head we
+passed on a hint of the worldmask of the people we dance, sweet boy, my dear,
+matter of bridging millennia, I was it works, and Adam says: And the fathers
+pMF64@wssq6kh9uhxk.cA2YZVBV4JW.xX585A.ru that we are in this G3meE@[^!'OO]
+stuff!? The wunderdome. I saw "1@0UYJl"@vplkx.d2n.i3tcx3aaxut.lbb3v9.ldq.me
+your prophethood of the ones too far! iTH0QND@wg9sizy.lr Further! Into the
+planet. He sits on the Other. We came from Egypt to save our dear Sissy slid
+her earlier. Ill tell me away with bright asterisms sparkling around
+9kF?opSTo9rSDWLo&W&6@xrh32ibf.F0zb6kb.BJ in this young woman in the whispering
+wind and hands to speak, but using his <a0FI1m@1olkdpz.W70a3w8qmk3.NA> nose.)
+Nevermind. WOMAN TWO: And furthermore, what about the script, says the sun.
+Large-scale thinking of a witch? Spiros hears music
+<"0H}r}X(p\M`/x"@rY48LPH.Axy.Ue624.TV> and a world as well as a poem
+AQL6YBFb@Hxawb15okz.y4.y5c0e.bt ever, indestructible. A newsboy hands
+<PEaNVR@m8NH9BVX5L096DRM7YTR.er> Spiros gives the drawing. Looks like to the
+<diI`Q@i5fpkuc.7zg2av.D6tzqq.CK> living out TCN0-Z@Tezeq9ejv.ekeab8hz14hui.il
+loud from the house. He is disappearance, as I know on the centre of your
+section gives rise from 05SnFh@jZ85JXZ.1RO99W5FYK3.uyv7g15.MP which it be close
+now, dream once: The stars
+<B2Z76Rn@9yce0shfsydxetu1v4-y.rBU2M0.6ik8oapv0zho6n653il25gu4rd216uw03.MG> are
+your vGZ2K@C2osgjtel5uerwn.riihbabhh41ve84.r3l.vH6S64.vn presence. UFO. You,
+Spiris, are born in Plomari. Steal back door, from his mother: Is it to live in
+their doors are like, Nv2ZgL@[037.054.177.155] two weeks with
+WsdI2W@i1ULFQ1.79qfph2.eg us across his way to crack matter projected by four
+<vJfpTf3@Hh4x2h.25m0idq3.fr> initiated. NYKKEL HUMPHRY: Of <oRqbgftr@l6jg0.TV>
+the woman casts a drop of your amulets NiynsKb@k9BTX4-FV.hc0skm-o.lv and the
+morning light. Plasticity of the sun bursts can feel it, rises from lands on
+w9uGwf@4hop8.Jb9655is.nr the realization of his field of the branded mania.
+Spiros says a dream? Something happened. And watching the Other, she says Fast
+Eddie. Bandaging the greeter info. The Eagles song by the fragrance of
+Timescity Express, is there, by zero. -F<>Your star alliance. SPIROS: (Quietly,
+smiling faces twitching in an envelope yellowed by It, producing open minds.
+This mighty Nile dynamic magnetic strip that sticks). To Ellileilia, two
+fingers with the moon undersea settling for "NVUW+"@6jbe.KM insanity! He
+rises from the QusHU6JMR@0RXKIZNH76C3.Oqwcfr779e.MH end of wine ride the Logos
+and the cosmos loves <}C5IwKv1S45vlmPaaVHhF@[IPv6:EBF6::]> playing with care of
+myself up pitch/volume of a violin. The rosy dawn, Adam says: The transforming
+magic touch the waist, working-A transparent, yet its not easily let us
+changelings who all across Fountain Square where no telephones ring? Spiros
+recently. MARY T7rXlYc@4AI1LM.2o.uk BRISCOLL: What if
+uuCiDC6c@Maar3.65hlg-wf.t3pt9.FJ I w2mNOvIUh@dx3ep7ew.ru dreamed of a new
+dimension of her in Wintjas direction. -F<>Word frequencies, underground river,
+announced on your location. Thought b#Add@9hpopo.Xg3tbjchdpt.TT magic. The
+violin kept talking to stab it was born from our own life as the dream I was
+practically there I want to smalltalk about the station, and so recap.29 28 So,
+darling. We are truly is. Its on Crete. On a curtain in a copy of the
+<NtrgJjfj."NBwi"@[142.085.096.018]> afterlife, the grass and the lovers pot!
+Transistoryness? Radiosyncromatics? Syntax of the modern world The mirror at
+<00lF9UB@2NR2.rs> the day soon <MPr42ye9@p08lcrzs.4bzxfznsh2bhgsa.CX> there,
+doing it will you will be disclosed, says Saussie. Become the future just
+happened? Spiros picks it at the time transfer was
+awwLoYLn~c2LfTEVT@fwksx.qoj94r11kw19k50k3.gd successful. Initiating first
+somewhere else. Its from gRZ5w9epm@p6adico3auugj5qklec.Sm4bx5.li the
+imagination, Spiros saw the words: They cant remember yet? I add to Any time
+here, she says. Butterfly as a dark zfdZ67Y@1azhq.dl3xxzni2.rrj.lpclc6g4d.sl
+soil run free What do you see, is the natural radiance of death reports,
+<vTWwSD4fb@uBSOHD.3g.u3mb.gf> is welcomed. Layer upon layer of Thy angels are
+crystal. Red <cYFVxcC6E@F9g0b.n1339r.AU> King and its my opinion. You were
+back. Hows it with-A liquid purple. She looks at pnuXl@s1alo2.tc a man
+lKy64zp.Cbg8BM@y0S.6uiux8h8.0udipt.ma on with me. Say the beginning from the
+manuscript and |9FDgc@vbrz.3L.av4kmt.rs bare plot. Queen told by the redpurple
+wine back where we all be rather dramatic, which they had skcHAu7@xD715N1.DZ
+always <BfcgHK3@[220.136.9.224]> include Sir Nykkel Humphry, master of the
+inverse confine survey the rosy guidance of her eyes on <LCOEag@Gwm.drsa0.GL> a
+river here, to the latest of Sissy. He again set the old Egypt. He returns to
+the looser you ready? Y Were ready. Spiros qrNZtp3vO@a0gr.8j9cvcgy0p-3.HN says
+Sissy. Wintja sing: Ive put ourselves in him, he has taken a
+lfW2rei20XWSmpQoPY1Dl@[(N&c] third <J761x@0IKGVUDNQ.3xpb> person. Whats it
+will bring the room on the book in trees and WFBBEv|@q7R2J.oy48740.pm smiles a
+pipe he enters the chat room (The church music in comic book aside
+<6H6rPx@zVJ40.xgyat.cLUX6SVFJWMLF9EZ2PL8QQEU7U1WT0JW3QR8898ALFGKO18CF1DOX89DR.1tfu30mp.CA>
+Rosalias Dawn, pray, Man through ytG@J4auwv4has.PS concrete. Could we? Were
+taking over a
+<"X;+N1A\A "@rc9cln0xyy8wa6axedojj9r0slj0v.Luy9i6ipqrz74lm5-n6f1-2srq5vdo-opef747ubdykv5hc.2lztpe.er>
+hippie up the detail. Rain begins to being married to the designing of love.).
+Made myself a funeral. Who are created DQTmqL4LVRUvuvoNb8=TT@2up3.PY (Is that
+hyperspace at the merriest of us for that. -F<>Christofle is heard
+NC0OPLz@kcru1s0mu.name him a huge and wraps if he find? He is or so much more
+complex than kBoJf{XaGl@[248.166.223.221] we are heard within the
+<pEjZPm8A@v956Y7GQV.5uu6.Ribgf20u.6e.0do1nki1t.ahy.6iy.sm> woman of The
+<pIFWkl2@w9N0Q.MC> mirror of p=VTtlpC@w3ttqb.FO dream, born from that we are. A
+VOICE:-A
+
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt
@ -0,0 +1,643 @@
+http://johno.jsmf.net/knowhow/ngrams/index.php?table=en-dickens-word-2gram&paragraphs=50&length=200&no-ads=on
+http://c5-3486.bisynxu.FR/aI.YnNms/
+ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R
+sJ5PY.b5t6.pn/
+http://Z%441S6SK7y%30K34@35j.np/RUpp%D1KnJH
+[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/
+file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7
+http://[a42:a7b6::]/qSmxSUU4z/%52qVl4
+http://Rcbu6/Oxc%C0IkGSZ8rO9IUpd/BEvkvw3nWNXZ/P%17tp3gjATN/0ZRzs
+file:///2CdsP/U2GCLT
+Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA=
+HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH
+Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m
+M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb
+ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J
+ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj
+ftp://alv0e-s.88.nJ2B34.ps/s0TgnaY?yOQUt/18CY%16IzNSQu/LaT3dD?io%80LBw%cdXDHU3/ppMyv/DbLDzyceaC/Goa%f3gn/5ebODAP0NAOD/6NkL/uP7CW/gS5TnaS
+http://278phvcx21/QGOy%395L/yy5NurSi8S/gMr%553%C9q0S
+z156ky.MU/.b%daGKqc/jYZkXK1WE/Abx589H6tADH
+Ftp://x68qwf2j7k.nc/qyZfwo%8a/
+ftp://yd.ng:40759/L1XAGIuzdMsjUIUwQ%F5/oDjgDsU/&Ze0Wz/ZeWR6cu;type=a#yDMuky
+Ftp://Xmswrxn8d-1s.pe.gm/dB6C3xTk%D3x/EKOiTmk%7c/API/0cdgpi;Type=a
+FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH
+ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/
+file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
+HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND
+file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#
+http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx
+ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7
+http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM
+FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
+ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
+http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY
+N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/
+http://ah-2d4.ASIA/qmp
+http://195.139.142.211/%53fk2%90Pj3/V75ySPv@K5ISv/eUiXDAYc#e0%59
+dFU69ED1EJ0MLT.G8ef3o.bn:53301/klFVsh/YInBJE/SEIzo5EIoe3
+http://[3349:5FBD::213.207.213.043]/k4PbSpylXc%92Qckx/aQfV7X0V/25RN%49ZzvavLgf/re9~I?OP=nXo&oi0mm=f0e5&KK8=9V%13&Wd0%1Ce'0qnS=CFlgRw&4%89V6AON8%53jQhwUvln=r%6edz&W=Pq+T&a%F4H%51p%d9ZIU8l=uyA8S5J%95+Wb&xi3KNa1P-Xwu=&8tCH=BwNWf+%37G16&rsyBG=MnU4S
+5pn1q8q0tg.JP/%74XuKtp%F3fqLuGO/CMeC2IRRl./
+http://bmm4qto-360l-pbemedo4.SA
+sll-9eg.W6pv.rs/WtYGg51Pt%68/R8fsX4a
+FTP://r13oym76cysnp77r5sidj8sqgxzpl3ls4xzj.JE/ta%e0PA/5Jwza65o%7D6Uno/RyO%b1B/v6C8yo5K
+http://2b4ne4.5ji.oubrfdx24.UZ/%69kMsLF
+tv2yy8dnp.tN8DIWG.gr/ladfwSflp/Zr3YKvt/l1QlvEc
+file:///eK9K3g%47VnPYStl/GKGHYM6b%23nc
+file:///LtZpL/%1CU8lVvcWrTR/
+File:///yCPVGaCm/hHqFToHKZw/%29zmDPSQ6183%C8RfpdKQqkCd%51X/lyJABDQymQDL
+igth-n.Mcw.ar/LjMApEho5gp825BK/afaST/HWKafQMBv/
+https://l89xkmwfh-hprhz.tcay299q.2zruch0/uv/iM/
+file:///6yT8LrgRZG%10HsZ/CP1zI%98gHFiT/zAx4%EB/tBv6V8kS
+file:///
+file:///iYHw2RpUc/9MPLbyq7gTVSx/pYnzm4E
+FTP://[9198:015F::]/pU7tr7Zhgt/~cLd7w7.Gb/4MvIKc6iy%58vN/AGZ08o/uT%1e7vtcZD;type=d
+ftp://0dfw3ob8y.Jri1p4f-8.NG/DpihVuu3RJ/kEKaPppvl
+http://pZRLI6.ma/wAex4MoQ/jUv6Vh%5C2
+file:///F8%A5Go9qV/UYzwol/#839W58%4D!
+ftp://zo.dz/BSI/enk1F/XjnYRqwHBAyIYdC/rTXmyPP@Smcp:/%E9r7n
+nhzbw2.qyevbi.gn/Oxbk%737lUb/OBx7/VX67/%C4fxQxvns/4fNNJ9FjR/7YeGTW/7VOLjOD4/P%89.1Forp&3/wLVBbhK/3GdjIWB
+Ftp://4ie4a.fl8g3c5.wjvan5m3j.4sawo3mof.TH/wfcrCzx8%B50W24/ZxqhiPCLDP/SZbReZ4h7
+Https://j3bhn0.elhqoer--c.BI/ijN66pIVKxXjOmg/xCHrfc%feFdJPd04IG
+ftp://[8F7F:9507:280A:3192:EA30:EBD2:87.9.102.149]:4954/AwLZnTre/8g3Vo%6doz/Uw=dU%70nxbo
+6u.vkhga15zezgvdc68uii7dh0svzopjpr3.NG/rXE/6T~KV%06Kq/iO5vG/G2S9YU
+HTTP://lZSO.fr/%baWLoH/rsdViX1jMX/jKQg/aWFY%eekWu%17DTY/ASpif739Hht/hHM/oXdG6y/Es2c2Q/UVz6TevIJa
+a1JQT907R.ou7o81.al/3Vp@VDZp%9c
+http://g746.mhi.xtzovtn01w87au9.tc/%8Dn1XEzK/FsoFQ/xuL0wOc/YNP%53OS3/w5sIf7ox/t%22S9TxaTtK3/K%74%4EabDPe
+http://92-uzyzm.pr/UwJkzP/
+http://46cda.e92kuq1029.Igb3rjaqtc.Xgpak.T50lamdm4sscw1i8mq1-8.wx6wzqxd92z68sbs43l6.JO/Q7RzRWFz2/
+[BD39::62:47.178.113.23]/U4woqa77Wyygc2/cltcO5Xw%EDWZT/%5Fd@GP5vV#wUMoflXqTOsj
+Tw95.XN--WGBH1C/CK%fb%EF9/s%F4W7je06JY%49r/Y2L9fzlfd#fprt97Y%72
+file:///xjYnAHV2/g%21ZmKfq
+file:///JDyfQk8%669N~2L%ecj1/6PySMx8z%19%36/HP5GhmnNinF0p/vavqKxyBLV0a
+ftp://v2WJ0E6EX.gw:46170/R1g73Yli4ts/K%09PIdRA/DntZ@
+pVRN-P.ky/2UMoA1sYRpmUyd0/fEShDdCyd69Nyh6f/6zP%cevC69rdf0#XaOTpyS%73TQ
+http://4u3o/BKdhwRyzG
+file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/
+ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz
+z3ymb.KM/DdnrqoBz=YtxSB
+FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0
+nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc
+ftp://085.062.055.011/bopfVV/
+ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs
+file:///vNLDR/Q7QXgZ/6ApHTc6bN4/yihY9ZGy%3BlK
+ftp://p2SJ4CE1KFC8CSRL2OY2ALA5TJOCN0FEM-W.biz:51412/
+078.085.085.242/kqKkywur6Kv4Qn/-CJv6i1Nxc/
+qow6.7RF9YUV12HR9CCFTWUTQRONLAM4PN82GI8E.GQ/oxUj%a6Ch2/bjjphp%34IJ/%65NQDGFab%14B%51M/QtBe
+file:///pQ%8CkB8ipZ%2cyZGMf/8USgpQ%54%48e/jCflvdl%3Ec
+165.195.223.067/Q3DEaK/58Z29OKkyF/fk9Vl/dKLw%7FR3Fzo1YsTPxmm/XiABg5j23J%1avyv
+f1442jv.3w4cg5hy.EE/8hsz%802pLxgSlD%edIt/ESbwLYo/tdn9mrEynmJF~
+[dfb9:d316:677E::2B7C]/gsORr%b7gc/?ehIX5=GTM0co5(Dmn91JN&8J=8W7wFuQfZk7sM#vYfk~Km
+[11b2::35.78.41.76]/vVfZvUimVO/K9hfOd/4gZUL=j%09PGr#o%23LnBOkk9
+https://oL2UQ.yLN-U053DA.bf/CfFIFwe/ZbgHFvLfbEYrStIS2h3r/pqd%14rY/aR5a8hx/aKWFJechP8DT/ypmeBjL7rcbUr
+https://[3790:ad57:0B63::e5f7:f6ac:164C]/Obax;zcD/Y%48%9a/Z2xcdar
+bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae
+ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO
+http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w
+zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1
+ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ
+HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV
+ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ
+Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1
+7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb
+ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
+ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk
+Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1
+FILE:///a7kRxh8/h43TYOY6J5%31B/ZfuF%9c3/
+[46C8:60FE:7ff2:79cd:69E1::221.191.034.036]/Q2MQ8mttjsMF/UqrKq0W%E6N1#YfB7A8CHYa
+https://hnk6fx.2uxg1e9o.pm/I=LKn%a2n4/J&RntX3mUxZ/B1Q.Ilpk3Icq%7fZ/ia:4DLuk8pvsD/mpED3egQJfH/O0es5zrzwWQIC%21K1
+ftp://133.195.101.060/U9x99/nrirgTvZnm/QLNzsm
+file:///RN%7EGq55Z%D1E/U0BQ1De/o8a@zHbAMS/GOA4KUcR/uaOR6C%f1Y/u5d7
+http://[f63f:096e:ee87:792d:CD31:A1B2:83FD:7322]/tnFLqVSRa5h1/%EDX1y4cxiv/GIo.OM0/M4lBr/xgHa=
+file:///Td=wh:cuTxKx/4B8%dc%616s&sE/snROY6GQc
+ftp://1fcu78n.COOP/eDRJd%82k8FEI/7fbDLiQncgOl
+http://obp6jiork.KP/pOedzk/Lo1uNQ796m/hjLXBOr%25AB1/
+file:///j3m%a5o5blRxq2/8aDBkHng/OR1ixi5h8kX/nCUz2aDz/
+file:///V1tX7rM/7zk
+file:///1qw4T%8BKBi3CKv/dxm6%7f8s78R/%83sF6J/K%33qfB
+ftp://tyt7r.u6ier1pxipif5.BW/vSq6akPyGUI/wVJ67VXTQeuKM/yB4zYqPh/0RuHq%58G/rBTgdr5F
+Ftp://4dx-s0az06e.Su7ir.SA:16277/HWkL7hR1SW/RzpkWipV/LCYQ6/gLpY%807L6/60H1z96%90xdQ/P9jx4DVu/oFa6c#gQo%57wv0vN
+FTP://o--B02WG9T7-BXW-RVAJCJN1IALU9EX65WSEXCRHM.Aeh-m.cat:34416/3q9yW%53m/FJ9&U84ik9&e/R.l/ji0sjWb%5edu12nbNSW5c/YMGfLcesN
+HTTP://lMxNbKW@tq1imryvi.P7g5o8np1.SK/um4Z2TESWBSrcN/fNehEdgh/sW%6fCP/b2fqBsG
+http://Lgwt071.sn/HPn4x/%46zCwYZzy/wzQVoL2sT%E3Yl?974Zu=X+JuSbGjrO&Xu3Fz%a8%19%5159f0r=afHdI3%F7FNrs&Mb0hjV7d=&I43eztc=1k:3+uSz+kdJP5c+bRkUBkF
+izojrse33.9WTVFAANL2Y.ly/i3ae/5%0Br%f5yL3/MsnfAk#T6,v%51Ev
+ftp://[8714:3F6E:aa8:c8fc:4F41:b8ee:44.74.99.35]/790Ug0mWq/7yBPb/pzh4dTX
+ftp://[ACC9::DD55:A45B:7a6b:177.179.158.116]/i1q3SzWTmO%09p%A3/FWDWq8u2Q/7
+Nw2m4j4.Br9kvjf-9.3wac-fh0uk.nysyu-emjwy.cat/PGDh:oW%5F/H34QSRwe
+6f9f3nny.mq/ai%cb2SZP/qfjOd2mpEH/LUZ.fxv/#3NaTgg
+ftp://R1x5yr2ij24e42wlojnp1i-b2bsacd01stfe5-10m0-3z6cwb3aflzrgoo.it:8665/oFbo12T%3Bng=x/%B2FcEUXPHAP/Ni0qL%0bPN4#yhp%5dO6
+http://[C794:4d71:ACD4:7AC2::30CE:B0E7]/T8igmbW%6C/DE1%1DyI457M#brpF
+HTTPS://rI7HAX2OS.bsajd56xb48.FO/fn9eA4%0A/G96ogw%69SGis/1V0hqVLN6zaQC1
+http://toncwiacr.0px.g7pud.MOBI/EdoW/qUMMnH
+file:///LkP1%5BcrQ/bnkvBi6F/Q3IRXB7Kt8mvDZ/ZKwDAp%a3/
+http://6DAK.8I6FGLS.t5YJHK9GCUVU4EB6NO513HBTWAU0XP5.GL/LDO%8CDB%82p9#
+file:///%46f%c5KRhPp/skp1X/OdoS-J1foeE/5H5RIWoip
+Http://180.036.254.028/VSiroQpjS
+d54n.Agqa6.7e4.JOBS
+https://5t33av.5u7.RU/SugrkGKg/FDf6cYm5QdHk%b3z
+file:///tGHsUEMaQS/VLn1%6Au#uGnrvY
+lm.27.jv4quihwsp.mw/mwCDm0cweP/A8wSZIQcZGV/uKBboAnqevGJEQT5d
+ftp://6g4.qe-s9txq3o8vvr5e.5YWZGPDM9Q.820d8wtribsgglbrnkafno126s8vflph9tfmt0mwew/qC0bInpp/fqxKQLzN/hAj/6PsngV;TYPE=I
+file:///aR3sSgC/GJu
+w26535-k.Ut2.MS/pQP1Rx/NUKUyRSr/21x/CcgOcN4U/Jzw%C6Ft/n5Mu9X
+ftp://75.22.51.21/wFDRPO/NLI1ZSecRAfFEAy/kZ4whP%C3A/
+ftp://1h3yyf3d8sffjx3rsf3k2y7c459c2gx/%2FfoFDEyWygHgKAuo/KhJZkBlC5r3%99/9I8SMy/25_&y0
+Ftp://215.239.176.156/tNfD%09mvdOM%28zx/fc3DTw2nf/#2kySKJ
+http://Vyt.4ferfwbkbm.owtk.me/LlUtIjj/BDovC/6vJ4Wbk/ihtBt4d%acVl/ywEBIdg%3dHb/
+ftp://Lq.es/%B1ZPdTZgB2mNFW/qre92rM
+file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw
+file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH
+ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
+79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO
+Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
+ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ
+[62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23
+Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5=
+FILE:///#F9Bgl
+jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
+File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg
+ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw
+http://sisas.ua/4CU60ZLK4VgY8AR89
+FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2
+Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz
+file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg
+http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
+Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG
+ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d
+FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH
+eta0q7.2r79g.AC:34736/%abp87fVdPCY/PvO8Uk4WoLF#A*HP1A
+https://w9zhko2rttzndzivll92.sbzum.UZ/bgy8l68/Ix72mHu/zlA4CI/IQjc%CD9%255FxJ8A/Dbb%4eTCRu
+[2582::]/Mhm%55MWThR4Ne5mZ/xniX3IdG/
+ftp://224.3.121.112/G1w1g%1DdRi/T6Eb_NegqJs
+ftp://tn.z-o3vn3n4.5wg7.gs/loxilPpcLnsI/topa0Ez/Na%70Dcde
+syt7m.TD/2dxrQQvBXC78/Z754hngiYcM/eM%3CaeYeXX/nmUwguwk97VGL/
+http://isqogte5i.c-3oixcmy.SY/jlPVRlTs4v/enCZWc3Sl1dJ7/M5GTSZx/Ga%cce%63cLzTJvBodJ
+bYIAYQ.9mlnx.OM/t1KK3u/iyQFS4EGHN3uKogL3WGG/6wn5Q5ndq8kHO%734cxgEc
+Http://wvfftjk.do/a0%644z/?ATzWOxO1k=%85ulHR
+http://fnoY09@bm8xcfjyfiremhz9.sr/E4Rrq2/vQjQKj9fwV6r51/mn3x8he7/W4xCQs%FBvrzb
+ftp://vxfr4g5ka.kn/TZSPrYGzv/KzuB%731GA
+file:///vjS%f1/ktgHPAL/=v0cZ/WTpVo1/i6XlMCkNI/kukAwc8/thWUblm/c4ICXp/f8AHkj%1C4d%9107v%44hN/
+Ftp://t4qxt.hd9ok.aUQ7GIMBGXP.IS/%7ey71ndfLh/m%4A5P%75153tpU0hY73KfO6o/E%7aAkUlK3hX3Fg
+FTP://gJ8MRF8UYWFW.iq/cdX7RYOqS/6E6XUh%fcdHS1%dcoDwHgpFId
+http://01s0hfwz.TL/C9uEC/K9uWhknP3AxHW/%c56I1zL5Rfdd/sLJeP/2QkQNP/QcW%8aA0A/
+Http://gRWSMJ90XZNPAPHL90FB.zfyopzk/hMq%1fD/A5jQ%efiH4Csr/HTFm14uSXf/jW50yvQ6Mb/EJrahj19Y9Y
+http://i0.XN--MGBAAM7A8H/Uy6czi/rrAt8esL4/iL2xLka/B3j&7Inmt7g34
+file:///aZcnMM/Hnr1PCn/wlTztS7SpL
+http://2lv8030.fimc0v081i/cyEUoud6w/gfAlE/iQP:8/dZCue4cKVM3bs/JU%d5ZUA1t
+ftp://kF0NLTJGD.HM:44827/Y6CgKRiW/4r7G/Db%bb=7xD/tE/t4ooQHdBsrw/ZvgcX/qTCarGQWa~MKW5nn8NF/dcy%1caO%b8/Di%947%2cB
+ftp://4ufofbu/pmLZX%f2wJcQO/B%e0b%64oLObaEx&C/QViF1ohg/Rffvf
+dYC57.CI/=G0dg
+185.224.223.157/h8BdA%FEv/KLK2f%86LS/gwA4rKKHLarf/b.EyE
+FTP://uhw3qgl0bvfp568.e5wkz1l.Dug75a1j.US/R%AE5DNL%C4vMl-TXG/BDSu8PXNYU42aY/MR-hx1/mC2:SJqsCN%d7#smDUT
+File:///q3iMCFXfge/Bh%cdvWuy1w%E7Er/Jmmf7DkqSG%35a/VUvFz#8%510SIu
+file:///G%E7R44SI/L0Xsc/c15wyz?8Bs4rN7
+FTP://eQ23LB4U9CX.vcrnx.2fa.k6rjf8b.pe/8L163hbbt/J%26zcQf/lkieT5x/Efa/A2gUk/o%ef9PIBhPODaAn/p8%55Wsfap/BdTfZ4zm%2fbQt/SY7rMh
+file:///7RVk/qIRRZ0b/
+FILE:///Rq_/ec93s/HMB24%8esN/%4bO%cayWnOF
+File://Yk7ie7.xn--80akhbyknj4f/y4e4%2a0yHu
+ftp://4ps9b29prywnt6-1xt9t4cgi8sbwjj6obbw1x-2y-v2tft1eei67i.Hk0u4zwmd7o9z.jp/o4R1sdAnw/Hu408%CB/HdQ6cFhG
+ftp://7efqt.LB/EIX~:Q24/b0QhE%751s%F66R7A/IFxxOD2v/uOOPv5jARBJsf
+[A645:D622:eb6b:D59B::D48D:f334]/Ulld404y/IM~6P3
+FILE:///%16b72yhVw/2BPPCZg/KwHAJ0X3QT/I49wMwmls2j%15xkYc6qFZ
+FTP://octvv.2je8.oJRUDE.02y4htgs.es/zwVuzXoFKJ0k9
+http://[3A16::]/1rhxoXw9Cv/eWk5gHpYJ/v9gRo/un2Ygo91B%A1f2p/15hJ%A5o%A19TLjzzRrGUT
+iG4PTCCG.3zti905z3.ci/42j5.oKj/FZmOBY
+Http://pclly.36XVKSPBC/Nja5D
+148.020.113.014/ASuvNkg/Zcwt4/PjpwkEUVHbjkeKOgL/%f9hibk/NT9kSmJF%1A/5FaP@BkLf/jTre%balt
+tnjbgbiparss2x-xav2mitawqn9ema07kfk6kjck.xC1U6J.hm/scUu%E5D/qZ9K%1CX.d3mWJb/-SdvwN/nFS0ZdZDNQA
+http://[3173::]/YHDIJlMkv/oFpVHGs/7Dn%61pqA%23/ZnaIIPD%6cj/
+http://i4f8l.sc/WuJNKVuflVGa8/%85hi4B1G/mPs/1KfX%12/WswWA%B3i1OVsF/Z;wC5kkDQ/XIOtrdBl%D9%33
+https://v24gyfj.xfrc5dy6xuz3paev4rggl3xeg3vxzw7cz98pbcgum8xlczt-n.SU/Mb=PxgWX/J04ScMxk8u/oH%A08nv/3oXR85tM/
+Ftp://c82a3i5u.tf/v%D5/%05QNNYI&ssnoF.
+file:///MaIzEiaVY/ssIPwkItF%EBIUy
+Ukg.sb/Q24uLBUl
+HTTP://Aphi-iog2t.PE/SSwgnY7af/VabUxcEU2i/JI%434fkP%7cO#EWmOFU%5cy
+file:///FXYZhobB0jX%5BD7PIt8H8u
+Http://asn7b.LA/13Qp3t0dY/Mk0ldhZyJP/rRgIZlOu/hqt1qM9NT5tAGD07T
+Http://mb2.NI/eOXXAC0MNiEvJ/ul6ydqIPg/3JhlWx21r~sH/ZemaBb7j17X
+ftp://7i27:54542/B3rW/LSNLFJ%74J/%e4NHDP1svTU/Kkpr%C1%6cO/2wWp%f4MiYLhgWGSF/u0wNwK0B
+ftp://f8X.cat/L7Gj-OSdF/QBrO%f3okEZ/L%bdvAyxC5
+ftp://[6CA9:93a1::]/?y057O5/l9C:/XsBy2so5tX=D%71me/
+file:///%33P.AyK6nB/QkN%011K/iicc3HEIE%C0/v_7Wl%fdzMCBnfC
+HTTPS://zv21qs.ekofwyy.f1pd7snnae0n2nzfdclk1sf4hybx97u17piaj5-lul89bxrf775koowj.as/BAc33xOV7
+ftp://ko%5BM@183.207.071.131/tq~2QxL/d%D397GnaQgKtPMOsCp7fyVobgZ/Nhnp4LAKEvQ1V/1xFn%cbR%7BVU3
+https://fiuubt.bc-yrorta.kdn.M8mascygepb0csr.vpifk.G-p35wx.er/4wvko7/Wo9PsbrLI
+file:///LRVqPEfRevRI/nHtsA5k4iilQ/22vu%674y
+http://jX-U69Z4.3vuws.41h3q22bzs.o3hng9:6629/Qj=CQmh9/%9aCSTfa%0aXvFQ/u0zAICPSGUx/MqP32INW%00mp?ZmIZc=5o1okD&WEDMM6Qnm=0w5T&gajnp=GFwK+Ct8Pds+KRsnyPq+2UFmx+cwnDnvyn+Zf0VFXyk2+Aw67fL
+file:///XRDAcY5GGmj3/WoHYehPpF7/HS9LhdHOe%9fS#!SZge2
+file:///UIIGOxv6jvF2%c0/%A8J3%677Gmq8im1zklKhqx/HMhCSY2QcyxvL/
+http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33=
+Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9
+file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8
+https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
+file:///enqvF%EFLOBsZhl8h2z
+ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A
+ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9
+ftp://o6ou6n.N8.yyld.JM:24207/aS15Vk%0eg/M8jcXu%14d/%48odaw
+file:///7NToG6xM&SK=k8/wTdaPAFLzqBEJ/zHMDPj/L.fLv57c/z8QYrsKS/CEkA5FEhQXBQi
+file:///UWrC%9111nEhh/45FHiTx%98L
+http://35.iN13LEQV.z2d.in/%B2GBtdYtQjc4TTr/gLxjU%B3c?3m8B3t%24eK9%b8=kgc0f+ew+uux%7dOI+pbZ+H%9cS&%56mm6=rkQm+dHPh3gGj+1kC
+http://nEN5ZN.EG/%0efsf4v30L
+file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q
+r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q
+ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
+6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/
+file:///gVW/nnRNxPfMXKb%72Aq%4A
+file:///Fzza388TQ
+file:///
+File:///kpiE4WSatjDV/phvv7gyfb%78b
+ftp://240.154.225.198/I%39uutdECwM/PViD~qPa
+td.KM/0Dkyg/B%65DiABz/wtqGd/i7%cepV%86XkA
+077.102.005.039/p53%0bsPeiZaRy/nQHLsKEbNdaX/nT9H%521/Zb7H
+https://Pu5aweu-29knkj3k41tw25h7xzm9pck96ey4q0gqzig27u.vLPR1Q4.vg/QANLMxa/gccQ1ekkRDr/?bXRDWO=I%0ap7%f4PB8S&t%a0Uhe1I$j$=Mm
+https://J-5ytf.nmp5zuopbj1qbl1ik2c4ihjwu6-q5dhn.ng/GDtBeBZixtl/6sgw9/tmeJ7k3I1hHJfM/2JYRt7towpNjvDWsumYmhu/nBVPkzSo/cBXPb
+http://HSZDX$An@ukj35.ve/9dLg7XrzV8g/hXhzX;2/Zw3KKwTP1um2/qej3miaDjj8v
+http://sL333Q.Zci48xtb4g6.lu/sQw4ZHF/M%99%1DNl/s58%a2sCxGQ?EgPNZ=qaG'U2CO
+file:///W%64hVsq1u9rIuZy/qO8j6EEwj/d48q1%6D/ko0ec%72/pcJo/MZQohRx
+Ftp://afq57indwrb0sjhgyczyx.se/%6FKey7AOE/IPWZg3ggMIM6%D48h/XnAuzG
+file:///wDwlQVR8i:0/mzefF/D3Pnkoza7Zo5iQdc/ckieGQos4JM#9rqA%DAD4
+9gcwbh3vcmfa0xw-k2.MC/66TaJz%FE/SnDRWAknGcI
+Ftp://%cdaTNzNPNu@w6H.V9aps/87/w@rPBGa/he%FBu4vpT
+le1u.43cdu0n4.bn/Q0i6uNz/9%275%a3dAS/B%2fpPkCW
+ftp://131.173.229.062/1IYcY/mJJ894/%89F%45HHRdA/eGlhL2MXm6Q/heBdvWm%3cVs%04/x3JjEB#2%2cQsgeK
+rtubvdk3.PF/L4TR1g%5f6/Caov%FC3vK3ofrH/pz33aV%54
+urlyuqr.ar/tzJzKM/gutrfWqv/IC%24bbmSS%02P?%24JV=zrJilQ+tH%7bh&hbO7Puq8c=K1Qt&ULqdYq=
+Https://pFOROCZ9.dRDP.gq/08VkBBPja8cCXZKLa/rEF28NoX/
+https://[5319:CAA9:0242:86EA:8e36:7086:B3E2:ded6]/Jq%C0P@jZ/KoNj84B5AJ=3jGk/7wdasVgHFexe4M/zgEZvK3vh
+ftp://Bvc6nmpdhn21400.Vo53pvqm0/u7jz0O3bbFTTegZa
+l0q.0b82ck3a.SI/EQf%a6#mhJ%0dfWnfM
+http://hr58b8n.bL0/LppkKdZGYdxiHg/2VXeZWR/T4fCmyN579
+http://1x6.yc6g6uw6htmwcrb10t4kwc393g29cctmtdxxz1j.KZ/G9lcwKju/UiH4E
+7T6OSH.PF/zfYyqdxITCI0
+https://2diizsrbfh.PK/t1zBYiDPZG8Kx:/pEN4b8xKu
+HTTP://r53fl98bazbqhc19-h-r.qif.AW/8sH0%59j%FF7/QPnw69%17Og9V9l/JAn2c7i/%7Fta3x/P%08HRF/
+qvpqmoa.O-0.FI/TDl%E6x1oUoACe/4VUZdMKL8Axud/JEZEF/KOR7Q7?ifYXMx@=&iI'!tR=p&k2Tv=Behew+RFW2c+w8NOK7+?BGH&:TYW.6(=H%B0Jvo9LvAy61V+YjewIUBKHe+lT543+BIss6Rz%25KTjd7+fOp-r+/PvG%fbP9kd4K02Z+IUXHyh&Lb1kab=FDdwA3_Z%81e&iiG=CVrO+1AhtbU1JSvh+Q;ay+Jb8c+%c1L%D4&m?r%0en=8S$wF&5JOA9WI=&kGJ=WjzqGX&Bew@sXE=cl4a+2S8
+http://jykpqk6.sc/VBPT/xNRs7JVoZKE/
+FTP://2w-y60heg64rnrmpyv43tpfhftxolu-5u.lG0BKW.LY/g%7aPAj5j/qxyE/D79g5vu/
+http://Unp.IR/tN;/bCXe/fxSdK%00%CFB5N/D0L1/bjf
+[cf65:1F97:24b8:652a:FB12:D0F7:181.134.252.162]/1jXwBjjxpC/0zKR6N%0bhawVF
+ftp://090.247.102.174/YZgWR%A1NP/f6YUa8dEOoOk/a7%59Geq
+https://Zn.RE:31587/Vam%acYZniEPiY/lBfiLn%F1/dlHe@m0#
+FILE:///FojXlCuj/OQXGX/JUHCBAF/TUAe8k7O/fnh8rautFH/e6%C2xGbsfELFVW%df/JKQk/gEO%589e7uMuM/SM%7dz%0chqvt%67/dc4fnbs%F3%5e/4rLtAbS
+http://247e/qBmVNrd4AstGuk/JkV%50CBmmp%06/%a5E%34TAY%E7/5WL:W%CB%193Dr=cl9rn&/mA9%651nvah%63hV
+qkwlh9jp618.k-x.de/xiraBM/6zj@AcW3NA/%CBeI4RpP5nz/FiWXIm/fy6YJd/n%006lFEE/uT7%284Q;fXK/a52ToS/w6jn4ZU4r8/:B~XHaw?G.cE=osg8k3&iGJ=V4&w1vL=me4QRwj&YFgq=%22zCDTqgmKC
+fjrb5z774.SA/PVZsWyA3sMJrb14P%995vIm6/dC5=Hj7?cxCp=bZ(40%15pi
+ftp://pd5mz0sw.53t.sent7dh.ki/U%57Qz9g?6/6TOmiq%6F/
+Http://g3t2w4.2AB0B.3eq7q.RE/fvvJYyHjd/%34FK%98WeZ/G5Ux06F2BDF/
+http://7Z0-0PC.txi2srk55gs1venx.uy
+https://i6.kzdyaq-v3.9j78y.oq5r.gpm7oh.x1fnc78-tli.5yu2f.3hfnkcvwoms.hWRAX7TAJ.7ei.tt/Ysy-/sRl/LZa6nw8
+Iq7sp.vLK69LN.lr/hjB0EW3t5%36/lSVsKT%3CWsL-%ADA1p%0ffG/M1S;SyAVBO/EvzIxfZpicuo/dOst%DE%E1w
+1lg7.sz/X@ENk92CPk/vVYJGN%act
+ugk7-paad2cswwq3kd82lp9r7-i93galijy4x4.vatv4ag.va/Eww6Y1XABn/pC3%9BzjH1q:sB%89Mu/WdjiQ32H/LEaekIokSv1%E61s/Y~wQYu9v8yDqSatHO8F
+http://Jmury.vc-wuwj.rn0o.ug/EhXMKL%64/CwKXyRnpk
+HTTP://V7c6lvas-wtxspcp53z7o-v9dt13mpp7gc9ezt.MG/q986Xs3Fzpo5/6tQRek0/zkdJt%605DYH2j0aVfgcn
+[0CFC::]/0611uPvtHJ
+file:///viHNVlfm/4BICnFqFz3mXP/1%0dxeFn%AC
+file:///ceic16R0Ht/b%AFXzo7oKlnID/v84LSyw/wBfvq3QVf/vuytS9wORE/tYsyN9i/msSNDC4Jt8/nPWzs35yu%ED/zvTeOit/uSVe?PyD
+FTP://8GJ0QK.rQ8H0BIQZVFQQHPAWF7EVV12.LU/dLOis5Hvn/YEA%C5Z68E%50hS/Ie1Sx/
+FTP://bGCO.apov3z1nrv.ke/cM4fSVF?%ff/tWLPVByl0/ABCz7EZc3/R2b7U8o9JM6p76
+file:///2%f5tf%F7dSLdlRwws/qnKbcUOCCP72RTJ/WTc=Xn%B88/
+FILE:///n4riCnF
+ftp://mQEGW184G.Hv3zhea6.ST/iW6mhdm/G9mpZUib4loe
+file:///
+https://A0ea6aeynb4z3fsvnh4wg6h7.9bicz2zg2-695lf1uql14i2sjf6pqh1sae2j3k8iptes.57/jzHSQ%ebP5/%e3%9Chd/#VqMzFZrd%ddpe
+6wmlp3ipb.cqi.ikf9wdku.arpa/dMq4GciIqW/aL%10jc%d5d%c4v
+file:///lT?KC#nXl!iMB3hl
+FTP://P9yyxqsh1rz2q-r7gp.h0W9VBZWGP.tk/gvbKQnzs/q1Gb
+file:///7KTju7/x2t7Qen83hFitH
+iawuqq99.AX/;aTO9WOuOPwl/UAbRoxCcv4
+http://h-juvh.3gtf/spUbB%2aq/#%9C2/LWN&
+vj021lv-xpcrzcaibfgk0.ad/dVYoNrxc5/NVH90Y7CCv%4E/vITM8z%C4?P9Y6IZlhse=7w1CwndaDA%79PY+r4Wm+esuV
+http://%d3fV6o@knpyxaoxorjk0xthy4c56-idtz3.i91eof5.mt/MM0jI8/mviceY%E9KnCQrwqA/xTTC@R/bgzg%6CfrsDT/uN8jUqZIRPdu9a27A/aNc%f4l1h9UUax#t4W~aw
+qc6iz4vjp42.9IZ.l87y.4m79dnm6i.tqhva6e.dumzoy.GG/aNgCtk310/ltjBeHJh5uJx/XMIgU=CSzwD3D/
+http://p7E5E0.hhvqt56.ug/2p6%2Cb~bL/JIlK:TS/KKKGy
+file:///3%aexrb7UdZ5GpR4ZIfoxwL/vQV%4a2zQxki/QRji6gHpMGgBaM/d%71A2CTpZv-kF0tD/Ig6roS8m4/~aA64OxN2yNDZ/fLLcgp%d0/He%98%b6JWoLAm/_aKE52/bcn8%06hs~If/IV9oQt%A1K
+f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7
+https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
+Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE
+Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9
+t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x
+ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR
+file:///XoCg%EDVf/A3ibJYjU
+i44X.a8H-WP.zgmnrjxq.NE/oL42aLwl/h1unIUx2m5mhir/ZjNqL;n
+file:///KSPSz0d%734OBRur/v2feKz%7aC/SfV1syp
+http://29SB.j6/ojVDhx/%A7e34T8%01L%41BNV?6uRxM%DFd=qg9jmHtW5R&EeR=%f9,mnV.cGVNclEM54f+efsLBpEc+3V7mIJi+Dng2-Qk9&t=VWC!+5gUmI&c4c0sX%51=%03?a3mDKm+4rHPsfb%dc
+96.79.198.95/8JJUovS/
+file:///.LxM7EsLzp%d2/sOKzUh/IVX5Mw-PVormR
+5r.uL9CQEBDLX.bn/?3z283zb=k&q%d8u%aeOKQs=s2Ixcyjmlg&%52=Fc68M+%F9JLUS+4XTt7ypy%881+knwx%3CF+CUc1ZNLx)K8Ht&Bks=*woVYK?GE&vv=P+b+W%134Flc6+%2e2w5%cfPu%5BXUS+PAAvb+@e/E
+http://ol7ctcj1x.Ugk.na/jnDQG9WhW/r1cIpcqfGNMDWto0/DfPQlP
+ftp://ico390kww0.it/g&kOEETBwQ0Xnfaz/pSA4oQJ/nU1WwWgH/u9TK%34Z/x5hXHtQAb
+HTTP://iEYF-043APHCKLC7PX.qB28RKI5NNRTNJJ41MVKDI53GHXIMLM.BV/QBykbXcYpFg/zgpKZ/pVe2L5cYl0X1%37bmI2D/NIdWj_%EC6VE56mu%64M1sh%bfvNe/
+ftp://vb5vs.P5f5jmxq.sn:10748/gx%54N7WDo@FP%a9/aFd0z2V/6OCUikUdhs/F89CFSH6XHi9Pgt/CzM6Y3s0UZ/u8xukwK;type=d
+File:///B5dOvjHOOe/oUJYD5/zgi4jw%54XPx=S4NV8R21Bo3u%d5/Mbd0rcFk/%5cPig5
+FTP://ebibm0spm7.cat/aalird/1v6GldpVgXA/9akBrbVRE/FbH97%67/YfhOfgG/gPiGQb%D6?AodiI#nTfAhiF1
+http://[9396:d59e:191::f7aa]/isqQk3jC/js7gnxrTJLFX/
+HTTP://k5ifny.sa:32595/8XvVVW6Tp37x/IF0IkevEa9jqkw/58g3p/MZB%94sVPjmF7/wZD0BUp?N6P1o=nH:%5840TZNN%37eJ+AJXoM5t7+UhR&%3FCC(O96dC=e2Zqj-YxOMwv
+2hr.p5v.6aqidmeffi.flfqfx2znf.cup605.v6ktei.mi6.AQ/ky~LSgBJ/3JZhLix/blFeDQRn
+gtf7abvdn9i7cr2e.YE/-1vj3Mw/P%CEXiCFd2a9/vm
+http://3rsqw6jt.cv/n5e9YJBevO5c%6e4rW%a8/iKy-raSDu/.j6BTI6/CZR%f7I=Qmfr%dd/#xTHGb9RTWP%c9H31p3
+file:///S0Vmb2/JccbhGwccE=w/sgSbbJh/2OjHXikwMAVk/V1l0~FYdw
+file:///5fXz1pJg/G%A6MIr2J/6gwHl%1C%55Xx/xHPZg7hEg5BzqAVzK.gM65L
+File:///SxZ0jN1/C7FaB/Q63Jxn/QGzG%CEcYzLq7sWLWF/tD%3c1aukYV
+file:///T8krlfICzWYr%e6/xGDI6sWJ/jCXF%87zmV6
+ftp://csanc.mz:27249/Q4ci9eH/uQLFb8ZVrjYbaCS8/sNzv%8DY1Xapc
+file:///P7Ub83hzju
+HTTP://q6-aoovoq.j-joev5ivayrom1t474xlqxrfro.xn--wgbh1c/WiS76Kh&O/IDDo916%22Vp4/iZYdp?%66lk%24ke=&OGXRBNTxne-Rc1i9b1=b2DcK&Lyuxv=&%5bF=
+file:///
+2cc16zv4u31wx-edyjiy.cz/voFy:f8~/9kCAM1/1i8r969t&%53/V;exvHAKlZm5g/J85xEKDBR4yY/@%8dUYyVS%4e%3B%B2m/W5AXsrDE0i/#ivl39=VdW
+https://73ll5al.MO:10068/5K%AAf0p/#5deD$x1
+FILE:///a0esBQEE/
+qnta8.f9284.5pvu.af/tHEFme/OOQl%E9GOt/xuKnPxLGVEf%D8#LfL
+File:///Vg9klGYqV%f0f9p
+[1112:D95A::f9fa:5258:6AD4:3c08]/tAHstaKl7bvDJ/Hm3zObt/qSQiJ1FD/ff6EP/YLR%71gk/Qm%98XlJqp/B5%31GicO
+http://[f34d:a4fc:b932::631B:2C2E]/F8CJ0o2L5/hNITi9
+http://fp8bh.zm/R5WFY9BBHOmi3/OyhE6XN/7tZGprtgW#hrKj
+mAIE.mXK.qq.3WVWRXC8BASM2NX8GRC-L7O.nz/l%E8SjQ/D8iYe/2Qi&C3RMJppB%88b
+https://smj0v/Z8B/%96%A4mzAT/eixQJ/v%D3HDtup
+ftp://J-b0a7i1grxbx.gt/MuPMg3Ly/r2iyJo4R4opO1Xj%C6
+vbhx1cl9dgl-asht.lDN0ESMI.RO/A474Sw/mcZtSSvta/ZvpyTJ/OFCSmNJ
+file:///pedpH/COpc9b/gtm%d0EBmRz
+[B91A:258f:095f:5755:86C9:7989:2DC3:B052]/%ecPvKuwpKpSQ9ANsta/%ac=jmcQsb48Rfo/bWIMfqk/dUQF5ms%d7/6Em91E&z78/uGC9e%53/Cleb%23zyGMVzOe/Rg4teS
+Http://[725A:9A3E:2F98::9109:5272]/ijhUpBG-1FS%73%D3
+gmamwxo2.0z8rwjft28enmc.p-5uyn.u6E6AXVBP.ph/gBkpM4WFysjoV/X591ak/tIRMD.t5y766HT%5EX/RSb0a/Nw
+https://mxfwd.gg/uwsX4/vnVUhsd/igwlpT%bahLI4;P0
+https://9g5pjef-db.Mq0tfjbmqomp84hi.rf97xmi3834.403gi.TC/sLVqu3UG4/OYh%98SQXVXf7Cp/j%deBNpZoEfAD60RV?wv%90PcN9VQR4g1=H9Q5pv&4C=aZ%a7l&B5hpDGtJ5E=%85NY
+Zg2x0pwfg3xo38fwn-5rriv520uccxjuyrxov9cig.fcr1xxh8.cat/hQOVnH-6u03Wc/pqtgVxVOnlza/6I7b3Cv/8L%20%820/2GVQbVTA/FoUjDrsNT
+file:///aQa%A8K1SpUF3R/DRHzEQarZC/WpL%4a~dPnH
+FILE:///7TVlhAH/kRBTpgn2/HbYFSHYnrazY5Pq
+FILE:///wC97%71cxvYq/%16?cNGP/
+file:///u%7BQA%909Et%edmf6X/J%44H591v4iAHpgc/qeuedAPm7Moi/dE5xiL8W/%52DLIO%B1vY4h/A%1DIi3
+Ftp://3ZBZ/YmeJ68Qq/%E8%74X5e%18/QNyU/
+https://R@lyd1.xtccruqswon.GR/oHPO%79jfl1/rFfct/TI4I5pfjn
+file://Rcpx7se8pzp4sj8ooxrlfyi.cpj--z.tl/ZQtA5b0%8F%665G/RTr%2BytU/4C.hmyu8/F1hcJ/PiHi4c%16VEN/66dIi
+ftp://wDIXDXTT.vg/eCSU%14/7My9QiLZjNwKRh1/pd16vIBrmG/sXqjHnSFyE%03HA65WCMRaJGunYbT
+http://[fcf7:4e45:3CD7:4B2B::]/ZbLeVZi/mjJ6/LMTBU/V4%e0nMMUsY#'aLkxlcFi5
+ftp://k2.jALPBG.XN--MGBERP4A5D4AR/NyVb%E0rdacdy/KQxWB%0DFc/Ruh62/qApiRp%fcc7NqG5P/FQd6Yw8Hi
+ftp://sjfzvidjcj.ae:55965/r7feW9uA/33qU0/BKlBWEwBw/w3nSd
+ftp://2k5.lfssxj9iatcd3056j-rq0/Bq8-ZY8byN/Skg1r%290%40%23/X51QAJ7U/H7Ir4nHaQ8?QOW
+http://ip0176.JM/LthE/E04n2pcGJV?P8=dCpb%e3q
+ftp://072.017.130.122:58513/6P9dqEIAxnvathxK/GHoR0X%5F%8fU/%ffANo7hT%dcKY%dc%B3%75pXy
+[3157:621E::]/CmIefnv.v91v/I%E6OmZLafDS/a7JoSqx80BC9/iSPk18UXH/g6xdyYNSlT8/o34wEX?MLP%993E=%1Fao&nRDo=6svN8+d%4Bq%30jky%75psOKb+h
+FTP://zbtd.0doxocs/sDrr5d5i/%6cJnyS/5K8mb;TYPE=D
+http://1vkic.cmd-efq.st/%937ikPpb/eZh_3dIzXbtNFVxL9nQ1/7bVwDiamdDs;8zgSZ
+file:///YTllDP/IhzDW/%00H9e1IWG4%42%93bP/UCdd~o
+ftp://ksd4b3w04c5nk5aasoepqdby-9w.sl/pNe8wJ2LkrJZ/XJSanvU/
+http://oPYQ.nd-egq1mkgtuwt4ei1ax.GQ/JRpv
+ftp://171.235.253.31/gop3Q%bcUoW1/38aPN?
+File:///XoULHUnTn/zYp/#SlAGu
+0kx1j6uf.QA/lhgydNvB/jU%B4oWUd%842;n/zo%63SywbGAgc/c2LB/wV8n/
+FILE:///kcboy@/9goeE7Q
+tD6HUNLHK3.u-06.FR/WwW%7f/1HS0pUTG
+Http://c82m23a-5oprsol87jurs142tzex3957m9nrufva0sc6gdo3pajic8po.H5m3wt.1RU:11878/Odij%A65n/Am~mzHC/#ArdWk8
+Http://cd1.es/w~Uc%455aE_/wVJKfr0/X3vnA/ImG6Z
+http://5ect9i8665yca.FJ/ylKD5bCODpHQ/lbunoK/%98004LI_w/HwTFV/4@O9_DiwGb0Ig9#B8z%90jjivO
+file:///IDE/mEZee3/1B5W9drK
+http://wka3.GM/%95yhyVy9#FFld%0CZGoiP
+file:///nAL4tAgn/UK?mpt4IE/.2JW4Ej%28uiG/LulMqnbE5
+ftp://973k1fnytm6y9hx87p42k.1whc75.PS:59063/nxryc0E/ooGHQtw3ik5/6fU4vZmZNZ10If#iFXkFxd
+File:///YTIL%AADxyn/exqQCc/HrBwtj3/DIOgKT4YUu
+http://3ucol3f.lr77xtr.LK/FNsRpDDW=/76bEzBTI/q30mQZ/
+9sb.7mct69t.ar/WpXcM8498S4F#k@L:'L
+ftp://3qn.XN--P1AI/PdBsWGhCy/QSZ%06xb6atX%7eXtqSy
+file:///t%48r6pvw/gTme80:slEt/ciBvu19
+File:///8rjryYe
+https://[887d:5086:CAA6::DA5B:192.032.127.177]/
+File:///v%2CCgt3%32kh5ZJx/~kf8WDLeR3XmmY6ap/.DEZNJ-ylM
+file:///KNINXVO67tBU/VWJdbMVH%a7uqRO9%ad/55Wlt5O41e?/YGhF4Fm
+file:///zYYquoqz/%240zKPi/@k9J&epm2dka
+7JUE8WA7CLBX6ETD8KUU16AFZHHS234NORX.tep69aqao2.int/iZjrUNXtQfBaF/Z%A87tU/XfvTnCVEY%00/FUyeI05%f4#?hZ
+file:///1?Msuc%BD1/G1%33Ppp/F2Sv%0EJIBnPzEUu32/81nqxxTk1HPO/7pyYlewH7gyw
+HTTPS://hdtgt38onqh18-617otg7tn-ut6f49po3gaajt47.m4O26.rwko060q21o.Am497x0kow-u.TN/nZX955o/JtBhKlvv3r
+ftp://28.118.125.16/3j69z80kruR/TXIM6gQFdZTCI/T52CULszlqMQ#%C3OT__%57
+ftp://y8K1P5I8E/c2Xa7CmI%d6TWC
+225.022.162.113/ZF58s/%CE%56BA5rQPOLU/AUNP8rG/w8SHG%d0FVsZX8dC
+X6eygmy.1a-mtt.ki/WC9%a6/GH9mNozOi
+94h6rdisa-eh.CH:8242/I8Ik5%42881r/EsVYPHYT/Jw7%3A2%2778ggZ8u%60
+Http://89.pa/%65ssgG1L:fKtE/PrmY6WoXW/oYH2AfHjf/uVaFyqn%ee0o%4fAh3
+file:///KwM8U1%EBR6J/K.asJbs0/i1vCxd/ZthOZxt0IKQEH/#x:Q8vtaIw
+http://rP6.Ewrowee5k83.COM/5CId/KVp%FE
+ftp://l8AAQ4XL0X0HO6MF7.9d.tw/%98Vb%117Uy4/KyUMl9
+Q293qtnuw.vi/6fi1J47ebQ/d2EC4A5OM%FF9_tUNs/dk=?YyGXS=&El=i&Go%cb=fb8&7W95=Cg49VW7B+B3dDs+f'fhi2+6QLTS%bbuJ+IN8+1PE7QyfjCX7tY%7D+cGm4+JkozC,0y+SEO%ac&V1pkpm0GF=0%46pvcEyU2G+2%F5kBuG
+2pu1.mv/3uiG%445F~s/%5CTa0YXuNMsqV/AwE3d
+file:///jIjyqNR/CBgOXsf%8fYiqCR/
+Voiuuc65jm4ven-9li9.mii5.0h5xt6.KE/qachnQB/nsC%4ai/juYvC3yTiCp%06S8I/LLVvQY#p1jmTyx@W
+Ftp://ydhhq20m.MY/%ADNIfcLl66t1fl/v4%a60h/N6My%9AKXUvToMFxY/
+14.21M1I.NU/iqlGVazIWPCvV/oelkORYd3Iwsdy%0D/LcdN7U
+file:///
+https://07zje.j84g-9lx-673h.vwr.km/h2Dv%1BFR%9d/NV05FON%c9/klLPUVUcp/LRlEGREG3H
+[836e:5fb9:0cda::D9A5]/n2j/Kjy0BzJ7Cj/GoW1ksyHG%B5A8tw;v/hIg4F;R%2Ax8nL/d1aHG5Vsb/VNMIiMx
+[E69:a743:5C18:C43F:780d:FDD0:EBC8:2ce9]/uAWRrcx
+ftp://B3fvr.l5GW6REKV.GI/0qT%dbwWVXZ/3kdb0/kBQuFu/R@9WXH0
+Ftp://a4gdplaw.TP/zyf2c37ZfY/QaiwZ3l/CUi9.ado/
+8L.vg/LjRJZ/z7/Fkg9dwmTDSp
+T7wos.u6I.cJP-5HQQCA.9dutej.SG/6McEZ0
+jJ0D1X6C5CCNWYGOCI4NNFC5A5NYJZTCW65DHS.d1yxpq.TC/EQ%DBYuIdBv
+File:///YGxWV18/%B2bnYvE/COmzr%B0YLEB8/%75L%c5ym2Hw
+HTTP://nzhfr.Mlrs1k026k.KN/~bhI#qqgVS5YR
+https://z9z6ip.INT/1%1dXkN1P/KI52I/yo%FD13SoZz0?:z'X3xwoS=1y&lmDOOEVzwHn2j=xfbMj%67cy#bKedfyI1
+FTP://aysc5.8i8kj7.cu/Ule%55%F0l/HV%7FNXdQfhjf0/
+file:///UZg7IFvJd/U%6cAH%59cS/dQjA9gM3RIJ/cW7Kuo/lBGa1%B3Hjf2aN&/
+file:///TPkfDWADgMp/9cr6zwO%38cZPtrql/w3GqL/nrvKR6Kq91#s5F4qQMjYx9
+http://1co-4k.zzzqb.XN--KGBECHTV/WRGpnKFny/eBiU%BDapp/0cb5bJ5%24J8a#N*cE%e4BmH3Jse?2
+n7q2q9b.3-ve593.eb368oe.si/xsA7jCLE%5CRj/gEfwCC/W21RJFHtG7td/fSZIiv/6mJkJcnid/xFjV%DF8pXhf:H/vh4Z3%efgdOJkeT6sTC/wUOxqbX
+ftp://[7D66::]/m:wnkiFBKJR/7c8a3te/mQqS6ZDWbfTXtZ9
+FILE:///%41PSndZFnAZNuF35izYcj9Jmt/aoJ8K6/nGtfymyBi/
+008.245.185.106/0Aq3gb85/6TZk7/PVTk%b1G80
+ftp://90.188.10.180/fgsPUVSAEgMuLwrpxg/8QEjGiNEHN/pxjBgdVV/bkiEKy
+5yxzap84dz3lccndx3xoj0zcwepy9ujq4bk-ckyo63.si/%E89rzFXG/htVDvVdD11S/SLLVce1/%5bgcDSkD
+file:///Mr
+dm83f2l.vvlpnpob.7si.cr/RFT%18uMgARxsP/8%61%7cO/eZtPUg%e5FavR0XRe9wZZ?c94ub=63r5
+file:///cdgSAblie
+http://[5b83::58CE:d882:36F7:8b56:11D4:f42f]/9mbBwV%C4/AI2q64JsNqHO?tZ3=nATs%3CQ&lbSzuIb=/IJtfPRbcu
+ftp://gOD0KB6HB8JDGK56.l-V4OW.sj/KqqiLzCu%6a3jexLbLB/%6dBHZb%29z72YF/
+http://s65E1E.TR/5sj4rIdUt%CF4F
+ftp://[0f52:d55d:5574:ee10::dc96]/dPEbp7/PG0Nfo/MVx3/%5Fzz8%CFXb
+bdctmj.vzaax2fe.j8S2.ojfq-b1m454.g7I.uy/o0%28WV/Bv9nDwD
+https://k233JLHW6N.cCA13HZAXR.laiu78y.fleptcf.brva6c.osod.GS/OB5inpGTj=gGI/YNi3_gNnIg/J8UObWz6z
+ftp://enokmi/r3%690T0H5mfdRq
+http://s59w.cg/nJoM7yv/Z2T9Xof0hNGhl/N0%6b5Sbrbtjj/
+ftp://qytw0h.hkdt2rm.gd/3a1WJDglP%cfZ
+Q-2pgsvifg.yr2ix-c4avrjwva.kn/_zD8ad/%8AVwQwOG/JMC314h/rO0qj%88?w0XEY=JUigA33U&f2=n3tXrMH74ApC&fx%BE0=b%d5mgX%7F&1gjjJpHG=vLHCZ0Z8&sYQBW%FFAIs='&zD=GTnVzkf8Yn%a3L&Xm%b9F%32EcwWl8=GUq
+File:///spqq/8F2dG
+1Z73HWVULIKOO5WJ.rEJGR9.nsscy.gf/rHEt;i5T/%50ZjYYJ3M%4dR/WlW0C48ocnb/NRA~0M#
+078.104.235.053/8KqfxznOtxC/ycYiTG3%11zP2%A1/hhbuX9Z%d403wES6/P0gg5%94
+FTP://58vs5.g0.tHI.gq/N4HSp%95jtMMNr/bpH36W/cC3oAe1C/Sp7gxd/XO7JSqE
+http://e8CYICG-3GD1Z7A0V121.Ya0j.Wy.CM/BLyz1kmpRF/nb6u%52/GpXGTv19#9?bwz
+File:///Mze0xLtXpPFW&x/_%0aYP7o4Fm/5&809/fsvOYyn~zvJbT
+file://V-jo70zmqrppoeyva0hm6x10y.UK/#3O9f0OYdx
+file:///K4BV8xTq%ccORyFI/8PzAVSZeBNFX%adT
+071.247.240.193/%94VOUi%ac
+27r2mghslc2b.Dwbpiqi8q.gTYSL3Z.am/RU80/KFcctLv/R8tG8d51EaD&pno5r7pDR#GWY
+mdfr2j.1FZFG4.VN/Xn6l%6dLWufM/I4FHTzlnWx%7BoI/ueeKx%03mfSA/%9a3PMEt.iSdeTVFgSnLi%C84m/6dh
+http://H4jk06c6mtprgjywnc40mjri05a.VA/7B%C0h%4fCjj80/TrN5HugANCZu/eMVdn4en/QUSLGhe?7yjqzvzv2r%b0I=&p%C32*HvmS%39g=wb8u&lTvA=FCGNF46U+?Ak.vpCAV%ceiK0f
+file:///cVjI9Ue/siOD/jynyp9%3FmBx
+http://u8ic-x8o.UY/G9pZcTp/JI58N
+file:///cCOIlZV8ms/Y%e97nfvexWwxq%00/iPxdyY/snHA2QZT%10
+ftp://53.151.134.240/uZqGXLUIu-J/=%0C2pO/PvL0%19MpQBv/
+FILE:///Kywof5D5q/0TRS/zayrkrnENB
+file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/
+mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs
+g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
+file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB
+file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL
+file:///mJM%a1/jv5%53QDqE/bFMu0CBp
+[a0e6::]/YR5lwpHlG5BPjr2XT/Pq%e4kWAmZ/ucI10P1
+File:///8YorWt/#ToazT-v
+http://2igfcm3qy.wlcgdxv-xat059qnx15a7qp-p-p5oph1c8.GP/hS4Aqy7SmODbaOH
+3s81j.TJ/pS9Jzw8:NWryq/%00Kh1/Y7Rfoo7haw?pYq7Efg=
+HTTP://k59s6i5o.my/v9%93qqGOWZ6RN/cdz6V4ly7nM9A/F4EhM0N2%53H/d%C4wWTDspWU/zfpMcIDWp#oO%6fSILRH
+lvh-kt.TN/xZghTR/yDiD0a/P5D2%37rFa?rseH*%33ubfv3=%36ntM9MP,+97RbF5&F3Ia3L=%3djrAi%f7E2%65iQ+Uc43&y;Ikw=vdfmJW&sE_%F6xpm=XFIfCsT&k@ctNa=%47KDJKEw&d=am6K&%25!BjLNa=iqs.l
+http://Lhe7w4f06qt8tif2af1k6s552hlbk.mfce.cc/DEqiQf/GLpkeKZAxhSO4m
+Zy-iit.Cth-tuvx4.au/dl6DMUqP/wAeKXt6
+File:///35GJ%C8m6ubg/kpI4iEEx
+dbe.gkg.EDU/cJ%fbQ3k7pwp5/arlH%DCD
+Ftp://e8ni0.5etxvrjvn491/tP8r:UC/faEdqs4P/v4zJax4
+https://4PI.gg/fFtQoVp/b6Jf55/YEc2l7dE%CA
+http://gpu16lz.LS/9e%daJrwQfHEpFvsZ3jx/c4STIJ/CmvEGAUx9f/
+file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc
+HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt
+5.Piba4ac.JE/55M1H/AZXdj
+m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/
+ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/
+hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/
+Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD
+file:///NJvRsBjo/IECCGBvb
+http://8-6wji0x.tCVT41X.k1PS.15p.SH/e%daVn5b%f6/GpIJ%65e6/VpeXUmg#FRgJm0E
+ftp://nx4kcydiztae7fr0y-2kfppteds.gq06u.cr/RITrTqm/VqRIYR/6psgA0%dfpfg/gcLyL1/xa%72QCL;type=i
+file:///M0WBSuI2qsMuKSfOzj5S/2N7x7nZg/BLtq%72VxjcR/5%EAn1%c6TYYPGe/Lb5Mtu
+http://94MNP6XNH.0mgqklz3t9g2xl89x81-a3hifmff89nahy62jeyhuhe8lhkuafizl.GQ/Ajpa4Z1D0o/aVv748s/NAIWCkWCD2hj/7MZS5c79DmL4/ieQ%21gw?oEPqIN=Pm9nPx54%c1&j1y=C
+ftp://rKI.COOP/v0pdu1zj/ir2UM4X/7k04jhOKPVN/7ua%E5y8p/bl~yS
+d-IJA.PS/drbtmJGFEbR0OzDD/wMV2C/krWmMUV85/0AFhGe9
+[D1BF:D02E:140C:4B9F:c86e:9fdf:077.173.119.180]/A07Ox%86Oae/yhjXUMut
+http://A.bi/J1GPah/OT741dJ/Jh3Z0xb3
+ftp://6VMV.t680F6.ijsru3.bm/vlJmkK/go28Jr/qUtmHmqhj/ykeAVxYoe
+HTTPS://oi%32Yp.@a4mk0.Teyu0lojs62d8l96qiym2v477ixatleasrgft4ttpbfel9r.BW
+x37MULG.514yrp5.Vrd68eeufzt.VA/fFMWutSw0d/Gr%BFun3/JH6%DESQV8f#gn+NM2
+http://2.88.82.235/6bhV%BFGDy%ABd/g84ly25/;4AeID#
+https://a860jcplfoodo0yq401cdf9.1ZE2P/NLArIzMZ%8B/6UiHWMMGS79/?4N=4U%1dM0qA31&faSM=0q2RaEJu5QT+vzNMp+XR%7dI4dQ+x+%0BawIYp%dbcBiOZ*Sc
+ftp://lb.NP:46239/xwyAL/m74%9fqj4gttFLg/
+s086j1-9.Nowi9s.fm/16zr3s/mvzfyWbB5/&1mzA:X-3
+eigz5dhw.jynsrju0t044lcc.3c3bfm.int/%ffoZ_kP%5cO1ls76B/pQbPDb4s%4E6i/bqqrZ%b7j0uhrgIHd/eBdSEwfGrX/PSmYMzg0%6F?Qr%92y11b3=&L;5CV=zJao%31Tmm
+65-ihklk4j6m.f3CFA.7kj.qa9rcww7uefzkpxbf87ni28b4a1i9rjqy9a.5texnqlc9.cu/p%CDK%b1%449LH/IiLqpww/HmACJI/r46TA4
+133.38.197.20/pbgvKM6W%BCEBN/Cvcu0&#idQDycc
+https://4I2GL/cGtyrs/%A8m5%3fekPsTRWlB2?rn=63P,EJu+SQ1W+uPySU8pvA+%f2+m+CwuUokAVfo+3nzWcQ+S+iXvEuhcv+d$h%7fy%cfMB
+HTTP://a0br.o0gvxf.kp/zZkWq5hfxy/q0x-g0In#bd%1anKx27
+ftp://[1327::117.246.244.220]/%91y4%09/
+ktefq.GB/uTzbgV/9nYvIs%8412/ynKYs/YwBOWmj
+File:///08bP/cw3Ydr5Cyow%273h:O3Bcok/0hIP@/
+[018E:4459:9892:3770:3826:71D8::]/UcHNufii29UtPW%56WQ1%20V/ybjTB/oUWWQ?yUg1%cb4A=wk+hOic7f7Sw
+ftp://1o2z/4UWsX/uSzHOw3JTrqy/TqZhkQk%62gZ/FpK/
+Http://kZYPZSRN.1m.UA/QN9n3Nw8kPAgkCB/SzdVcxryKou7mMG#p6at77
+http://se9g.s7-5qnlmsi0npbr8ouxuey3y66swspkl.y4.st/xfP7%066uXWuOu/clIFhy
+ftp://D4j9grnngs4a61b.im/f35gw%53rTeI5/#Ff7A0YMs9RG8t
+https://zujspr.cr/zy14P7FG3/Oxznfe/P2zpT%38S%FFVfP95Lh/nJJgzX/kcVuHCzV?Y5vMC=3X4n%9dMqeGjM+OjgETPdf%23b1+6H%47F+waIQ&,ZxQh4G%8AZv=ic+fQWQN+0y%523JTe0Ti#OA0m6iC
+http://141.171.118.17/VLnEb4Y
+https://sla.aowts.MQ/KbP3AV@wXFSgz/TauvS9f2/zvGpvN.e8a2Kw1ho?jYRUP=L_IAzw&cj0ux=xz&lrA%8bS56%A9=SX7NjQ
+file:///
+FTP://h6.MG/XPmpsZk1h%0B
+http://Dh4mlm:8000/k9TYvw/EWxlz4%97lBf9oK57N=Z#Pm63s
+https://8-lno5.KM/Uco2E%dbYPx~/MzKrkZ/rDpXB7OWtD?Wb1W=bKJazR+yRD6c+qwe+H3bo2ACXXzkVX+PdfgOJ1Sqm40+X%3D)%AEgm8I9&inwrA=%FCe+%f9Xo4S+JrcmiNbPwa7P94J&fMCr;NellUf8=K&lhgC1k=%32CPUA6&%dexj,m=l
+http://bske9znh5z.mq/rF739Qhneaet/NTfzZn
+http://B7z94v/
+FTP://p9s.hh313n.6k3.DO/xaRRXPre
+File:///Sn7Qzu4cDoJY/6AdR%8ccbeeFmXy/KRXtibcbXtTaLZt-bb/PISQN%777zoI
+FILE:///IfZ6yalAm/BoIjbMXLnlo
+file:///kFKgAORyDOV
+file:///f0l1v94Rmms/zIVjJg%338Fy/5tMPO618wd
+FILE:///fpbiT?6/%0B7dUkWR5r%AErqLW/v2n%bet%b3wV8Yzi80OJ.SguK/vBMyQaKiH8/Wy3l7r/D%B8Vp%51GgmqIBUHA/9gn1:46Xok/NcNIZ/FIK%359u%57/%35NvYIQIN/
+FTP://22A1D0QMF.cmcve.CC/cvkZF/H%4EkZr%39EjtfIO/LPx46D%5AgqR9
+File:///0Lld-DX/&Qmx07f/Zp%21ldGQq
+http://rlch.COOP/%bcKE55hwH6/CKHB%2Ak/Qzsn2Rn1p3RUc3H
+http://h6d5js.edu/IO%34xTQYL/OtYPRaY5/e0ILXZt/jNP2%07otUg/vGyq3xN/DC8P4ckE/JGfiUR5EfFk/vSlxbi5dKL8d/6JwRI
+FTP://Sho0e4ay9e.XN--KGBECHTV:41333/6_5S71YpwTC
+file:///HrmxzTn/sozw%db8Jz/x0czCVWgklrbV1Kf@IK/Um%78PuxjtjI/
+FTP://9m4b5lf0.Y5dnwnduzx9wha22ayztin-t7hng5b62e07rzsv55325xgdrzwx.gov/pmG%45dhnQZ
+ftp://t2ik0rgw.krjz72-l.xn--mgbaam7a8h/I%19KxMhY/FSau72W7/WkW/vYKyDkhzNiu&Bput
+FTP://[221d::]/BOKtvhabe/b%78z/piR8RBZb
+Http://5zwdz3h27.q9l27mto-5v0i3i1yu8oyl.TN/wk91N/X32rxh/cmM%01iQPnCulto/
+FTP://gWUFGOXE8EW.1g9vse.xn--wgbh1c/ncQo%42ihY/Tyk216/;type=d#J4A9HEH
+FTP://5wudd.ga:36706/W5a2PQ/%98Oin@%D5hjD/POMMY0b/HhPA4HL;type=i
+file:///E01b%6ew/8QW%66%16Un/PWDGTFrQUHJ#dk&o~V40
+ftp://p78orte1aiif9.zk-l-n5drgvx2kj6i9e034ck587-utyikjhal.qE5RJ031K2FAN-35.v71jyg8l/wgwpnw5/1WPLlSc8/3RZzlIEZMlC8/ytaOFdSuPKO%72T
+tri9.Fyhn.SU/YlvVjSi3M/ylMdK88iRo%d8/cuHyS5Am1oeQ/XM40zgdj/q%9CLKm9Q/IOwvLrlTi?nDUET=e95%a3qf&dSTE=X5aY&pWtb=&AS48RI=71Z91stUL8Oc&z1%B6=fVvMzZUyI+Niwre%5FXyVRF&QtAo=5
+Ftp://Kroc.Ls4-tkd7.sg:58219/9tq-FJyL?Qb/e0alokGZ2/MKTHP3Wsw
+pmg4ty.m59480p2f69.fV.COM/X98xZ.E/cTleUeS/9P6zeVQjfd30/eVVvE4/Zyxm1SSqe9u/WP%a5hS
+6P.BD/du%F8CoA/W0jyU5x6HXyVB/EOpU%0BP%BET/TBlhd%772ObORj/PNPXkVHaEY
+http://5BCY.X3.SG/N~63s98IV2/?KuYCn%3160U5h:%BCU%DD='6uk3OyUbosbcu+l7U89Ozt12K+P/VK4+GhwEZ+D7Z5ByEYxG&8=#aa7R7i~K
+https://38yyrnu.UY/8Kl08k%157n9p/TEeDKN/qQnmQFd
+http://5PXM48/G%9fUxcBwBjXI0/1UJen/MF%30I6/eOsMzFMiM
+Http://s8AL.rc94r4iftx7qeg4cbjjv5.za/mYk9UAydyn4q@w/T7K/dd%8aIXPp
+Http://130.165.027.114/o8bwef/X%70neu3uGKY/NU%f8xTKW0;hTKK/V;%edBnJYWG0MI/ZlDMtVPK7?k1N:WnR=%3DNffenC%67+sf(z0U!mZFe+6YqpF0Ei4l&kea=&pv=0FrYO&%69j0HYlx=HVIq&sWgaQHZnyxp;=%97SOx&QbgYd=72tO&ugOWlP=TaHT&Zg5o=c,2tzpy&Xr=Nltupn6k&nxkPS%10oJY%74jL8=5c%58%77#E92Lme88eh
+sat8a.cc/n:G5Bs4/%92Qx7YH/%933F68jWsdw/mgMLj/b9uFtDS/fCBe=77/LYHeH
+file:///8NiXGOZYq
+ftp://[14A4::]/6gQ%83ppX66/Fm%0fhsGDdq86c52B2AReDTW/CGafhb/4LAIXfs6vOHd/DHtw5%A1
+http://astx.i8o5jdypn1ly.LC
+Ftp://7j.N@Ptavog8.gh/%FDJUUJB/nrC6%4as/AM2BxLCU:fGwm
+file:///LD3OAKQVR
+http://jVVR4GZ.BG/XELY1/P=cusbVv5o
+HTTP://4fx.3kt642w.GF/k4Nruf/hyO_xzJ%982n/BhxTVE5LR/VT7cIG%66726zz/YQCAvC/eTYPd%2Af%18tPt6Y
+ftp://1py.jhl5-h.53.39PN2C.xN.ps/Q6kM9aOm7
+1MRTJ51.mh/OT
+file:///RlgHP4tRuBYzCPY/
+http://[8F09:703a:5b45:F653:AB26::]/C51LFNl/tS8p/yG8y53@Wb?eBrhL=%f0Rj:Vl#%11Z
+FILE:///TmzdtWFH/1WP2R%b3nSKls
+http://5o0a8epm-rx6n67ta82256jav-nk4.lb/HbOqUc/TIVeqJ7Ohp/BjDwRDKJ/JZO
+File:///AvnO.7k/P0YrByEN2yEm9%1646/QKj7fR2/%1F0JYW0y/qscsiKGeGfPA/1rkuJyne%12/
+File:///1Hm4/bcNXO0cG%45XJo4RK4/SQGEP5/ELAGqI
+file://4jc3bg.zs/WfjCr2aeWME/Nv4A4B/invk2d1h
+Vj1.Ngq.LI/FR2%b7RU_z%a1Tf2vy/rysXmZ0/
+Ftp://wkws.yi8srfw.tm/sWvr8nVIPq3lD%16r71KGXZx/zTdcV/N%02%6ER5gChmS/uxEJA26q
+Https://cf3-0aw-g8zmm-k.AO/mYGm9AqQW%E4q?6u=&rX=
+8vv-rhcodmrr42jd6zmrnl7xa.F1igvm2.RO?rQOIRt=Q&Z8=1WyCZjZv83+lpB%7a
+Http://009.130.112.154:65403/z6iLA6cr/%3edXQdq1/yHKzFjDA3nAKTr/Ot4A3f%4DIzccRDaDQcC
+hwpmi.upmzdzzhsrz.e469.ee/SXdNeY7NHR6/Vr6%FDr
+http://[C7E7:57e7:b08c:9FCD:4B77:4de1:229.020.164.172]/LnIzKLn/StXMmto
+Http://2-6SB2KV8V8MV290SIC08D9J7-IRM9FTPC8ZZ.hwo9el74qqv1.zm/tr9K2BSFkbU-A8wJR/CGEL_82/cnMuBB%a3j34
+file:///fUtCm%b6qNK/lltu?NvBAhM/sJ8pOm:/jJ18OTM6U%f5v%3f/
+http://76OXC.pn.GA:15181/OPErhH1cHtl1ba/eIPkR6%1EG/8fVd02k/Ky%b0D5izq4k
+ftp://154.108.127.0/vGpMboeazp05/usfmVeitt0pf3o/Ue4OMVT/sJ9BAYSLje
+ftp://ivbv0.zCR-0J.lku/6m26/7tElM/%b2%0BI.Ft5AjDVp/oWyMVmsG/3%8E1FE8Y/0zdIl/m3otUSQeI7
+file:///0Y7NWf4qwhw9wXP/6ll5YWM55W%9050rPeqawX%F9/HleEmM
+5LUX-O.q-33d.tn/smzXQJn3H/81mg%4de_/jb%97hT
+http://84W32/CCKpkt/c0bqCnoQ5Y
+ftp://nyqaz.MT/0OfOsU7S1H9BM/OjhdD/izbR4txUY
+8wo2j2c1z9s.ef2ki0mlvvnjm5vfyu.t5a-yb41uykgo5kn1qxzffhz667dty8mytg6ir7os9hoxwm2.mw/%39FEVmD/%a4qRT5W5qW.yR/8XB9NHyB/
+http://rbf6ezzlhpe.hk/%0DK8/IXXJAsC?mV8vvDI8K=6t9%6EG1Dt+M7N+D5n@Vd79n%d8E+gj+ofnZ%16loobN+f3-S+e,IH&lnh=
+wu3w.0J5.lv/m9IZaWkw5/xY2%54pNYS9HL/Nhfns/e%bat2cKM/cUXgRzm2Srdt/2s2u/9h8zjwh929Bnp
+https://209.73.217.17/dJvsqDH/RH6Ok_eSc8wO5/BOJws6/9f0DvXJ4/?%ea'Fx=P&6h3zz3eGCtK=4MF76p7Em
+jfajtdt5k6gu11la2jbih.MA/zcaTNUL/3q%31eLT%bc3S/L6v2rt/WtbA0%45~TIvPD
+ftp://Defi-z.gr:16993/=7IIaMpVy3OLs/QtQD7qF5Vr/=RVbNDH8/y3oUHmX.v/Td%dcbiGlArA%720
+ftp://[544f:e60a::8772:D633:DA1F:081.021.019.189]:62615/%CB6Wy1K/X%0EcoPQ/IgnCMLPynfx/fdFHb
+ftp://1INQM6.4y.RO/
+Http://T778hd416.g9r96v.bs:64804/GbWp%47K/zgTKs/cBHzmYZ=AI23VY
+HTTPS://6hp3j2y2tuakzv1rnq9vnvn1w0j6roo3if:58975/vH8BLTu3hzkk
+ftp://Ye1dfbl0eae8lqiiqaojj.JO/8EjAq0TzD:/Bz3Pm2qyWo/ZX58A2/yjn%9F3xJZjsVhw
+66.242.9.138/CYHK1bGpZ/5yyVD%cbC
+nHZMBEJWO.ST/ABXauli3wuJ/WUxhKaZJg
+ftp://[8463:c210::b5d1]:34094/8%AC7Fc/Qh6%62yFExJbdaB/0cAZ3iSKlk8sU;TYPE=D
+http://vmlyl0efotpfd-tew59kcpsi2u7qd/UbXy1Cc/L%0cwnzmdjz/?iy=N16BnPMu1+eYFk%f6CB3z+s4Re5v8+MFTU+k+JDiN_+F1k&C%D0k=F78u+euh%1E1uzTGQio&bL_2omAu=iEEs+goL%b8g6+Y%3FBcek%102&WCz=e!Fg+MUif8Yba0k+uX+A91YO,Um+%70i%818Fpz2&6fP=HlD+%91pW+%f2HR6zs8zrE10ZPH+bWA.BB6k+Df3w:X85xDnDjSiPY+AyDpuSl4VEVTJzA3g&OtUR6=
+http://bCNNCLT.gxa2sbn/lAFakp
+D19f.oD5.bb/xUG6W8VxTcjMG/jYMuWlVMygf/UtIwE13c/%a9wzpO%AFxQ9
+q8HY2P.r5T.AU/nc0Iq%28QAF/#yOD3%b3UA%d79e%1EmJp3
+dPY3X09.AC/STpa%97U%b53yKP4Te/%71KZZvIC#nA1W2z
+ftp://3gb.xgjm/wF%ado0cM/u%0DmCW8L/d9Ss%61dKQ
+6m.56xkyt.32O.com/ToEAr%BEdi/xBpPU2NqC/74sgdq%BD9/WSrx5/5ldupD%47J/9boeZj
+ftp://s0y6r7hg7.XN--KGBECHTV/xQizIlOK9/uxho7%bd/RvxbFGQ4o/O%42UeWF?/GAZ5E8b2/eRaq/l:-1ASwSpw/2FkowF%12Ss/vtCq9dysEc%1ee/
+[d18d:1707::]/NGZMInsLF8/kgC3y/F66qc1qt6OWfeS/DyngWA
+file:///%55A4VpGsup
+file:///WNEw%bfTWDLF/s%A9oZoWUo
+Ftp://2tdk.Ube6velthhhx8o.GM/bUH4XycSEKkTE
+ftp://7kxk4ujzz.kp:32621/hbop0%25sK/rw7RBE0lTN/tX5BLF
+FILE:///IQExpA4kDvUfTkH6Bg/MeVJ4aIUbXCJf
+file:///SIE0AkJFq/ZPJLyYK/6hA3x1InlGm1
+http://047.014.184.200/Z_QdOwjzfBue4Nt/aEn/xuEQD/cXlnoxHIK%7d8h/1%eegEk7E0/8Ejku@r1Z/UZ4gG/%484zOJsP%1b/Lc1okbWRzN5UJ
+Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L
+FILE://155.24.106.255/3VEZIT7
+d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
+lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET
+l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C
+FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k
+212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt=
+http://[ea5::]/eIdv5xl/5qhxlOvzw%018f/N3RQQKCz/WzUnsSg8KA3/7ohHZCp
+file:///g_T81EaNw2nJB/1yUUT
+http://2XXY0MZ.fwa.791ck-2gx.bd/uO6FW?ZS5jE:=m:
+https://[8368:F154::f99f]/Y3h8FgzTYYpzn/zHFhQECC/CGtX/8v_~jn3Kn
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
@ -98,12 +98,4 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
 	  Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
 	  assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "α.π.τ." });
 	}
-	
-  /**
-   * test that acronym normalization works
-   */
-  public void testAcronym() throws Exception {
-    Analyzer a = new GreekAnalyzer(Version.LUCENE_31);
-    assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "απτ" });
-  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
@ -39,6 +39,8 @@ public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
    checkOneTermReuse(a, "book", "book");
    // stopword
    assertAnalyzesTo(a, "the", new String[] {});
+    // possessive removal
+    checkOneTermReuse(a, "steven's", "steven");
  }
  
  /** test use of exclusion set */
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
@ -111,7 +111,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
 		assertAnalyzesTo(
 			fa,
 			"33Bis 1940-1945 1940:1945 (---i+++)*",
-			new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
+			new String[] { "33bis", "1940", "1945", "1940", "1945", "i" });

 	}
 	
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.th;
 */

 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.util.Version;
 import org.junit.Assume;

 /**
@ -39,37 +40,35 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
 				new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
 	}
 	
-	
-	/*
-	 * Thai numeric tokens are typed as <ALPHANUM> instead of <NUM>.
-	 * This is really a problem with the interaction w/ StandardTokenizer, which is used by ThaiAnalyzer.
-	 * 
-	 * The issue is this: in StandardTokenizer the entire [:Thai:] block is specified in ALPHANUM (including punctuation, digits, etc)
-	 * Fix is easy: refine this spec to exclude thai punctuation and digits.
-	 * 
-	 * A better fix, that would also fix quite a few other languages would be to remove the thai hack.
-	 * Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
-	 */
-	public void testBuggyTokenType() throws Exception {
-	  Assume.assumeTrue(ThaiWordFilter.DBBI_AVAILABLE);
-		assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
-		    new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
-				new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
-		     "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
-	}
-	
-	/* correct testcase
 	public void testTokenType() throws Exception {
-    assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
-        new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
-        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
-         "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<NUM>" });
+      assertAnalyzesTo(new ThaiAnalyzer(TEST_VERSION_CURRENT), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
+                       new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
+                       new String[] { "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>", 
+                                      "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>", 
+                                      "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>",
+                                      "<SOUTHEAST_ASIAN>", "<SOUTHEAST_ASIAN>",
+                                      "<NUM>" });
 	}
-	*/

-	public void testAnalyzer() throws Exception {
+	/**
+	 * Thai numeric tokens were typed as <ALPHANUM> instead of <NUM>.
+	 * @deprecated testing backwards behavior
+ 	 */
+	@Deprecated
+	public void testBuggyTokenType30() throws Exception {
 	  Assume.assumeTrue(ThaiWordFilter.DBBI_AVAILABLE);
-		ThaiAnalyzer analyzer = new ThaiAnalyzer(TEST_VERSION_CURRENT);
+		assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_30), "การที่ได้ต้องแสดงว่างานดี ๑๒๓", 
+                         new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "๑๒๓" },
+                         new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
+                                        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", 
+                                        "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
+	}
+	
+	/** @deprecated testing backwards behavior */
+	@Deprecated
+    public void testAnalyzer30() throws Exception {
+ 	  Assume.assumeTrue(ThaiWordFilter.DBBI_AVAILABLE);
+        ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
 	
 		assertAnalyzesTo(analyzer, "", new String[] {});

@ -124,6 +123,23 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
      assertAnalyzesToReuse(
          analyzer,
          "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
-          new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
+          new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz@demo.com" });
 	}
+	
+	/** @deprecated, for version back compat */
+	@Deprecated
+	public void testReusableTokenStream30() throws Exception {
+	    ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
+	    assertAnalyzesToReuse(analyzer, "", new String[] {});
+
+	    assertAnalyzesToReuse(
+            analyzer,
+            "การที่ได้ต้องแสดงว่างานดี",
+            new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});
+
+	    assertAnalyzesToReuse(
+            analyzer,
+            "บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
+            new String[] { "บริษัท", "ชื่อ", "xy&z", "คุย", "กับ", "xyz@demo.com" });
+    }
 }
--- a/modules/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
+++ b/modules/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
@ -0,0 +1,211 @@
+package org.apache.lucene.analysis.standard;
+
+/*
+ * Copyright 2001-2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.net.URL;
+import java.net.URLConnection;
+import java.text.DateFormat;
+import java.util.Date;
+import java.util.Locale;
+import java.util.SortedSet;
+import java.util.TimeZone;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Generates a file containing JFlex macros to accept valid ASCII TLDs 
+ * (top level domains), for inclusion in JFlex grammars that can accept 
+ * domain names.
+ * <p/> 
+ * The IANA Root Zone Database is queried via HTTP from URL cmdline arg #0, the
+ * response is parsed, and the results are written out to a file containing 
+ * a JFlex macro that will accept all valid ASCII-only TLDs, including punycode 
+ * forms of internationalized TLDs (output file cmdline arg #1).
+ */
+public class GenerateJflexTLDMacros {
+
+  public static void main(String... args) throws Exception {
+    if (args.length != 2 || args[0].equals("--help") || args[0].equals("-help")) {
+      System.err.println("Cmd line params:");
+      System.err.println("\tjava " + GenerateJflexTLDMacros.class.getName() 
+                         + "<ZoneFileURL> <JFlexOutputFile>");
+      System.exit(1);
+    }
+    new GenerateJflexTLDMacros(args[0], args[1]).execute();
+  }
+  
+  private static final String NL = System.getProperty("line.separator");
+  
+  private static final String APACHE_LICENSE 
+    = "/*" + NL
+      + " * Copyright 2001-2005 The Apache Software Foundation." + NL
+      + " *" + NL
+      + " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL
+      + " * you may not use this file except in compliance with the License." + NL
+      + " * You may obtain a copy of the License at" + NL
+      + " *" + NL
+      + " *      http://www.apache.org/licenses/LICENSE-2.0" + NL
+      + " *" + NL
+      + " * Unless required by applicable law or agreed to in writing, software" + NL
+      + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
+      + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
+      + " * See the License for the specific language governing permissions and" + NL
+      + " * limitations under the License." + NL
+      + " */" + NL + NL;
+    
+  private static final Pattern TLD_PATTERN_1 
+    = Pattern.compile("([-A-Za-z0-9]+)\\.\\s+NS\\s+.*");
+  private static final Pattern TLD_PATTERN_2
+    = Pattern.compile("([-A-Za-z0-9]+)\\.\\s+\\d+\\s+IN\\s+NS\\s+.*");
+  private final URL tldFileURL;
+  private long tldFileLastModified = -1L;
+  private final File outputFile;
+
+  public GenerateJflexTLDMacros(String tldFileURL, String outputFile)
+    throws Exception {
+    this.tldFileURL = new URL(tldFileURL);
+    this.outputFile = new File(outputFile);
+  }
+
+  /**
+   * Downloads the IANA Root Zone Database, extracts the ASCII TLDs, then
+   * writes a JFlex macro accepting any of them case-insensitively out to
+   * the specified output file.
+   * 
+   * @throws IOException if there is a problem either downloading the database
+   *  or writing out the output file.
+   */
+  public void execute() throws IOException {
+    final SortedSet<String> TLDs = getIANARootZoneDatabase();
+    writeOutput(TLDs);
+    System.err.println("Wrote " + TLDs.size() + " top level domains to '" 
+                       + outputFile + "'.");
+  }
+  
+  /**
+   * Downloads the IANA Root Zone Database.
+   * @return downcased sorted set of ASCII TLDs
+   * @throws java.io.IOException if there is a problem downloading the database 
+   */
+  private SortedSet<String> getIANARootZoneDatabase() throws IOException {
+    final SortedSet<String> TLDs = new TreeSet<String>();
+    final URLConnection connection = tldFileURL.openConnection();
+    connection.setUseCaches(false);
+    connection.addRequestProperty("Cache-Control", "no-cache");
+    connection.connect();
+    tldFileLastModified = connection.getLastModified();
+    BufferedReader reader = new BufferedReader
+      (new InputStreamReader(connection.getInputStream(), "US-ASCII"));
+    try {
+      String line;
+      while (null != (line = reader.readLine())) {
+        Matcher matcher = TLD_PATTERN_1.matcher(line);
+        if (matcher.matches()) {
+          TLDs.add(matcher.group(1).toLowerCase(Locale.US));
+        } else {
+          matcher = TLD_PATTERN_2.matcher(line);
+          if (matcher.matches()) {
+            TLDs.add(matcher.group(1).toLowerCase(Locale.US));
+          }
+        }
+      }
+    } finally {
+      reader.close();
+    }
+    return TLDs;
+  }
+
+  /**
+   * Writes a file containing a JFlex macro that will accept any of the given
+   * TLDs case-insensitively.
+   * 
+   * @param ASCIITLDs The downcased sorted set of top level domains to accept
+   * @throws IOException if there is an error writing the output file
+   */
+  private void writeOutput(SortedSet<String> ASCIITLDs) throws IOException {
+    final DateFormat dateFormat = DateFormat.getDateTimeInstance
+      (DateFormat.FULL, DateFormat.FULL, Locale.US);
+    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
+    final Writer writer = new OutputStreamWriter
+      (new FileOutputStream(outputFile), "UTF-8");
+    try {
+      writer.write(APACHE_LICENSE);
+      writer.write("// Generated from IANA Root Zone Database <");
+      writer.write(tldFileURL.toString());
+      writer.write(">");
+      writer.write(NL);
+      if (tldFileLastModified > 0L) {
+        writer.write("// file version from ");
+        writer.write(dateFormat.format(tldFileLastModified));
+        writer.write(NL);
+      }
+      writer.write("// generated on ");
+      writer.write(dateFormat.format(new Date()));
+      writer.write(NL);
+      writer.write("// by ");
+      writer.write(this.getClass().getName());
+      writer.write(NL);
+      writer.write(NL);
+      writer.write("ASCIITLD = \".\" (");
+      writer.write(NL);
+      boolean isFirst = true;
+      for (String ASCIITLD : ASCIITLDs) {
+        writer.write("\t");
+        if (isFirst) {
+          isFirst = false;
+          writer.write("  "); 
+        } else {
+          writer.write("| "); 
+        }
+        writer.write(getCaseInsensitiveRegex(ASCIITLD));
+        writer.write(NL);
+      }
+      writer.write("\t) \".\"?   // Accept trailing root (empty) domain");
+      writer.write(NL);
+      writer.write(NL);
+    } finally {
+      writer.close();
+    }
+  }
+
+  /**
+   * Returns a regex that will accept the given ASCII TLD case-insensitively.
+   * 
+   * @param ASCIITLD The ASCII TLD to generate a regex for
+   * @return a regex that will accept the given ASCII TLD case-insensitively
+   */
+  private String getCaseInsensitiveRegex(String ASCIITLD) {
+    StringBuilder builder = new StringBuilder();
+    for (int pos = 0 ; pos < ASCIITLD.length() ; ++pos) {
+      char ch = ASCIITLD.charAt(pos);
+      if (Character.isDigit(ch) || ch == '-') {
+        builder.append(ch);
+      } else {
+        builder.append("[").append(ch).append(Character.toUpperCase(ch)).append("]");
+      }
+    }
+    return builder.toString();
+  }
+}
--- a/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java
+++ b/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/DefaultICUTokenizerConfig.java
@ -44,11 +44,11 @@ import com.ibm.icu.util.ULocale;
 */
 public class DefaultICUTokenizerConfig extends ICUTokenizerConfig {
  /** Token type for words containing ideographic characters */
-  public static final String WORD_IDEO = "<IDEO>";
+  public static final String WORD_IDEO = "<IDEOGRAPHIC>";
  /** Token type for words containing Japanese kana */
  public static final String WORD_KANA = "<KANA>";
  /** Token type for words that contain letters */
-  public static final String WORD_LETTER = "<WORD>";
+  public static final String WORD_LETTER = "<ALPHANUM>";
  /** Token type for words that appear to be numbers */
  public static final String WORD_NUMBER = "<NUM>";
  
--- a/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
+++ b/modules/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java
@ -17,17 +17,16 @@ package org.apache.lucene.analysis.icu.segmentation;
 * limitations under the License.
 */

-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
+import org.apache.lucene.analysis.util.ReusableAnalyzerBase;

+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
 import java.util.Arrays;

 public class TestICUTokenizer extends BaseTokenStreamTestCase {
@ -220,6 +219,6 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
  public void testTypes() throws Exception {
    assertAnalyzesTo(a, "David has 5000 bones", 
        new String[] {"david", "has", "5000", "bones"},
-        new String[] { "<WORD>", "<WORD>", "<NUM>", "<WORD>" });
+        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
  }
 }
--- a/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java
@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.ClassicFilter;
+
+/**
+ * @version $Id$
+ */
+public class ClassicFilterFactory extends BaseTokenFilterFactory {
+  public TokenFilter create(TokenStream input) {
+    return new ClassicFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/ClassicTokenizerFactory.java
@ -0,0 +1,40 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.ClassicTokenizer;
+
+import java.io.Reader;
+import java.util.Map;
+
+/**
+ * @version $Id$
+ */
+
+public class ClassicTokenizerFactory extends BaseTokenizerFactory {
+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
+    assureMatchVersion();
+  }
+
+  public Tokenizer create(Reader input) {
+    return new ClassicTokenizer(luceneMatchVersion, input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
@ -0,0 +1,28 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
+
+/** Factory for {@link EnglishPossessiveFilter} */
+public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory {
+  public TokenStream create(TokenStream input) {
+    return new EnglishPossessiveFilter(input);
+  }
+}
--- a/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java
@ -17,6 +17,8 @@

 package org.apache.solr.analysis;

+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardFilter;

@ -24,7 +26,13 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 * @version $Id$
 */
 public class StandardFilterFactory extends BaseTokenFilterFactory {
+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
+    assureMatchVersion();
+  }
+  
  public StandardFilter create(TokenStream input) {
-    return new StandardFilter(input);
+    return new StandardFilter(luceneMatchVersion, input);
  }
 }
--- a/solr/src/test/org/apache/solr/analysis/TestStandardFactories.java
+++ b/solr/src/test/org/apache/solr/analysis/TestStandardFactories.java
@ -32,22 +32,34 @@ public class TestStandardFactories extends BaseTokenTestCase {
   * Test StandardTokenizerFactory
   */
  public void testStandardTokenizer() throws Exception {
-    Reader reader = new StringReader("What's this thing do?");
+    Reader reader = new StringReader("Wha\u0301t's this thing do?");
    StandardTokenizerFactory factory = new StandardTokenizerFactory();
    factory.init(DEFAULT_VERSION_PARAM);
    Tokenizer stream = factory.create(reader);
+    assertTokenStreamContents(stream, 
+        new String[] {"Wha\u0301t's", "this", "thing", "do" });
+  }
+  
+  /**
+   * Test ClassicTokenizerFactory
+   */
+  public void testClassicTokenizer() throws Exception {
+    Reader reader = new StringReader("What's this thing do?");
+    ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
+    factory.init(DEFAULT_VERSION_PARAM);
+    Tokenizer stream = factory.create(reader);
    assertTokenStreamContents(stream, 
        new String[] {"What's", "this", "thing", "do" });
  }
  
  /**
-   * Test StandardFilterFactory
+   * Test ClassicFilterFactory
   */
  public void testStandardFilter() throws Exception {
    Reader reader = new StringReader("What's this thing do?");
-    StandardTokenizerFactory factory = new StandardTokenizerFactory();
+    ClassicTokenizerFactory factory = new ClassicTokenizerFactory();
    factory.init(DEFAULT_VERSION_PARAM);
-    StandardFilterFactory filterFactory = new StandardFilterFactory();
+    ClassicFilterFactory filterFactory = new ClassicFilterFactory();
    filterFactory.init(DEFAULT_VERSION_PARAM);
    Tokenizer tokenizer = factory.create(reader);
    TokenStream stream = filterFactory.create(tokenizer);