LUCENE-2213: rename ArrayUtil.getNextSize -> oversize; tweak how it picks the next size

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@901662 13f79535-47bb-0310-9956-ffa450edef68
2010-01-21 11:54:50 +00:00 · 2010-01-21 11:54:50 +00:00 · 9b3b890f45
parent 94826b348c
commit 9b3b890f45
71 changed files with 26485 additions and 59 deletions
--- a/contrib/analyzers/common/src/java/org/tartarus/snowball/SnowballProgram.java
+++ b/contrib/analyzers/common/src/java/org/tartarus/snowball/SnowballProgram.java
@ -35,6 +35,7 @@ package org.tartarus.snowball;
 import java.lang.reflect.InvocationTargetException;

 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;

 /**
 * This is the rev 502 of the Snowball SVN trunk,
@ -432,7 +433,7 @@ public abstract class SnowballProgram {
 	final int newLength = limit + adjustment;
 	//resize if necessary
 	if (newLength > current.length) {
-	  char newBuffer[] = new char[ArrayUtil.getNextSize(newLength)];
+	  char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)];
 	  System.arraycopy(current, 0, newBuffer, 0, limit);
 	  current = newBuffer;
 	}
--- a/contrib/snowball/.cvsignore
+++ b/contrib/snowball/.cvsignore
@ -0,0 +1,2 @@
+build
+snowball
--- a/contrib/snowball/LICENSE.txt
+++ b/contrib/snowball/LICENSE.txt
@ -0,0 +1,16 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
--- a/contrib/snowball/README.txt
+++ b/contrib/snowball/README.txt
@ -0,0 +1,25 @@
+Lucene Snowball README file
+
+This project provides pre-compiled version of the Snowball stemmers
+based on revision 500 of the Tartarus Snowball repository,
+together with classes integrating them with the Lucene search engine.
+
+A few changes has been made to the static Snowball code and compiled stemmers:
+
+ * Class SnowballProgram is made abstract and contains new abstract method stem() to avoid reflection in Lucene filter class SnowballFilter.
+ * All use of StringBuffers has been refactored to StringBuilder for speed.
+ * Snowball BSD license header has been added to the Java classes to avoid having RAT adding new ASL headers.
+
+
+IMPORTANT NOTICE ON BACKWARDS COMPATIBILITY!
+
+An index created using the Snowball module in Lucene 2.3.2 and below
+might not be compatible with the Snowball module in Lucene 2.4 or greater.
+
+For more information about this issue see:
+https://issues.apache.org/jira/browse/LUCENE-1142
+
+
+For more information on Snowball, see:
+  http://snowball.tartarus.org/
+
--- a/contrib/snowball/SNOWBALL-LICENSE.txt
+++ b/contrib/snowball/SNOWBALL-LICENSE.txt
@ -0,0 +1,26 @@
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    * notice, this list of conditions and the following disclaimer in the
+    * documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holders nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/contrib/snowball/bin/.cvsignore
+++ b/contrib/snowball/bin/.cvsignore
@ -0,0 +1 @@
+snowball
--- a/contrib/snowball/bin/snowball.sh
+++ b/contrib/snowball/bin/snowball.sh
@ -0,0 +1,7 @@
+#!/bin/csh -f
+set infile = $1
+set outdir = $2
+
+set name = $infile:h:t:uStemmer
+
+exec $0:h/snowball $infile -o $outdir/$name -n $name -java
--- a/contrib/snowball/build.xml
+++ b/contrib/snowball/build.xml
@ -0,0 +1,155 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="snowball" default="default">
+
+  <description>
+    Snowball Analyzers
+  </description>
+
+  <import file="../contrib-build.xml"/>
+
+  <property name="snowball.cvsroot" value=":pserver:cvsuser@cvs.tartarus.org:/home/cvs"/>
+  <property name="snowball.cvs.password" value="anonymous"/>
+  <property name="snowball.root" value="snowball/website"/>
+  <property name="bin.dir" location="bin"/>
+
+  <property name="analyzers.jar" location="${common.dir}/build/contrib/analyzers/common/lucene-analyzers-${version}.jar"/>
+  <available property="analyzers.jar.present" type="file" file="${analyzers.jar}"/>
+  
+  <path id="classpath">
+	<pathelement path="${lucene.jar}"/>
+	<pathelement path="${analyzers.jar}"/>
+	<pathelement path="${project.classpath}"/>
+  </path>
+
+  <target name="jar" depends="compile" description="Create JAR">
+    <jarify>
+      <metainf-includes>
+        <metainf dir=".">
+          <include name="SNOWBALL-LICENSE.txt"/>
+        </metainf>
+      </metainf-includes>
+    </jarify>
+  </target>
+
+  <target name="jar-src" depends="init"
+    description="Packages the sources as JAR file">
+  	<jarify basedir="${src.dir}" destfile="${build.dir}/${final.name}-src.jar">
+  	  <metainf-includes>
+        <metainf dir=".">
+          <include name="SNOWBALL-LICENSE.txt"/>
+        </metainf>
+      </metainf-includes>
+  	</jarify>
+  </target>
+
+	
+  <!-- ====================================================== -->
+  <!-- Download Snowball code                                 -->
+  <!-- ====================================================== -->
+  <target name="download" depends="init">
+    <cvs cvsRoot="${snowball.cvsroot}"
+      package="${snowball.root}"
+      passfile="snowball.cvspass"/>
+  </target>
+
+  <target name="create-passfile">
+    <cvspass cvsroot="${snowball.cvsroot}"
+      password="${snowball.cvs.password}"
+      passfile="snowball.cvspass"
+      />
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Compile Snowball C code                                -->
+  <!-- ====================================================== -->
+  <target name="compile-compiler" depends="download">
+    <apply failonerror="true" executable="gcc" parallel="true">
+      <arg value="-O"/>
+      <arg value="-o"/>
+      <arg value="${bin.dir}/snowball"/>
+      <fileset dir="${snowball.root}/p" includes="*.c"/>
+    </apply>
+  </target>
+
+  <!-- ====================================================== -->
+  <!-- Generate Java code                                     -->
+  <!-- ====================================================== -->
+  <target name="generate" depends="compile-compiler">
+    <apply failonerror="true" executable="${bin.dir}/snowball.sh">
+      <srcfile/>
+      <arg value="${src.dir}/net/sf/snowball/ext"/>
+      <fileset dir="${snowball.root}" includes="**/stem.sbl"/>
+    </apply>
+
+    <copy todir="${src.dir}/net">
+      <fileset dir="${snowball.root}/net">
+        <include name="**/*.java"/>
+      </fileset>
+    </copy>
+
+  </target>
+
+
+  <target name="docs">
+    <taskdef
+      name="anakia"
+      classname="org.apache.velocity.anakia.AnakiaTask"
+      >
+      <classpath refid="anakia.classpath"/>
+    </taskdef>
+
+    <anakia
+      basedir="${docs.src}"
+      destdir="${docs.dest}/"
+      extension=".html" style="./site.vsl"
+      projectFile="stylesheets/project.xml"
+      excludes="**/stylesheets/** empty.xml"
+      includes="**/*.xml"
+      lastModifiedCheck="true"
+      templatePath="${jakarta.site2.home}/xdocs/stylesheets"
+      >
+    </anakia>
+
+  </target>
+
+  <target name="compile-core" depends="build-analyzers, common.compile-core" />
+  <target name="compile-test" depends="download-vocab-tests, common.compile-test" />
+  
+  <target name="build-analyzers" unless="analyzers.jar.present">
+    <echo>Snowball building dependency ${analyzers.jar}</echo>
+    <ant antfile="../analyzers/build.xml" target="default" inheritall="false" dir="../analyzers" />
+  </target>
+
+  <property name="snowball.vocab.rev" value="500"/>
+  <property name="snowball.vocab.url" 
+  	value="svn://svn.tartarus.org/snowball/trunk/data"/>
+  <property name="vocab.dir" value="src/test/org/apache/lucene/analysis/snowball"/>
+	
+  <target name="download-vocab-tests" depends="compile-core"
+    description="Downloads Snowball vocabulary tests">
+    <sequential>
+      <mkdir dir="${vocab.dir}"/>
+      <exec dir="${vocab.dir}" executable="${svn.exe}" failifexecutionfails="false" failonerror="true">
+        <arg line="checkout --trust-server-cert --non-interactive -r ${snowball.vocab.rev} ${snowball.vocab.url}"/>
+      </exec>
+    </sequential>
+  </target>
+</project>
--- a/contrib/snowball/docs/index.html
+++ b/contrib/snowball/docs/index.html
@ -0,0 +1,148 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+
+<!-- Content Stylesheet for Site -->
+
+        
+<!-- start the processing -->
+    <!-- ====================================================================== -->
+    <!-- GENERATED FILE, DO NOT EDIT, EDIT THE XML FILE IN xdocs INSTEAD! -->
+    <!-- Main Page Section -->
+    <!-- ====================================================================== -->
+    <html>
+        <head>
+            <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"/>
+
+                                                    <meta name="author" value="Doug Cutting">
+            <meta name="email" value="cutting@apache.org">
+            
+           
+                                    
+                        
+            <title>Snowball Stemmers for Lucene - Overview - Snowball Stemmers for Lucene</title>
+        </head>
+
+        <body bgcolor="#ffffff" text="#000000" link="#525D76">        
+            <table border="0" width="100%" cellspacing="0">
+                <!-- TOP IMAGE -->
+                <tr>
+                    <td colspan="2">
+<a href="http://jakarta.apache.org"><img src="http://jakarta.apache.org/images/jakarta-logo.gif" align="left" border="0"/></a>
+</td>
+                </tr>
+            </table>
+            <table border="0" width="100%" cellspacing="4">
+                <tr><td colspan="2">
+                    <hr noshade="" size="1"/>
+                </td></tr>
+                
+                <tr>
+                    <!-- LEFT SIDE NAVIGATION -->
+                    <td width="20%" valign="top" nowrap="true">
+                    
+    <!-- ============================================================ -->
+
+                <p><strong>Documentation</strong></p>
+        <ul>
+                    <li>    <a href="./api/index.html">Javadoc</a>
+</li>
+                </ul>
+            <p><strong>Download</strong></p>
+        <ul>
+                    <li>    <a href="http://jakarta.apache.org/builds/jakarta-lucene-sandbox/snowball/">Releases</a>
+</li>
+                    <li>    <a href="http://jakarta.apache.org/site/cvsindex.html">CVS Repository</a>
+</li>
+                </ul>
+            <p><strong>Links</strong></p>
+        <ul>
+                    <li>    <a href="http://snowball.tartarus.org/">Snowball Home</a>
+</li>
+                    <li>    <a href="http://jakarta.apache.org/lucene/">Lucene Home</a>
+</li>
+                    <li>    <a href="http://jakarta.apache.org/lucene/docs/lucene-sandbox/">Lucene Sandbox</a>
+</li>
+                </ul>
+            <p><strong>Jakarta</strong></p>
+        <ul>
+                    <li>    <a href="http://jakarta.apache.org/site/getinvolved.html">Get Involved</a>
+</li>
+                    <li>    <a href="http://jakarta.apache.org/site/acknowledgements.html">Acknowledgements</a>
+</li>
+                    <li>    <a href="http://jakarta.apache.org/site/contact.html">Contact</a>
+</li>
+                    <li>    <a href="http://jakarta.apache.org/site/legal.html">Legal</a>
+</li>
+                </ul>
+                        </td>
+                    <td width="80%" align="left" valign="top">
+                                                                    <table border="0" cellspacing="0" cellpadding="2" width="100%">
+      <tr><td bgcolor="#525D76">
+        <font color="#ffffff" face="arial,helvetica,sanserif">
+          <a name="Snowball Stemmers for Lucene"><strong>Snowball Stemmers for Lucene</strong></a>
+        </font>
+      </td></tr>
+      <tr><td>
+        <blockquote>
+                                    <p>
+This project provides pre-compiled version of the Snowball stemmers
+together with classes integrating them with the Lucene search engine.
+</p>
+                            </blockquote>
+        </p>
+      </td></tr>
+      <tr><td><br/></td></tr>
+    </table>
+                                                <table border="0" cellspacing="0" cellpadding="2" width="100%">
+      <tr><td bgcolor="#525D76">
+        <font color="#ffffff" face="arial,helvetica,sanserif">
+          <a name="Download"><strong>Download</strong></a>
+        </font>
+      </td></tr>
+      <tr><td>
+        <blockquote>
+                                    <p>
+Releases of the stemmers are available
+<a href="http://jakarta.apache.org/builds/jakarta-lucene-sandbox/snowball/">
+here</a>
+</p>
+                            </blockquote>
+        </p>
+      </td></tr>
+      <tr><td><br/></td></tr>
+    </table>
+                                        </td>
+                </tr>
+
+                <!-- FOOTER -->
+                <tr><td colspan="2">
+                    <hr noshade="" size="1"/>
+                </td></tr>
+                <tr><td colspan="2">
+                    <div align="center"><font color="#525D76" size="-1"><em>
+                    Copyright &#169; 1999-2004, The Apache Software Foundation
+                    </em></font></div>
+                </td></tr>
+            </table>
+        </body>
+    </html>
+<!-- end the processing -->
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/contrib/snowball/pom.xml.template
+++ b/contrib/snowball/pom.xml.template
@ -0,0 +1,43 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+  -->
+
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.lucene</groupId>
+    <artifactId>lucene-contrib</artifactId>
+    <version>@version@</version>
+  </parent>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-snowball</artifactId>
+  <name>Lucene Snowball</name>
+  <version>@version@</version>
+  <description>Snowball Analyzers</description>
+  <packaging>jar</packaging>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-analyzers</artifactId>
+      <version>@version@</version>
+    </dependency>
+  </dependencies>
+</project>
--- a/contrib/snowball/snowball.cvspass
+++ b/contrib/snowball/snowball.cvspass
@ -0,0 +1 @@
+:pserver:cvsuser@cvs.tartarus.org:/home/cvs Ay=0=a%0bZ
--- a/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
+++ b/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
@ -0,0 +1,121 @@
+package org.apache.lucene.analysis.snowball;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.standard.*;
+import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
+import org.apache.lucene.util.Version;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter}, {@link StopFilter} and {@link SnowballFilter}.
+ *
+ * Available stemmers are listed in org.tartarus.snowball.ext.  The name of a
+ * stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
+ * {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English".
+ *
+ * <p><b>NOTE</b>: This class uses the same {@link Version}
+ * dependent settings as {@link StandardAnalyzer}, with the following addition:
+ * <ul>
+ *   <li> As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
+ * </ul>
+ * </p>
+ */
+public final class SnowballAnalyzer extends Analyzer {
+  private String name;
+  private Set<?> stopSet;
+  private final Version matchVersion;
+
+  /** Builds the named analyzer with no stop words. */
+  public SnowballAnalyzer(Version matchVersion, String name) {
+    this.name = name;
+    this.matchVersion = matchVersion;
+  }
+
+  /** 
+   * Builds the named analyzer with the given stop words.
+   * @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead.  
+   */
+  @Deprecated
+  public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
+    this(matchVersion, name);
+    stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
+  }
+  
+  /** Builds the named analyzer with the given stop words. */
+  public SnowballAnalyzer(Version matchVersion, String name, Set<?> stopWords) {
+    this(matchVersion, name);
+    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
+        stopWords));
+  }
+
+  /** Constructs a {@link StandardTokenizer} filtered by a {@link
+      StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
+      and a {@link SnowballFilter} */
+  @Override
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream result = new StandardTokenizer(matchVersion, reader);
+    result = new StandardFilter(result);
+    // Use a special lowercase filter for turkish, the stemmer expects it.
+    if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
+      result = new TurkishLowerCaseFilter(result);
+    else
+      result = new LowerCaseFilter(matchVersion, result);
+    if (stopSet != null)
+      result = new StopFilter(matchVersion,
+                              result, stopSet);
+    result = new SnowballFilter(result, name);
+    return result;
+  }
+  
+  private class SavedStreams {
+    Tokenizer source;
+    TokenStream result;
+  }
+  
+  /** Returns a (possibly reused) {@link StandardTokenizer} filtered by a 
+   * {@link StandardFilter}, a {@link LowerCaseFilter}, 
+   * a {@link StopFilter}, and a {@link SnowballFilter} */
+  @Override
+  public TokenStream reusableTokenStream(String fieldName, Reader reader)
+      throws IOException {
+    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
+    if (streams == null) {
+      streams = new SavedStreams();
+      streams.source = new StandardTokenizer(matchVersion, reader);
+      streams.result = new StandardFilter(streams.source);
+      // Use a special lowercase filter for turkish, the stemmer expects it.
+      if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish"))
+        streams.result = new TurkishLowerCaseFilter(streams.result);
+      else
+        streams.result = new LowerCaseFilter(matchVersion, streams.result);
+      if (stopSet != null)
+        streams.result = new StopFilter(matchVersion,
+                                        streams.result, stopSet);
+      streams.result = new SnowballFilter(streams.result, name);
+      setPreviousTokenStream(streams);
+    } else {
+      streams.source.reset(reader);
+    }
+    return streams.result;
+  }
+}
--- a/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@ -0,0 +1,92 @@
+package org.apache.lucene.analysis.snowball;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
+import org.apache.lucene.analysis.LowerCaseFilter; // javadoc @link
+import org.tartarus.snowball.SnowballProgram;
+
+/**
+ * A filter that stems words using a Snowball-generated stemmer.
+ *
+ * Available stemmers are listed in {@link org.tartarus.snowball.ext}.
+ * <p><b>NOTE</b>: SnowballFilter expects lowercased text.
+ * <ul>
+ *  <li>For the Turkish language, see {@link TurkishLowerCaseFilter}.
+ *  <li>For other languages, see {@link LowerCaseFilter}.
+ * </ul>
+ * </p>
+ */
+public final class SnowballFilter extends TokenFilter {
+
+  private SnowballProgram stemmer;
+
+  private TermAttribute termAtt;
+  
+  public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
+    super(input);
+    this.stemmer = stemmer;
+    termAtt = addAttribute(TermAttribute.class);
+  }
+
+  /**
+   * Construct the named stemming filter.
+   *
+   * Available stemmers are listed in {@link org.tartarus.snowball.ext}.
+   * The name of a stemmer is the part of the class name before "Stemmer",
+   * e.g., the stemmer in {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English".
+   *
+   * @param in the input tokens to stem
+   * @param name the name of a stemmer
+   */
+  public SnowballFilter(TokenStream in, String name) {
+    super(in);
+    try {      
+      Class<?> stemClass = Class.forName("org.tartarus.snowball.ext." + name + "Stemmer");
+      stemmer = (SnowballProgram) stemClass.newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException(e.toString());
+    }
+    termAtt = addAttribute(TermAttribute.class);
+  }
+
+  /** Returns the next input Token, after being stemmed */
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      char termBuffer[] = termAtt.termBuffer();
+      final int length = termAtt.termLength();
+      stemmer.setCurrent(termBuffer, length);
+      stemmer.stem();
+      final char finalTerm[] = stemmer.getCurrentBuffer();
+      final int newLength = stemmer.getCurrentBufferLength();
+      if (finalTerm != termBuffer)
+        termAtt.setTermBuffer(finalTerm, 0, newLength);
+      else
+        termAtt.setTermLength(newLength); 
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
--- a/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/package.html
+++ b/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/package.html
@ -0,0 +1,24 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<body>
+{@link org.apache.lucene.analysis.TokenFilter} and {@link
+org.apache.lucene.analysis.Analyzer} implementations that use Snowball
+stemmers.
+</body>
+</html>
--- a/contrib/snowball/src/java/org/tartarus/snowball/Among.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/Among.java
@ -0,0 +1,63 @@
+/*
+
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    * notice, this list of conditions and the following disclaimer in the
+    * documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holders nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+package org.tartarus.snowball;
+
+import java.lang.reflect.Method;
+
+public class Among {
+    public Among (String s, int substring_i, int result,
+		  String methodname, SnowballProgram methodobject) {
+        this.s_size = s.length();
+        this.s = s.toCharArray();
+        this.substring_i = substring_i;
+	this.result = result;
+	this.methodobject = methodobject;
+	if (methodname.length() == 0) {
+	    this.method = null;
+	} else {
+	    try {
+		this.method = methodobject.getClass().
+		getDeclaredMethod(methodname, new Class[0]);
+	    } catch (NoSuchMethodException e) {
+		throw new RuntimeException(e);
+	    }
+	}
+    }
+
+    public final int s_size; /* search string */
+    public final char[] s; /* search string */
+    public final int substring_i; /* index to longest matching substring */
+    public final int result;      /* result of the lookup */
+    public final Method method; /* method to use if substring matches */
+    public final SnowballProgram methodobject; /* object to invoke method on */
+   
+};
--- a/contrib/snowball/src/java/org/tartarus/snowball/SnowballProgram.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/SnowballProgram.java
@ -0,0 +1,566 @@
+/*
+
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    * notice, this list of conditions and the following disclaimer in the
+    * documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holders nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+
+package org.tartarus.snowball;
+
+import java.lang.reflect.InvocationTargetException;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * This is the rev 502 of the Snowball SVN trunk,
+ * but modified:
+ * made abstract and introduced abstract method stem to avoid expensive reflection in filter class.
+ * refactored StringBuffers to StringBuilder
+ * uses char[] as buffer instead of StringBuffer/StringBuilder
+ * eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b
+ */
+public abstract class SnowballProgram {
+    protected SnowballProgram()
+    {
+	current = new char[8];
+	setCurrent("");
+    }
+
+    public abstract boolean stem();
+
+    /**
+     * Set the current string.
+     */
+    public void setCurrent(String value)
+    {
+	current = value.toCharArray();
+	cursor = 0;
+	limit = value.length();
+	limit_backward = 0;
+	bra = cursor;
+	ket = limit;
+    }
+
+    /**
+     * Get the current string.
+     */
+    public String getCurrent()
+    {
+      return new String(current, 0, limit);
+    }
+    
+    /**
+     * Set the current string.
+     * @param text character array containing input
+     * @param length valid length of text.
+     */
+    public void setCurrent(char text[], int length) {
+      current = text;
+      cursor = 0;
+      limit = length;
+      limit_backward = 0;
+      bra = cursor;
+      ket = limit;
+    }
+
+    /**
+     * Get the current buffer containing the stem.
+     * <p>
+     * NOTE: this may be a reference to a different character array than the
+     * one originally provided with setCurrent, in the exceptional case that 
+     * stemming produced a longer intermediate or result string. 
+     * </p>
+     * <p>
+     * It is necessary to use {@link #getCurrentBufferLength()} to determine
+     * the valid length of the returned buffer. For example, many words are
+     * stemmed simply by subtracting from the length to remove suffixes.
+     * </p>
+     * @see #getCurrentBufferLength()
+     */
+    public char[] getCurrentBuffer() {
+      return current;
+    }
+    
+    /**
+     * Get the valid length of the character array in 
+     * {@link #getCurrentBuffer()}. 
+     * @return valid length of the array.
+     */
+    public int getCurrentBufferLength() {
+      return limit;
+    }
+
+    // current string
+    private char current[];
+
+    protected int cursor;
+    protected int limit;
+    protected int limit_backward;
+    protected int bra;
+    protected int ket;
+
+    protected void copy_from(SnowballProgram other)
+    {
+	current          = other.current;
+	cursor           = other.cursor;
+	limit            = other.limit;
+	limit_backward   = other.limit_backward;
+	bra              = other.bra;
+	ket              = other.ket;
+    }
+
+    protected boolean in_grouping(char [] s, int min, int max)
+    {
+	if (cursor >= limit) return false;
+	char ch = current[cursor];
+	if (ch > max || ch < min) return false;
+	ch -= min;
+	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+	cursor++;
+	return true;
+    }
+
+    protected boolean in_grouping_b(char [] s, int min, int max)
+    {
+	if (cursor <= limit_backward) return false;
+	char ch = current[cursor - 1];
+	if (ch > max || ch < min) return false;
+	ch -= min;
+	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false;
+	cursor--;
+	return true;
+    }
+
+    protected boolean out_grouping(char [] s, int min, int max)
+    {
+	if (cursor >= limit) return false;
+	char ch = current[cursor];
+	if (ch > max || ch < min) {
+	    cursor++;
+	    return true;
+	}
+	ch -= min;
+	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+	    cursor ++;
+	    return true;
+	}
+	return false;
+    }
+
+    protected boolean out_grouping_b(char [] s, int min, int max)
+    {
+	if (cursor <= limit_backward) return false;
+	char ch = current[cursor - 1];
+	if (ch > max || ch < min) {
+	    cursor--;
+	    return true;
+	}
+	ch -= min;
+	if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) {
+	    cursor--;
+	    return true;
+	}
+	return false;
+    }
+
+    protected boolean in_range(int min, int max)
+    {
+	if (cursor >= limit) return false;
+	char ch = current[cursor];
+	if (ch > max || ch < min) return false;
+	cursor++;
+	return true;
+    }
+
+    protected boolean in_range_b(int min, int max)
+    {
+	if (cursor <= limit_backward) return false;
+	char ch = current[cursor - 1];
+	if (ch > max || ch < min) return false;
+	cursor--;
+	return true;
+    }
+
+    protected boolean out_range(int min, int max)
+    {
+	if (cursor >= limit) return false;
+	char ch = current[cursor];
+	if (!(ch > max || ch < min)) return false;
+	cursor++;
+	return true;
+    }
+
+    protected boolean out_range_b(int min, int max)
+    {
+	if (cursor <= limit_backward) return false;
+	char ch = current[cursor - 1];
+	if(!(ch > max || ch < min)) return false;
+	cursor--;
+	return true;
+    }
+
+    protected boolean eq_s(int s_size, CharSequence s)
+    {
+	if (limit - cursor < s_size) return false;
+	int i;
+	for (i = 0; i != s_size; i++) {
+	    if (current[cursor + i] != s.charAt(i)) return false;
+	}
+	cursor += s_size;
+	return true;
+    }
+
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected boolean eq_s(int s_size, String s)
+    {
+	return eq_s(s_size, (CharSequence)s);
+    }
+
+    protected boolean eq_s_b(int s_size, CharSequence s)
+    {
+	if (cursor - limit_backward < s_size) return false;
+	int i;
+	for (i = 0; i != s_size; i++) {
+	    if (current[cursor - s_size + i] != s.charAt(i)) return false;
+	}
+	cursor -= s_size;
+	return true;
+    }
+
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected boolean eq_s_b(int s_size, String s)
+    {
+	return eq_s_b(s_size, (CharSequence)s);
+    }
+
+    protected boolean eq_v(CharSequence s)
+    {
+	return eq_s(s.length(), s);
+    }
+
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected boolean eq_v(StringBuilder s)
+    {
+	return eq_s(s.length(), (CharSequence)s);
+    }
+
+    protected boolean eq_v_b(CharSequence s)
+    {   return eq_s_b(s.length(), s);
+    }
+
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected boolean eq_v_b(StringBuilder s)
+    {   return eq_s_b(s.length(), (CharSequence)s);
+    }
+
+    protected int find_among(Among v[], int v_size)
+    {
+	int i = 0;
+	int j = v_size;
+
+	int c = cursor;
+	int l = limit;
+
+	int common_i = 0;
+	int common_j = 0;
+
+	boolean first_key_inspected = false;
+
+	while(true) {
+	    int k = i + ((j - i) >> 1);
+	    int diff = 0;
+	    int common = common_i < common_j ? common_i : common_j; // smaller
+	    Among w = v[k];
+	    int i2;
+	    for (i2 = common; i2 < w.s_size; i2++) {
+		if (c + common == l) {
+		    diff = -1;
+		    break;
+		}
+		diff = current[c + common] - w.s[i2];
+		if (diff != 0) break;
+		common++;
+	    }
+	    if (diff < 0) {
+		j = k;
+		common_j = common;
+	    } else {
+		i = k;
+		common_i = common;
+	    }
+	    if (j - i <= 1) {
+		if (i > 0) break; // v->s has been inspected
+		if (j == i) break; // only one item in v
+
+		// - but now we need to go round once more to get
+		// v->s inspected. This looks messy, but is actually
+		// the optimal approach.
+
+		if (first_key_inspected) break;
+		first_key_inspected = true;
+	    }
+	}
+	while(true) {
+	    Among w = v[i];
+	    if (common_i >= w.s_size) {
+		cursor = c + w.s_size;
+		if (w.method == null) return w.result;
+		boolean res;
+		try {
+		    Object resobj = w.method.invoke(w.methodobject,
+						    new Object[0]);
+		    res = resobj.toString().equals("true");
+		} catch (InvocationTargetException e) {
+		    res = false;
+		    // FIXME - debug message
+		} catch (IllegalAccessException e) {
+		    res = false;
+		    // FIXME - debug message
+		}
+		cursor = c + w.s_size;
+		if (res) return w.result;
+	    }
+	    i = w.substring_i;
+	    if (i < 0) return 0;
+	}
+    }
+
+    // find_among_b is for backwards processing. Same comments apply
+    protected int find_among_b(Among v[], int v_size)
+    {
+	int i = 0;
+	int j = v_size;
+
+	int c = cursor;
+	int lb = limit_backward;
+
+	int common_i = 0;
+	int common_j = 0;
+
+	boolean first_key_inspected = false;
+
+	while(true) {
+	    int k = i + ((j - i) >> 1);
+	    int diff = 0;
+	    int common = common_i < common_j ? common_i : common_j;
+	    Among w = v[k];
+	    int i2;
+	    for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) {
+		if (c - common == lb) {
+		    diff = -1;
+		    break;
+		}
+		diff = current[c - 1 - common] - w.s[i2];
+		if (diff != 0) break;
+		common++;
+	    }
+	    if (diff < 0) {
+		j = k;
+		common_j = common;
+	    } else {
+		i = k;
+		common_i = common;
+	    }
+	    if (j - i <= 1) {
+		if (i > 0) break;
+		if (j == i) break;
+		if (first_key_inspected) break;
+		first_key_inspected = true;
+	    }
+	}
+	while(true) {
+	    Among w = v[i];
+	    if (common_i >= w.s_size) {
+		cursor = c - w.s_size;
+		if (w.method == null) return w.result;
+
+		boolean res;
+		try {
+		    Object resobj = w.method.invoke(w.methodobject,
+						    new Object[0]);
+		    res = resobj.toString().equals("true");
+		} catch (InvocationTargetException e) {
+		    res = false;
+		    // FIXME - debug message
+		} catch (IllegalAccessException e) {
+		    res = false;
+		    // FIXME - debug message
+		}
+		cursor = c - w.s_size;
+		if (res) return w.result;
+	    }
+	    i = w.substring_i;
+	    if (i < 0) return 0;
+	}
+    }
+
+    /* to replace chars between c_bra and c_ket in current by the
+     * chars in s.
+     */
+    protected int replace_s(int c_bra, int c_ket, CharSequence s)
+    {
+	final int adjustment = s.length() - (c_ket - c_bra);
+	final int newLength = limit + adjustment;
+	//resize if necessary
+	if (newLength > current.length) {
+	  char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)];
+	  System.arraycopy(current, 0, newBuffer, 0, limit);
+	  current = newBuffer;
+	}
+	// if the substring being replaced is longer or shorter than the
+	// replacement, need to shift things around
+	if (adjustment != 0 && c_ket < limit) {
+	  System.arraycopy(current, c_ket, current, c_bra + s.length(), 
+	      limit - c_ket);
+	}
+	// insert the replacement text
+	// Note, faster is s.getChars(0, s.length(), current, c_bra);
+	// but would have to duplicate this method for both String and StringBuilder
+	for (int i = 0; i < s.length(); i++)
+	  current[c_bra + i] = s.charAt(i);
+	
+	limit += adjustment;
+	if (cursor >= c_ket) cursor += adjustment;
+	else if (cursor > c_bra) cursor = c_bra;
+	return adjustment;
+    }
+
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected int replace_s(int c_bra, int c_ket, String s) {
+	return replace_s(c_bra, c_ket, (CharSequence)s);
+    }
+
+    protected void slice_check()
+    {
+	if (bra < 0 ||
+	    bra > ket ||
+	    ket > limit)
+	{
+	    System.err.println("faulty slice operation");
+	// FIXME: report error somehow.
+	/*
+	    fprintf(stderr, "faulty slice operation:\n");
+	    debug(z, -1, 0);
+	    exit(1);
+	    */
+	}
+    }
+
+    protected void slice_from(CharSequence s)
+    {
+	slice_check();
+	replace_s(bra, ket, s);
+    }
+ 
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected void slice_from(String s)
+    {
+	slice_from((CharSequence)s);
+    }
+
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected void slice_from(StringBuilder s)
+    {
+	slice_from((CharSequence)s);
+    }
+
+    protected void slice_del()
+    {
+	slice_from((CharSequence)"");
+    }
+
+    protected void insert(int c_bra, int c_ket, CharSequence s)
+    {
+	int adjustment = replace_s(c_bra, c_ket, s);
+	if (c_bra <= bra) bra += adjustment;
+	if (c_bra <= ket) ket += adjustment;
+    }
+
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected void insert(int c_bra, int c_ket, String s)
+    {
+	insert(c_bra, c_ket, (CharSequence)s);
+    }
+
+    /** @deprecated for binary back compat. Will be removed in Lucene 4.0 */
+    @Deprecated
+    protected void insert(int c_bra, int c_ket, StringBuilder s)
+    {
+	insert(c_bra, c_ket, (CharSequence)s);
+    }
+
+    /* Copy the slice into the supplied StringBuffer */
+    protected StringBuilder slice_to(StringBuilder s)
+    {
+	slice_check();
+	int len = ket - bra;
+	s.setLength(0);
+	s.append(current, bra, len);
+	return s;
+    }
+
+    protected StringBuilder assign_to(StringBuilder s)
+    {
+	s.setLength(0);
+	s.append(current, 0, limit);
+	return s;
+    }
+
+/*
+extern void debug(struct SN_env * z, int number, int line_count)
+{   int i;
+    int limit = SIZE(z->p);
+    //if (number >= 0) printf("%3d (line %4d): '", number, line_count);
+    if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit);
+    for (i = 0; i <= limit; i++)
+    {   if (z->lb == i) printf("{");
+        if (z->bra == i) printf("[");
+        if (z->c == i) printf("|");
+        if (z->ket == i) printf("]");
+        if (z->l == i) printf("}");
+        if (i < limit)
+        {   int ch = z->p[i];
+            if (ch == 0) ch = '#';
+            printf("%c", ch);
+        }
+    }
+    printf("'\n");
+}
+*/
+
+};
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/TestApp.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/TestApp.java
@ -0,0 +1,108 @@
+/*
+
+Copyright (c) 2001, Dr Martin Porter
+Copyright (c) 2002, Richard Boulton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+    * notice, this list of conditions and the following disclaimer in the
+    * documentation and/or other materials provided with the distribution.
+    * Neither the name of the copyright holders nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ */
+
+package org.tartarus.snowball;
+
+import java.lang.reflect.Method;
+import java.io.Reader;
+import java.io.Writer;
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.OutputStream;
+import java.io.FileOutputStream;
+
+public class TestApp {
+    private static void usage()
+    {
+        System.err.println("Usage: TestApp <algorithm> <input file> [-o <output file>]");
+    }
+
+    public static void main(String [] args) throws Throwable {
+	if (args.length < 2) {
+            usage();
+            return;
+        }
+
+	Class stemClass = Class.forName("org.tartarus.snowball.ext." +
+					args[0] + "Stemmer");
+        SnowballProgram stemmer = (SnowballProgram) stemClass.newInstance();
+	Method stemMethod = stemClass.getMethod("stem", new Class[0]);
+
+	Reader reader;
+	reader = new InputStreamReader(new FileInputStream(args[1]));
+	reader = new BufferedReader(reader);
+
+	StringBuffer input = new StringBuffer();
+
+        OutputStream outstream;
+
+	if (args.length > 2) {
+            if (args.length == 4 && args[2].equals("-o")) {
+                outstream = new FileOutputStream(args[3]);
+            } else {
+                usage();
+                return;
+            }
+	} else {
+	    outstream = System.out;
+	}
+	Writer output = new OutputStreamWriter(outstream);
+	output = new BufferedWriter(output);
+
+	int repeat = 1;
+	if (args.length > 4) {
+	    repeat = Integer.parseInt(args[4]);
+	}
+
+	Object [] emptyArgs = new Object[0];
+	int character;
+	while ((character = reader.read()) != -1) {
+	    char ch = (char) character;
+	    if (Character.isWhitespace((char) ch)) {
+		if (input.length() > 0) {
+		    stemmer.setCurrent(input.toString());
+		    for (int i = repeat; i != 0; i--) {
+			stemMethod.invoke(stemmer, emptyArgs);
+		    }
+		    output.write(stemmer.getCurrent());
+		    output.write('\n');
+		    input.delete(0, input.length());
+		}
+	    } else {
+		input.append(Character.toLowerCase(ch));
+	    }
+	}
+	output.flush();
+    }
+}
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/DanishStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/DanishStemmer.java
@ -0,0 +1,423 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.Among;
+
+/**
+ * Generated class implementing code defined by a snowball script.
+ */
+public class DanishStemmer extends SnowballProgram {
+
+        private Among a_0[] = {
+            new Among ( "hed", -1, 1, "", this),
+            new Among ( "ethed", 0, 1, "", this),
+            new Among ( "ered", -1, 1, "", this),
+            new Among ( "e", -1, 1, "", this),
+            new Among ( "erede", 3, 1, "", this),
+            new Among ( "ende", 3, 1, "", this),
+            new Among ( "erende", 5, 1, "", this),
+            new Among ( "ene", 3, 1, "", this),
+            new Among ( "erne", 3, 1, "", this),
+            new Among ( "ere", 3, 1, "", this),
+            new Among ( "en", -1, 1, "", this),
+            new Among ( "heden", 10, 1, "", this),
+            new Among ( "eren", 10, 1, "", this),
+            new Among ( "er", -1, 1, "", this),
+            new Among ( "heder", 13, 1, "", this),
+            new Among ( "erer", 13, 1, "", this),
+            new Among ( "s", -1, 2, "", this),
+            new Among ( "heds", 16, 1, "", this),
+            new Among ( "es", 16, 1, "", this),
+            new Among ( "endes", 18, 1, "", this),
+            new Among ( "erendes", 19, 1, "", this),
+            new Among ( "enes", 18, 1, "", this),
+            new Among ( "ernes", 18, 1, "", this),
+            new Among ( "eres", 18, 1, "", this),
+            new Among ( "ens", 16, 1, "", this),
+            new Among ( "hedens", 24, 1, "", this),
+            new Among ( "erens", 24, 1, "", this),
+            new Among ( "ers", 16, 1, "", this),
+            new Among ( "ets", 16, 1, "", this),
+            new Among ( "erets", 28, 1, "", this),
+            new Among ( "et", -1, 1, "", this),
+            new Among ( "eret", 30, 1, "", this)
+        };
+
+        private Among a_1[] = {
+            new Among ( "gd", -1, -1, "", this),
+            new Among ( "dt", -1, -1, "", this),
+            new Among ( "gt", -1, -1, "", this),
+            new Among ( "kt", -1, -1, "", this)
+        };
+
+        private Among a_2[] = {
+            new Among ( "ig", -1, 1, "", this),
+            new Among ( "lig", 0, 1, "", this),
+            new Among ( "elig", 1, 1, "", this),
+            new Among ( "els", -1, 1, "", this),
+            new Among ( "l\u00F8st", -1, 2, "", this)
+        };
+
+        private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
+
+        private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 };
+
+        private int I_x;
+        private int I_p1;
+        private StringBuilder S_ch = new StringBuilder();
+
+        private void copy_from(DanishStemmer other) {
+            I_x = other.I_x;
+            I_p1 = other.I_p1;
+            S_ch = other.S_ch;
+            super.copy_from(other);
+        }
+
+        private boolean r_mark_regions() {
+            int v_1;
+            int v_2;
+            // (, line 29
+            I_p1 = limit;
+            // test, line 33
+            v_1 = cursor;
+            // (, line 33
+            // hop, line 33
+            {
+                int c = cursor + 3;
+                if (0 > c || c > limit)
+                {
+                    return false;
+                }
+                cursor = c;
+            }
+            // setmark x, line 33
+            I_x = cursor;
+            cursor = v_1;
+            // goto, line 34
+            golab0: while(true)
+            {
+                v_2 = cursor;
+                lab1: do {
+                    if (!(in_grouping(g_v, 97, 248)))
+                    {
+                        break lab1;
+                    }
+                    cursor = v_2;
+                    break golab0;
+                } while (false);
+                cursor = v_2;
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 34
+            golab2: while(true)
+            {
+                lab3: do {
+                    if (!(out_grouping(g_v, 97, 248)))
+                    {
+                        break lab3;
+                    }
+                    break golab2;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p1, line 34
+            I_p1 = cursor;
+            // try, line 35
+            lab4: do {
+                // (, line 35
+                if (!(I_p1 < I_x))
+                {
+                    break lab4;
+                }
+                I_p1 = I_x;
+            } while (false);
+            return true;
+        }
+
+        private boolean r_main_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            // (, line 40
+            // setlimit, line 41
+            v_1 = limit - cursor;
+            // tomark, line 41
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_2 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_1;
+            // (, line 41
+            // [, line 41
+            ket = cursor;
+            // substring, line 41
+            among_var = find_among_b(a_0, 32);
+            if (among_var == 0)
+            {
+                limit_backward = v_2;
+                return false;
+            }
+            // ], line 41
+            bra = cursor;
+            limit_backward = v_2;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 48
+                    // delete, line 48
+                    slice_del();
+                    break;
+                case 2:
+                    // (, line 50
+                    if (!(in_grouping_b(g_s_ending, 97, 229)))
+                    {
+                        return false;
+                    }
+                    // delete, line 50
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_consonant_pair() {
+            int v_1;
+            int v_2;
+            int v_3;
+            // (, line 54
+            // test, line 55
+            v_1 = limit - cursor;
+            // (, line 55
+            // setlimit, line 56
+            v_2 = limit - cursor;
+            // tomark, line 56
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_3 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_2;
+            // (, line 56
+            // [, line 56
+            ket = cursor;
+            // substring, line 56
+            if (find_among_b(a_1, 4) == 0)
+            {
+                limit_backward = v_3;
+                return false;
+            }
+            // ], line 56
+            bra = cursor;
+            limit_backward = v_3;
+            cursor = limit - v_1;
+            // next, line 62
+            if (cursor <= limit_backward)
+            {
+                return false;
+            }
+            cursor--;
+            // ], line 62
+            bra = cursor;
+            // delete, line 62
+            slice_del();
+            return true;
+        }
+
+        private boolean r_other_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            // (, line 65
+            // do, line 66
+            v_1 = limit - cursor;
+            lab0: do {
+                // (, line 66
+                // [, line 66
+                ket = cursor;
+                // literal, line 66
+                if (!(eq_s_b(2, "st")))
+                {
+                    break lab0;
+                }
+                // ], line 66
+                bra = cursor;
+                // literal, line 66
+                if (!(eq_s_b(2, "ig")))
+                {
+                    break lab0;
+                }
+                // delete, line 66
+                slice_del();
+            } while (false);
+            cursor = limit - v_1;
+            // setlimit, line 67
+            v_2 = limit - cursor;
+            // tomark, line 67
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_3 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_2;
+            // (, line 67
+            // [, line 67
+            ket = cursor;
+            // substring, line 67
+            among_var = find_among_b(a_2, 5);
+            if (among_var == 0)
+            {
+                limit_backward = v_3;
+                return false;
+            }
+            // ], line 67
+            bra = cursor;
+            limit_backward = v_3;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 70
+                    // delete, line 70
+                    slice_del();
+                    // do, line 70
+                    v_4 = limit - cursor;
+                    lab1: do {
+                        // call consonant_pair, line 70
+                        if (!r_consonant_pair())
+                        {
+                            break lab1;
+                        }
+                    } while (false);
+                    cursor = limit - v_4;
+                    break;
+                case 2:
+                    // (, line 72
+                    // <-, line 72
+                    slice_from("l\u00F8s");
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_undouble() {
+            int v_1;
+            int v_2;
+            // (, line 75
+            // setlimit, line 76
+            v_1 = limit - cursor;
+            // tomark, line 76
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_2 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_1;
+            // (, line 76
+            // [, line 76
+            ket = cursor;
+            if (!(out_grouping_b(g_v, 97, 248)))
+            {
+                limit_backward = v_2;
+                return false;
+            }
+            // ], line 76
+            bra = cursor;
+            // -> ch, line 76
+            S_ch = slice_to(S_ch);
+            limit_backward = v_2;
+            // name ch, line 77
+            if (!(eq_v_b(S_ch)))
+            {
+                return false;
+            }
+            // delete, line 78
+            slice_del();
+            return true;
+        }
+
+        public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            // (, line 82
+            // do, line 84
+            v_1 = cursor;
+            lab0: do {
+                // call mark_regions, line 84
+                if (!r_mark_regions())
+                {
+                    break lab0;
+                }
+            } while (false);
+            cursor = v_1;
+            // backwards, line 85
+            limit_backward = cursor; cursor = limit;
+            // (, line 85
+            // do, line 86
+            v_2 = limit - cursor;
+            lab1: do {
+                // call main_suffix, line 86
+                if (!r_main_suffix())
+                {
+                    break lab1;
+                }
+            } while (false);
+            cursor = limit - v_2;
+            // do, line 87
+            v_3 = limit - cursor;
+            lab2: do {
+                // call consonant_pair, line 87
+                if (!r_consonant_pair())
+                {
+                    break lab2;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            // do, line 88
+            v_4 = limit - cursor;
+            lab3: do {
+                // call other_suffix, line 88
+                if (!r_other_suffix())
+                {
+                    break lab3;
+                }
+            } while (false);
+            cursor = limit - v_4;
+            // do, line 89
+            v_5 = limit - cursor;
+            lab4: do {
+                // call undouble, line 89
+                if (!r_undouble())
+                {
+                    break lab4;
+                }
+            } while (false);
+            cursor = limit - v_5;
+            cursor = limit_backward;            return true;
+        }
+
+}
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/DutchStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/DutchStemmer.java
@ -0,0 +1,837 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.Among;
+
+/**
+ * Generated class implementing code defined by a snowball script.
+ */
+public class DutchStemmer extends SnowballProgram {
+
+        private Among a_0[] = {
+            new Among ( "", -1, 6, "", this),
+            new Among ( "\u00E1", 0, 1, "", this),
+            new Among ( "\u00E4", 0, 1, "", this),
+            new Among ( "\u00E9", 0, 2, "", this),
+            new Among ( "\u00EB", 0, 2, "", this),
+            new Among ( "\u00ED", 0, 3, "", this),
+            new Among ( "\u00EF", 0, 3, "", this),
+            new Among ( "\u00F3", 0, 4, "", this),
+            new Among ( "\u00F6", 0, 4, "", this),
+            new Among ( "\u00FA", 0, 5, "", this),
+            new Among ( "\u00FC", 0, 5, "", this)
+        };
+
+        private Among a_1[] = {
+            new Among ( "", -1, 3, "", this),
+            new Among ( "I", 0, 2, "", this),
+            new Among ( "Y", 0, 1, "", this)
+        };
+
+        private Among a_2[] = {
+            new Among ( "dd", -1, -1, "", this),
+            new Among ( "kk", -1, -1, "", this),
+            new Among ( "tt", -1, -1, "", this)
+        };
+
+        private Among a_3[] = {
+            new Among ( "ene", -1, 2, "", this),
+            new Among ( "se", -1, 3, "", this),
+            new Among ( "en", -1, 2, "", this),
+            new Among ( "heden", 2, 1, "", this),
+            new Among ( "s", -1, 3, "", this)
+        };
+
+        private Among a_4[] = {
+            new Among ( "end", -1, 1, "", this),
+            new Among ( "ig", -1, 2, "", this),
+            new Among ( "ing", -1, 1, "", this),
+            new Among ( "lijk", -1, 3, "", this),
+            new Among ( "baar", -1, 4, "", this),
+            new Among ( "bar", -1, 5, "", this)
+        };
+
+        private Among a_5[] = {
+            new Among ( "aa", -1, -1, "", this),
+            new Among ( "ee", -1, -1, "", this),
+            new Among ( "oo", -1, -1, "", this),
+            new Among ( "uu", -1, -1, "", this)
+        };
+
+        private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+        private static final char g_v_I[] = {1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+        private static final char g_v_j[] = {17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 };
+
+        private int I_p2;
+        private int I_p1;
+        private boolean B_e_found;
+
+        private void copy_from(DutchStemmer other) {
+            I_p2 = other.I_p2;
+            I_p1 = other.I_p1;
+            B_e_found = other.B_e_found;
+            super.copy_from(other);
+        }
+
+        private boolean r_prelude() {
+            int among_var;
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            int v_6;
+            // (, line 41
+            // test, line 42
+            v_1 = cursor;
+            // repeat, line 42
+            replab0: while(true)
+            {
+                v_2 = cursor;
+                lab1: do {
+                    // (, line 42
+                    // [, line 43
+                    bra = cursor;
+                    // substring, line 43
+                    among_var = find_among(a_0, 11);
+                    if (among_var == 0)
+                    {
+                        break lab1;
+                    }
+                    // ], line 43
+                    ket = cursor;
+                    switch(among_var) {
+                        case 0:
+                            break lab1;
+                        case 1:
+                            // (, line 45
+                            // <-, line 45
+                            slice_from("a");
+                            break;
+                        case 2:
+                            // (, line 47
+                            // <-, line 47
+                            slice_from("e");
+                            break;
+                        case 3:
+                            // (, line 49
+                            // <-, line 49
+                            slice_from("i");
+                            break;
+                        case 4:
+                            // (, line 51
+                            // <-, line 51
+                            slice_from("o");
+                            break;
+                        case 5:
+                            // (, line 53
+                            // <-, line 53
+                            slice_from("u");
+                            break;
+                        case 6:
+                            // (, line 54
+                            // next, line 54
+                            if (cursor >= limit)
+                            {
+                                break lab1;
+                            }
+                            cursor++;
+                            break;
+                    }
+                    continue replab0;
+                } while (false);
+                cursor = v_2;
+                break replab0;
+            }
+            cursor = v_1;
+            // try, line 57
+            v_3 = cursor;
+            lab2: do {
+                // (, line 57
+                // [, line 57
+                bra = cursor;
+                // literal, line 57
+                if (!(eq_s(1, "y")))
+                {
+                    cursor = v_3;
+                    break lab2;
+                }
+                // ], line 57
+                ket = cursor;
+                // <-, line 57
+                slice_from("Y");
+            } while (false);
+            // repeat, line 58
+            replab3: while(true)
+            {
+                v_4 = cursor;
+                lab4: do {
+                    // goto, line 58
+                    golab5: while(true)
+                    {
+                        v_5 = cursor;
+                        lab6: do {
+                            // (, line 58
+                            if (!(in_grouping(g_v, 97, 232)))
+                            {
+                                break lab6;
+                            }
+                            // [, line 59
+                            bra = cursor;
+                            // or, line 59
+                            lab7: do {
+                                v_6 = cursor;
+                                lab8: do {
+                                    // (, line 59
+                                    // literal, line 59
+                                    if (!(eq_s(1, "i")))
+                                    {
+                                        break lab8;
+                                    }
+                                    // ], line 59
+                                    ket = cursor;
+                                    if (!(in_grouping(g_v, 97, 232)))
+                                    {
+                                        break lab8;
+                                    }
+                                    // <-, line 59
+                                    slice_from("I");
+                                    break lab7;
+                                } while (false);
+                                cursor = v_6;
+                                // (, line 60
+                                // literal, line 60
+                                if (!(eq_s(1, "y")))
+                                {
+                                    break lab6;
+                                }
+                                // ], line 60
+                                ket = cursor;
+                                // <-, line 60
+                                slice_from("Y");
+                            } while (false);
+                            cursor = v_5;
+                            break golab5;
+                        } while (false);
+                        cursor = v_5;
+                        if (cursor >= limit)
+                        {
+                            break lab4;
+                        }
+                        cursor++;
+                    }
+                    continue replab3;
+                } while (false);
+                cursor = v_4;
+                break replab3;
+            }
+            return true;
+        }
+
+        private boolean r_mark_regions() {
+            // (, line 64
+            I_p1 = limit;
+            I_p2 = limit;
+            // gopast, line 69
+            golab0: while(true)
+            {
+                lab1: do {
+                    if (!(in_grouping(g_v, 97, 232)))
+                    {
+                        break lab1;
+                    }
+                    break golab0;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 69
+            golab2: while(true)
+            {
+                lab3: do {
+                    if (!(out_grouping(g_v, 97, 232)))
+                    {
+                        break lab3;
+                    }
+                    break golab2;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p1, line 69
+            I_p1 = cursor;
+            // try, line 70
+            lab4: do {
+                // (, line 70
+                if (!(I_p1 < 3))
+                {
+                    break lab4;
+                }
+                I_p1 = 3;
+            } while (false);
+            // gopast, line 71
+            golab5: while(true)
+            {
+                lab6: do {
+                    if (!(in_grouping(g_v, 97, 232)))
+                    {
+                        break lab6;
+                    }
+                    break golab5;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 71
+            golab7: while(true)
+            {
+                lab8: do {
+                    if (!(out_grouping(g_v, 97, 232)))
+                    {
+                        break lab8;
+                    }
+                    break golab7;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p2, line 71
+            I_p2 = cursor;
+            return true;
+        }
+
+        private boolean r_postlude() {
+            int among_var;
+            int v_1;
+            // repeat, line 75
+            replab0: while(true)
+            {
+                v_1 = cursor;
+                lab1: do {
+                    // (, line 75
+                    // [, line 77
+                    bra = cursor;
+                    // substring, line 77
+                    among_var = find_among(a_1, 3);
+                    if (among_var == 0)
+                    {
+                        break lab1;
+                    }
+                    // ], line 77
+                    ket = cursor;
+                    switch(among_var) {
+                        case 0:
+                            break lab1;
+                        case 1:
+                            // (, line 78
+                            // <-, line 78
+                            slice_from("y");
+                            break;
+                        case 2:
+                            // (, line 79
+                            // <-, line 79
+                            slice_from("i");
+                            break;
+                        case 3:
+                            // (, line 80
+                            // next, line 80
+                            if (cursor >= limit)
+                            {
+                                break lab1;
+                            }
+                            cursor++;
+                            break;
+                    }
+                    continue replab0;
+                } while (false);
+                cursor = v_1;
+                break replab0;
+            }
+            return true;
+        }
+
+        private boolean r_R1() {
+            if (!(I_p1 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_R2() {
+            if (!(I_p2 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_undouble() {
+            int v_1;
+            // (, line 90
+            // test, line 91
+            v_1 = limit - cursor;
+            // among, line 91
+            if (find_among_b(a_2, 3) == 0)
+            {
+                return false;
+            }
+            cursor = limit - v_1;
+            // [, line 91
+            ket = cursor;
+            // next, line 91
+            if (cursor <= limit_backward)
+            {
+                return false;
+            }
+            cursor--;
+            // ], line 91
+            bra = cursor;
+            // delete, line 91
+            slice_del();
+            return true;
+        }
+
+        private boolean r_e_ending() {
+            int v_1;
+            // (, line 94
+            // unset e_found, line 95
+            B_e_found = false;
+            // [, line 96
+            ket = cursor;
+            // literal, line 96
+            if (!(eq_s_b(1, "e")))
+            {
+                return false;
+            }
+            // ], line 96
+            bra = cursor;
+            // call R1, line 96
+            if (!r_R1())
+            {
+                return false;
+            }
+            // test, line 96
+            v_1 = limit - cursor;
+            if (!(out_grouping_b(g_v, 97, 232)))
+            {
+                return false;
+            }
+            cursor = limit - v_1;
+            // delete, line 96
+            slice_del();
+            // set e_found, line 97
+            B_e_found = true;
+            // call undouble, line 98
+            if (!r_undouble())
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_en_ending() {
+            int v_1;
+            int v_2;
+            // (, line 101
+            // call R1, line 102
+            if (!r_R1())
+            {
+                return false;
+            }
+            // and, line 102
+            v_1 = limit - cursor;
+            if (!(out_grouping_b(g_v, 97, 232)))
+            {
+                return false;
+            }
+            cursor = limit - v_1;
+            // not, line 102
+            {
+                v_2 = limit - cursor;
+                lab0: do {
+                    // literal, line 102
+                    if (!(eq_s_b(3, "gem")))
+                    {
+                        break lab0;
+                    }
+                    return false;
+                } while (false);
+                cursor = limit - v_2;
+            }
+            // delete, line 102
+            slice_del();
+            // call undouble, line 103
+            if (!r_undouble())
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_standard_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            int v_6;
+            int v_7;
+            int v_8;
+            int v_9;
+            int v_10;
+            // (, line 106
+            // do, line 107
+            v_1 = limit - cursor;
+            lab0: do {
+                // (, line 107
+                // [, line 108
+                ket = cursor;
+                // substring, line 108
+                among_var = find_among_b(a_3, 5);
+                if (among_var == 0)
+                {
+                    break lab0;
+                }
+                // ], line 108
+                bra = cursor;
+                switch(among_var) {
+                    case 0:
+                        break lab0;
+                    case 1:
+                        // (, line 110
+                        // call R1, line 110
+                        if (!r_R1())
+                        {
+                            break lab0;
+                        }
+                        // <-, line 110
+                        slice_from("heid");
+                        break;
+                    case 2:
+                        // (, line 113
+                        // call en_ending, line 113
+                        if (!r_en_ending())
+                        {
+                            break lab0;
+                        }
+                        break;
+                    case 3:
+                        // (, line 116
+                        // call R1, line 116
+                        if (!r_R1())
+                        {
+                            break lab0;
+                        }
+                        if (!(out_grouping_b(g_v_j, 97, 232)))
+                        {
+                            break lab0;
+                        }
+                        // delete, line 116
+                        slice_del();
+                        break;
+                }
+            } while (false);
+            cursor = limit - v_1;
+            // do, line 120
+            v_2 = limit - cursor;
+            lab1: do {
+                // call e_ending, line 120
+                if (!r_e_ending())
+                {
+                    break lab1;
+                }
+            } while (false);
+            cursor = limit - v_2;
+            // do, line 122
+            v_3 = limit - cursor;
+            lab2: do {
+                // (, line 122
+                // [, line 122
+                ket = cursor;
+                // literal, line 122
+                if (!(eq_s_b(4, "heid")))
+                {
+                    break lab2;
+                }
+                // ], line 122
+                bra = cursor;
+                // call R2, line 122
+                if (!r_R2())
+                {
+                    break lab2;
+                }
+                // not, line 122
+                {
+                    v_4 = limit - cursor;
+                    lab3: do {
+                        // literal, line 122
+                        if (!(eq_s_b(1, "c")))
+                        {
+                            break lab3;
+                        }
+                        break lab2;
+                    } while (false);
+                    cursor = limit - v_4;
+                }
+                // delete, line 122
+                slice_del();
+                // [, line 123
+                ket = cursor;
+                // literal, line 123
+                if (!(eq_s_b(2, "en")))
+                {
+                    break lab2;
+                }
+                // ], line 123
+                bra = cursor;
+                // call en_ending, line 123
+                if (!r_en_ending())
+                {
+                    break lab2;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            // do, line 126
+            v_5 = limit - cursor;
+            lab4: do {
+                // (, line 126
+                // [, line 127
+                ket = cursor;
+                // substring, line 127
+                among_var = find_among_b(a_4, 6);
+                if (among_var == 0)
+                {
+                    break lab4;
+                }
+                // ], line 127
+                bra = cursor;
+                switch(among_var) {
+                    case 0:
+                        break lab4;
+                    case 1:
+                        // (, line 129
+                        // call R2, line 129
+                        if (!r_R2())
+                        {
+                            break lab4;
+                        }
+                        // delete, line 129
+                        slice_del();
+                        // or, line 130
+                        lab5: do {
+                            v_6 = limit - cursor;
+                            lab6: do {
+                                // (, line 130
+                                // [, line 130
+                                ket = cursor;
+                                // literal, line 130
+                                if (!(eq_s_b(2, "ig")))
+                                {
+                                    break lab6;
+                                }
+                                // ], line 130
+                                bra = cursor;
+                                // call R2, line 130
+                                if (!r_R2())
+                                {
+                                    break lab6;
+                                }
+                                // not, line 130
+                                {
+                                    v_7 = limit - cursor;
+                                    lab7: do {
+                                        // literal, line 130
+                                        if (!(eq_s_b(1, "e")))
+                                        {
+                                            break lab7;
+                                        }
+                                        break lab6;
+                                    } while (false);
+                                    cursor = limit - v_7;
+                                }
+                                // delete, line 130
+                                slice_del();
+                                break lab5;
+                            } while (false);
+                            cursor = limit - v_6;
+                            // call undouble, line 130
+                            if (!r_undouble())
+                            {
+                                break lab4;
+                            }
+                        } while (false);
+                        break;
+                    case 2:
+                        // (, line 133
+                        // call R2, line 133
+                        if (!r_R2())
+                        {
+                            break lab4;
+                        }
+                        // not, line 133
+                        {
+                            v_8 = limit - cursor;
+                            lab8: do {
+                                // literal, line 133
+                                if (!(eq_s_b(1, "e")))
+                                {
+                                    break lab8;
+                                }
+                                break lab4;
+                            } while (false);
+                            cursor = limit - v_8;
+                        }
+                        // delete, line 133
+                        slice_del();
+                        break;
+                    case 3:
+                        // (, line 136
+                        // call R2, line 136
+                        if (!r_R2())
+                        {
+                            break lab4;
+                        }
+                        // delete, line 136
+                        slice_del();
+                        // call e_ending, line 136
+                        if (!r_e_ending())
+                        {
+                            break lab4;
+                        }
+                        break;
+                    case 4:
+                        // (, line 139
+                        // call R2, line 139
+                        if (!r_R2())
+                        {
+                            break lab4;
+                        }
+                        // delete, line 139
+                        slice_del();
+                        break;
+                    case 5:
+                        // (, line 142
+                        // call R2, line 142
+                        if (!r_R2())
+                        {
+                            break lab4;
+                        }
+                        // Boolean test e_found, line 142
+                        if (!(B_e_found))
+                        {
+                            break lab4;
+                        }
+                        // delete, line 142
+                        slice_del();
+                        break;
+                }
+            } while (false);
+            cursor = limit - v_5;
+            // do, line 146
+            v_9 = limit - cursor;
+            lab9: do {
+                // (, line 146
+                if (!(out_grouping_b(g_v_I, 73, 232)))
+                {
+                    break lab9;
+                }
+                // test, line 148
+                v_10 = limit - cursor;
+                // (, line 148
+                // among, line 149
+                if (find_among_b(a_5, 4) == 0)
+                {
+                    break lab9;
+                }
+                if (!(out_grouping_b(g_v, 97, 232)))
+                {
+                    break lab9;
+                }
+                cursor = limit - v_10;
+                // [, line 152
+                ket = cursor;
+                // next, line 152
+                if (cursor <= limit_backward)
+                {
+                    break lab9;
+                }
+                cursor--;
+                // ], line 152
+                bra = cursor;
+                // delete, line 152
+                slice_del();
+            } while (false);
+            cursor = limit - v_9;
+            return true;
+        }
+
+        public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            // (, line 157
+            // do, line 159
+            v_1 = cursor;
+            lab0: do {
+                // call prelude, line 159
+                if (!r_prelude())
+                {
+                    break lab0;
+                }
+            } while (false);
+            cursor = v_1;
+            // do, line 160
+            v_2 = cursor;
+            lab1: do {
+                // call mark_regions, line 160
+                if (!r_mark_regions())
+                {
+                    break lab1;
+                }
+            } while (false);
+            cursor = v_2;
+            // backwards, line 161
+            limit_backward = cursor; cursor = limit;
+            // do, line 162
+            v_3 = limit - cursor;
+            lab2: do {
+                // call standard_suffix, line 162
+                if (!r_standard_suffix())
+                {
+                    break lab2;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            cursor = limit_backward;            // do, line 163
+            v_4 = cursor;
+            lab3: do {
+                // call postlude, line 163
+                if (!r_postlude())
+                {
+                    break lab3;
+                }
+            } while (false);
+            cursor = v_4;
+            return true;
+        }
+
+}
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/EnglishStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/EnglishStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/FinnishStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/FinnishStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/FrenchStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/FrenchStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/German2Stemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/German2Stemmer.java
@ -0,0 +1,726 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.Among;
+
+/**
+ * Generated class implementing code defined by a snowball script.
+ */
+public class German2Stemmer extends SnowballProgram {
+
+        private Among a_0[] = {
+            new Among ( "", -1, 6, "", this),
+            new Among ( "ae", 0, 2, "", this),
+            new Among ( "oe", 0, 3, "", this),
+            new Among ( "qu", 0, 5, "", this),
+            new Among ( "ue", 0, 4, "", this),
+            new Among ( "\u00DF", 0, 1, "", this)
+        };
+
+        private Among a_1[] = {
+            new Among ( "", -1, 6, "", this),
+            new Among ( "U", 0, 2, "", this),
+            new Among ( "Y", 0, 1, "", this),
+            new Among ( "\u00E4", 0, 3, "", this),
+            new Among ( "\u00F6", 0, 4, "", this),
+            new Among ( "\u00FC", 0, 5, "", this)
+        };
+
+        private Among a_2[] = {
+            new Among ( "e", -1, 1, "", this),
+            new Among ( "em", -1, 1, "", this),
+            new Among ( "en", -1, 1, "", this),
+            new Among ( "ern", -1, 1, "", this),
+            new Among ( "er", -1, 1, "", this),
+            new Among ( "s", -1, 2, "", this),
+            new Among ( "es", 5, 1, "", this)
+        };
+
+        private Among a_3[] = {
+            new Among ( "en", -1, 1, "", this),
+            new Among ( "er", -1, 1, "", this),
+            new Among ( "st", -1, 2, "", this),
+            new Among ( "est", 2, 1, "", this)
+        };
+
+        private Among a_4[] = {
+            new Among ( "ig", -1, 1, "", this),
+            new Among ( "lich", -1, 1, "", this)
+        };
+
+        private Among a_5[] = {
+            new Among ( "end", -1, 1, "", this),
+            new Among ( "ig", -1, 2, "", this),
+            new Among ( "ung", -1, 1, "", this),
+            new Among ( "lich", -1, 3, "", this),
+            new Among ( "isch", -1, 2, "", this),
+            new Among ( "ik", -1, 2, "", this),
+            new Among ( "heit", -1, 3, "", this),
+            new Among ( "keit", -1, 4, "", this)
+        };
+
+        private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 };
+
+        private static final char g_s_ending[] = {117, 30, 5 };
+
+        private static final char g_st_ending[] = {117, 30, 4 };
+
+        private int I_x;
+        private int I_p2;
+        private int I_p1;
+
+        private void copy_from(German2Stemmer other) {
+            I_x = other.I_x;
+            I_p2 = other.I_p2;
+            I_p1 = other.I_p1;
+            super.copy_from(other);
+        }
+
+        private boolean r_prelude() {
+            int among_var;
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            // (, line 28
+            // test, line 30
+            v_1 = cursor;
+            // repeat, line 30
+            replab0: while(true)
+            {
+                v_2 = cursor;
+                lab1: do {
+                    // goto, line 30
+                    golab2: while(true)
+                    {
+                        v_3 = cursor;
+                        lab3: do {
+                            // (, line 30
+                            if (!(in_grouping(g_v, 97, 252)))
+                            {
+                                break lab3;
+                            }
+                            // [, line 31
+                            bra = cursor;
+                            // or, line 31
+                            lab4: do {
+                                v_4 = cursor;
+                                lab5: do {
+                                    // (, line 31
+                                    // literal, line 31
+                                    if (!(eq_s(1, "u")))
+                                    {
+                                        break lab5;
+                                    }
+                                    // ], line 31
+                                    ket = cursor;
+                                    if (!(in_grouping(g_v, 97, 252)))
+                                    {
+                                        break lab5;
+                                    }
+                                    // <-, line 31
+                                    slice_from("U");
+                                    break lab4;
+                                } while (false);
+                                cursor = v_4;
+                                // (, line 32
+                                // literal, line 32
+                                if (!(eq_s(1, "y")))
+                                {
+                                    break lab3;
+                                }
+                                // ], line 32
+                                ket = cursor;
+                                if (!(in_grouping(g_v, 97, 252)))
+                                {
+                                    break lab3;
+                                }
+                                // <-, line 32
+                                slice_from("Y");
+                            } while (false);
+                            cursor = v_3;
+                            break golab2;
+                        } while (false);
+                        cursor = v_3;
+                        if (cursor >= limit)
+                        {
+                            break lab1;
+                        }
+                        cursor++;
+                    }
+                    continue replab0;
+                } while (false);
+                cursor = v_2;
+                break replab0;
+            }
+            cursor = v_1;
+            // repeat, line 35
+            replab6: while(true)
+            {
+                v_5 = cursor;
+                lab7: do {
+                    // (, line 35
+                    // [, line 36
+                    bra = cursor;
+                    // substring, line 36
+                    among_var = find_among(a_0, 6);
+                    if (among_var == 0)
+                    {
+                        break lab7;
+                    }
+                    // ], line 36
+                    ket = cursor;
+                    switch(among_var) {
+                        case 0:
+                            break lab7;
+                        case 1:
+                            // (, line 37
+                            // <-, line 37
+                            slice_from("ss");
+                            break;
+                        case 2:
+                            // (, line 38
+                            // <-, line 38
+                            slice_from("\u00E4");
+                            break;
+                        case 3:
+                            // (, line 39
+                            // <-, line 39
+                            slice_from("\u00F6");
+                            break;
+                        case 4:
+                            // (, line 40
+                            // <-, line 40
+                            slice_from("\u00FC");
+                            break;
+                        case 5:
+                            // (, line 41
+                            // hop, line 41
+                            {
+                                int c = cursor + 2;
+                                if (0 > c || c > limit)
+                                {
+                                    break lab7;
+                                }
+                                cursor = c;
+                            }
+                            break;
+                        case 6:
+                            // (, line 42
+                            // next, line 42
+                            if (cursor >= limit)
+                            {
+                                break lab7;
+                            }
+                            cursor++;
+                            break;
+                    }
+                    continue replab6;
+                } while (false);
+                cursor = v_5;
+                break replab6;
+            }
+            return true;
+        }
+
+        private boolean r_mark_regions() {
+            int v_1;
+            // (, line 48
+            I_p1 = limit;
+            I_p2 = limit;
+            // test, line 53
+            v_1 = cursor;
+            // (, line 53
+            // hop, line 53
+            {
+                int c = cursor + 3;
+                if (0 > c || c > limit)
+                {
+                    return false;
+                }
+                cursor = c;
+            }
+            // setmark x, line 53
+            I_x = cursor;
+            cursor = v_1;
+            // gopast, line 55
+            golab0: while(true)
+            {
+                lab1: do {
+                    if (!(in_grouping(g_v, 97, 252)))
+                    {
+                        break lab1;
+                    }
+                    break golab0;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 55
+            golab2: while(true)
+            {
+                lab3: do {
+                    if (!(out_grouping(g_v, 97, 252)))
+                    {
+                        break lab3;
+                    }
+                    break golab2;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p1, line 55
+            I_p1 = cursor;
+            // try, line 56
+            lab4: do {
+                // (, line 56
+                if (!(I_p1 < I_x))
+                {
+                    break lab4;
+                }
+                I_p1 = I_x;
+            } while (false);
+            // gopast, line 57
+            golab5: while(true)
+            {
+                lab6: do {
+                    if (!(in_grouping(g_v, 97, 252)))
+                    {
+                        break lab6;
+                    }
+                    break golab5;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 57
+            golab7: while(true)
+            {
+                lab8: do {
+                    if (!(out_grouping(g_v, 97, 252)))
+                    {
+                        break lab8;
+                    }
+                    break golab7;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p2, line 57
+            I_p2 = cursor;
+            return true;
+        }
+
+        private boolean r_postlude() {
+            int among_var;
+            int v_1;
+            // repeat, line 61
+            replab0: while(true)
+            {
+                v_1 = cursor;
+                lab1: do {
+                    // (, line 61
+                    // [, line 63
+                    bra = cursor;
+                    // substring, line 63
+                    among_var = find_among(a_1, 6);
+                    if (among_var == 0)
+                    {
+                        break lab1;
+                    }
+                    // ], line 63
+                    ket = cursor;
+                    switch(among_var) {
+                        case 0:
+                            break lab1;
+                        case 1:
+                            // (, line 64
+                            // <-, line 64
+                            slice_from("y");
+                            break;
+                        case 2:
+                            // (, line 65
+                            // <-, line 65
+                            slice_from("u");
+                            break;
+                        case 3:
+                            // (, line 66
+                            // <-, line 66
+                            slice_from("a");
+                            break;
+                        case 4:
+                            // (, line 67
+                            // <-, line 67
+                            slice_from("o");
+                            break;
+                        case 5:
+                            // (, line 68
+                            // <-, line 68
+                            slice_from("u");
+                            break;
+                        case 6:
+                            // (, line 69
+                            // next, line 69
+                            if (cursor >= limit)
+                            {
+                                break lab1;
+                            }
+                            cursor++;
+                            break;
+                    }
+                    continue replab0;
+                } while (false);
+                cursor = v_1;
+                break replab0;
+            }
+            return true;
+        }
+
+        private boolean r_R1() {
+            if (!(I_p1 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_R2() {
+            if (!(I_p2 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_standard_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            int v_6;
+            int v_7;
+            int v_8;
+            int v_9;
+            // (, line 79
+            // do, line 80
+            v_1 = limit - cursor;
+            lab0: do {
+                // (, line 80
+                // [, line 81
+                ket = cursor;
+                // substring, line 81
+                among_var = find_among_b(a_2, 7);
+                if (among_var == 0)
+                {
+                    break lab0;
+                }
+                // ], line 81
+                bra = cursor;
+                // call R1, line 81
+                if (!r_R1())
+                {
+                    break lab0;
+                }
+                switch(among_var) {
+                    case 0:
+                        break lab0;
+                    case 1:
+                        // (, line 83
+                        // delete, line 83
+                        slice_del();
+                        break;
+                    case 2:
+                        // (, line 86
+                        if (!(in_grouping_b(g_s_ending, 98, 116)))
+                        {
+                            break lab0;
+                        }
+                        // delete, line 86
+                        slice_del();
+                        break;
+                }
+            } while (false);
+            cursor = limit - v_1;
+            // do, line 90
+            v_2 = limit - cursor;
+            lab1: do {
+                // (, line 90
+                // [, line 91
+                ket = cursor;
+                // substring, line 91
+                among_var = find_among_b(a_3, 4);
+                if (among_var == 0)
+                {
+                    break lab1;
+                }
+                // ], line 91
+                bra = cursor;
+                // call R1, line 91
+                if (!r_R1())
+                {
+                    break lab1;
+                }
+                switch(among_var) {
+                    case 0:
+                        break lab1;
+                    case 1:
+                        // (, line 93
+                        // delete, line 93
+                        slice_del();
+                        break;
+                    case 2:
+                        // (, line 96
+                        if (!(in_grouping_b(g_st_ending, 98, 116)))
+                        {
+                            break lab1;
+                        }
+                        // hop, line 96
+                        {
+                            int c = cursor - 3;
+                            if (limit_backward > c || c > limit)
+                            {
+                                break lab1;
+                            }
+                            cursor = c;
+                        }
+                        // delete, line 96
+                        slice_del();
+                        break;
+                }
+            } while (false);
+            cursor = limit - v_2;
+            // do, line 100
+            v_3 = limit - cursor;
+            lab2: do {
+                // (, line 100
+                // [, line 101
+                ket = cursor;
+                // substring, line 101
+                among_var = find_among_b(a_5, 8);
+                if (among_var == 0)
+                {
+                    break lab2;
+                }
+                // ], line 101
+                bra = cursor;
+                // call R2, line 101
+                if (!r_R2())
+                {
+                    break lab2;
+                }
+                switch(among_var) {
+                    case 0:
+                        break lab2;
+                    case 1:
+                        // (, line 103
+                        // delete, line 103
+                        slice_del();
+                        // try, line 104
+                        v_4 = limit - cursor;
+                        lab3: do {
+                            // (, line 104
+                            // [, line 104
+                            ket = cursor;
+                            // literal, line 104
+                            if (!(eq_s_b(2, "ig")))
+                            {
+                                cursor = limit - v_4;
+                                break lab3;
+                            }
+                            // ], line 104
+                            bra = cursor;
+                            // not, line 104
+                            {
+                                v_5 = limit - cursor;
+                                lab4: do {
+                                    // literal, line 104
+                                    if (!(eq_s_b(1, "e")))
+                                    {
+                                        break lab4;
+                                    }
+                                    cursor = limit - v_4;
+                                    break lab3;
+                                } while (false);
+                                cursor = limit - v_5;
+                            }
+                            // call R2, line 104
+                            if (!r_R2())
+                            {
+                                cursor = limit - v_4;
+                                break lab3;
+                            }
+                            // delete, line 104
+                            slice_del();
+                        } while (false);
+                        break;
+                    case 2:
+                        // (, line 107
+                        // not, line 107
+                        {
+                            v_6 = limit - cursor;
+                            lab5: do {
+                                // literal, line 107
+                                if (!(eq_s_b(1, "e")))
+                                {
+                                    break lab5;
+                                }
+                                break lab2;
+                            } while (false);
+                            cursor = limit - v_6;
+                        }
+                        // delete, line 107
+                        slice_del();
+                        break;
+                    case 3:
+                        // (, line 110
+                        // delete, line 110
+                        slice_del();
+                        // try, line 111
+                        v_7 = limit - cursor;
+                        lab6: do {
+                            // (, line 111
+                            // [, line 112
+                            ket = cursor;
+                            // or, line 112
+                            lab7: do {
+                                v_8 = limit - cursor;
+                                lab8: do {
+                                    // literal, line 112
+                                    if (!(eq_s_b(2, "er")))
+                                    {
+                                        break lab8;
+                                    }
+                                    break lab7;
+                                } while (false);
+                                cursor = limit - v_8;
+                                // literal, line 112
+                                if (!(eq_s_b(2, "en")))
+                                {
+                                    cursor = limit - v_7;
+                                    break lab6;
+                                }
+                            } while (false);
+                            // ], line 112
+                            bra = cursor;
+                            // call R1, line 112
+                            if (!r_R1())
+                            {
+                                cursor = limit - v_7;
+                                break lab6;
+                            }
+                            // delete, line 112
+                            slice_del();
+                        } while (false);
+                        break;
+                    case 4:
+                        // (, line 116
+                        // delete, line 116
+                        slice_del();
+                        // try, line 117
+                        v_9 = limit - cursor;
+                        lab9: do {
+                            // (, line 117
+                            // [, line 118
+                            ket = cursor;
+                            // substring, line 118
+                            among_var = find_among_b(a_4, 2);
+                            if (among_var == 0)
+                            {
+                                cursor = limit - v_9;
+                                break lab9;
+                            }
+                            // ], line 118
+                            bra = cursor;
+                            // call R2, line 118
+                            if (!r_R2())
+                            {
+                                cursor = limit - v_9;
+                                break lab9;
+                            }
+                            switch(among_var) {
+                                case 0:
+                                    cursor = limit - v_9;
+                                    break lab9;
+                                case 1:
+                                    // (, line 120
+                                    // delete, line 120
+                                    slice_del();
+                                    break;
+                            }
+                        } while (false);
+                        break;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            return true;
+        }
+
+        public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            // (, line 130
+            // do, line 131
+            v_1 = cursor;
+            lab0: do {
+                // call prelude, line 131
+                if (!r_prelude())
+                {
+                    break lab0;
+                }
+            } while (false);
+            cursor = v_1;
+            // do, line 132
+            v_2 = cursor;
+            lab1: do {
+                // call mark_regions, line 132
+                if (!r_mark_regions())
+                {
+                    break lab1;
+                }
+            } while (false);
+            cursor = v_2;
+            // backwards, line 133
+            limit_backward = cursor; cursor = limit;
+            // do, line 134
+            v_3 = limit - cursor;
+            lab2: do {
+                // call standard_suffix, line 134
+                if (!r_standard_suffix())
+                {
+                    break lab2;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            cursor = limit_backward;            // do, line 135
+            v_4 = cursor;
+            lab3: do {
+                // call postlude, line 135
+                if (!r_postlude())
+                {
+                    break lab3;
+                }
+            } while (false);
+            cursor = v_4;
+            return true;
+        }
+
+}
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/GermanStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/GermanStemmer.java
@ -0,0 +1,688 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.Among;
+
+/**
+ * Generated class implementing code defined by a snowball script.
+ */
+public class GermanStemmer extends SnowballProgram {
+
+        private Among a_0[] = {
+            new Among ( "", -1, 6, "", this),
+            new Among ( "U", 0, 2, "", this),
+            new Among ( "Y", 0, 1, "", this),
+            new Among ( "\u00E4", 0, 3, "", this),
+            new Among ( "\u00F6", 0, 4, "", this),
+            new Among ( "\u00FC", 0, 5, "", this)
+        };
+
+        private Among a_1[] = {
+            new Among ( "e", -1, 1, "", this),
+            new Among ( "em", -1, 1, "", this),
+            new Among ( "en", -1, 1, "", this),
+            new Among ( "ern", -1, 1, "", this),
+            new Among ( "er", -1, 1, "", this),
+            new Among ( "s", -1, 2, "", this),
+            new Among ( "es", 5, 1, "", this)
+        };
+
+        private Among a_2[] = {
+            new Among ( "en", -1, 1, "", this),
+            new Among ( "er", -1, 1, "", this),
+            new Among ( "st", -1, 2, "", this),
+            new Among ( "est", 2, 1, "", this)
+        };
+
+        private Among a_3[] = {
+            new Among ( "ig", -1, 1, "", this),
+            new Among ( "lich", -1, 1, "", this)
+        };
+
+        private Among a_4[] = {
+            new Among ( "end", -1, 1, "", this),
+            new Among ( "ig", -1, 2, "", this),
+            new Among ( "ung", -1, 1, "", this),
+            new Among ( "lich", -1, 3, "", this),
+            new Among ( "isch", -1, 2, "", this),
+            new Among ( "ik", -1, 2, "", this),
+            new Among ( "heit", -1, 3, "", this),
+            new Among ( "keit", -1, 4, "", this)
+        };
+
+        private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 };
+
+        private static final char g_s_ending[] = {117, 30, 5 };
+
+        private static final char g_st_ending[] = {117, 30, 4 };
+
+        private int I_x;
+        private int I_p2;
+        private int I_p1;
+
+        private void copy_from(GermanStemmer other) {
+            I_x = other.I_x;
+            I_p2 = other.I_p2;
+            I_p1 = other.I_p1;
+            super.copy_from(other);
+        }
+
+        private boolean r_prelude() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            int v_6;
+            // (, line 28
+            // test, line 30
+            v_1 = cursor;
+            // repeat, line 30
+            replab0: while(true)
+            {
+                v_2 = cursor;
+                lab1: do {
+                    // (, line 30
+                    // or, line 33
+                    lab2: do {
+                        v_3 = cursor;
+                        lab3: do {
+                            // (, line 31
+                            // [, line 32
+                            bra = cursor;
+                            // literal, line 32
+                            if (!(eq_s(1, "\u00DF")))
+                            {
+                                break lab3;
+                            }
+                            // ], line 32
+                            ket = cursor;
+                            // <-, line 32
+                            slice_from("ss");
+                            break lab2;
+                        } while (false);
+                        cursor = v_3;
+                        // next, line 33
+                        if (cursor >= limit)
+                        {
+                            break lab1;
+                        }
+                        cursor++;
+                    } while (false);
+                    continue replab0;
+                } while (false);
+                cursor = v_2;
+                break replab0;
+            }
+            cursor = v_1;
+            // repeat, line 36
+            replab4: while(true)
+            {
+                v_4 = cursor;
+                lab5: do {
+                    // goto, line 36
+                    golab6: while(true)
+                    {
+                        v_5 = cursor;
+                        lab7: do {
+                            // (, line 36
+                            if (!(in_grouping(g_v, 97, 252)))
+                            {
+                                break lab7;
+                            }
+                            // [, line 37
+                            bra = cursor;
+                            // or, line 37
+                            lab8: do {
+                                v_6 = cursor;
+                                lab9: do {
+                                    // (, line 37
+                                    // literal, line 37
+                                    if (!(eq_s(1, "u")))
+                                    {
+                                        break lab9;
+                                    }
+                                    // ], line 37
+                                    ket = cursor;
+                                    if (!(in_grouping(g_v, 97, 252)))
+                                    {
+                                        break lab9;
+                                    }
+                                    // <-, line 37
+                                    slice_from("U");
+                                    break lab8;
+                                } while (false);
+                                cursor = v_6;
+                                // (, line 38
+                                // literal, line 38
+                                if (!(eq_s(1, "y")))
+                                {
+                                    break lab7;
+                                }
+                                // ], line 38
+                                ket = cursor;
+                                if (!(in_grouping(g_v, 97, 252)))
+                                {
+                                    break lab7;
+                                }
+                                // <-, line 38
+                                slice_from("Y");
+                            } while (false);
+                            cursor = v_5;
+                            break golab6;
+                        } while (false);
+                        cursor = v_5;
+                        if (cursor >= limit)
+                        {
+                            break lab5;
+                        }
+                        cursor++;
+                    }
+                    continue replab4;
+                } while (false);
+                cursor = v_4;
+                break replab4;
+            }
+            return true;
+        }
+
+        private boolean r_mark_regions() {
+            int v_1;
+            // (, line 42
+            I_p1 = limit;
+            I_p2 = limit;
+            // test, line 47
+            v_1 = cursor;
+            // (, line 47
+            // hop, line 47
+            {
+                int c = cursor + 3;
+                if (0 > c || c > limit)
+                {
+                    return false;
+                }
+                cursor = c;
+            }
+            // setmark x, line 47
+            I_x = cursor;
+            cursor = v_1;
+            // gopast, line 49
+            golab0: while(true)
+            {
+                lab1: do {
+                    if (!(in_grouping(g_v, 97, 252)))
+                    {
+                        break lab1;
+                    }
+                    break golab0;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 49
+            golab2: while(true)
+            {
+                lab3: do {
+                    if (!(out_grouping(g_v, 97, 252)))
+                    {
+                        break lab3;
+                    }
+                    break golab2;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p1, line 49
+            I_p1 = cursor;
+            // try, line 50
+            lab4: do {
+                // (, line 50
+                if (!(I_p1 < I_x))
+                {
+                    break lab4;
+                }
+                I_p1 = I_x;
+            } while (false);
+            // gopast, line 51
+            golab5: while(true)
+            {
+                lab6: do {
+                    if (!(in_grouping(g_v, 97, 252)))
+                    {
+                        break lab6;
+                    }
+                    break golab5;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 51
+            golab7: while(true)
+            {
+                lab8: do {
+                    if (!(out_grouping(g_v, 97, 252)))
+                    {
+                        break lab8;
+                    }
+                    break golab7;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p2, line 51
+            I_p2 = cursor;
+            return true;
+        }
+
+        private boolean r_postlude() {
+            int among_var;
+            int v_1;
+            // repeat, line 55
+            replab0: while(true)
+            {
+                v_1 = cursor;
+                lab1: do {
+                    // (, line 55
+                    // [, line 57
+                    bra = cursor;
+                    // substring, line 57
+                    among_var = find_among(a_0, 6);
+                    if (among_var == 0)
+                    {
+                        break lab1;
+                    }
+                    // ], line 57
+                    ket = cursor;
+                    switch(among_var) {
+                        case 0:
+                            break lab1;
+                        case 1:
+                            // (, line 58
+                            // <-, line 58
+                            slice_from("y");
+                            break;
+                        case 2:
+                            // (, line 59
+                            // <-, line 59
+                            slice_from("u");
+                            break;
+                        case 3:
+                            // (, line 60
+                            // <-, line 60
+                            slice_from("a");
+                            break;
+                        case 4:
+                            // (, line 61
+                            // <-, line 61
+                            slice_from("o");
+                            break;
+                        case 5:
+                            // (, line 62
+                            // <-, line 62
+                            slice_from("u");
+                            break;
+                        case 6:
+                            // (, line 63
+                            // next, line 63
+                            if (cursor >= limit)
+                            {
+                                break lab1;
+                            }
+                            cursor++;
+                            break;
+                    }
+                    continue replab0;
+                } while (false);
+                cursor = v_1;
+                break replab0;
+            }
+            return true;
+        }
+
+        private boolean r_R1() {
+            if (!(I_p1 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_R2() {
+            if (!(I_p2 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_standard_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            int v_6;
+            int v_7;
+            int v_8;
+            int v_9;
+            // (, line 73
+            // do, line 74
+            v_1 = limit - cursor;
+            lab0: do {
+                // (, line 74
+                // [, line 75
+                ket = cursor;
+                // substring, line 75
+                among_var = find_among_b(a_1, 7);
+                if (among_var == 0)
+                {
+                    break lab0;
+                }
+                // ], line 75
+                bra = cursor;
+                // call R1, line 75
+                if (!r_R1())
+                {
+                    break lab0;
+                }
+                switch(among_var) {
+                    case 0:
+                        break lab0;
+                    case 1:
+                        // (, line 77
+                        // delete, line 77
+                        slice_del();
+                        break;
+                    case 2:
+                        // (, line 80
+                        if (!(in_grouping_b(g_s_ending, 98, 116)))
+                        {
+                            break lab0;
+                        }
+                        // delete, line 80
+                        slice_del();
+                        break;
+                }
+            } while (false);
+            cursor = limit - v_1;
+            // do, line 84
+            v_2 = limit - cursor;
+            lab1: do {
+                // (, line 84
+                // [, line 85
+                ket = cursor;
+                // substring, line 85
+                among_var = find_among_b(a_2, 4);
+                if (among_var == 0)
+                {
+                    break lab1;
+                }
+                // ], line 85
+                bra = cursor;
+                // call R1, line 85
+                if (!r_R1())
+                {
+                    break lab1;
+                }
+                switch(among_var) {
+                    case 0:
+                        break lab1;
+                    case 1:
+                        // (, line 87
+                        // delete, line 87
+                        slice_del();
+                        break;
+                    case 2:
+                        // (, line 90
+                        if (!(in_grouping_b(g_st_ending, 98, 116)))
+                        {
+                            break lab1;
+                        }
+                        // hop, line 90
+                        {
+                            int c = cursor - 3;
+                            if (limit_backward > c || c > limit)
+                            {
+                                break lab1;
+                            }
+                            cursor = c;
+                        }
+                        // delete, line 90
+                        slice_del();
+                        break;
+                }
+            } while (false);
+            cursor = limit - v_2;
+            // do, line 94
+            v_3 = limit - cursor;
+            lab2: do {
+                // (, line 94
+                // [, line 95
+                ket = cursor;
+                // substring, line 95
+                among_var = find_among_b(a_4, 8);
+                if (among_var == 0)
+                {
+                    break lab2;
+                }
+                // ], line 95
+                bra = cursor;
+                // call R2, line 95
+                if (!r_R2())
+                {
+                    break lab2;
+                }
+                switch(among_var) {
+                    case 0:
+                        break lab2;
+                    case 1:
+                        // (, line 97
+                        // delete, line 97
+                        slice_del();
+                        // try, line 98
+                        v_4 = limit - cursor;
+                        lab3: do {
+                            // (, line 98
+                            // [, line 98
+                            ket = cursor;
+                            // literal, line 98
+                            if (!(eq_s_b(2, "ig")))
+                            {
+                                cursor = limit - v_4;
+                                break lab3;
+                            }
+                            // ], line 98
+                            bra = cursor;
+                            // not, line 98
+                            {
+                                v_5 = limit - cursor;
+                                lab4: do {
+                                    // literal, line 98
+                                    if (!(eq_s_b(1, "e")))
+                                    {
+                                        break lab4;
+                                    }
+                                    cursor = limit - v_4;
+                                    break lab3;
+                                } while (false);
+                                cursor = limit - v_5;
+                            }
+                            // call R2, line 98
+                            if (!r_R2())
+                            {
+                                cursor = limit - v_4;
+                                break lab3;
+                            }
+                            // delete, line 98
+                            slice_del();
+                        } while (false);
+                        break;
+                    case 2:
+                        // (, line 101
+                        // not, line 101
+                        {
+                            v_6 = limit - cursor;
+                            lab5: do {
+                                // literal, line 101
+                                if (!(eq_s_b(1, "e")))
+                                {
+                                    break lab5;
+                                }
+                                break lab2;
+                            } while (false);
+                            cursor = limit - v_6;
+                        }
+                        // delete, line 101
+                        slice_del();
+                        break;
+                    case 3:
+                        // (, line 104
+                        // delete, line 104
+                        slice_del();
+                        // try, line 105
+                        v_7 = limit - cursor;
+                        lab6: do {
+                            // (, line 105
+                            // [, line 106
+                            ket = cursor;
+                            // or, line 106
+                            lab7: do {
+                                v_8 = limit - cursor;
+                                lab8: do {
+                                    // literal, line 106
+                                    if (!(eq_s_b(2, "er")))
+                                    {
+                                        break lab8;
+                                    }
+                                    break lab7;
+                                } while (false);
+                                cursor = limit - v_8;
+                                // literal, line 106
+                                if (!(eq_s_b(2, "en")))
+                                {
+                                    cursor = limit - v_7;
+                                    break lab6;
+                                }
+                            } while (false);
+                            // ], line 106
+                            bra = cursor;
+                            // call R1, line 106
+                            if (!r_R1())
+                            {
+                                cursor = limit - v_7;
+                                break lab6;
+                            }
+                            // delete, line 106
+                            slice_del();
+                        } while (false);
+                        break;
+                    case 4:
+                        // (, line 110
+                        // delete, line 110
+                        slice_del();
+                        // try, line 111
+                        v_9 = limit - cursor;
+                        lab9: do {
+                            // (, line 111
+                            // [, line 112
+                            ket = cursor;
+                            // substring, line 112
+                            among_var = find_among_b(a_3, 2);
+                            if (among_var == 0)
+                            {
+                                cursor = limit - v_9;
+                                break lab9;
+                            }
+                            // ], line 112
+                            bra = cursor;
+                            // call R2, line 112
+                            if (!r_R2())
+                            {
+                                cursor = limit - v_9;
+                                break lab9;
+                            }
+                            switch(among_var) {
+                                case 0:
+                                    cursor = limit - v_9;
+                                    break lab9;
+                                case 1:
+                                    // (, line 114
+                                    // delete, line 114
+                                    slice_del();
+                                    break;
+                            }
+                        } while (false);
+                        break;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            return true;
+        }
+
+        public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            // (, line 124
+            // do, line 125
+            v_1 = cursor;
+            lab0: do {
+                // call prelude, line 125
+                if (!r_prelude())
+                {
+                    break lab0;
+                }
+            } while (false);
+            cursor = v_1;
+            // do, line 126
+            v_2 = cursor;
+            lab1: do {
+                // call mark_regions, line 126
+                if (!r_mark_regions())
+                {
+                    break lab1;
+                }
+            } while (false);
+            cursor = v_2;
+            // backwards, line 127
+            limit_backward = cursor; cursor = limit;
+            // do, line 128
+            v_3 = limit - cursor;
+            lab2: do {
+                // call standard_suffix, line 128
+                if (!r_standard_suffix())
+                {
+                    break lab2;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            cursor = limit_backward;            // do, line 129
+            v_4 = cursor;
+            lab3: do {
+                // call postlude, line 129
+                if (!r_postlude())
+                {
+                    break lab3;
+                }
+            } while (false);
+            cursor = v_4;
+            return true;
+        }
+
+}
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/HungarianStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/HungarianStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/ItalianStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/ItalianStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/KpStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/KpStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/LovinsStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/LovinsStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/NorwegianStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/NorwegianStemmer.java
@ -0,0 +1,358 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.Among;
+
+/**
+ * Generated class implementing code defined by a snowball script.
+ */
+public class NorwegianStemmer extends SnowballProgram {
+
+        private Among a_0[] = {
+            new Among ( "a", -1, 1, "", this),
+            new Among ( "e", -1, 1, "", this),
+            new Among ( "ede", 1, 1, "", this),
+            new Among ( "ande", 1, 1, "", this),
+            new Among ( "ende", 1, 1, "", this),
+            new Among ( "ane", 1, 1, "", this),
+            new Among ( "ene", 1, 1, "", this),
+            new Among ( "hetene", 6, 1, "", this),
+            new Among ( "erte", 1, 3, "", this),
+            new Among ( "en", -1, 1, "", this),
+            new Among ( "heten", 9, 1, "", this),
+            new Among ( "ar", -1, 1, "", this),
+            new Among ( "er", -1, 1, "", this),
+            new Among ( "heter", 12, 1, "", this),
+            new Among ( "s", -1, 2, "", this),
+            new Among ( "as", 14, 1, "", this),
+            new Among ( "es", 14, 1, "", this),
+            new Among ( "edes", 16, 1, "", this),
+            new Among ( "endes", 16, 1, "", this),
+            new Among ( "enes", 16, 1, "", this),
+            new Among ( "hetenes", 19, 1, "", this),
+            new Among ( "ens", 14, 1, "", this),
+            new Among ( "hetens", 21, 1, "", this),
+            new Among ( "ers", 14, 1, "", this),
+            new Among ( "ets", 14, 1, "", this),
+            new Among ( "et", -1, 1, "", this),
+            new Among ( "het", 25, 1, "", this),
+            new Among ( "ert", -1, 3, "", this),
+            new Among ( "ast", -1, 1, "", this)
+        };
+
+        private Among a_1[] = {
+            new Among ( "dt", -1, -1, "", this),
+            new Among ( "vt", -1, -1, "", this)
+        };
+
+        private Among a_2[] = {
+            new Among ( "leg", -1, 1, "", this),
+            new Among ( "eleg", 0, 1, "", this),
+            new Among ( "ig", -1, 1, "", this),
+            new Among ( "eig", 2, 1, "", this),
+            new Among ( "lig", 2, 1, "", this),
+            new Among ( "elig", 4, 1, "", this),
+            new Among ( "els", -1, 1, "", this),
+            new Among ( "lov", -1, 1, "", this),
+            new Among ( "elov", 7, 1, "", this),
+            new Among ( "slov", 7, 1, "", this),
+            new Among ( "hetslov", 9, 1, "", this)
+        };
+
+        private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 };
+
+        private static final char g_s_ending[] = {119, 125, 149, 1 };
+
+        private int I_x;
+        private int I_p1;
+
+        private void copy_from(NorwegianStemmer other) {
+            I_x = other.I_x;
+            I_p1 = other.I_p1;
+            super.copy_from(other);
+        }
+
+        private boolean r_mark_regions() {
+            int v_1;
+            int v_2;
+            // (, line 26
+            I_p1 = limit;
+            // test, line 30
+            v_1 = cursor;
+            // (, line 30
+            // hop, line 30
+            {
+                int c = cursor + 3;
+                if (0 > c || c > limit)
+                {
+                    return false;
+                }
+                cursor = c;
+            }
+            // setmark x, line 30
+            I_x = cursor;
+            cursor = v_1;
+            // goto, line 31
+            golab0: while(true)
+            {
+                v_2 = cursor;
+                lab1: do {
+                    if (!(in_grouping(g_v, 97, 248)))
+                    {
+                        break lab1;
+                    }
+                    cursor = v_2;
+                    break golab0;
+                } while (false);
+                cursor = v_2;
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 31
+            golab2: while(true)
+            {
+                lab3: do {
+                    if (!(out_grouping(g_v, 97, 248)))
+                    {
+                        break lab3;
+                    }
+                    break golab2;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p1, line 31
+            I_p1 = cursor;
+            // try, line 32
+            lab4: do {
+                // (, line 32
+                if (!(I_p1 < I_x))
+                {
+                    break lab4;
+                }
+                I_p1 = I_x;
+            } while (false);
+            return true;
+        }
+
+        private boolean r_main_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            int v_3;
+            // (, line 37
+            // setlimit, line 38
+            v_1 = limit - cursor;
+            // tomark, line 38
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_2 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_1;
+            // (, line 38
+            // [, line 38
+            ket = cursor;
+            // substring, line 38
+            among_var = find_among_b(a_0, 29);
+            if (among_var == 0)
+            {
+                limit_backward = v_2;
+                return false;
+            }
+            // ], line 38
+            bra = cursor;
+            limit_backward = v_2;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 44
+                    // delete, line 44
+                    slice_del();
+                    break;
+                case 2:
+                    // (, line 46
+                    // or, line 46
+                    lab0: do {
+                        v_3 = limit - cursor;
+                        lab1: do {
+                            if (!(in_grouping_b(g_s_ending, 98, 122)))
+                            {
+                                break lab1;
+                            }
+                            break lab0;
+                        } while (false);
+                        cursor = limit - v_3;
+                        // (, line 46
+                        // literal, line 46
+                        if (!(eq_s_b(1, "k")))
+                        {
+                            return false;
+                        }
+                        if (!(out_grouping_b(g_v, 97, 248)))
+                        {
+                            return false;
+                        }
+                    } while (false);
+                    // delete, line 46
+                    slice_del();
+                    break;
+                case 3:
+                    // (, line 48
+                    // <-, line 48
+                    slice_from("er");
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_consonant_pair() {
+            int v_1;
+            int v_2;
+            int v_3;
+            // (, line 52
+            // test, line 53
+            v_1 = limit - cursor;
+            // (, line 53
+            // setlimit, line 54
+            v_2 = limit - cursor;
+            // tomark, line 54
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_3 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_2;
+            // (, line 54
+            // [, line 54
+            ket = cursor;
+            // substring, line 54
+            if (find_among_b(a_1, 2) == 0)
+            {
+                limit_backward = v_3;
+                return false;
+            }
+            // ], line 54
+            bra = cursor;
+            limit_backward = v_3;
+            cursor = limit - v_1;
+            // next, line 59
+            if (cursor <= limit_backward)
+            {
+                return false;
+            }
+            cursor--;
+            // ], line 59
+            bra = cursor;
+            // delete, line 59
+            slice_del();
+            return true;
+        }
+
+        private boolean r_other_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            // (, line 62
+            // setlimit, line 63
+            v_1 = limit - cursor;
+            // tomark, line 63
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_2 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_1;
+            // (, line 63
+            // [, line 63
+            ket = cursor;
+            // substring, line 63
+            among_var = find_among_b(a_2, 11);
+            if (among_var == 0)
+            {
+                limit_backward = v_2;
+                return false;
+            }
+            // ], line 63
+            bra = cursor;
+            limit_backward = v_2;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 67
+                    // delete, line 67
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            // (, line 72
+            // do, line 74
+            v_1 = cursor;
+            lab0: do {
+                // call mark_regions, line 74
+                if (!r_mark_regions())
+                {
+                    break lab0;
+                }
+            } while (false);
+            cursor = v_1;
+            // backwards, line 75
+            limit_backward = cursor; cursor = limit;
+            // (, line 75
+            // do, line 76
+            v_2 = limit - cursor;
+            lab1: do {
+                // call main_suffix, line 76
+                if (!r_main_suffix())
+                {
+                    break lab1;
+                }
+            } while (false);
+            cursor = limit - v_2;
+            // do, line 77
+            v_3 = limit - cursor;
+            lab2: do {
+                // call consonant_pair, line 77
+                if (!r_consonant_pair())
+                {
+                    break lab2;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            // do, line 78
+            v_4 = limit - cursor;
+            lab3: do {
+                // call other_suffix, line 78
+                if (!r_other_suffix())
+                {
+                    break lab3;
+                }
+            } while (false);
+            cursor = limit - v_4;
+            cursor = limit_backward;            return true;
+        }
+
+}
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/PorterStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/PorterStemmer.java
@ -0,0 +1,906 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.Among;
+
+/**
+ * Generated class implementing code defined by a snowball script.
+ */
+public class PorterStemmer extends SnowballProgram {
+
+        private Among a_0[] = {
+            new Among ( "s", -1, 3, "", this),
+            new Among ( "ies", 0, 2, "", this),
+            new Among ( "sses", 0, 1, "", this),
+            new Among ( "ss", 0, -1, "", this)
+        };
+
+        private Among a_1[] = {
+            new Among ( "", -1, 3, "", this),
+            new Among ( "bb", 0, 2, "", this),
+            new Among ( "dd", 0, 2, "", this),
+            new Among ( "ff", 0, 2, "", this),
+            new Among ( "gg", 0, 2, "", this),
+            new Among ( "bl", 0, 1, "", this),
+            new Among ( "mm", 0, 2, "", this),
+            new Among ( "nn", 0, 2, "", this),
+            new Among ( "pp", 0, 2, "", this),
+            new Among ( "rr", 0, 2, "", this),
+            new Among ( "at", 0, 1, "", this),
+            new Among ( "tt", 0, 2, "", this),
+            new Among ( "iz", 0, 1, "", this)
+        };
+
+        private Among a_2[] = {
+            new Among ( "ed", -1, 2, "", this),
+            new Among ( "eed", 0, 1, "", this),
+            new Among ( "ing", -1, 2, "", this)
+        };
+
+        private Among a_3[] = {
+            new Among ( "anci", -1, 3, "", this),
+            new Among ( "enci", -1, 2, "", this),
+            new Among ( "abli", -1, 4, "", this),
+            new Among ( "eli", -1, 6, "", this),
+            new Among ( "alli", -1, 9, "", this),
+            new Among ( "ousli", -1, 12, "", this),
+            new Among ( "entli", -1, 5, "", this),
+            new Among ( "aliti", -1, 10, "", this),
+            new Among ( "biliti", -1, 14, "", this),
+            new Among ( "iviti", -1, 13, "", this),
+            new Among ( "tional", -1, 1, "", this),
+            new Among ( "ational", 10, 8, "", this),
+            new Among ( "alism", -1, 10, "", this),
+            new Among ( "ation", -1, 8, "", this),
+            new Among ( "ization", 13, 7, "", this),
+            new Among ( "izer", -1, 7, "", this),
+            new Among ( "ator", -1, 8, "", this),
+            new Among ( "iveness", -1, 13, "", this),
+            new Among ( "fulness", -1, 11, "", this),
+            new Among ( "ousness", -1, 12, "", this)
+        };
+
+        private Among a_4[] = {
+            new Among ( "icate", -1, 2, "", this),
+            new Among ( "ative", -1, 3, "", this),
+            new Among ( "alize", -1, 1, "", this),
+            new Among ( "iciti", -1, 2, "", this),
+            new Among ( "ical", -1, 2, "", this),
+            new Among ( "ful", -1, 3, "", this),
+            new Among ( "ness", -1, 3, "", this)
+        };
+
+        private Among a_5[] = {
+            new Among ( "ic", -1, 1, "", this),
+            new Among ( "ance", -1, 1, "", this),
+            new Among ( "ence", -1, 1, "", this),
+            new Among ( "able", -1, 1, "", this),
+            new Among ( "ible", -1, 1, "", this),
+            new Among ( "ate", -1, 1, "", this),
+            new Among ( "ive", -1, 1, "", this),
+            new Among ( "ize", -1, 1, "", this),
+            new Among ( "iti", -1, 1, "", this),
+            new Among ( "al", -1, 1, "", this),
+            new Among ( "ism", -1, 1, "", this),
+            new Among ( "ion", -1, 2, "", this),
+            new Among ( "er", -1, 1, "", this),
+            new Among ( "ous", -1, 1, "", this),
+            new Among ( "ant", -1, 1, "", this),
+            new Among ( "ent", -1, 1, "", this),
+            new Among ( "ment", 15, 1, "", this),
+            new Among ( "ement", 16, 1, "", this),
+            new Among ( "ou", -1, 1, "", this)
+        };
+
+        private static final char g_v[] = {17, 65, 16, 1 };
+
+        private static final char g_v_WXY[] = {1, 17, 65, 208, 1 };
+
+        private boolean B_Y_found;
+        private int I_p2;
+        private int I_p1;
+
+        private void copy_from(PorterStemmer other) {
+            B_Y_found = other.B_Y_found;
+            I_p2 = other.I_p2;
+            I_p1 = other.I_p1;
+            super.copy_from(other);
+        }
+
+        private boolean r_shortv() {
+            // (, line 19
+            if (!(out_grouping_b(g_v_WXY, 89, 121)))
+            {
+                return false;
+            }
+            if (!(in_grouping_b(g_v, 97, 121)))
+            {
+                return false;
+            }
+            if (!(out_grouping_b(g_v, 97, 121)))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_R1() {
+            if (!(I_p1 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_R2() {
+            if (!(I_p2 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_Step_1a() {
+            int among_var;
+            // (, line 24
+            // [, line 25
+            ket = cursor;
+            // substring, line 25
+            among_var = find_among_b(a_0, 4);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 25
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 26
+                    // <-, line 26
+                    slice_from("ss");
+                    break;
+                case 2:
+                    // (, line 27
+                    // <-, line 27
+                    slice_from("i");
+                    break;
+                case 3:
+                    // (, line 29
+                    // delete, line 29
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_Step_1b() {
+            int among_var;
+            int v_1;
+            int v_3;
+            int v_4;
+            // (, line 33
+            // [, line 34
+            ket = cursor;
+            // substring, line 34
+            among_var = find_among_b(a_2, 3);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 34
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 35
+                    // call R1, line 35
+                    if (!r_R1())
+                    {
+                        return false;
+                    }
+                    // <-, line 35
+                    slice_from("ee");
+                    break;
+                case 2:
+                    // (, line 37
+                    // test, line 38
+                    v_1 = limit - cursor;
+                    // gopast, line 38
+                    golab0: while(true)
+                    {
+                        lab1: do {
+                            if (!(in_grouping_b(g_v, 97, 121)))
+                            {
+                                break lab1;
+                            }
+                            break golab0;
+                        } while (false);
+                        if (cursor <= limit_backward)
+                        {
+                            return false;
+                        }
+                        cursor--;
+                    }
+                    cursor = limit - v_1;
+                    // delete, line 38
+                    slice_del();
+                    // test, line 39
+                    v_3 = limit - cursor;
+                    // substring, line 39
+                    among_var = find_among_b(a_1, 13);
+                    if (among_var == 0)
+                    {
+                        return false;
+                    }
+                    cursor = limit - v_3;
+                    switch(among_var) {
+                        case 0:
+                            return false;
+                        case 1:
+                            // (, line 41
+                            // <+, line 41
+                            {
+                                int c = cursor;
+                                insert(cursor, cursor, "e");
+                                cursor = c;
+                            }
+                            break;
+                        case 2:
+                            // (, line 44
+                            // [, line 44
+                            ket = cursor;
+                            // next, line 44
+                            if (cursor <= limit_backward)
+                            {
+                                return false;
+                            }
+                            cursor--;
+                            // ], line 44
+                            bra = cursor;
+                            // delete, line 44
+                            slice_del();
+                            break;
+                        case 3:
+                            // (, line 45
+                            // atmark, line 45
+                            if (cursor != I_p1)
+                            {
+                                return false;
+                            }
+                            // test, line 45
+                            v_4 = limit - cursor;
+                            // call shortv, line 45
+                            if (!r_shortv())
+                            {
+                                return false;
+                            }
+                            cursor = limit - v_4;
+                            // <+, line 45
+                            {
+                                int c = cursor;
+                                insert(cursor, cursor, "e");
+                                cursor = c;
+                            }
+                            break;
+                    }
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_Step_1c() {
+            int v_1;
+            // (, line 51
+            // [, line 52
+            ket = cursor;
+            // or, line 52
+            lab0: do {
+                v_1 = limit - cursor;
+                lab1: do {
+                    // literal, line 52
+                    if (!(eq_s_b(1, "y")))
+                    {
+                        break lab1;
+                    }
+                    break lab0;
+                } while (false);
+                cursor = limit - v_1;
+                // literal, line 52
+                if (!(eq_s_b(1, "Y")))
+                {
+                    return false;
+                }
+            } while (false);
+            // ], line 52
+            bra = cursor;
+            // gopast, line 53
+            golab2: while(true)
+            {
+                lab3: do {
+                    if (!(in_grouping_b(g_v, 97, 121)))
+                    {
+                        break lab3;
+                    }
+                    break golab2;
+                } while (false);
+                if (cursor <= limit_backward)
+                {
+                    return false;
+                }
+                cursor--;
+            }
+            // <-, line 54
+            slice_from("i");
+            return true;
+        }
+
+        private boolean r_Step_2() {
+            int among_var;
+            // (, line 57
+            // [, line 58
+            ket = cursor;
+            // substring, line 58
+            among_var = find_among_b(a_3, 20);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 58
+            bra = cursor;
+            // call R1, line 58
+            if (!r_R1())
+            {
+                return false;
+            }
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 59
+                    // <-, line 59
+                    slice_from("tion");
+                    break;
+                case 2:
+                    // (, line 60
+                    // <-, line 60
+                    slice_from("ence");
+                    break;
+                case 3:
+                    // (, line 61
+                    // <-, line 61
+                    slice_from("ance");
+                    break;
+                case 4:
+                    // (, line 62
+                    // <-, line 62
+                    slice_from("able");
+                    break;
+                case 5:
+                    // (, line 63
+                    // <-, line 63
+                    slice_from("ent");
+                    break;
+                case 6:
+                    // (, line 64
+                    // <-, line 64
+                    slice_from("e");
+                    break;
+                case 7:
+                    // (, line 66
+                    // <-, line 66
+                    slice_from("ize");
+                    break;
+                case 8:
+                    // (, line 68
+                    // <-, line 68
+                    slice_from("ate");
+                    break;
+                case 9:
+                    // (, line 69
+                    // <-, line 69
+                    slice_from("al");
+                    break;
+                case 10:
+                    // (, line 71
+                    // <-, line 71
+                    slice_from("al");
+                    break;
+                case 11:
+                    // (, line 72
+                    // <-, line 72
+                    slice_from("ful");
+                    break;
+                case 12:
+                    // (, line 74
+                    // <-, line 74
+                    slice_from("ous");
+                    break;
+                case 13:
+                    // (, line 76
+                    // <-, line 76
+                    slice_from("ive");
+                    break;
+                case 14:
+                    // (, line 77
+                    // <-, line 77
+                    slice_from("ble");
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_Step_3() {
+            int among_var;
+            // (, line 81
+            // [, line 82
+            ket = cursor;
+            // substring, line 82
+            among_var = find_among_b(a_4, 7);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 82
+            bra = cursor;
+            // call R1, line 82
+            if (!r_R1())
+            {
+                return false;
+            }
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 83
+                    // <-, line 83
+                    slice_from("al");
+                    break;
+                case 2:
+                    // (, line 85
+                    // <-, line 85
+                    slice_from("ic");
+                    break;
+                case 3:
+                    // (, line 87
+                    // delete, line 87
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_Step_4() {
+            int among_var;
+            int v_1;
+            // (, line 91
+            // [, line 92
+            ket = cursor;
+            // substring, line 92
+            among_var = find_among_b(a_5, 19);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 92
+            bra = cursor;
+            // call R2, line 92
+            if (!r_R2())
+            {
+                return false;
+            }
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 95
+                    // delete, line 95
+                    slice_del();
+                    break;
+                case 2:
+                    // (, line 96
+                    // or, line 96
+                    lab0: do {
+                        v_1 = limit - cursor;
+                        lab1: do {
+                            // literal, line 96
+                            if (!(eq_s_b(1, "s")))
+                            {
+                                break lab1;
+                            }
+                            break lab0;
+                        } while (false);
+                        cursor = limit - v_1;
+                        // literal, line 96
+                        if (!(eq_s_b(1, "t")))
+                        {
+                            return false;
+                        }
+                    } while (false);
+                    // delete, line 96
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_Step_5a() {
+            int v_1;
+            int v_2;
+            // (, line 100
+            // [, line 101
+            ket = cursor;
+            // literal, line 101
+            if (!(eq_s_b(1, "e")))
+            {
+                return false;
+            }
+            // ], line 101
+            bra = cursor;
+            // or, line 102
+            lab0: do {
+                v_1 = limit - cursor;
+                lab1: do {
+                    // call R2, line 102
+                    if (!r_R2())
+                    {
+                        break lab1;
+                    }
+                    break lab0;
+                } while (false);
+                cursor = limit - v_1;
+                // (, line 102
+                // call R1, line 102
+                if (!r_R1())
+                {
+                    return false;
+                }
+                // not, line 102
+                {
+                    v_2 = limit - cursor;
+                    lab2: do {
+                        // call shortv, line 102
+                        if (!r_shortv())
+                        {
+                            break lab2;
+                        }
+                        return false;
+                    } while (false);
+                    cursor = limit - v_2;
+                }
+            } while (false);
+            // delete, line 103
+            slice_del();
+            return true;
+        }
+
+        private boolean r_Step_5b() {
+            // (, line 106
+            // [, line 107
+            ket = cursor;
+            // literal, line 107
+            if (!(eq_s_b(1, "l")))
+            {
+                return false;
+            }
+            // ], line 107
+            bra = cursor;
+            // call R2, line 108
+            if (!r_R2())
+            {
+                return false;
+            }
+            // literal, line 108
+            if (!(eq_s_b(1, "l")))
+            {
+                return false;
+            }
+            // delete, line 109
+            slice_del();
+            return true;
+        }
+
+        public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            int v_10;
+            int v_11;
+            int v_12;
+            int v_13;
+            int v_14;
+            int v_15;
+            int v_16;
+            int v_17;
+            int v_18;
+            int v_19;
+            int v_20;
+            // (, line 113
+            // unset Y_found, line 115
+            B_Y_found = false;
+            // do, line 116
+            v_1 = cursor;
+            lab0: do {
+                // (, line 116
+                // [, line 116
+                bra = cursor;
+                // literal, line 116
+                if (!(eq_s(1, "y")))
+                {
+                    break lab0;
+                }
+                // ], line 116
+                ket = cursor;
+                // <-, line 116
+                slice_from("Y");
+                // set Y_found, line 116
+                B_Y_found = true;
+            } while (false);
+            cursor = v_1;
+            // do, line 117
+            v_2 = cursor;
+            lab1: do {
+                // repeat, line 117
+                replab2: while(true)
+                {
+                    v_3 = cursor;
+                    lab3: do {
+                        // (, line 117
+                        // goto, line 117
+                        golab4: while(true)
+                        {
+                            v_4 = cursor;
+                            lab5: do {
+                                // (, line 117
+                                if (!(in_grouping(g_v, 97, 121)))
+                                {
+                                    break lab5;
+                                }
+                                // [, line 117
+                                bra = cursor;
+                                // literal, line 117
+                                if (!(eq_s(1, "y")))
+                                {
+                                    break lab5;
+                                }
+                                // ], line 117
+                                ket = cursor;
+                                cursor = v_4;
+                                break golab4;
+                            } while (false);
+                            cursor = v_4;
+                            if (cursor >= limit)
+                            {
+                                break lab3;
+                            }
+                            cursor++;
+                        }
+                        // <-, line 117
+                        slice_from("Y");
+                        // set Y_found, line 117
+                        B_Y_found = true;
+                        continue replab2;
+                    } while (false);
+                    cursor = v_3;
+                    break replab2;
+                }
+            } while (false);
+            cursor = v_2;
+            I_p1 = limit;
+            I_p2 = limit;
+            // do, line 121
+            v_5 = cursor;
+            lab6: do {
+                // (, line 121
+                // gopast, line 122
+                golab7: while(true)
+                {
+                    lab8: do {
+                        if (!(in_grouping(g_v, 97, 121)))
+                        {
+                            break lab8;
+                        }
+                        break golab7;
+                    } while (false);
+                    if (cursor >= limit)
+                    {
+                        break lab6;
+                    }
+                    cursor++;
+                }
+                // gopast, line 122
+                golab9: while(true)
+                {
+                    lab10: do {
+                        if (!(out_grouping(g_v, 97, 121)))
+                        {
+                            break lab10;
+                        }
+                        break golab9;
+                    } while (false);
+                    if (cursor >= limit)
+                    {
+                        break lab6;
+                    }
+                    cursor++;
+                }
+                // setmark p1, line 122
+                I_p1 = cursor;
+                // gopast, line 123
+                golab11: while(true)
+                {
+                    lab12: do {
+                        if (!(in_grouping(g_v, 97, 121)))
+                        {
+                            break lab12;
+                        }
+                        break golab11;
+                    } while (false);
+                    if (cursor >= limit)
+                    {
+                        break lab6;
+                    }
+                    cursor++;
+                }
+                // gopast, line 123
+                golab13: while(true)
+                {
+                    lab14: do {
+                        if (!(out_grouping(g_v, 97, 121)))
+                        {
+                            break lab14;
+                        }
+                        break golab13;
+                    } while (false);
+                    if (cursor >= limit)
+                    {
+                        break lab6;
+                    }
+                    cursor++;
+                }
+                // setmark p2, line 123
+                I_p2 = cursor;
+            } while (false);
+            cursor = v_5;
+            // backwards, line 126
+            limit_backward = cursor; cursor = limit;
+            // (, line 126
+            // do, line 127
+            v_10 = limit - cursor;
+            lab15: do {
+                // call Step_1a, line 127
+                if (!r_Step_1a())
+                {
+                    break lab15;
+                }
+            } while (false);
+            cursor = limit - v_10;
+            // do, line 128
+            v_11 = limit - cursor;
+            lab16: do {
+                // call Step_1b, line 128
+                if (!r_Step_1b())
+                {
+                    break lab16;
+                }
+            } while (false);
+            cursor = limit - v_11;
+            // do, line 129
+            v_12 = limit - cursor;
+            lab17: do {
+                // call Step_1c, line 129
+                if (!r_Step_1c())
+                {
+                    break lab17;
+                }
+            } while (false);
+            cursor = limit - v_12;
+            // do, line 130
+            v_13 = limit - cursor;
+            lab18: do {
+                // call Step_2, line 130
+                if (!r_Step_2())
+                {
+                    break lab18;
+                }
+            } while (false);
+            cursor = limit - v_13;
+            // do, line 131
+            v_14 = limit - cursor;
+            lab19: do {
+                // call Step_3, line 131
+                if (!r_Step_3())
+                {
+                    break lab19;
+                }
+            } while (false);
+            cursor = limit - v_14;
+            // do, line 132
+            v_15 = limit - cursor;
+            lab20: do {
+                // call Step_4, line 132
+                if (!r_Step_4())
+                {
+                    break lab20;
+                }
+            } while (false);
+            cursor = limit - v_15;
+            // do, line 133
+            v_16 = limit - cursor;
+            lab21: do {
+                // call Step_5a, line 133
+                if (!r_Step_5a())
+                {
+                    break lab21;
+                }
+            } while (false);
+            cursor = limit - v_16;
+            // do, line 134
+            v_17 = limit - cursor;
+            lab22: do {
+                // call Step_5b, line 134
+                if (!r_Step_5b())
+                {
+                    break lab22;
+                }
+            } while (false);
+            cursor = limit - v_17;
+            cursor = limit_backward;            // do, line 137
+            v_18 = cursor;
+            lab23: do {
+                // (, line 137
+                // Boolean test Y_found, line 137
+                if (!(B_Y_found))
+                {
+                    break lab23;
+                }
+                // repeat, line 137
+                replab24: while(true)
+                {
+                    v_19 = cursor;
+                    lab25: do {
+                        // (, line 137
+                        // goto, line 137
+                        golab26: while(true)
+                        {
+                            v_20 = cursor;
+                            lab27: do {
+                                // (, line 137
+                                // [, line 137
+                                bra = cursor;
+                                // literal, line 137
+                                if (!(eq_s(1, "Y")))
+                                {
+                                    break lab27;
+                                }
+                                // ], line 137
+                                ket = cursor;
+                                cursor = v_20;
+                                break golab26;
+                            } while (false);
+                            cursor = v_20;
+                            if (cursor >= limit)
+                            {
+                                break lab25;
+                            }
+                            cursor++;
+                        }
+                        // <-, line 137
+                        slice_from("y");
+                        continue replab24;
+                    } while (false);
+                    cursor = v_19;
+                    break replab24;
+                }
+            } while (false);
+            cursor = v_18;
+            return true;
+        }
+
+}
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/PortugueseStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/PortugueseStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/RomanianStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/RomanianStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/RussianStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/RussianStemmer.java
@ -0,0 +1,727 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.Among;
+
+/**
+ * Generated class implementing code defined by a snowball script.
+ */
+public class RussianStemmer extends SnowballProgram {
+
+        private Among a_0[] = {
+            new Among ( "\u0432", -1, 1, "", this),
+            new Among ( "\u0438\u0432", 0, 2, "", this),
+            new Among ( "\u044B\u0432", 0, 2, "", this),
+            new Among ( "\u0432\u0448\u0438", -1, 1, "", this),
+            new Among ( "\u0438\u0432\u0448\u0438", 3, 2, "", this),
+            new Among ( "\u044B\u0432\u0448\u0438", 3, 2, "", this),
+            new Among ( "\u0432\u0448\u0438\u0441\u044C", -1, 1, "", this),
+            new Among ( "\u0438\u0432\u0448\u0438\u0441\u044C", 6, 2, "", this),
+            new Among ( "\u044B\u0432\u0448\u0438\u0441\u044C", 6, 2, "", this)
+        };
+
+        private Among a_1[] = {
+            new Among ( "\u0435\u0435", -1, 1, "", this),
+            new Among ( "\u0438\u0435", -1, 1, "", this),
+            new Among ( "\u043E\u0435", -1, 1, "", this),
+            new Among ( "\u044B\u0435", -1, 1, "", this),
+            new Among ( "\u0438\u043C\u0438", -1, 1, "", this),
+            new Among ( "\u044B\u043C\u0438", -1, 1, "", this),
+            new Among ( "\u0435\u0439", -1, 1, "", this),
+            new Among ( "\u0438\u0439", -1, 1, "", this),
+            new Among ( "\u043E\u0439", -1, 1, "", this),
+            new Among ( "\u044B\u0439", -1, 1, "", this),
+            new Among ( "\u0435\u043C", -1, 1, "", this),
+            new Among ( "\u0438\u043C", -1, 1, "", this),
+            new Among ( "\u043E\u043C", -1, 1, "", this),
+            new Among ( "\u044B\u043C", -1, 1, "", this),
+            new Among ( "\u0435\u0433\u043E", -1, 1, "", this),
+            new Among ( "\u043E\u0433\u043E", -1, 1, "", this),
+            new Among ( "\u0435\u043C\u0443", -1, 1, "", this),
+            new Among ( "\u043E\u043C\u0443", -1, 1, "", this),
+            new Among ( "\u0438\u0445", -1, 1, "", this),
+            new Among ( "\u044B\u0445", -1, 1, "", this),
+            new Among ( "\u0435\u044E", -1, 1, "", this),
+            new Among ( "\u043E\u044E", -1, 1, "", this),
+            new Among ( "\u0443\u044E", -1, 1, "", this),
+            new Among ( "\u044E\u044E", -1, 1, "", this),
+            new Among ( "\u0430\u044F", -1, 1, "", this),
+            new Among ( "\u044F\u044F", -1, 1, "", this)
+        };
+
+        private Among a_2[] = {
+            new Among ( "\u0435\u043C", -1, 1, "", this),
+            new Among ( "\u043D\u043D", -1, 1, "", this),
+            new Among ( "\u0432\u0448", -1, 1, "", this),
+            new Among ( "\u0438\u0432\u0448", 2, 2, "", this),
+            new Among ( "\u044B\u0432\u0448", 2, 2, "", this),
+            new Among ( "\u0449", -1, 1, "", this),
+            new Among ( "\u044E\u0449", 5, 1, "", this),
+            new Among ( "\u0443\u044E\u0449", 6, 2, "", this)
+        };
+
+        private Among a_3[] = {
+            new Among ( "\u0441\u044C", -1, 1, "", this),
+            new Among ( "\u0441\u044F", -1, 1, "", this)
+        };
+
+        private Among a_4[] = {
+            new Among ( "\u043B\u0430", -1, 1, "", this),
+            new Among ( "\u0438\u043B\u0430", 0, 2, "", this),
+            new Among ( "\u044B\u043B\u0430", 0, 2, "", this),
+            new Among ( "\u043D\u0430", -1, 1, "", this),
+            new Among ( "\u0435\u043D\u0430", 3, 2, "", this),
+            new Among ( "\u0435\u0442\u0435", -1, 1, "", this),
+            new Among ( "\u0438\u0442\u0435", -1, 2, "", this),
+            new Among ( "\u0439\u0442\u0435", -1, 1, "", this),
+            new Among ( "\u0435\u0439\u0442\u0435", 7, 2, "", this),
+            new Among ( "\u0443\u0439\u0442\u0435", 7, 2, "", this),
+            new Among ( "\u043B\u0438", -1, 1, "", this),
+            new Among ( "\u0438\u043B\u0438", 10, 2, "", this),
+            new Among ( "\u044B\u043B\u0438", 10, 2, "", this),
+            new Among ( "\u0439", -1, 1, "", this),
+            new Among ( "\u0435\u0439", 13, 2, "", this),
+            new Among ( "\u0443\u0439", 13, 2, "", this),
+            new Among ( "\u043B", -1, 1, "", this),
+            new Among ( "\u0438\u043B", 16, 2, "", this),
+            new Among ( "\u044B\u043B", 16, 2, "", this),
+            new Among ( "\u0435\u043C", -1, 1, "", this),
+            new Among ( "\u0438\u043C", -1, 2, "", this),
+            new Among ( "\u044B\u043C", -1, 2, "", this),
+            new Among ( "\u043D", -1, 1, "", this),
+            new Among ( "\u0435\u043D", 22, 2, "", this),
+            new Among ( "\u043B\u043E", -1, 1, "", this),
+            new Among ( "\u0438\u043B\u043E", 24, 2, "", this),
+            new Among ( "\u044B\u043B\u043E", 24, 2, "", this),
+            new Among ( "\u043D\u043E", -1, 1, "", this),
+            new Among ( "\u0435\u043D\u043E", 27, 2, "", this),
+            new Among ( "\u043D\u043D\u043E", 27, 1, "", this),
+            new Among ( "\u0435\u0442", -1, 1, "", this),
+            new Among ( "\u0443\u0435\u0442", 30, 2, "", this),
+            new Among ( "\u0438\u0442", -1, 2, "", this),
+            new Among ( "\u044B\u0442", -1, 2, "", this),
+            new Among ( "\u044E\u0442", -1, 1, "", this),
+            new Among ( "\u0443\u044E\u0442", 34, 2, "", this),
+            new Among ( "\u044F\u0442", -1, 2, "", this),
+            new Among ( "\u043D\u044B", -1, 1, "", this),
+            new Among ( "\u0435\u043D\u044B", 37, 2, "", this),
+            new Among ( "\u0442\u044C", -1, 1, "", this),
+            new Among ( "\u0438\u0442\u044C", 39, 2, "", this),
+            new Among ( "\u044B\u0442\u044C", 39, 2, "", this),
+            new Among ( "\u0435\u0448\u044C", -1, 1, "", this),
+            new Among ( "\u0438\u0448\u044C", -1, 2, "", this),
+            new Among ( "\u044E", -1, 2, "", this),
+            new Among ( "\u0443\u044E", 44, 2, "", this)
+        };
+
+        private Among a_5[] = {
+            new Among ( "\u0430", -1, 1, "", this),
+            new Among ( "\u0435\u0432", -1, 1, "", this),
+            new Among ( "\u043E\u0432", -1, 1, "", this),
+            new Among ( "\u0435", -1, 1, "", this),
+            new Among ( "\u0438\u0435", 3, 1, "", this),
+            new Among ( "\u044C\u0435", 3, 1, "", this),
+            new Among ( "\u0438", -1, 1, "", this),
+            new Among ( "\u0435\u0438", 6, 1, "", this),
+            new Among ( "\u0438\u0438", 6, 1, "", this),
+            new Among ( "\u0430\u043C\u0438", 6, 1, "", this),
+            new Among ( "\u044F\u043C\u0438", 6, 1, "", this),
+            new Among ( "\u0438\u044F\u043C\u0438", 10, 1, "", this),
+            new Among ( "\u0439", -1, 1, "", this),
+            new Among ( "\u0435\u0439", 12, 1, "", this),
+            new Among ( "\u0438\u0435\u0439", 13, 1, "", this),
+            new Among ( "\u0438\u0439", 12, 1, "", this),
+            new Among ( "\u043E\u0439", 12, 1, "", this),
+            new Among ( "\u0430\u043C", -1, 1, "", this),
+            new Among ( "\u0435\u043C", -1, 1, "", this),
+            new Among ( "\u0438\u0435\u043C", 18, 1, "", this),
+            new Among ( "\u043E\u043C", -1, 1, "", this),
+            new Among ( "\u044F\u043C", -1, 1, "", this),
+            new Among ( "\u0438\u044F\u043C", 21, 1, "", this),
+            new Among ( "\u043E", -1, 1, "", this),
+            new Among ( "\u0443", -1, 1, "", this),
+            new Among ( "\u0430\u0445", -1, 1, "", this),
+            new Among ( "\u044F\u0445", -1, 1, "", this),
+            new Among ( "\u0438\u044F\u0445", 26, 1, "", this),
+            new Among ( "\u044B", -1, 1, "", this),
+            new Among ( "\u044C", -1, 1, "", this),
+            new Among ( "\u044E", -1, 1, "", this),
+            new Among ( "\u0438\u044E", 30, 1, "", this),
+            new Among ( "\u044C\u044E", 30, 1, "", this),
+            new Among ( "\u044F", -1, 1, "", this),
+            new Among ( "\u0438\u044F", 33, 1, "", this),
+            new Among ( "\u044C\u044F", 33, 1, "", this)
+        };
+
+        private Among a_6[] = {
+            new Among ( "\u043E\u0441\u0442", -1, 1, "", this),
+            new Among ( "\u043E\u0441\u0442\u044C", -1, 1, "", this)
+        };
+
+        private Among a_7[] = {
+            new Among ( "\u0435\u0439\u0448\u0435", -1, 1, "", this),
+            new Among ( "\u043D", -1, 2, "", this),
+            new Among ( "\u0435\u0439\u0448", -1, 1, "", this),
+            new Among ( "\u044C", -1, 3, "", this)
+        };
+
+        private static final char g_v[] = {33, 65, 8, 232 };
+
+        private int I_p2;
+        private int I_pV;
+
+        private void copy_from(RussianStemmer other) {
+            I_p2 = other.I_p2;
+            I_pV = other.I_pV;
+            super.copy_from(other);
+        }
+
+        private boolean r_mark_regions() {
+            int v_1;
+            // (, line 57
+            I_pV = limit;
+            I_p2 = limit;
+            // do, line 61
+            v_1 = cursor;
+            lab0: do {
+                // (, line 61
+                // gopast, line 62
+                golab1: while(true)
+                {
+                    lab2: do {
+                        if (!(in_grouping(g_v, 1072, 1103)))
+                        {
+                            break lab2;
+                        }
+                        break golab1;
+                    } while (false);
+                    if (cursor >= limit)
+                    {
+                        break lab0;
+                    }
+                    cursor++;
+                }
+                // setmark pV, line 62
+                I_pV = cursor;
+                // gopast, line 62
+                golab3: while(true)
+                {
+                    lab4: do {
+                        if (!(out_grouping(g_v, 1072, 1103)))
+                        {
+                            break lab4;
+                        }
+                        break golab3;
+                    } while (false);
+                    if (cursor >= limit)
+                    {
+                        break lab0;
+                    }
+                    cursor++;
+                }
+                // gopast, line 63
+                golab5: while(true)
+                {
+                    lab6: do {
+                        if (!(in_grouping(g_v, 1072, 1103)))
+                        {
+                            break lab6;
+                        }
+                        break golab5;
+                    } while (false);
+                    if (cursor >= limit)
+                    {
+                        break lab0;
+                    }
+                    cursor++;
+                }
+                // gopast, line 63
+                golab7: while(true)
+                {
+                    lab8: do {
+                        if (!(out_grouping(g_v, 1072, 1103)))
+                        {
+                            break lab8;
+                        }
+                        break golab7;
+                    } while (false);
+                    if (cursor >= limit)
+                    {
+                        break lab0;
+                    }
+                    cursor++;
+                }
+                // setmark p2, line 63
+                I_p2 = cursor;
+            } while (false);
+            cursor = v_1;
+            return true;
+        }
+
+        private boolean r_R2() {
+            if (!(I_p2 <= cursor))
+            {
+                return false;
+            }
+            return true;
+        }
+
+        private boolean r_perfective_gerund() {
+            int among_var;
+            int v_1;
+            // (, line 71
+            // [, line 72
+            ket = cursor;
+            // substring, line 72
+            among_var = find_among_b(a_0, 9);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 72
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 76
+                    // or, line 76
+                    lab0: do {
+                        v_1 = limit - cursor;
+                        lab1: do {
+                            // literal, line 76
+                            if (!(eq_s_b(1, "\u0430")))
+                            {
+                                break lab1;
+                            }
+                            break lab0;
+                        } while (false);
+                        cursor = limit - v_1;
+                        // literal, line 76
+                        if (!(eq_s_b(1, "\u044F")))
+                        {
+                            return false;
+                        }
+                    } while (false);
+                    // delete, line 76
+                    slice_del();
+                    break;
+                case 2:
+                    // (, line 83
+                    // delete, line 83
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_adjective() {
+            int among_var;
+            // (, line 87
+            // [, line 88
+            ket = cursor;
+            // substring, line 88
+            among_var = find_among_b(a_1, 26);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 88
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 97
+                    // delete, line 97
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_adjectival() {
+            int among_var;
+            int v_1;
+            int v_2;
+            // (, line 101
+            // call adjective, line 102
+            if (!r_adjective())
+            {
+                return false;
+            }
+            // try, line 109
+            v_1 = limit - cursor;
+            lab0: do {
+                // (, line 109
+                // [, line 110
+                ket = cursor;
+                // substring, line 110
+                among_var = find_among_b(a_2, 8);
+                if (among_var == 0)
+                {
+                    cursor = limit - v_1;
+                    break lab0;
+                }
+                // ], line 110
+                bra = cursor;
+                switch(among_var) {
+                    case 0:
+                        cursor = limit - v_1;
+                        break lab0;
+                    case 1:
+                        // (, line 115
+                        // or, line 115
+                        lab1: do {
+                            v_2 = limit - cursor;
+                            lab2: do {
+                                // literal, line 115
+                                if (!(eq_s_b(1, "\u0430")))
+                                {
+                                    break lab2;
+                                }
+                                break lab1;
+                            } while (false);
+                            cursor = limit - v_2;
+                            // literal, line 115
+                            if (!(eq_s_b(1, "\u044F")))
+                            {
+                                cursor = limit - v_1;
+                                break lab0;
+                            }
+                        } while (false);
+                        // delete, line 115
+                        slice_del();
+                        break;
+                    case 2:
+                        // (, line 122
+                        // delete, line 122
+                        slice_del();
+                        break;
+                }
+            } while (false);
+            return true;
+        }
+
+        private boolean r_reflexive() {
+            int among_var;
+            // (, line 128
+            // [, line 129
+            ket = cursor;
+            // substring, line 129
+            among_var = find_among_b(a_3, 2);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 129
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 132
+                    // delete, line 132
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_verb() {
+            int among_var;
+            int v_1;
+            // (, line 136
+            // [, line 137
+            ket = cursor;
+            // substring, line 137
+            among_var = find_among_b(a_4, 46);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 137
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 143
+                    // or, line 143
+                    lab0: do {
+                        v_1 = limit - cursor;
+                        lab1: do {
+                            // literal, line 143
+                            if (!(eq_s_b(1, "\u0430")))
+                            {
+                                break lab1;
+                            }
+                            break lab0;
+                        } while (false);
+                        cursor = limit - v_1;
+                        // literal, line 143
+                        if (!(eq_s_b(1, "\u044F")))
+                        {
+                            return false;
+                        }
+                    } while (false);
+                    // delete, line 143
+                    slice_del();
+                    break;
+                case 2:
+                    // (, line 151
+                    // delete, line 151
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_noun() {
+            int among_var;
+            // (, line 159
+            // [, line 160
+            ket = cursor;
+            // substring, line 160
+            among_var = find_among_b(a_5, 36);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 160
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 167
+                    // delete, line 167
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_derivational() {
+            int among_var;
+            // (, line 175
+            // [, line 176
+            ket = cursor;
+            // substring, line 176
+            among_var = find_among_b(a_6, 2);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 176
+            bra = cursor;
+            // call R2, line 176
+            if (!r_R2())
+            {
+                return false;
+            }
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 179
+                    // delete, line 179
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_tidy_up() {
+            int among_var;
+            // (, line 183
+            // [, line 184
+            ket = cursor;
+            // substring, line 184
+            among_var = find_among_b(a_7, 4);
+            if (among_var == 0)
+            {
+                return false;
+            }
+            // ], line 184
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 188
+                    // delete, line 188
+                    slice_del();
+                    // [, line 189
+                    ket = cursor;
+                    // literal, line 189
+                    if (!(eq_s_b(1, "\u043D")))
+                    {
+                        return false;
+                    }
+                    // ], line 189
+                    bra = cursor;
+                    // literal, line 189
+                    if (!(eq_s_b(1, "\u043D")))
+                    {
+                        return false;
+                    }
+                    // delete, line 189
+                    slice_del();
+                    break;
+                case 2:
+                    // (, line 192
+                    // literal, line 192
+                    if (!(eq_s_b(1, "\u043D")))
+                    {
+                        return false;
+                    }
+                    // delete, line 192
+                    slice_del();
+                    break;
+                case 3:
+                    // (, line 194
+                    // delete, line 194
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            int v_5;
+            int v_6;
+            int v_7;
+            int v_8;
+            int v_9;
+            int v_10;
+            // (, line 199
+            // do, line 201
+            v_1 = cursor;
+            lab0: do {
+                // call mark_regions, line 201
+                if (!r_mark_regions())
+                {
+                    break lab0;
+                }
+            } while (false);
+            cursor = v_1;
+            // backwards, line 202
+            limit_backward = cursor; cursor = limit;
+            // setlimit, line 202
+            v_2 = limit - cursor;
+            // tomark, line 202
+            if (cursor < I_pV)
+            {
+                return false;
+            }
+            cursor = I_pV;
+            v_3 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_2;
+            // (, line 202
+            // do, line 203
+            v_4 = limit - cursor;
+            lab1: do {
+                // (, line 203
+                // or, line 204
+                lab2: do {
+                    v_5 = limit - cursor;
+                    lab3: do {
+                        // call perfective_gerund, line 204
+                        if (!r_perfective_gerund())
+                        {
+                            break lab3;
+                        }
+                        break lab2;
+                    } while (false);
+                    cursor = limit - v_5;
+                    // (, line 205
+                    // try, line 205
+                    v_6 = limit - cursor;
+                    lab4: do {
+                        // call reflexive, line 205
+                        if (!r_reflexive())
+                        {
+                            cursor = limit - v_6;
+                            break lab4;
+                        }
+                    } while (false);
+                    // or, line 206
+                    lab5: do {
+                        v_7 = limit - cursor;
+                        lab6: do {
+                            // call adjectival, line 206
+                            if (!r_adjectival())
+                            {
+                                break lab6;
+                            }
+                            break lab5;
+                        } while (false);
+                        cursor = limit - v_7;
+                        lab7: do {
+                            // call verb, line 206
+                            if (!r_verb())
+                            {
+                                break lab7;
+                            }
+                            break lab5;
+                        } while (false);
+                        cursor = limit - v_7;
+                        // call noun, line 206
+                        if (!r_noun())
+                        {
+                            break lab1;
+                        }
+                    } while (false);
+                } while (false);
+            } while (false);
+            cursor = limit - v_4;
+            // try, line 209
+            v_8 = limit - cursor;
+            lab8: do {
+                // (, line 209
+                // [, line 209
+                ket = cursor;
+                // literal, line 209
+                if (!(eq_s_b(1, "\u0438")))
+                {
+                    cursor = limit - v_8;
+                    break lab8;
+                }
+                // ], line 209
+                bra = cursor;
+                // delete, line 209
+                slice_del();
+            } while (false);
+            // do, line 212
+            v_9 = limit - cursor;
+            lab9: do {
+                // call derivational, line 212
+                if (!r_derivational())
+                {
+                    break lab9;
+                }
+            } while (false);
+            cursor = limit - v_9;
+            // do, line 213
+            v_10 = limit - cursor;
+            lab10: do {
+                // call tidy_up, line 213
+                if (!r_tidy_up())
+                {
+                    break lab10;
+                }
+            } while (false);
+            cursor = limit - v_10;
+            limit_backward = v_3;
+            cursor = limit_backward;            return true;
+        }
+
+}
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/SpanishStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/SpanishStemmer.java
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/SwedishStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/SwedishStemmer.java
@ -0,0 +1,349 @@
+// This file was generated automatically by the Snowball to Java compiler
+
+package org.tartarus.snowball.ext;
+import org.tartarus.snowball.SnowballProgram;
+import org.tartarus.snowball.Among;
+
+/**
+ * Generated class implementing code defined by a snowball script.
+ */
+public class SwedishStemmer extends SnowballProgram {
+
+        private Among a_0[] = {
+            new Among ( "a", -1, 1, "", this),
+            new Among ( "arna", 0, 1, "", this),
+            new Among ( "erna", 0, 1, "", this),
+            new Among ( "heterna", 2, 1, "", this),
+            new Among ( "orna", 0, 1, "", this),
+            new Among ( "ad", -1, 1, "", this),
+            new Among ( "e", -1, 1, "", this),
+            new Among ( "ade", 6, 1, "", this),
+            new Among ( "ande", 6, 1, "", this),
+            new Among ( "arne", 6, 1, "", this),
+            new Among ( "are", 6, 1, "", this),
+            new Among ( "aste", 6, 1, "", this),
+            new Among ( "en", -1, 1, "", this),
+            new Among ( "anden", 12, 1, "", this),
+            new Among ( "aren", 12, 1, "", this),
+            new Among ( "heten", 12, 1, "", this),
+            new Among ( "ern", -1, 1, "", this),
+            new Among ( "ar", -1, 1, "", this),
+            new Among ( "er", -1, 1, "", this),
+            new Among ( "heter", 18, 1, "", this),
+            new Among ( "or", -1, 1, "", this),
+            new Among ( "s", -1, 2, "", this),
+            new Among ( "as", 21, 1, "", this),
+            new Among ( "arnas", 22, 1, "", this),
+            new Among ( "ernas", 22, 1, "", this),
+            new Among ( "ornas", 22, 1, "", this),
+            new Among ( "es", 21, 1, "", this),
+            new Among ( "ades", 26, 1, "", this),
+            new Among ( "andes", 26, 1, "", this),
+            new Among ( "ens", 21, 1, "", this),
+            new Among ( "arens", 29, 1, "", this),
+            new Among ( "hetens", 29, 1, "", this),
+            new Among ( "erns", 21, 1, "", this),
+            new Among ( "at", -1, 1, "", this),
+            new Among ( "andet", -1, 1, "", this),
+            new Among ( "het", -1, 1, "", this),
+            new Among ( "ast", -1, 1, "", this)
+        };
+
+        private Among a_1[] = {
+            new Among ( "dd", -1, -1, "", this),
+            new Among ( "gd", -1, -1, "", this),
+            new Among ( "nn", -1, -1, "", this),
+            new Among ( "dt", -1, -1, "", this),
+            new Among ( "gt", -1, -1, "", this),
+            new Among ( "kt", -1, -1, "", this),
+            new Among ( "tt", -1, -1, "", this)
+        };
+
+        private Among a_2[] = {
+            new Among ( "ig", -1, 1, "", this),
+            new Among ( "lig", 0, 1, "", this),
+            new Among ( "els", -1, 1, "", this),
+            new Among ( "fullt", -1, 3, "", this),
+            new Among ( "l\u00F6st", -1, 2, "", this)
+        };
+
+        private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 };
+
+        private static final char g_s_ending[] = {119, 127, 149 };
+
+        private int I_x;
+        private int I_p1;
+
+        private void copy_from(SwedishStemmer other) {
+            I_x = other.I_x;
+            I_p1 = other.I_p1;
+            super.copy_from(other);
+        }
+
+        private boolean r_mark_regions() {
+            int v_1;
+            int v_2;
+            // (, line 26
+            I_p1 = limit;
+            // test, line 29
+            v_1 = cursor;
+            // (, line 29
+            // hop, line 29
+            {
+                int c = cursor + 3;
+                if (0 > c || c > limit)
+                {
+                    return false;
+                }
+                cursor = c;
+            }
+            // setmark x, line 29
+            I_x = cursor;
+            cursor = v_1;
+            // goto, line 30
+            golab0: while(true)
+            {
+                v_2 = cursor;
+                lab1: do {
+                    if (!(in_grouping(g_v, 97, 246)))
+                    {
+                        break lab1;
+                    }
+                    cursor = v_2;
+                    break golab0;
+                } while (false);
+                cursor = v_2;
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // gopast, line 30
+            golab2: while(true)
+            {
+                lab3: do {
+                    if (!(out_grouping(g_v, 97, 246)))
+                    {
+                        break lab3;
+                    }
+                    break golab2;
+                } while (false);
+                if (cursor >= limit)
+                {
+                    return false;
+                }
+                cursor++;
+            }
+            // setmark p1, line 30
+            I_p1 = cursor;
+            // try, line 31
+            lab4: do {
+                // (, line 31
+                if (!(I_p1 < I_x))
+                {
+                    break lab4;
+                }
+                I_p1 = I_x;
+            } while (false);
+            return true;
+        }
+
+        private boolean r_main_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            // (, line 36
+            // setlimit, line 37
+            v_1 = limit - cursor;
+            // tomark, line 37
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_2 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_1;
+            // (, line 37
+            // [, line 37
+            ket = cursor;
+            // substring, line 37
+            among_var = find_among_b(a_0, 37);
+            if (among_var == 0)
+            {
+                limit_backward = v_2;
+                return false;
+            }
+            // ], line 37
+            bra = cursor;
+            limit_backward = v_2;
+            switch(among_var) {
+                case 0:
+                    return false;
+                case 1:
+                    // (, line 44
+                    // delete, line 44
+                    slice_del();
+                    break;
+                case 2:
+                    // (, line 46
+                    if (!(in_grouping_b(g_s_ending, 98, 121)))
+                    {
+                        return false;
+                    }
+                    // delete, line 46
+                    slice_del();
+                    break;
+            }
+            return true;
+        }
+
+        private boolean r_consonant_pair() {
+            int v_1;
+            int v_2;
+            int v_3;
+            // setlimit, line 50
+            v_1 = limit - cursor;
+            // tomark, line 50
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_2 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_1;
+            // (, line 50
+            // and, line 52
+            v_3 = limit - cursor;
+            // among, line 51
+            if (find_among_b(a_1, 7) == 0)
+            {
+                limit_backward = v_2;
+                return false;
+            }
+            cursor = limit - v_3;
+            // (, line 52
+            // [, line 52
+            ket = cursor;
+            // next, line 52
+            if (cursor <= limit_backward)
+            {
+                limit_backward = v_2;
+                return false;
+            }
+            cursor--;
+            // ], line 52
+            bra = cursor;
+            // delete, line 52
+            slice_del();
+            limit_backward = v_2;
+            return true;
+        }
+
+        private boolean r_other_suffix() {
+            int among_var;
+            int v_1;
+            int v_2;
+            // setlimit, line 55
+            v_1 = limit - cursor;
+            // tomark, line 55
+            if (cursor < I_p1)
+            {
+                return false;
+            }
+            cursor = I_p1;
+            v_2 = limit_backward;
+            limit_backward = cursor;
+            cursor = limit - v_1;
+            // (, line 55
+            // [, line 56
+            ket = cursor;
+            // substring, line 56
+            among_var = find_among_b(a_2, 5);
+            if (among_var == 0)
+            {
+                limit_backward = v_2;
+                return false;
+            }
+            // ], line 56
+            bra = cursor;
+            switch(among_var) {
+                case 0:
+                    limit_backward = v_2;
+                    return false;
+                case 1:
+                    // (, line 57
+                    // delete, line 57
+                    slice_del();
+                    break;
+                case 2:
+                    // (, line 58
+                    // <-, line 58
+                    slice_from("l\u00F6s");
+                    break;
+                case 3:
+                    // (, line 59
+                    // <-, line 59
+                    slice_from("full");
+                    break;
+            }
+            limit_backward = v_2;
+            return true;
+        }
+
+        public boolean stem() {
+            int v_1;
+            int v_2;
+            int v_3;
+            int v_4;
+            // (, line 64
+            // do, line 66
+            v_1 = cursor;
+            lab0: do {
+                // call mark_regions, line 66
+                if (!r_mark_regions())
+                {
+                    break lab0;
+                }
+            } while (false);
+            cursor = v_1;
+            // backwards, line 67
+            limit_backward = cursor; cursor = limit;
+            // (, line 67
+            // do, line 68
+            v_2 = limit - cursor;
+            lab1: do {
+                // call main_suffix, line 68
+                if (!r_main_suffix())
+                {
+                    break lab1;
+                }
+            } while (false);
+            cursor = limit - v_2;
+            // do, line 69
+            v_3 = limit - cursor;
+            lab2: do {
+                // call consonant_pair, line 69
+                if (!r_consonant_pair())
+                {
+                    break lab2;
+                }
+            } while (false);
+            cursor = limit - v_3;
+            // do, line 70
+            v_4 = limit - cursor;
+            lab3: do {
+                // call other_suffix, line 70
+                if (!r_other_suffix())
+                {
+                    break lab3;
+                }
+            } while (false);
+            cursor = limit - v_4;
+            cursor = limit_backward;            return true;
+        }
+
+}
+
--- a/contrib/snowball/src/java/org/tartarus/snowball/ext/TurkishStemmer.java
+++ b/contrib/snowball/src/java/org/tartarus/snowball/ext/TurkishStemmer.java
--- a/contrib/snowball/src/java/overview.html
+++ b/contrib/snowball/src/java/overview.html
@ -0,0 +1,53 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<body>
+<p>
+Lucene Snowball README file
+</p>
+
+<p>
+This project provides pre-compiled version of the Snowball stemmers
+based on revision 500 of the Tartarus Snowball repository,
+together with classes integrating them with the Lucene search engine.
+</p>
+<p>
+A few changes has been made to the static Snowball code and compiled stemmers:
+</p>
+<ul>
+  <li>Class SnowballProgram is made abstract and contains new abstract method stem() to avoid reflection in Lucene filter class SnowballFilter.</li>
+  <li>All use of StringBuffers has been refactored to StringBuilder for speed.</li>
+  <li>Snowball BSD license header has been added to the Java classes to avoid having RAT adding ASL headers.</li>
+</ul>
+<p>
+See the Snowball <a href ="http://snowball.tartarus.org/">home page</a> for more information about the algorithms.
+</p>
+
+<p>
+<b>IMPORTANT NOTICE ON BACKWARDS COMPATIBILITY!</b>
+</p>
+<p>
+An index created using the Snowball module in Lucene 2.3.2 and below
+might not be compatible with the Snowball module in Lucene 2.4 or greater.
+</p>
+<p>
+For more information about this issue see:
+https://issues.apache.org/jira/browse/LUCENE-1142
+</p>
+
+</body>
+</html>
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/danish_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/danish_stop.txt
@ -0,0 +1,108 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og           | and
+i            | in
+jeg          | I
+det          | that (dem. pronoun)/it (pers. pronoun)
+at           | that (in front of a sentence)/to (with infinitive)
+en           | a/an
+den          | it (pers. pronoun)/that (dem. pronoun)
+til          | to/at/for/until/against/by/of/into, more
+er           | present tense of "to be"
+som          | who, as
+på           | on/upon/in/on/at/to/after/of/with/for, on
+de           | they
+med          | with/by/in, along
+han          | he
+af           | of/by/from/off/for/in/with/on, off
+for          | at/for/to/from/by/of/ago, in front/before, because
+ikke         | not
+der          | who/which, there/those
+var          | past tense of "to be"
+mig          | me/myself
+sig          | oneself/himself/herself/itself/themselves
+men          | but
+et           | a/an/one, one (number), someone/somebody/one
+har          | present tense of "to have"
+om           | round/about/for/in/a, about/around/down, if
+vi           | we
+min          | my
+havde        | past tense of "to have"
+ham          | him
+hun          | she
+nu           | now
+over         | over/above/across/by/beyond/past/on/about, over/past
+da           | then, when/as/since
+fra          | from/off/since, off, since
+du           | you
+ud           | out
+sin          | his/her/its/one's
+dem          | them
+os           | us/ourselves
+op           | up
+man          | you/one
+hans         | his
+hvor         | where
+eller        | or
+hvad         | what
+skal         | must/shall etc.
+selv         | myself/youself/herself/ourselves etc., even
+her          | here
+alle         | all/everyone/everybody etc.
+vil          | will (verb)
+blev         | past tense of "to stay/to remain/to get/to become"
+kunne        | could
+ind          | in
+når          | when
+være         | present tense of "to be"
+dog          | however/yet/after all
+noget        | something
+ville        | would
+jo           | you know/you see (adv), yes
+deres        | their/theirs
+efter        | after/behind/according to/for/by/from, later/afterwards
+ned          | down
+skulle       | should
+denne        | this
+end          | than
+dette        | this
+mit          | my/mine
+også         | also
+under        | under/beneath/below/during, below/underneath
+have         | have
+dig          | you
+anden        | other
+hende        | her
+mine         | my
+alt          | everything
+meget        | much/very, plenty of
+sit          | his, her, its, one's
+sine         | his, her, its, one's
+vor          | our
+mod          | against
+disse        | these
+hvis         | if
+din          | your/yours
+nogle        | some
+hos          | by/at
+blive        | be/become
+mange        | many
+ad           | by/through
+bliver       | present tense of "to be/to become"
+hendes       | her/hers
+været        | be
+thi          | for (conj)
+jer          | you
+sådan        | such, like this/like that
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/dutch_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/dutch_stop.txt
@ -0,0 +1,117 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de             |  the
+en             |  and
+van            |  of, from
+ik             |  I, the ego
+te             |  (1) chez, at etc, (2) to, (3) too
+dat            |  that, which
+die            |  that, those, who, which
+in             |  in, inside
+een            |  a, an, one
+hij            |  he
+het            |  the, it
+niet           |  not, nothing, naught
+zijn           |  (1) to be, being, (2) his, one's, its
+is             |  is
+was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op             |  on, upon, at, in, up, used up
+aan            |  on, upon, to (as dative)
+met            |  with, by
+als            |  like, such as, when
+voor           |  (1) before, in front of, (2) furrow
+had            |  had, past tense all persons sing. of 'hebben' (have)
+er             |  there
+maar           |  but, only
+om             |  round, about, for etc
+hem            |  him
+dan            |  then
+zou            |  should/would, past tense all persons sing. of 'zullen'
+of             |  or, whether, if
+wat            |  what, something, anything
+mijn           |  possessive and noun 'mine'
+men            |  people, 'one'
+dit            |  this
+zo             |  so, thus, in this way
+door           |  through by
+over           |  over, across
+ze             |  she, her, they, them
+zich           |  oneself
+bij            |  (1) a bee, (2) by, near, at
+ook            |  also, too
+tot            |  till, until
+je             |  you
+mij            |  me
+uit            |  out of, from
+der            |  Old Dutch form of 'van der' still found in surnames
+daar           |  (1) there, (2) because
+haar           |  (1) her, their, them, (2) hair
+naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
+heb            |  present first person sing. of 'to have'
+hoe            |  how, why
+heeft          |  present third person sing. of 'to have'
+hebben         |  'to have' and various parts thereof
+deze           |  this
+u              |  you
+want           |  (1) for, (2) mitten, (3) rigging
+nog            |  yet, still
+zal            |  'shall', first and third person sing. of verb 'zullen' (will)
+me             |  me
+zij            |  she, they
+nu             |  now
+ge             |  'thou', still used in Belgium and south Netherlands
+geen           |  none
+omdat          |  because
+iets           |  something, somewhat
+worden         |  to become, grow, get
+toch           |  yet, still
+al             |  all, every, each
+waren          |  (1) 'were' (2) to wander, (3) wares, (3)
+veel           |  much, many
+meer           |  (1) more, (2) lake
+doen           |  to do, to make
+toen           |  then, when
+moet           |  noun 'spot/mote' and present form of 'to must'
+ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder         |  without
+kan            |  noun 'can' and present form of 'to be able'
+hun            |  their, them
+dus            |  so, consequently
+alles          |  all, everything, anything
+onder          |  under, beneath
+ja             |  yes, of course
+eens           |  once, one day
+hier           |  here
+wie            |  who
+werd           |  imperfect third person sing. of 'become'
+altijd         |  always
+doch           |  yet, but etc
+wordt          |  present third person sing. of 'become'
+wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen         |  to be able
+ons            |  us/our
+zelf           |  self
+tegen          |  against, towards, at
+na             |  after, near
+reeds          |  already
+wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon            |  could; past tense of 'to be able'
+niets          |  nothing
+uw             |  your
+iemand         |  somebody
+geweest        |  been; past participle of 'be'
+andere         |  other
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/english_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/english_stop.txt
@ -0,0 +1,317 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ 
+ | An English stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | Many of the forms below are quite rare (e.g. "yourselves") but included for
+ |  completeness.
+
+           | PRONOUNS FORMS
+             | 1st person sing
+
+i              | subject, always in upper case of course
+
+me             | object
+my             | possessive adjective
+               | the possessive pronoun `mine' is best suppressed, because of the
+               | sense of coal-mine etc.
+myself         | reflexive
+             | 1st person plural
+we             | subject
+
+| us           | object
+               | care is required here because US = United States. It is usually
+               | safe to remove it if it is in lower case.
+our            | possessive adjective
+ours           | possessive pronoun
+ourselves      | reflexive
+             | second person (archaic `thou' forms not included)
+you            | subject and object
+your           | possessive adjective
+yours          | possessive pronoun
+yourself       | reflexive (singular)
+yourselves     | reflexive (plural)
+             | third person singular
+he             | subject
+him            | object
+his            | possessive adjective and pronoun
+himself        | reflexive
+
+she            | subject
+her            | object and possessive adjective
+hers           | possessive pronoun
+herself        | reflexive
+
+it             | subject and object
+its            | possessive adjective
+itself         | reflexive
+             | third person plural
+they           | subject
+them           | object
+their          | possessive adjective
+theirs         | possessive pronoun
+themselves     | reflexive
+             | other forms (demonstratives, interrogatives)
+what
+which
+who
+whom
+this
+that
+these
+those
+
+           | VERB FORMS (using F.R. Palmer's nomenclature)
+             | BE
+am             | 1st person, present
+is             | -s form (3rd person, present)
+are            | present
+was            | 1st person, past
+were           | past
+be             | infinitive
+been           | past participle
+being          | -ing form
+             | HAVE
+have           | simple
+has            | -s form
+had            | past
+having         | -ing form
+             | DO
+do             | simple
+does           | -s form
+did            | past
+doing          | -ing form
+
+ | The forms below are, I believe, best omitted, because of the significant
+ | homonym forms:
+
+ |  He made a WILL
+ |  old tin CAN
+ |  merry month of MAY
+ |  a smell of MUST
+ |  fight the good fight with all thy MIGHT
+
+ | would, could, should, ought might however be included
+
+ |          | AUXILIARIES
+ |            | WILL
+ |will
+
+would
+
+ |            | SHALL
+ |shall
+
+should
+
+ |            | CAN
+ |can
+
+could
+
+ |            | MAY
+ |may
+ |might
+ |            | MUST
+ |must
+ |            | OUGHT
+
+ought
+
+           | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
+              | pronoun + verb
+
+i'm
+you're
+he's
+she's
+it's
+we're
+they're
+i've
+you've
+we've
+they've
+i'd
+you'd
+he'd
+she'd
+we'd
+they'd
+i'll
+you'll
+he'll
+she'll
+we'll
+they'll
+
+              | verb + negation
+
+isn't
+aren't
+wasn't
+weren't
+hasn't
+haven't
+hadn't
+doesn't
+don't
+didn't
+
+              | auxiliary + negation
+
+won't
+wouldn't
+shan't
+shouldn't
+can't
+cannot
+couldn't
+mustn't
+
+             | miscellaneous forms
+
+let's
+that's
+who's
+what's
+here's
+there's
+when's
+where's
+why's
+how's
+
+              | rarer forms
+
+ | daren't needn't
+
+              | doubtful forms
+
+ | oughtn't mightn't
+
+           | ARTICLES
+a
+an
+the
+
+           | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
+           | high, that classification is pointless.)
+and
+but
+if
+or
+because
+as
+until
+while
+
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+
+again
+further
+then
+once
+
+here
+there
+when
+where
+why
+how
+
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+
+ | Just for the record, the following words are among the commonest in English
+
+    | one
+    | every
+    | least
+    | less
+    | many
+    | now
+    | ever
+    | never
+    | say
+    | says
+    | said
+    | also
+    | get
+    | go
+    | goes
+    | just
+    | made
+    | make
+    | put
+    | see
+    | seen
+    | whether
+    | like
+    | well
+    | back
+    | even
+    | still
+    | way
+    | take
+    | since
+    | another
+    | however
+    | two
+    | three
+    | four
+    | five
+    | first
+    | second
+    | new
+    | old
+    | high
+    | long
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/finnish_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/finnish_stop.txt
@ -0,0 +1,95 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ 
+| forms of BE
+
+olla
+olen
+olet
+on
+olemme
+olette
+ovat
+ole        | negative form
+
+oli
+olisi
+olisit
+olisin
+olisimme
+olisitte
+olisivat
+olit
+olin
+olimme
+olitte
+olivat
+ollut
+olleet
+
+en         | negation
+et
+ei
+emme
+ette
+eivät
+
+|Nom   Gen    Acc    Part   Iness   Elat    Illat  Adess   Ablat   Allat   Ess    Trans
+minä   minun  minut  minua  minussa minusta minuun minulla minulta minulle               | I
+sinä   sinun  sinut  sinua  sinussa sinusta sinuun sinulla sinulta sinulle               | you
+hän    hänen  hänet  häntä  hänessä hänestä häneen hänellä häneltä hänelle               | he she
+me     meidän meidät meitä  meissä  meistä  meihin meillä  meiltä  meille                | we
+te     teidän teidät teitä  teissä  teistä  teihin teillä  teiltä  teille                | you
+he     heidän heidät heitä  heissä  heistä  heihin heillä  heiltä  heille                | they
+
+tämä   tämän         tätä   tässä   tästä   tähän  tallä   tältä   tälle   tänä   täksi  | this
+tuo    tuon          tuotä  tuossa  tuosta  tuohon tuolla  tuolta  tuolle  tuona  tuoksi | that
+se     sen           sitä   siinä   siitä   siihen sillä   siltä   sille   sinä   siksi  | it
+nämä   näiden        näitä  näissä  näistä  näihin näillä  näiltä  näille  näinä  näiksi | these
+nuo    noiden        noita  noissa  noista  noihin noilla  noilta  noille  noina  noiksi | those
+ne     niiden        niitä  niissä  niistä  niihin niillä  niiltä  niille  niinä  niiksi | they
+
+kuka   kenen kenet   ketä   kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
+ketkä  keiden ketkä  keitä  keissä  keistä  keihin keillä  keiltä  keille  keinä  keiksi | (pl)
+mikä   minkä minkä   mitä   missä   mistä   mihin  millä   miltä   mille   minä   miksi  | which what
+mitkä                                                                                    | (pl)
+
+joka   jonka         jota   jossa   josta   johon  jolla   jolta   jolle   jona   joksi  | who which
+jotka  joiden        joita  joissa  joista  joihin joilla  joilta  joille  joina  joiksi | (pl)
+
+| conjunctions
+
+että   | that
+ja     | and
+jos    | if
+koska  | because
+kuin   | than
+mutta  | but
+niin   | so
+sekä   | and
+sillä  | for
+tai    | or
+vaan   | but
+vai    | or
+vaikka | although
+
+
+| prepositions
+
+kanssa  | with
+mukaan  | according to
+noin    | about
+poikki  | across
+yli     | over, across
+
+| other
+
+kun    | when
+niin   | so
+nyt    | now
+itse   | self
+
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/french_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/french_stop.txt
@ -0,0 +1,183 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au             |  a + le
+aux            |  a + les
+avec           |  with
+ce             |  this
+ces            |  these
+dans           |  with
+de             |  of
+des            |  de + les
+du             |  de + le
+elle           |  she
+en             |  `of them' etc
+et             |  and
+eux            |  them
+il             |  he
+je             |  I
+la             |  the
+le             |  the
+leur           |  their
+lui            |  him
+ma             |  my (fem)
+mais           |  but
+me             |  me
+même           |  same; as in moi-même (myself) etc
+mes            |  me (pl)
+moi            |  me
+mon            |  my (masc)
+ne             |  not
+nos            |  our (pl)
+notre          |  our
+nous           |  we
+on             |  one
+ou             |  where
+par            |  by
+pas            |  not
+pour           |  for
+qu             |  que before vowel
+que            |  that
+qui            |  who
+sa             |  his, her (fem)
+se             |  oneself
+ses            |  his (pl)
+son            |  his, her (masc)
+sur            |  on
+ta             |  thy (fem)
+te             |  thee
+tes            |  thy (pl)
+toi            |  thee
+ton            |  thy (masc)
+tu             |  thou
+un             |  a
+une            |  a
+vos            |  your (pl)
+votre          |  your
+vous           |  you
+
+               |  single letter forms
+
+c              |  c'
+d              |  d'
+j              |  j'
+l              |  l'
+à              |  to, at
+m              |  m'
+n              |  n'
+s              |  s'
+t              |  t'
+y              |  there
+
+               | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+               | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+               | Later additions (from Jean-Christophe Deschamps)
+ceci           |  this
+celà           |  that
+cet            |  this
+cette          |  this
+ici            |  here
+ils            |  they
+les            |  the (pl)
+leurs          |  their (pl)
+quel           |  which
+quels          |  which
+quelle         |  which
+quelles        |  which
+sans           |  without
+soi            |  oneself
+
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/german_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/german_stop.txt
@ -0,0 +1,292 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber           |  but
+
+alle           |  all
+allem
+allen
+aller
+alles
+
+als            |  than, as
+also           |  so
+am             |  an + dem
+an             |  at
+
+ander          |  other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch           |  also
+auf            |  on
+aus            |  out of
+bei            |  by
+bin            |  am
+bis            |  until
+bist           |  art
+da             |  there
+damit          |  with it
+dann           |  then
+
+der            |  the
+den
+des
+dem
+die
+das
+
+daß            |  that
+
+derselbe       |  the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu           |  to that
+
+dein           |  thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn           |  because
+
+derer          |  of those
+dessen         |  of him
+
+dich           |  thee
+dir            |  to thee
+du             |  thou
+
+dies           |  this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch           |  (several meanings)
+dort           |  (over) there
+
+
+durch          |  through
+
+ein            |  a
+eine
+einem
+einen
+einer
+eines
+
+einig          |  some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal         |  once
+
+er             |  he
+ihn            |  him
+ihm            |  to him
+
+es             |  it
+etwas          |  something
+
+euer           |  your
+eure
+eurem
+euren
+eurer
+eures
+
+für            |  for
+gegen          |  towards
+gewesen        |  p.p. of sein
+hab            |  have
+habe           |  have
+haben          |  have
+hat            |  has
+hatte          |  had
+hatten         |  had
+hier           |  here
+hin            |  there
+hinter         |  behind
+
+ich            |  I
+mich           |  me
+mir            |  to me
+
+
+ihr            |  you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch           |  to you
+
+im             |  in + dem
+in             |  in
+indem          |  while
+ins            |  in + das
+ist            |  is
+
+jede           |  each, every
+jedem
+jeden
+jeder
+jedes
+
+jene           |  that
+jenem
+jenen
+jener
+jenes
+
+jetzt          |  now
+kann           |  can
+
+kein           |  no
+keine
+keinem
+keinen
+keiner
+keines
+
+können         |  can
+könnte         |  could
+machen         |  do
+man            |  one
+
+manche         |  some, many a
+manchem
+manchen
+mancher
+manches
+
+mein           |  my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit            |  with
+muss           |  must
+musste         |  had to
+nach           |  to(wards)
+nicht          |  not
+nichts         |  nothing
+noch           |  still, yet
+nun            |  now
+nur            |  only
+ob             |  whether
+oder           |  or
+ohne           |  without
+sehr           |  very
+
+sein           |  his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst         |  self
+sich           |  herself
+
+sie            |  they, she
+ihnen          |  to them
+
+sind           |  are
+so             |  so
+
+solche         |  such
+solchem
+solchen
+solcher
+solches
+
+soll           |  shall
+sollte         |  should
+sondern        |  but
+sonst          |  else
+über           |  over
+um             |  about, around
+und            |  and
+
+uns            |  us
+unse
+unsem
+unsen
+unser
+unses
+
+unter          |  under
+viel           |  much
+vom            |  von + dem
+von            |  from
+vor            |  before
+während        |  while
+war            |  was
+waren          |  were
+warst          |  wast
+was            |  what
+weg            |  away, off
+weil           |  because
+weiter         |  further
+
+welche         |  which
+welchem
+welchen
+welcher
+welches
+
+wenn           |  when
+werde          |  will
+werden         |  will
+wie            |  how
+wieder         |  again
+will           |  want
+wir            |  we
+wird           |  will
+wirst          |  willst
+wo             |  where
+wollen         |  want
+wollte         |  wanted
+würde          |  would
+würden         |  would
+zu             |  to
+zum            |  zu + dem
+zur            |  zu + der
+zwar           |  indeed
+zwischen       |  between
+
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/hungarian_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/hungarian_stop.txt
@ -0,0 +1,209 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ 
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amíg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elő
+először
+előtt
+első
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+így
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kívül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+míg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+ő
+ők
+őket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/italian_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/italian_stop.txt
@ -0,0 +1,301 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad             |  a (to) before vowel
+al             |  a + il
+allo           |  a + lo
+ai             |  a + i
+agli           |  a + gli
+all            |  a + l'
+agl            |  a + gl'
+alla           |  a + la
+alle           |  a + le
+con            |  with
+col            |  con + il
+coi            |  con + i (forms collo, cogli etc are now very rare)
+da             |  from
+dal            |  da + il
+dallo          |  da + lo
+dai            |  da + i
+dagli          |  da + gli
+dall           |  da + l'
+dagl           |  da + gll'
+dalla          |  da + la
+dalle          |  da + le
+di             |  of
+del            |  di + il
+dello          |  di + lo
+dei            |  di + i
+degli          |  di + gli
+dell           |  di + l'
+degl           |  di + gl'
+della          |  di + la
+delle          |  di + le
+in             |  in
+nel            |  in + el
+nello          |  in + lo
+nei            |  in + i
+negli          |  in + gli
+nell           |  in + l'
+negl           |  in + gl'
+nella          |  in + la
+nelle          |  in + le
+su             |  on
+sul            |  su + il
+sullo          |  su + lo
+sui            |  su + i
+sugli          |  su + gli
+sull           |  su + l'
+sugl           |  su + gl'
+sulla          |  su + la
+sulle          |  su + le
+per            |  through, by
+tra            |  among
+contro         |  against
+io             |  I
+tu             |  thou
+lui            |  he
+lei            |  she
+noi            |  we
+voi            |  you
+loro           |  they
+mio            |  my
+mia            |
+miei           |
+mie            |
+tuo            |
+tua            |
+tuoi           |  thy
+tue            |
+suo            |
+sua            |
+suoi           |  his, her
+sue            |
+nostro         |  our
+nostra         |
+nostri         |
+nostre         |
+vostro         |  your
+vostra         |
+vostri         |
+vostre         |
+mi             |  me
+ti             |  thee
+ci             |  us, there
+vi             |  you, there
+lo             |  him, the
+la             |  her, the
+li             |  them
+le             |  them, the
+gli            |  to him, the
+ne             |  from there etc
+il             |  the
+un             |  a
+uno            |  a
+una            |  a
+ma             |  but
+ed             |  and
+se             |  if
+perché         |  why, because
+anche          |  also
+come           |  how
+dov            |  where (as dov')
+dove           |  where
+che            |  who, that
+chi            |  who
+cui            |  whom
+non            |  not
+più            |  more
+quale          |  who, that
+quanto         |  how much
+quanti         |
+quanta         |
+quante         |
+quello         |  that
+quelli         |
+quella         |
+quelle         |
+questo         |  this
+questi         |
+questa         |
+queste         |
+si             |  yes
+tutto          |  all
+tutti          |  all
+
+               |  single letter forms:
+
+a              |  at
+c              |  as c' for ce or ci
+e              |  and
+i              |  the
+l              |  as l'
+o              |  or
+
+               | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrà
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+               | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarà
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+               | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farà
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+               | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starà
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/norwegian_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/norwegian_stop.txt
@ -0,0 +1,192 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard <Jan.Bruusgaard@ssb.no>, Jan 2005
+
+og             | and
+i              | in
+jeg            | I
+det            | it/this/that
+at             | to (w. inf.)
+en             | a/an
+et             | a/an
+den            | it/this/that
+til            | to
+er             | is/am/are
+som            | who/that
+på             | on
+de             | they / you(formal)
+med            | with
+han            | he
+av             | of
+ikke           | not
+ikkje          | not *
+der            | there
+så             | so
+var            | was/were
+meg            | me
+seg            | you
+men            | but
+ett            | one
+har            | have
+om             | about
+vi             | we
+min            | my
+mitt           | my
+ha             | have
+hadde          | had
+hun            | she
+nå             | now
+over           | over
+da             | when/as
+ved            | by/know
+fra            | from
+du             | you
+ut             | out
+sin            | your
+dem            | them
+oss            | us
+opp            | up
+man            | you/one
+kan            | can
+hans           | his
+hvor           | where
+eller          | or
+hva            | what
+skal           | shall/must
+selv           | self (reflective)
+sjøl           | self (reflective)
+her            | here
+alle           | all
+vil            | will
+bli            | become
+ble            | became
+blei           | became *
+blitt          | have become
+kunne          | could
+inn            | in
+når            | when
+være           | be
+kom            | come
+noen           | some
+noe            | some
+ville          | would
+dere           | you
+som            | who/which/that
+deres          | their/theirs
+kun            | only/just
+ja             | yes
+etter          | after
+ned            | down
+skulle         | should
+denne          | this
+for            | for/because
+deg            | you
+si             | hers/his
+sine           | hers/his
+sitt           | hers/his
+mot            | against
+å              | to
+meget          | much
+hvorfor        | why
+dette          | this
+disse          | these/those
+uten           | without
+hvordan        | how
+ingen          | none
+din            | your
+ditt           | your
+blir           | become
+samme          | same
+hvilken        | which
+hvilke         | which (plural)
+sånn           | such a
+inni           | inside/within
+mellom         | between
+vår            | our
+hver           | each
+hvem           | who
+vors           | us/ours
+hvis           | whose
+både           | both
+bare           | only/just
+enn            | than
+fordi          | as/because
+før            | before
+mange          | many
+også           | also
+slik           | just
+vært           | been
+være           | to be
+båe            | both *
+begge          | both
+siden          | since
+dykk           | your *
+dykkar         | yours *
+dei            | they *
+deira          | them *
+deires         | theirs *
+deim           | them *
+di             | your (fem.) *
+då             | as/when *
+eg             | I *
+ein            | a/an *
+eit            | a/an *
+eitt           | a/an *
+elles          | or *
+honom          | he *
+hjå            | at *
+ho             | she *
+hoe            | she *
+henne          | her
+hennar         | her/hers
+hennes         | hers
+hoss           | how *
+hossen         | how *
+ikkje          | not *
+ingi           | noone *
+inkje          | noone *
+korleis        | how *
+korso          | how *
+kva            | what/which *
+kvar           | where *
+kvarhelst      | where *
+kven           | who/whom *
+kvi            | why *
+kvifor         | why *
+me             | we *
+medan          | while *
+mi             | my *
+mine           | my *
+mykje          | much *
+no             | now *
+nokon          | some (masc./neut.) *
+noka           | some (fem.) *
+nokor          | some *
+noko           | some *
+nokre          | some *
+si             | his/hers *
+sia            | since *
+sidan          | since *
+so             | so *
+somt           | some *
+somme          | some *
+um             | about*
+upp            | up *
+vere           | be *
+vore           | was *
+verte          | become *
+vort           | become *
+varte          | became *
+vart           | became *
+
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/portuguese_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/portuguese_stop.txt
@ -0,0 +1,251 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  of, from
+a              |  the; to, at; her
+o              |  the; him
+que            |  who, that
+e              |  and
+do             |  de + o
+da             |  de + a
+em             |  in
+um             |  a
+para           |  for
+  | é          from SER
+com            |  with
+não            |  not, no
+uma            |  a
+os             |  the; them
+no             |  em + o
+se             |  himself etc
+na             |  em + a
+por            |  for
+mais           |  more
+as             |  the; them
+dos            |  de + os
+como           |  as, like
+mas            |  but
+  | foi        from SER
+ao             |  a + o
+ele            |  he
+das            |  de + as
+  | tem        from TER
+à              |  a + a
+seu            |  his
+sua            |  her
+ou             |  or
+  | ser        from SER
+quando         |  when
+muito          |  much
+  | há         from HAV
+nos            |  em + os; us
+já             |  already, now
+  | está       from EST
+eu             |  I
+também         |  also
+só             |  only, just
+pelo           |  per + o
+pela           |  per + a
+até            |  up to
+isso           |  that
+ela            |  he
+entre          |  between
+  | era        from SER
+depois         |  after
+sem            |  without
+mesmo          |  same
+aos            |  a + os
+  | ter        from TER
+seus           |  his
+quem           |  whom
+nas            |  em + as
+me             |  me
+esse           |  that
+eles           |  they
+  | estão      from EST
+você           |  you
+  | tinha      from TER
+  | foram      from SER
+essa           |  that
+num            |  em + um
+nem            |  nor
+suas           |  her
+meu            |  my
+às             |  a + as
+minha          |  my
+  | têm        from TER
+numa           |  em + uma
+pelos          |  per + os
+elas           |  they
+  | havia      from HAV
+  | seja       from SER
+qual           |  which
+  | será       from SER
+nós            |  we
+  | tenho      from TER
+lhe            |  to him, her
+deles          |  of them
+essas          |  those
+esses          |  those
+pelas          |  per + as
+este           |  this
+  | fosse      from SER
+dele           |  of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu             |  thou
+te             |  thee
+vocês          |  you (plural)
+vos            |  you
+lhes           |  to them
+meus           |  my
+minhas
+teu            |  thy
+tua
+teus
+tuas
+nosso          | our
+nossa
+nossos
+nossas
+
+dela           |  of her
+delas          |  of them
+
+esta           |  this
+estes          |  these
+estas          |  these
+aquele         |  that
+aquela         |  that
+aqueles        |  those
+aquelas        |  those
+isto           |  this
+aquilo         |  that
+
+               | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+               | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houveríamos
+houveriam
+
+               | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+seríamos
+seriam
+
+               | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tínhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+teríamos
+teriam
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/russian_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/russian_stop.txt
@ -0,0 +1,241 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `ё' is translated to `е'.
+
+и              | and
+в              | in/into
+во             | alternative form
+не             | not
+что            | what/that
+он             | he
+на             | on/onto
+я              | i
+с              | from
+со             | alternative form
+как            | how
+а              | milder form of `no' (but)
+то             | conjunction and form of `that'
+все            | all
+она            | she
+так            | so, thus
+его            | him
+но             | but
+да             | yes/and
+ты             | thou
+к              | towards, by
+у              | around, chez
+же             | intensifier particle
+вы             | you
+за             | beyond, behind
+бы             | conditional/subj. particle
+по             | up to, along
+только         | only
+ее             | her
+мне            | to me
+было           | it was
+вот            | here is/are, particle
+от             | away from
+меня           | me
+еще            | still, yet, more
+нет            | no, there isnt/arent
+о              | about
+из             | out of
+ему            | to him
+теперь         | now
+когда          | when
+даже           | even
+ну             | so, well
+вдруг          | suddenly
+ли             | interrogative particle
+если           | if
+уже            | already, but homonym of `narrower'
+или            | or
+ни             | neither
+быть           | to be
+был            | he was
+него           | prepositional form of его
+до             | up to
+вас            | you accusative
+нибудь         | indef. suffix preceded by hyphen
+опять          | again
+уж             | already, but homonym of `adder'
+вам            | to you
+сказал         | he said
+ведь           | particle `after all'
+там            | there
+потом          | then
+себя           | oneself
+ничего         | nothing
+ей             | to her
+может          | usually with `быть' as `maybe'
+они            | they
+тут            | here
+где            | where
+есть           | there is/are
+надо           | got to, must
+ней            | prepositional form of  ей
+для            | for
+мы             | we
+тебя           | thee
+их             | them, their
+чем            | than
+была           | she was
+сам            | self
+чтоб           | in order to
+без            | without
+будто          | as if
+человек        | man, person, one
+чего           | genitive form of `what'
+раз            | once
+тоже           | also
+себе           | to oneself
+под            | beneath
+жизнь          | life
+будет          | will be
+ж              | short form of intensifer particle `же'
+тогда          | then
+кто            | who
+этот           | this
+говорил        | was saying
+того           | genitive form of `that'
+потому         | for that reason
+этого          | genitive form of `this'
+какой          | which
+совсем         | altogether
+ним            | prepositional form of `его', `они'
+здесь          | here
+этом           | prepositional form of `этот'
+один           | one
+почти          | almost
+мой            | my
+тем            | instrumental/dative plural of `тот', `то'
+чтобы          | full form of `in order that'
+нее            | her (acc.)
+кажется        | it seems
+сейчас         | now
+были           | they were
+куда           | where to
+зачем          | why
+сказать        | to say
+всех           | all (acc., gen. preposn. plural)
+никогда        | never
+сегодня        | today
+можно          | possible, one can
+при            | by
+наконец        | finally
+два            | two
+об             | alternative form of `о', about
+другой         | another
+хоть           | even
+после          | after
+над            | above
+больше         | more
+тот            | that one (masc.)
+через          | across, in
+эти            | these
+нас            | us
+про            | about
+всего          | in all, only, of all
+них            | prepositional form of `они' (they)
+какая          | which, feminine
+много          | lots
+разве          | interrogative particle
+сказала        | she said
+три            | three
+эту            | this, acc. fem. sing.
+моя            | my, feminine
+впрочем        | moreover, besides
+хорошо         | good
+свою           | ones own, acc. fem. sing.
+этой           | oblique form of `эта', fem. `this'
+перед          | in front of
+иногда         | sometimes
+лучше          | better
+чуть           | a little
+том            | preposn. form of `that one'
+нельзя         | one must not
+такой          | such a one
+им             | to them
+более          | more
+всегда         | always
+конечно        | of course
+всю            | acc. fem. sing of `all'
+между          | between
+
+
+  | b: some paradigms
+  |
+  | personal pronouns
+  |
+  | я  меня  мне  мной  [мною]
+  | ты  тебя  тебе  тобой  [тобою]
+  | он  его  ему  им  [него, нему, ним]
+  | она  ее  эи  ею  [нее, нэи, нею]
+  | оно  его  ему  им  [него, нему, ним]
+  |
+  | мы  нас  нам  нами
+  | вы  вас  вам  вами
+  | они  их  им  ими  [них, ним, ними]
+  |
+  |   себя  себе  собой   [собою]
+  |
+  | demonstrative pronouns: этот (this), тот (that)
+  |
+  | этот  эта  это  эти
+  | этого  эты  это  эти
+  | этого  этой  этого  этих
+  | этому  этой  этому  этим
+  | этим  этой  этим  [этою]  этими
+  | этом  этой  этом  этих
+  |
+  | тот  та  то  те
+  | того  ту  то  те
+  | того  той  того  тех
+  | тому  той  тому  тем
+  | тем  той  тем  [тою]  теми
+  | том  той  том  тех
+  |
+  | determinative pronouns
+  |
+  | (a) весь (all)
+  |
+  | весь  вся  все  все
+  | всего  всю  все  все
+  | всего  всей  всего  всех
+  | всему  всей  всему  всем
+  | всем  всей  всем  [всею]  всеми
+  | всем  всей  всем  всех
+  |
+  | (b) сам (himself etc)
+  |
+  | сам  сама  само  сами
+  | самого саму  само  самих
+  | самого самой самого  самих
+  | самому самой самому  самим
+  | самим  самой  самим  [самою]  самими
+  | самом самой самом  самих
+  |
+  | stems of verbs `to be', `to have', `to do' and modal
+  |
+  | быть  бы  буд  быв  есть  суть
+  | име
+  | дел
+  | мог   мож  мочь
+  | уме
+  | хоч  хот
+  | долж
+  | можн
+  | нужн
+  | нельзя
+
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/spanish_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/spanish_stop.txt
@ -0,0 +1,354 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  from, of
+la             |  the, her
+que            |  who, that
+el             |  the
+en             |  in
+y              |  and
+a              |  to
+los            |  the, them
+del            |  de + el
+se             |  himself, from him etc
+las            |  the, them
+por            |  for, by, etc
+un             |  a
+para           |  for
+con            |  with
+no             |  no
+una            |  a
+su             |  his, her
+al             |  a + el
+  | es         from SER
+lo             |  him
+como           |  how
+más            |  more
+pero           |  pero
+sus            |  su plural
+le             |  to him, her
+ya             |  already
+o              |  or
+  | fue        from SER
+este           |  this
+  | ha         from HABER
+sí             |  himself etc
+porque         |  because
+esta           |  this
+  | son        from SER
+entre          |  between
+  | está     from ESTAR
+cuando         |  when
+muy            |  very
+sin            |  without
+sobre          |  on
+  | ser        from SER
+  | tiene      from TENER
+también        |  also
+me             |  me
+hasta          |  until
+hay            |  there is/are
+donde          |  where
+  | han        from HABER
+quien          |  whom, that
+  | están      from ESTAR
+  | estado     from ESTAR
+desde          |  from
+todo           |  all
+nos            |  us
+durante        |  during
+  | estados    from ESTAR
+todos          |  all
+uno            |  a
+les            |  to them
+ni             |  nor
+contra         |  against
+otros          |  other
+  | fueron     from SER
+ese            |  that
+eso            |  that
+  | había      from HABER
+ante           |  before
+ellos          |  they
+e              |  and (variant of y)
+esto           |  this
+mí             |  me
+antes          |  before
+algunos        |  some
+qué            |  what?
+unos           |  a
+yo             |  I
+otro           |  other
+otras          |  other
+otra           |  other
+él             |  he
+tanto          |  so much, many
+esa            |  that
+estos          |  these
+mucho          |  much, many
+quienes        |  who
+nada           |  nothing
+muchos         |  many
+cual           |  who
+  | sea        from SER
+poco           |  few
+ella           |  she
+estar          |  to be
+  | haber      from HABER
+estas          |  these
+  | estaba     from ESTAR
+  | estamos    from ESTAR
+algunas        |  some
+algo           |  something
+nosotros       |  we
+
+      | other forms
+
+mi             |  me
+mis            |  mi plural
+tú             |  thou
+te             |  thee
+ti             |  thee
+tu             |  thy
+tus            |  tu plural
+ellas          |  they
+nosotras       |  we
+vosotros       |  you
+vosotras       |  you
+os             |  you
+mío            |  mine
+mía            |
+míos           |
+mías           |
+tuyo           |  thine
+tuya           |
+tuyos          |
+tuyas          |
+suyo           |  his, hers, theirs
+suya           |
+suyos          |
+suyas          |
+nuestro        |  ours
+nuestra        |
+nuestros       |
+nuestras       |
+vuestro        |  yours
+vuestra        |
+vuestros       |
+vuestras       |
+esos           |  those
+esas           |  those
+
+               | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estaría
+estarías
+estaríamos
+estaríais
+estarían
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+               | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habría
+habrías
+habríamos
+habríais
+habrían
+había
+habías
+habíamos
+habíais
+habían
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+               | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+sería
+serías
+seríamos
+seríais
+serían
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+  |  sed also means 'thirst'
+
+               | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendría
+tendrías
+tendríamos
+tendríais
+tendrían
+tenía
+tenías
+teníamos
+teníais
+tenían
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
--- a/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/swedish_stop.txt
+++ b/contrib/snowball/src/resources/org/apache/lucene/analysis/snowball/swedish_stop.txt
@ -0,0 +1,131 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ |  så = so, but also seed. These are indicated clearly below.
+
+och            | and
+det            | it, this/that
+att            | to (with infinitive)
+i              | in, at
+en             | a
+jag            | I
+hon            | she
+som            | who, that
+han            | he
+på             | on
+den            | it, this/that
+med            | with
+var            | where, each
+sig            | him(self) etc
+för            | for
+så             | so (also: seed)
+till           | to
+är             | is
+men            | but
+ett            | a
+om             | if; around, about
+hade           | had
+de             | they, these/those
+av             | of
+icke           | not, no
+mig            | me
+du             | you
+henne          | her
+då             | then, when
+sin            | his
+nu             | now
+har            | have
+inte           | inte någon = no one
+hans           | his
+honom          | him
+skulle         | 'sake'
+hennes         | her
+där            | there
+min            | my
+man            | one (pronoun)
+ej             | nor
+vid            | at, by, on (also: vast)
+kunde          | could
+något          | some etc
+från           | from, off
+ut             | out
+när            | when
+efter          | after, behind
+upp            | up
+vi             | we
+dem            | them
+vara           | be
+vad            | what
+över           | over
+än             | than
+dig            | you
+kan            | can
+sina           | his
+här            | here
+ha             | have
+mot            | towards
+alla           | all
+under          | under (also: wonder)
+någon          | some etc
+eller          | or (else)
+allt           | all
+mycket         | much
+sedan          | since
+ju             | why
+denna          | this/that
+själv          | myself, yourself etc
+detta          | this/that
+åt             | to
+utan           | without
+varit          | was
+hur            | how
+ingen          | no
+mitt           | my
+ni             | you
+bli            | to be, become
+blev           | from bli
+oss            | us
+din            | thy
+dessa          | these/those
+några          | some etc
+deras          | their
+blir           | from bli
+mina           | my
+samma          | (the) same
+vilken         | who, that
+er             | you, your
+sådan          | such a
+vår            | our
+blivit         | from bli
+dess           | its
+inom           | within
+mellan         | between
+sådant         | such a
+varför         | why
+varje          | each
+vilka          | who, that
+ditt           | thy
+vem            | who
+vilket         | who, that
+sitta          | his
+sådana         | such a
+vart           | each
+dina           | thy
+vars           | whose
+vårt           | our
+våra           | our
+ert            | your
+era            | your
+vilkas         | whose
+
--- a/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
+++ b/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
@ -0,0 +1,144 @@
+package org.apache.lucene.analysis.snowball;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.Version;
+
+public class TestSnowball extends BaseTokenStreamTestCase {
+
+  public void testEnglish() throws Exception {
+    Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
+    assertAnalyzesTo(a, "he abhorred accents",
+        new String[]{"he", "abhor", "accent"});
+  }
+  
+  public void testStopwords() throws Exception {
+    Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English",
+        StandardAnalyzer.STOP_WORDS_SET);
+    assertAnalyzesTo(a, "the quick brown fox jumped",
+        new String[]{"quick", "brown", "fox", "jump"});
+  }
+
+  /**
+   * Test english lowercasing. Test both cases (pre-3.1 and post-3.1) to ensure
+   * we lowercase I correct for non-Turkish languages in either case.
+   */
+  public void testEnglishLowerCase() throws Exception {
+    Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
+    assertAnalyzesTo(a, "cryogenic", new String[] { "cryogen" });
+    assertAnalyzesTo(a, "CRYOGENIC", new String[] { "cryogen" });
+    
+    Analyzer b = new SnowballAnalyzer(Version.LUCENE_30, "English");
+    assertAnalyzesTo(b, "cryogenic", new String[] { "cryogen" });
+    assertAnalyzesTo(b, "CRYOGENIC", new String[] { "cryogen" });
+  }
+  
+  /**
+   * Test turkish lowercasing
+   */
+  public void testTurkish() throws Exception {
+    Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "Turkish");
+
+    assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
+    assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
+  }
+  
+  /**
+   * Test turkish lowercasing (old buggy behavior)
+   * @deprecated Remove this when support for 3.0 indexes is no longer required
+   */
+  public void testTurkishBWComp() throws Exception {
+    Analyzer a = new SnowballAnalyzer(Version.LUCENE_30, "Turkish");
+    // AĞACI in turkish lowercases to ağacı, but with lowercase filter ağaci.
+    // this fails due to wrong casing, because the stemmer
+    // will only remove -ı, not -i
+    assertAnalyzesTo(a, "ağacı", new String[] { "ağaç" });
+    assertAnalyzesTo(a, "AĞACI", new String[] { "ağaci" });
+  }
+
+  
+  public void testReusableTokenStream() throws Exception {
+    Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
+    assertAnalyzesToReuse(a, "he abhorred accents",
+        new String[]{"he", "abhor", "accent"});
+    assertAnalyzesToReuse(a, "she abhorred him",
+        new String[]{"she", "abhor", "him"});
+  }
+  
+  public void testFilterTokens() throws Exception {
+    SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
+    TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+    OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
+    TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
+    PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
+    PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
+    FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
+    
+    filter.incrementToken();
+
+    assertEquals("accent", termAtt.term());
+    assertEquals(2, offsetAtt.startOffset());
+    assertEquals(7, offsetAtt.endOffset());
+    assertEquals("wrd", typeAtt.type());
+    assertEquals(3, posIncAtt.getPositionIncrement());
+    assertEquals(77, flagsAtt.getFlags());
+    assertEquals(new Payload(new byte[]{0,1,2,3}), payloadAtt.getPayload());
+  }
+  
+  private final class TestTokenStream extends TokenStream {
+    private TermAttribute termAtt;
+    private OffsetAttribute offsetAtt;
+    private TypeAttribute typeAtt;
+    private PayloadAttribute payloadAtt;
+    private PositionIncrementAttribute posIncAtt;
+    private FlagsAttribute flagsAtt;
+    
+    TestTokenStream() {
+      super();
+      termAtt = addAttribute(TermAttribute.class);
+      offsetAtt = addAttribute(OffsetAttribute.class);
+      typeAtt = addAttribute(TypeAttribute.class);
+      payloadAtt = addAttribute(PayloadAttribute.class);
+      posIncAtt = addAttribute(PositionIncrementAttribute.class);
+      flagsAtt = addAttribute(FlagsAttribute.class);
+    }
+    
+    @Override
+    public boolean incrementToken() {
+      clearAttributes();
+      termAtt.setTermBuffer("accents");
+      offsetAtt.setOffset(2, 7);
+      typeAtt.setType("wrd");
+      posIncAtt.setPositionIncrement(3);
+      payloadAtt.setPayload(new Payload(new byte[]{0,1,2,3}));
+      flagsAtt.setFlags(77);
+      return true;
+    }
+  }
+}
--- a/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
+++ b/contrib/snowball/src/test/org/apache/lucene/analysis/snowball/TestSnowballVocab.java
@ -0,0 +1,102 @@
+package org.apache.lucene.analysis.snowball;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Test the snowball filters against the snowball data tests
+ */
+public class TestSnowballVocab extends BaseTokenStreamTestCase {
+  private Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
+  static final File dataDir = new File(System.getProperty("dataDir", "./bin"));
+  static final File dataRoot = new File(dataDir, 
+      "org/apache/lucene/analysis/snowball/data");
+  
+  /**
+   * Run all languages against their snowball vocabulary tests.
+   */
+  public void testStemmers() throws IOException {
+    if (!dataRoot.exists()) {
+      System.err.println("WARN: This test was disabled, as the svn checkout of snowball test files is not supported on your system!");
+      return;
+    }
+    assertCorrectOutput("Danish", "danish");
+    assertCorrectOutput("Dutch", "dutch");
+    assertCorrectOutput("English", "english");
+    // disabled due to snowball java code generation bug: 
+    // see http://article.gmane.org/gmane.comp.search.snowball/1139
+    // assertCorrectOutput("Finnish", "finnish");
+    assertCorrectOutput("French", "french");
+    assertCorrectOutput("German", "german");
+    assertCorrectOutput("German2", "german2");
+    assertCorrectOutput("Hungarian", "hungarian");
+    assertCorrectOutput("Italian", "italian");
+    assertCorrectOutput("Kp", "kraaij_pohlmann");
+    // disabled due to snowball java code generation bug: 
+    // see http://article.gmane.org/gmane.comp.search.snowball/1139
+    // assertCorrectOutput("Lovins", "lovins");
+    assertCorrectOutput("Norwegian", "norwegian");
+    assertCorrectOutput("Porter", "porter");
+    assertCorrectOutput("Portuguese", "portuguese");
+    assertCorrectOutput("Romanian", "romanian");
+    assertCorrectOutput("Russian", "russian");
+    assertCorrectOutput("Spanish", "spanish");
+    assertCorrectOutput("Swedish", "swedish");
+    assertCorrectOutput("Turkish", "turkish");
+  }
+    
+  /**
+   * For the supplied language, run the stemmer against all strings in voc.txt
+   * The output should be the same as the string in output.txt
+   */
+  private void assertCorrectOutput(String snowballLanguage, String dataDirectory)
+      throws IOException {
+    System.err.println("checking snowball language: " + snowballLanguage);
+    TokenStream filter = new SnowballFilter(tokenizer, snowballLanguage);
+    InputStream vocFile = new FileInputStream(new File(dataRoot, 
+        dataDirectory + "/voc.txt"));
+    InputStream outputFile = new FileInputStream(new File(dataRoot, 
+        dataDirectory + "/output.txt"));
+    BufferedReader vocReader = new BufferedReader(new InputStreamReader(
+        vocFile, "UTF-8"));
+    BufferedReader outputReader = new BufferedReader(new InputStreamReader(
+        outputFile, "UTF-8"));
+    String inputWord = null;
+    while ((inputWord = vocReader.readLine()) != null) {
+      String expectedWord = outputReader.readLine();
+      assertNotNull(expectedWord);
+      tokenizer.reset(new StringReader(inputWord));
+      filter.reset();
+      assertTokenStreamContents(filter, new String[] {expectedWord});
+    }
+    vocReader.close();
+    outputReader.close();
+  }
+}
--- a/contrib/snowball/xdocs/index.xml
+++ b/contrib/snowball/xdocs/index.xml
@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<document>
+<properties>
+<title>Overview - Snowball Stemmers for Lucene</title>
+</properties>
+<body>
+
+<section name="Snowball Stemmers for Lucene">
+<p>
+This project provides pre-compiled version of the Snowball stemmers
+together with classes integrating them with the Lucene search engine.
+</p>
+</section>
+
+</body>
+</document>
+
--- a/contrib/snowball/xdocs/stylesheets/project.xml
+++ b/contrib/snowball/xdocs/stylesheets/project.xml
@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<project name="Snowball Stemers for Lucene"
+         href="http://jakarta.apache.org/lucene-sandbox/snowball/">
+
+    <title>Snowball Stemmers for Lucene</title>
+
+    <body>
+    <menu name="Documentation">
+        <item name="Javadoc"           href="/api/index.html"/>
+    </menu>
+
+    <menu name="Download">
+        <item name="Releases"
+              href="http://jakarta.apache.org/builds/jakarta-lucene-sandbox/snowball/"/>
+        <item name="CVS Repository"   href="/site/cvsindex.html"/>
+    </menu>
+
+    <menu name="Links">
+        <item name="Snowball Home" href="http://snowball.tartarus.org/"/>
+        <item name="Lucene Home" href="http://jakarta.apache.org/lucene/"/>
+        <item name="Lucene Sandbox"
+              href="http://jakarta.apache.org/lucene/docs/lucene-sandbox/"/>
+    </menu>
+
+    <menu name="Jakarta">
+        <item name="Get Involved"          href="/site/getinvolved.html"/>
+        <item name="Acknowledgements"      href="/site/acknowledgements.html"/>
+        <item name="Contact"               href="/site/contact.html"/>
+        <item name="Legal"                 href="/site/legal.html"/>
+    </menu>
+    </body>
+</project>
--- a/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java
+++ b/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java
@ -1,10 +1,5 @@
 package org.apache.lucene.analysis;

-import java.io.IOException;
-
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.util.ArrayUtil;
-
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -22,6 +17,12 @@ import org.apache.lucene.util.ArrayUtil;
 * limitations under the License.
 */

+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
 /**
 * This class converts alphabetic, numeric, and symbolic Unicode characters
 * which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
@ -101,7 +102,7 @@ public final class ASCIIFoldingFilter extends TokenFilter {
    // Worst-case length required:
    final int maxSizeNeeded = 4 * length;
    if (output.length < maxSizeNeeded) {
-      output = new char[ArrayUtil.getNextSize(maxSizeNeeded)];
+      output = new char[ArrayUtil.oversize(maxSizeNeeded, RamUsageEstimator.NUM_BYTES_CHAR)];
    }

    outputPos = 0;
--- a/src/java/org/apache/lucene/analysis/Token.java
+++ b/src/java/org/apache/lucene/analysis/Token.java
@ -29,6 +29,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.RamUsageEstimator;

 /** 
  A Token is an occurrence of a term from the text of a field.  It consists of
@ -347,12 +348,12 @@ public class Token extends AttributeImpl
  public char[] resizeTermBuffer(int newSize) {
    if (termBuffer == null) {
      // The buffer is always at least MIN_BUFFER_SIZE
-      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)]; 
+      termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; 
    } else {
      if(termBuffer.length < newSize){
        // Not big enough; create a new array with slight
        // over allocation and preserve content
-        final char[] newCharBuffer = new char[ArrayUtil.getNextSize(newSize)];
+        final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
        System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
        termBuffer = newCharBuffer;
      }
@ -367,19 +368,19 @@ public class Token extends AttributeImpl
  private void growTermBuffer(int newSize) {
    if (termBuffer == null) {
      // The buffer is always at least MIN_BUFFER_SIZE    
-      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];   
+      termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
    } else {
      if(termBuffer.length < newSize){
        // Not big enough; create a new array with slight
        // over allocation:
-        termBuffer = new char[ArrayUtil.getNextSize(newSize)];
+        termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
      }
    } 
  }
  
  private void initTermBuffer() {
    if (termBuffer == null) {
-      termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
+      termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
      termLength = 0;
    }
  }
--- a/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java
+++ b/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java
@ -21,6 +21,7 @@ import java.io.Serializable;

 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.RamUsageEstimator;

 /**
 * The term text of a Token.
@ -106,12 +107,12 @@ public class TermAttributeImpl extends AttributeImpl implements TermAttribute, C
  public char[] resizeTermBuffer(int newSize) {
    if (termBuffer == null) {
      // The buffer is always at least MIN_BUFFER_SIZE
-      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)]; 
+      termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)]; 
    } else {
      if(termBuffer.length < newSize){
        // Not big enough; create a new array with slight
        // over allocation and preserve content
-        final char[] newCharBuffer = new char[ArrayUtil.getNextSize(newSize)];
+        final char[] newCharBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
        System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
        termBuffer = newCharBuffer;
      }
@ -127,19 +128,19 @@ public class TermAttributeImpl extends AttributeImpl implements TermAttribute, C
  private void growTermBuffer(int newSize) {
    if (termBuffer == null) {
      // The buffer is always at least MIN_BUFFER_SIZE
-      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];   
+      termBuffer = new char[ArrayUtil.oversize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize, RamUsageEstimator.NUM_BYTES_CHAR)];   
    } else {
      if(termBuffer.length < newSize){
        // Not big enough; create a new array with slight
        // over allocation:
-        termBuffer = new char[ArrayUtil.getNextSize(newSize)];
+        termBuffer = new char[ArrayUtil.oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)];
      }
    } 
  }
  
  private void initTermBuffer() {
    if (termBuffer == null) {
-      termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
+      termBuffer = new char[ArrayUtil.oversize(MIN_BUFFER_SIZE, RamUsageEstimator.NUM_BYTES_CHAR)];
      termLength = 0;
    }
  }
--- a/src/java/org/apache/lucene/index/DocFieldConsumers.java
+++ b/src/java/org/apache/lucene/index/DocFieldConsumers.java
@ -25,6 +25,7 @@ import java.util.HashSet;
 import java.io.IOException;

 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;

 /** This is just a "splitter" class: it lets you wrap two
 *  DocFieldConsumer instances as a single consumer. */
@ -117,7 +118,7 @@ final class DocFieldConsumers extends DocFieldConsumer {
        // enough space to recycle all outstanding PerDoc
        // instances
        assert allocCount == 1+docFreeList.length;
-        docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)];
+        docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
      }
      return new PerDoc();
    } else
--- a/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
+++ b/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
@ -24,6 +24,7 @@ import java.io.IOException;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;

 /**
 * Gathers all Fieldables for a document under the same
@ -340,7 +341,7 @@ final class DocFieldProcessorPerThread extends DocConsumerPerThread {
        // enough space to recycle all outstanding PerDoc
        // instances
        assert allocCount == 1+docFreeList.length;
-        docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)];
+        docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
      }
      return new PerDoc();
    } else
--- a/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentsWriter.java
@ -40,6 +40,7 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Constants;
 import org.apache.lucene.util.ThreadInterruptedException;
+import org.apache.lucene.util.RamUsageEstimator;

 /**
 * This class accepts multiple added documents and directly
@ -1503,7 +1504,7 @@ final class DocumentsWriter {
        int gap = doc.docID - nextWriteDocID;
        if (gap >= waiting.length) {
          // Grow queue
-          DocWriter[] newArray = new DocWriter[ArrayUtil.getNextSize(gap)];
+          DocWriter[] newArray = new DocWriter[ArrayUtil.oversize(gap, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
          assert nextWriteLoc >= 0;
          System.arraycopy(waiting, nextWriteLoc, newArray, 0, waiting.length-nextWriteLoc);
          System.arraycopy(waiting, 0, newArray, waiting.length-nextWriteLoc, nextWriteLoc);
--- a/src/java/org/apache/lucene/index/FormatPostingsTermsConsumer.java
+++ b/src/java/org/apache/lucene/index/FormatPostingsTermsConsumer.java
@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import java.io.IOException;

 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;

 /**
 * NOTE: this API is experimental and will likely change
@ -35,7 +36,7 @@ abstract class FormatPostingsTermsConsumer {
  FormatPostingsDocsConsumer addTerm(String text) throws IOException {
    final int len = text.length();
    if (termBuffer == null || termBuffer.length < 1+len)
-      termBuffer = new char[ArrayUtil.getNextSize(1+len)];
+      termBuffer = new char[ArrayUtil.oversize(1+len, RamUsageEstimator.NUM_BYTES_CHAR)];
    text.getChars(0, len, termBuffer, 0);
    termBuffer[len] = 0xffff;
    return addTerm(termBuffer, 0);
--- a/src/java/org/apache/lucene/index/NormsWriterPerField.java
+++ b/src/java/org/apache/lucene/index/NormsWriterPerField.java
@ -63,11 +63,13 @@ final class NormsWriterPerField extends InvertedDocEndConsumerPerField implement
  
  @Override
  void finish() {
-    assert docIDs.length == norms.length;
    if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
      if (docIDs.length <= upto) {
        assert docIDs.length == upto;
        docIDs = ArrayUtil.grow(docIDs, 1+upto);
+      }
+      if (norms.length <= upto) {
+        assert norms.length == upto;
        norms = ArrayUtil.grow(norms, 1+upto);
      }
      final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
--- a/src/java/org/apache/lucene/index/StoredFieldsWriter.java
+++ b/src/java/org/apache/lucene/index/StoredFieldsWriter.java
@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;

 /** This is a DocFieldConsumer that writes stored fields. */
 final class StoredFieldsWriter {
@ -108,7 +109,7 @@ final class StoredFieldsWriter {
        // enough space to recycle all outstanding PerDoc
        // instances
        assert allocCount == 1+docFreeList.length;
-        docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)];
+        docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
      }
      return new PerDoc();
    } else
--- a/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
+++ b/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;

 import java.io.IOException;
 import java.util.Collection;
@ -117,7 +118,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
        // enough space to recycle all outstanding PerDoc
        // instances
        assert allocCount == 1+docFreeList.length;
-        docFreeList = new PerDoc[ArrayUtil.getNextSize(allocCount)];
+        docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
      }
      return new PerDoc();
    } else
@ -266,6 +267,8 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
    void addField(final int fieldNumber) {
      if (numVectorFields == fieldNumbers.length) {
        fieldNumbers = ArrayUtil.grow(fieldNumbers);
+      }
+      if (numVectorFields == fieldPointers.length) {
        fieldPointers = ArrayUtil.grow(fieldPointers);
      }
      fieldNumbers[numVectorFields] = fieldNumber;
--- a/src/java/org/apache/lucene/index/TermsHash.java
+++ b/src/java/org/apache/lucene/index/TermsHash.java
@ -26,6 +26,7 @@ import java.util.Arrays;
 import java.io.IOException;

 import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;

 /** This class implements {@link InvertedDocConsumer}, which
 *  is passed each token produced by the analyzer on each
@ -89,7 +90,7 @@ final class TermsHash extends InvertedDocConsumer {

    assert postingsFreeCount == postingsAllocCount: Thread.currentThread().getName() + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer;

-    final int newSize = ArrayUtil.getShrinkSize(postingsFreeList.length, postingsAllocCount);
+    final int newSize = ArrayUtil.getShrinkSize(postingsFreeList.length, postingsAllocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
    if (newSize != postingsFreeList.length) {
      RawPostingList[] newArray = new RawPostingList[newSize];
      System.arraycopy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
@ -222,7 +223,7 @@ final class TermsHash extends InvertedDocConsumer {
      if (newPostingsAllocCount > postingsFreeList.length)
        // Pre-allocate the postingsFreeList so it's large
        // enough to hold all postings we've given out
-        postingsFreeList = new RawPostingList[ArrayUtil.getNextSize(newPostingsAllocCount)];
+        postingsFreeList = new RawPostingList[ArrayUtil.oversize(newPostingsAllocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
    }

    postingsFreeCount -= numToCopy;
--- a/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/src/java/org/apache/lucene/util/ArrayUtil.java
@ -7,9 +7,9 @@ package org.apache.lucene.util;
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -122,20 +122,95 @@ public final class ArrayUtil {
 END APACHE HARMONY CODE
  */

+  /** Returns an array size >= minTargetSize, generally
+   *  over-allocating exponentially to achieve amortized
+   *  linear-time cost as the array grows.
+   *
+   *  NOTE: this was originally borrowed from Python 2.4.2
+   *  listobject.c sources (attribution in LICENSE.txt), but
+   *  has now been substantially changed based on
+   *  discussions from java-dev thread with subject "Dynamic
+   *  array reallocation algorithms", started on Jan 12
+   *  2010.
+   *
+   * @param minTargetSize Minimum required value to be returned.
+   * @param bytesPerElement Bytes used by each element of
+   * the array.  See constants in {@link RamUsageEstimator}.
+   *
+   * @lucene.internal
+   */

-  public static int getNextSize(int targetSize) {
-    /* This over-allocates proportional to the list size, making room
-     * for additional growth.  The over-allocation is mild, but is
-     * enough to give linear-time amortized behavior over a long
-     * sequence of appends() in the presence of a poorly-performing
-     * system realloc().
-     * The growth pattern is:  0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
-     */
-    return (targetSize >> 3) + (targetSize < 9 ? 3 : 6) + targetSize;
+  public static int oversize(int minTargetSize, int bytesPerElement) {
+
+    if (minTargetSize < 0) {
+      // catch usage that accidentally overflows int
+      throw new IllegalArgumentException("invalid array size " + minTargetSize);
+    }
+
+    if (minTargetSize == 0) {
+      // wait until at least one element is requested
+      return 0;
+    }
+
+    // asymptotic exponential growth by 1/8th, favors
+    // spending a bit more CPU to not tye up too much wasted
+    // RAM:
+    int extra = minTargetSize >> 3;
+
+    if (extra < 3) {
+      // for very small arrays, where constant overhead of
+      // realloc is presumably relatively high, we grow
+      // faster
+      extra = 3;
+    }
+
+    int newSize = minTargetSize + extra;
+
+    // add 7 to allow for worst case byte alignment addition below:
+    if (newSize+7 < 0) {
+      // int overflowed -- return max allowed array size
+      return Integer.MAX_VALUE;
+    }
+
+    if (Constants.JRE_IS_64BIT) {
+      // round up to 8 byte alignment in 64bit env
+      switch(bytesPerElement) {
+      case 4:
+        // round up to multiple of 2
+        return (newSize + 1) & 0x7ffffffe;
+      case 2:
+        // round up to multiple of 4
+        return (newSize + 3) & 0x7ffffffc;
+      case 1:
+        // round up to multiple of 8
+        return (newSize + 7) & 0x7ffffff8;
+      case 8:
+        // no rounding
+      default:
+        // odd (invalid?) size
+        return newSize;
+      }
+    } else {
+      // round up to 4 byte alignment in 64bit env
+      switch(bytesPerElement) {
+      case 2:
+        // round up to multiple of 2
+        return (newSize + 1) & 0x7ffffffe;
+      case 1:
+        // round up to multiple of 4
+        return (newSize + 3) & 0x7ffffffc;
+      case 4:
+      case 8:
+        // no rounding
+      default:
+        // odd (invalid?) size
+        return newSize;
+      }
+    }
  }

-  public static int getShrinkSize(int currentSize, int targetSize) {
-    final int newSize = getNextSize(targetSize);
+  public static int getShrinkSize(int currentSize, int targetSize, int bytesPerElement) {
+    final int newSize = oversize(targetSize, bytesPerElement);
    // Only reallocate if we are "substantially" smaller.
    // This saves us from "running hot" (constantly making a
    // bit bigger then a bit smaller, over and over):
@ -147,7 +222,7 @@ public final class ArrayUtil {

  public static int[] grow(int[] array, int minSize) {
    if (array.length < minSize) {
-      int[] newArray = new int[getNextSize(minSize)];
+      int[] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_INT)];
      System.arraycopy(array, 0, newArray, 0, array.length);
      return newArray;
    } else
@ -159,7 +234,7 @@ public final class ArrayUtil {
  }

  public static int[] shrink(int[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize);
+    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_INT);
    if (newSize != array.length) {
      int[] newArray = new int[newSize];
      System.arraycopy(array, 0, newArray, 0, newSize);
@ -170,7 +245,7 @@ public final class ArrayUtil {

  public static long[] grow(long[] array, int minSize) {
    if (array.length < minSize) {
-      long[] newArray = new long[getNextSize(minSize)];
+      long[] newArray = new long[oversize(minSize, RamUsageEstimator.NUM_BYTES_LONG)];
      System.arraycopy(array, 0, newArray, 0, array.length);
      return newArray;
    } else
@ -182,7 +257,7 @@ public final class ArrayUtil {
  }

  public static long[] shrink(long[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize);
+    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_LONG);
    if (newSize != array.length) {
      long[] newArray = new long[newSize];
      System.arraycopy(array, 0, newArray, 0, newSize);
@ -193,7 +268,7 @@ public final class ArrayUtil {

  public static byte[] grow(byte[] array, int minSize) {
    if (array.length < minSize) {
-      byte[] newArray = new byte[getNextSize(minSize)];
+      byte[] newArray = new byte[oversize(minSize, 1)];
      System.arraycopy(array, 0, newArray, 0, array.length);
      return newArray;
    } else
@ -205,7 +280,7 @@ public final class ArrayUtil {
  }

  public static byte[] shrink(byte[] array, int targetSize) {
-    final int newSize = getShrinkSize(array.length, targetSize);
+    final int newSize = getShrinkSize(array.length, targetSize, 1);
    if (newSize != array.length) {
      byte[] newArray = new byte[newSize];
      System.arraycopy(array, 0, newArray, 0, newSize);
--- a/src/java/org/apache/lucene/util/RamUsageEstimator.java
+++ b/src/java/org/apache/lucene/util/RamUsageEstimator.java
@ -43,6 +43,14 @@ public final class RamUsageEstimator {
  private int arraySize;
  private int classSize;

+  public final static int NUM_BYTES_OBJECT_REF = Constants.JRE_IS_64BIT ? 8 : 4;
+  public final static int NUM_BYTES_CHAR = 2;
+  public final static int NUM_BYTES_SHORT = 2;
+  public final static int NUM_BYTES_INT = 4;
+  public final static int NUM_BYTES_LONG = 8;
+  public final static int NUM_BYTES_FLOAT = 4;
+  public final static int NUM_BYTES_DOUBLE = 8;
+
  private boolean checkInterned;

  /**
--- a/src/test/org/apache/lucene/analysis/TestToken.java
+++ b/src/test/org/apache/lucene/analysis/TestToken.java
@ -85,7 +85,6 @@ public class TestToken extends LuceneTestCase {
      buf.append(buf.toString());
    }
    assertEquals(1048576, t.termLength());
-    assertEquals(1179654, t.termBuffer().length);

    // now as a string, first variant
    t = new Token();
@ -99,7 +98,6 @@ public class TestToken extends LuceneTestCase {
      buf.append(content);
    }
    assertEquals(1048576, t.termLength());
-    assertEquals(1179654, t.termBuffer().length);

    // now as a string, second variant
    t = new Token();
@ -113,7 +111,6 @@ public class TestToken extends LuceneTestCase {
      buf.append(content);
    }
    assertEquals(1048576, t.termLength());
-    assertEquals(1179654, t.termBuffer().length);

    // Test for slow growth to a long term
    t = new Token();
@ -127,7 +124,6 @@ public class TestToken extends LuceneTestCase {
      buf.append("a");
    }
    assertEquals(20000, t.termLength());
-    assertEquals(20167, t.termBuffer().length);

    // Test for slow growth to a long term
    t = new Token();
@ -141,7 +137,6 @@ public class TestToken extends LuceneTestCase {
      buf.append("a");
    }
    assertEquals(20000, t.termLength());
-    assertEquals(20167, t.termBuffer().length);
  }

  public void testToString() throws Exception {
--- a/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java
+++ b/src/test/org/apache/lucene/analysis/tokenattributes/TestTermAttributeImpl.java
@ -49,7 +49,6 @@ public class TestTermAttributeImpl extends LuceneTestCase {
      buf.append(buf.toString());
    }
    assertEquals(1048576, t.termLength());
-    assertEquals(1179654, t.termBuffer().length);

    // now as a string, first variant
    t = new TermAttributeImpl();
@ -63,7 +62,6 @@ public class TestTermAttributeImpl extends LuceneTestCase {
      buf.append(content);
    }
    assertEquals(1048576, t.termLength());
-    assertEquals(1179654, t.termBuffer().length);

    // now as a string, second variant
    t = new TermAttributeImpl();
@ -77,7 +75,6 @@ public class TestTermAttributeImpl extends LuceneTestCase {
      buf.append(content);
    }
    assertEquals(1048576, t.termLength());
-    assertEquals(1179654, t.termBuffer().length);

    // Test for slow growth to a long term
    t = new TermAttributeImpl();
@ -91,7 +88,6 @@ public class TestTermAttributeImpl extends LuceneTestCase {
      buf.append("a");
    }
    assertEquals(20000, t.termLength());
-    assertEquals(20167, t.termBuffer().length);

    // Test for slow growth to a long term
    t = new TermAttributeImpl();
@ -105,7 +101,6 @@ public class TestTermAttributeImpl extends LuceneTestCase {
      buf.append("a");
    }
    assertEquals(20000, t.termLength());
-    assertEquals(20167, t.termBuffer().length);
  }

  public void testToString() throws Exception {
--- a/src/test/org/apache/lucene/util/TestArrayUtil.java
+++ b/src/test/org/apache/lucene/util/TestArrayUtil.java
@ -0,0 +1,59 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Random;
+
+public class TestArrayUtil extends LuceneTestCase {
+
+  // Ensure ArrayUtil.getNextSize gives linear amortized cost of realloc/copy
+  public void testGrowth() {
+    int currentSize = 0;
+    long copyCost = 0;
+
+    // Make sure ArrayUtil hits Integer.MAX_VALUE, if we insist:
+    while(currentSize != Integer.MAX_VALUE) {
+      int nextSize = ArrayUtil.oversize(1+currentSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+      assertTrue(nextSize > currentSize);
+      if (currentSize > 0) {
+        copyCost += currentSize;
+        double copyCostPerElement = ((double) copyCost)/currentSize;
+        assertTrue("cost " + copyCostPerElement, copyCostPerElement < 10.0);
+      }
+      currentSize = nextSize;
+    }
+  }
+
+  public void testMaxSize() {
+    // intentionally pass invalid elemSizes:
+    for(int elemSize=0;elemSize<10;elemSize++) {
+      assertEquals(Integer.MAX_VALUE, ArrayUtil.oversize(Integer.MAX_VALUE, elemSize));
+      assertEquals(Integer.MAX_VALUE, ArrayUtil.oversize(Integer.MAX_VALUE-1, elemSize));
+    }
+  }
+
+  public void testInvalidElementSizes() {
+    final Random r = newRandom();
+    for(int iter=0;iter<10000;iter++) {
+      final int minTargetSize = r.nextInt(Integer.MAX_VALUE);
+      final int elemSize = r.nextInt(11);
+      final int v = ArrayUtil.oversize(minTargetSize, elemSize);
+      assertTrue(v >= minTargetSize);
+    }
+  }
+}
--- a/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
+++ b/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
@ -164,14 +164,14 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
      int encodedLen1 = IndexableBinaryStringTools.getEncodedLength(
          originalArray1, 0, numBytes1);
      if (encodedLen1 > encoded1.length)
-        encoded1 = new char[ArrayUtil.getNextSize(encodedLen1)];
+        encoded1 = new char[ArrayUtil.oversize(encodedLen1, RamUsageEstimator.NUM_BYTES_CHAR)];
      IndexableBinaryStringTools.encode(originalArray1, 0, numBytes1, encoded1,
          0, encodedLen1);

      int encodedLen2 = IndexableBinaryStringTools.getEncodedLength(original2,
          0, numBytes2);
      if (encodedLen2 > encoded2.length)
-        encoded2 = new char[ArrayUtil.getNextSize(encodedLen2)];
+        encoded2 = new char[ArrayUtil.oversize(encodedLen2, RamUsageEstimator.NUM_BYTES_CHAR)];
      IndexableBinaryStringTools.encode(original2, 0, numBytes2, encoded2, 0,
          encodedLen2);

@ -308,7 +308,7 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
      int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
          numBytes);
      if (encoded.length < encodedLen)
-        encoded = new char[ArrayUtil.getNextSize(encodedLen)];
+        encoded = new char[ArrayUtil.oversize(encodedLen, RamUsageEstimator.NUM_BYTES_CHAR)];
      IndexableBinaryStringTools.encode(binary, 0, numBytes, encoded, 0,
          encodedLen);
				`@ -0,0 +1 @@`
				`:pserver:cvsuser@cvs.tartarus.org:/home/cvs Ay=0=a%0bZ`