- Fixed some funky indentation that I found while testing the contributed

Portuguese stemmer. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149835 13f79535-47bb-0310-9956-ffa450edef68
2002-08-18 17:33:16 +00:00 · 2002-08-18 17:33:16 +00:00 · 63f7272924
parent bbbc192097
commit 63f7272924
4 changed files with 407 additions and 375 deletions
--- a/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -74,94 +74,104 @@ import java.util.Hashtable;
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
-public class GermanAnalyzer extends Analyzer {
+public class GermanAnalyzer extends Analyzer
 {
    /**
     * List of typical german stopwords.
     */
    private String[] GERMAN_STOP_WORDS = {
 	"einer", "eine", "eines", "einem", "einen",
 	"der", "die", "das", "dass", "daß",
 	"du", "er", "sie", "es",
 	"was", "wer", "wie", "wir",
 	"und", "oder", "ohne", "mit",
 	"am", "im", "in", "aus", "auf",
 	"ist", "sein", "war", "wird",
 	"ihr", "ihre", "ihres",
 	"als", "für", "von", "mit",
 	"dich", "dir", "mich", "mir",
 	"mein", "sein", "kein",
 	"durch", "wegen", "wird"
    };
-	/**
+    /**
-	 * List of typical german stopwords.
+     * Contains the stopwords used with the StopFilter.
-	 */
+     */
-	private String[] GERMAN_STOP_WORDS = {
+    private Hashtable stoptable = new Hashtable();
 		"einer", "eine", "eines", "einem", "einen",
 		"der", "die", "das", "dass", "daß",
 		"du", "er", "sie", "es",
 		"was", "wer", "wie", "wir",
 		"und", "oder", "ohne", "mit",
 		"am", "im", "in", "aus", "auf",
 		"ist", "sein", "war", "wird",
 		"ihr", "ihre", "ihres",
 		"als", "für", "von", "mit",
 		"dich", "dir", "mich", "mir",
 		"mein", "sein", "kein",
 		"durch", "wegen", "wird"
 	};
-	/**
+    /**
-	 * Contains the stopwords used with the StopFilter.
+     * Contains words that should be indexed but not stemmed.
-	 */
+     */
-	private Hashtable stoptable = new Hashtable();
+    private Hashtable excltable = new Hashtable();
 	/**
 	 * Contains words that should be indexed but not stemmed.
 	 */
 	private Hashtable excltable = new Hashtable();
-	/**
+    /**
-	 * Builds an analyzer.
+     * Builds an analyzer.
-	 */
+     */
-	public GermanAnalyzer() {
+    public GermanAnalyzer()
-		stoptable = StopFilter.makeStopTable( GERMAN_STOP_WORDS );
+    {
-	}
+	stoptable = StopFilter.makeStopTable( GERMAN_STOP_WORDS );
    }
-	/**
+    /**
-	 * Builds an analyzer with the given stop words.
+     * Builds an analyzer with the given stop words.
-	 */
+     */
-	public GermanAnalyzer( String[] stopwords ) {
+    public GermanAnalyzer( String[] stopwords )
-		stoptable = StopFilter.makeStopTable( stopwords );
+    {
-	}
+	stoptable = StopFilter.makeStopTable( stopwords );
    }
-	/**
+    /**
-	 * Builds an analyzer with the given stop words.
+     * Builds an analyzer with the given stop words.
-	 */
+     */
-	public GermanAnalyzer( Hashtable stopwords ) {
+    public GermanAnalyzer( Hashtable stopwords )
-		stoptable = stopwords;
+    {
-	}
+	stoptable = stopwords;
    }
-	/**
+    /**
-	 * Builds an analyzer with the given stop words.
+     * Builds an analyzer with the given stop words.
-	 */
+     */
-	public GermanAnalyzer( File stopwords ) {
+    public GermanAnalyzer( File stopwords )
-		stoptable = WordlistLoader.getWordtable( stopwords );
+    {
-	}
+	stoptable = WordlistLoader.getWordtable( stopwords );
    }
-	/**
+    /**
-	 * Builds an exclusionlist from an array of Strings.
+     * Builds an exclusionlist from an array of Strings.
-	 */
+     */
-	public void setStemExclusionTable( String[] exclusionlist ) {
+    public void setStemExclusionTable( String[] exclusionlist )
-		excltable = StopFilter.makeStopTable( exclusionlist );
+    {
-	}
+	excltable = StopFilter.makeStopTable( exclusionlist );
-	/**
+    }
 	 * Builds an exclusionlist from a Hashtable.
 	 */
 	public void setStemExclusionTable( Hashtable exclusionlist ) {
 		excltable = exclusionlist;
 	}
 	/**
 	 * Builds an exclusionlist from the words contained in the given file.
 	 */
 	public void setStemExclusionTable( File exclusionlist ) {
 		excltable = WordlistLoader.getWordtable( exclusionlist );
 	}
-	/**
+    /**
-	 * Creates a TokenStream which tokenizes all the text in the provided Reader.
+     * Builds an exclusionlist from a Hashtable.
-	 *
+     */
-	 * @return  A TokenStream build from a StandardTokenizer filtered with
+    public void setStemExclusionTable( Hashtable exclusionlist )
-	 * 			StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter
+    {
-	 */
+	excltable = exclusionlist;
-	public TokenStream tokenStream( String fieldName, Reader reader ) {
+    }
-		TokenStream result = new StandardTokenizer( reader );
+
-		result = new StandardFilter( result );
+    /**
-		result = new StopFilter( result, stoptable );
+     * Builds an exclusionlist from the words contained in the given file.
-		result = new GermanStemFilter( result, excltable );
+     */
-		return result;
+    public void setStemExclusionTable( File exclusionlist )
-	}
+    {
 	excltable = WordlistLoader.getWordtable( exclusionlist );
    }
    /**
     * Creates a TokenStream which tokenizes all the text in the provided Reader.
     *
     * @return  A TokenStream build from a StandardTokenizer filtered with
     *		StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter
     */
    public TokenStream tokenStream( String fieldName, Reader reader )
    {
 	TokenStream result = new StandardTokenizer( reader );
 	result = new StandardFilter( result );
 	result = new StopFilter( result, stoptable );
 	result = new GermanStemFilter( result, excltable );
 	return result;
    }
 }
--- a/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
+++ b/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
@ -68,62 +68,69 @@ import java.util.Hashtable;
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
-public final class GermanStemFilter extends TokenFilter {
+public final class GermanStemFilter extends TokenFilter
 {
    /**
     * The actual token in the input stream.
     */
    private Token token = null;
    private GermanStemmer stemmer = null;
    private Hashtable exclusions = null;
-	/**
+    public GermanStemFilter( TokenStream in )
-	 * The actual token in the input stream.
+    {
-	 */
+	stemmer = new GermanStemmer();
-	private Token token = null;
+	input = in;
-	private GermanStemmer stemmer = null;
+    }
 	private Hashtable exclusions = null;
-	public GermanStemFilter( TokenStream in ) {
+    /**
-		stemmer = new GermanStemmer();
+     * Builds a GermanStemFilter that uses an exclusiontable.
-		input = in;
+     */
-	}
+    public GermanStemFilter( TokenStream in, Hashtable exclusiontable )
    {
 	this( in );
 	exclusions = exclusiontable;
    }
-	/**
+    /**
-	 * Builds a GermanStemFilter that uses an exclusiontable.
+     * @return  Returns the next token in the stream, or null at EOS
-	 */
+     */
-	public GermanStemFilter( TokenStream in, Hashtable exclusiontable ) {
+    public final Token next()
-		this( in );
+	throws IOException
-		exclusions = exclusiontable;
+    {
 	if ( ( token = input.next() ) == null ) {
 	    return null;
 	}
 	// Check the exclusiontable
 	else if ( exclusions != null && exclusions.contains( token.termText() ) ) {
 	    return token;
 	}
 	else {
 	    String s = stemmer.stem( token.termText() );
 	    // If not stemmed, dont waste the time creating a new token
 	    if ( !s.equals( token.termText() ) ) {
 		return new Token( s, token.startOffset(),
 		    token.endOffset(), token.type() );
 	    }
 	    return token;
 	}
    }
-	/**
+    /**
-	 * @return  Returns the next token in the stream, or null at EOS
+     * Set a alternative/custom GermanStemmer for this filter.
-	 */
+     */
-	public final Token next()
+    public void setStemmer( GermanStemmer stemmer )
-		throws IOException {
+    {
-		if ( ( token = input.next() ) == null ) {
+	if ( stemmer != null ) {
-			return null;
+	    this.stemmer = stemmer;
 		}
 		// Check the exclusiontable
 		else if ( exclusions != null && exclusions.contains( token.termText() ) ) {
 			return token;
 		}
 		else {
 			String s = stemmer.stem( token.termText() );
 			// If not stemmed, dont waste the time creating a new token
 			if ( !s.equals( token.termText() ) ) {
 				return new Token( s, token.startOffset(),
 				    token.endOffset(), token.type() );
 			}
 			return token;
 		}
 	}
 	/**
 	 * Set a alternative/custom GermanStemmer for this filter.
 	 */
 	public void setStemmer( GermanStemmer stemmer ) {
 		if ( stemmer != null ) {
 			this.stemmer = stemmer;
 		}
 	}
 	/**
 	 * Set an alternative exclusion list for this filter.
 	 */
 	public void setExclusionTable( Hashtable exclusiontable ) {
 		exclusions = exclusiontable;
 	}
    }
    /**
     * Set an alternative exclusion list for this filter.
     */
    public void setExclusionTable( Hashtable exclusiontable )
    {
 	exclusions = exclusiontable;
    }
 }
--- a/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
+++ b/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
@ -62,17 +62,18 @@ package org.apache.lucene.analysis.de;
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
-
+public class GermanStemmer
-public class GermanStemmer {
+{
    /**
     * Buffer for the terms while stemming them.
     */
    private StringBuffer sb = new StringBuffer();
-	/**
+
-	 * Indicates if a term is handled as a noun.
+    /**
     * Indicates if a term is handled as a noun.
     */
    private boolean uppercase = false;
    /**
     * Amount of characters that are removed with <tt>substitute()</tt> while stemming.
     */
@ -84,22 +85,24 @@ public class GermanStemmer {
     * @param term  The term that should be stemmed.
     * @return      Discriminator for <tt>term</tt>
     */
-    protected String stem( String term ) {
+    protected String stem( String term )
-		// Mark a possible noun.
+    {
-		uppercase = Character.isUpperCase( term.charAt( 0 ) );
+	// Mark a possible noun.
-		// Use lowercase for medium stemming.
+	uppercase = Character.isUpperCase( term.charAt( 0 ) );
-		term = term.toLowerCase();
+	// Use lowercase for medium stemming.
-		if ( !isStemmable( term ) ) return term;
+	term = term.toLowerCase();
-		// Reset the StringBuffer.
+	if ( !isStemmable( term ) )
-		sb.delete( 0, sb.length() );
+	    return term;
-		sb.insert( 0, term );
+	// Reset the StringBuffer.
-		// Stemming starts here...
+	sb.delete( 0, sb.length() );
-		substitute( sb );
+	sb.insert( 0, term );
-		strip( sb );
+	// Stemming starts here...
-		optimize( sb );
+	substitute( sb );
-		resubstitute( sb );
+	strip( sb );
-		removeParticleDenotion( sb );
+	optimize( sb );
-		return sb.toString();
+	resubstitute( sb );
 	removeParticleDenotion( sb );
 	return sb.toString();
    }
    /**
@ -107,82 +110,90 @@ public class GermanStemmer {
     *
     * @return  true if, and only if, the given term consists in letters.
     */
-    private boolean isStemmable( String term ) {
+    private boolean isStemmable( String term )
-		for ( int c = 0; c < term.length(); c++ ) {
+    {
-			if ( !Character.isLetter( term.charAt( c ) ) ) return false;
+	for ( int c = 0; c < term.length(); c++ ) {
-		}
+	    if ( !Character.isLetter( term.charAt( c ) ) ) return false;
-		return true;
+	}
 	return true;
    }
-	/**
+    /**
-	 * suffix stripping (stemming) on the current term. The stripping is reduced
+     * suffix stripping (stemming) on the current term. The stripping is reduced
-	 * to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
+     * to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
-	 * from which all regular suffixes are build of. The simplification causes
+     * from which all regular suffixes are build of. The simplification causes
-	 * some overstemming, and way more irregular stems, but still provides unique.
+     * some overstemming, and way more irregular stems, but still provides unique.
-	 * discriminators in the most of those cases.
+     * discriminators in the most of those cases.
-	 * The algorithm is context free, except of the length restrictions.
+     * The algorithm is context free, except of the length restrictions.
-	 */
+     */
-	private void strip( StringBuffer buffer ) {
+    private void strip( StringBuffer buffer )
-		boolean doMore = true;
+    {
-		while ( doMore && buffer.length() > 3 ) {
+	boolean doMore = true;
-			if ( ( buffer.length() + substCount > 5 ) && buffer.substring( buffer.length() - 2, buffer.length() ).equals( "nd" ) ) {
+	while ( doMore && buffer.length() > 3 ) {
-				buffer.delete( buffer.length() - 2, buffer.length() );
+	    if ( ( buffer.length() + substCount > 5 ) &&
-			}
+		buffer.substring( buffer.length() - 2, buffer.length() ).equals( "nd" ) )
-			else if ( ( buffer.length() + substCount > 4 ) && buffer.substring( buffer.length() - 2, buffer.length() ).equals( "em" ) ) {
+	    {
-				buffer.delete( buffer.length() - 2, buffer.length() );
+		buffer.delete( buffer.length() - 2, buffer.length() );
-			}
+	    }
-			else if ( ( buffer.length() + substCount > 4 ) && buffer.substring( buffer.length() - 2, buffer.length() ).equals( "er" ) ) {
+	    else if ( ( buffer.length() + substCount > 4 ) &&
-				buffer.delete( buffer.length() - 2, buffer.length() );
+		buffer.substring( buffer.length() - 2, buffer.length() ).equals( "em" ) ) {
-			}
+		buffer.delete( buffer.length() - 2, buffer.length() );
-			else if ( buffer.charAt( buffer.length() - 1 ) == 'e' ) {
+	    }
-				buffer.deleteCharAt( buffer.length() - 1 );
+	    else if ( ( buffer.length() + substCount > 4 ) &&
-			}
+		buffer.substring( buffer.length() - 2, buffer.length() ).equals( "er" ) ) {
-			else if ( buffer.charAt( buffer.length() - 1 ) == 's' ) {
+		buffer.delete( buffer.length() - 2, buffer.length() );
-				buffer.deleteCharAt( buffer.length() - 1 );
+	    }
-			}
+	    else if ( buffer.charAt( buffer.length() - 1 ) == 'e' ) {
-			else if ( buffer.charAt( buffer.length() - 1 ) == 'n' ) {
+		buffer.deleteCharAt( buffer.length() - 1 );
-				buffer.deleteCharAt( buffer.length() - 1 );
+	    }
-			}
+	    else if ( buffer.charAt( buffer.length() - 1 ) == 's' ) {
-			// "t" occurs only as suffix of verbs.
+		buffer.deleteCharAt( buffer.length() - 1 );
-			else if ( buffer.charAt( buffer.length() - 1 ) == 't' && !uppercase ) {
+	    }
-				buffer.deleteCharAt( buffer.length() - 1 );
+	    else if ( buffer.charAt( buffer.length() - 1 ) == 'n' ) {
-			}
+		buffer.deleteCharAt( buffer.length() - 1 );
-			else {
+	    }
-				doMore = false;
+	    // "t" occurs only as suffix of verbs.
-			}
+	    else if ( buffer.charAt( buffer.length() - 1 ) == 't' && !uppercase ) {
-		}
+		buffer.deleteCharAt( buffer.length() - 1 );
 	    }
 	    else {
 		doMore = false;
 	    }
 	}
    }
-	/**
+    /**
-	 * Does some optimizations on the term. This optimisations are
+     * Does some optimizations on the term. This optimisations are
-	 * contextual.
+     * contextual.
-	 *
+     *
-	 * @return  The term with the optimizations applied.
+     * @return  The term with the optimizations applied.
-	 */
+     */
-	private void optimize( StringBuffer buffer ) {
+    private void optimize( StringBuffer buffer )
-		// Additional step for female plurals of professions and inhabitants.
+    {
-		if ( buffer.length() > 5 && buffer.substring( buffer.length() - 5, buffer.length() ).equals( "erin*" ) ) {
+	// Additional step for female plurals of professions and inhabitants.
-			buffer.deleteCharAt( buffer.length() -1 );
+	if ( buffer.length() > 5 && buffer.substring( buffer.length() - 5, buffer.length() ).equals( "erin*" ) ) {
-			strip( buffer );
+	    buffer.deleteCharAt( buffer.length() -1 );
-		}
+	    strip( buffer );
 		// Additional step for irregular plural nouns like "Matrizen -> Matrix".
 		if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
 			buffer.setCharAt( buffer.length() - 1, 'x' );
 		}
 	}
 	// Additional step for irregular plural nouns like "Matrizen -> Matrix".
 	if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
 	    buffer.setCharAt( buffer.length() - 1, 'x' );
 	}
    }
    /**
     * Removes a particle denotion ("ge") from a term.
     */
-    private void removeParticleDenotion( StringBuffer buffer ) {
+    private void removeParticleDenotion( StringBuffer buffer )
-		if ( buffer.length() > 4 ) {
+    {
-			for ( int c = 0; c < buffer.length() - 3; c++ ) {
+	if ( buffer.length() > 4 ) {
-				if ( buffer.substring( c, c + 4 ).equals( "gege" ) ) {
+	    for ( int c = 0; c < buffer.length() - 3; c++ ) {
-					buffer.delete( c, c + 2 );
+		if ( buffer.substring( c, c + 4 ).equals( "gege" ) ) {
-					return;
+		    buffer.delete( c, c + 2 );
-				}
+		    return;
 			}
 		}
 	    }
 	}
    }
    /**
@ -195,63 +206,66 @@ public class GermanStemmer {
     * - Substitute some common character combinations with a token:
     *   sch/ch/ei/ie/ig/st -> $/§/%/&/#/!
     */
-    private void substitute( StringBuffer buffer ) {
+    private void substitute( StringBuffer buffer )
-		substCount = 0;
+    {
-		for ( int c = 0; c < buffer.length(); c++ ) {
+	substCount = 0;
-			// Replace the second char of a pair of the equal characters with an asterisk
+	for ( int c = 0; c < buffer.length(); c++ ) {
-			if ( c > 0 && buffer.charAt( c ) == buffer.charAt ( c - 1 )  ) {
+	    // Replace the second char of a pair of the equal characters with an asterisk
-				buffer.setCharAt( c, '*' );
+	    if ( c > 0 && buffer.charAt( c ) == buffer.charAt ( c - 1 )  ) {
-			}
+		buffer.setCharAt( c, '*' );
-			// Substitute Umlauts.
+	    }
-			else if ( buffer.charAt( c ) == 'ä' ) {
+	    // Substitute Umlauts.
-				buffer.setCharAt( c, 'a' );
+	    else if ( buffer.charAt( c ) == 'ä' ) {
-			}
+		buffer.setCharAt( c, 'a' );
-			else if ( buffer.charAt( c ) == 'ö' ) {
+	    }
-				buffer.setCharAt( c, 'o' );
+	    else if ( buffer.charAt( c ) == 'ö' ) {
-			}
+		buffer.setCharAt( c, 'o' );
-			else if ( buffer.charAt( c ) == 'ü' ) {
+	    }
-				buffer.setCharAt( c, 'u' );
+	    else if ( buffer.charAt( c ) == 'ü' ) {
-			}
+		buffer.setCharAt( c, 'u' );
-			// Take care that at least one character is left left side from the current one
+	    }
-			if ( c < buffer.length() - 1 ) {
+	    // Take care that at least one character is left left side from the current one
-				if ( buffer.charAt( c ) == 'ß' ) {
+	    if ( c < buffer.length() - 1 ) {
-					buffer.setCharAt( c, 's' );
+		if ( buffer.charAt( c ) == 'ß' ) {
-					buffer.insert( c + 1, 's' );
+		    buffer.setCharAt( c, 's' );
-					substCount++;
+		    buffer.insert( c + 1, 's' );
-				}
+		    substCount++;
 				// Masking several common character combinations with an token
 				else if ( ( c < buffer.length() - 2 ) && buffer.charAt( c ) == 's' && buffer.charAt( c + 1 ) == 'c' && buffer.charAt( c + 2 ) == 'h' ) {
 					buffer.setCharAt( c, '$' );
 					buffer.delete( c + 1, c + 3 );
 					substCount =+ 2;
 				}
 				else if ( buffer.charAt( c ) == 'c' && buffer.charAt( c + 1 ) == 'h' ) {
 					buffer.setCharAt( c, '§' );
 					buffer.deleteCharAt( c + 1 );
 					substCount++;
 				}
 				else if ( buffer.charAt( c ) == 'e' && buffer.charAt( c + 1 ) == 'i' ) {
 					buffer.setCharAt( c, '%' );
 					buffer.deleteCharAt( c + 1 );
 					substCount++;
 				}
 				else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'e' ) {
 					buffer.setCharAt( c, '&' );
 					buffer.deleteCharAt( c + 1 );
 					substCount++;
 				}
 				else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'g' ) {
 					buffer.setCharAt( c, '#' );
 					buffer.deleteCharAt( c + 1 );
 					substCount++;
 				}
 				else if ( buffer.charAt( c ) == 's' && buffer.charAt( c + 1 ) == 't' ) {
 					buffer.setCharAt( c, '!' );
 					buffer.deleteCharAt( c + 1 );
 					substCount++;
 				}
 			}
 		}
 		// Masking several common character combinations with an token
 		else if ( ( c < buffer.length() - 2 ) && buffer.charAt( c ) == 's' &&
 		    buffer.charAt( c + 1 ) == 'c' && buffer.charAt( c + 2 ) == 'h' )
 		{
 		    buffer.setCharAt( c, '$' );
 		    buffer.delete( c + 1, c + 3 );
 		    substCount =+ 2;
 		}
 		else if ( buffer.charAt( c ) == 'c' && buffer.charAt( c + 1 ) == 'h' ) {
 		    buffer.setCharAt( c, '§' );
 		    buffer.deleteCharAt( c + 1 );
 		    substCount++;
 		}
 		else if ( buffer.charAt( c ) == 'e' && buffer.charAt( c + 1 ) == 'i' ) {
 		    buffer.setCharAt( c, '%' );
 		    buffer.deleteCharAt( c + 1 );
 		    substCount++;
 		}
 		else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'e' ) {
 		    buffer.setCharAt( c, '&' );
 		    buffer.deleteCharAt( c + 1 );
 		    substCount++;
 		}
 		else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'g' ) {
 		    buffer.setCharAt( c, '#' );
 		    buffer.deleteCharAt( c + 1 );
 		    substCount++;
 		}
 		else if ( buffer.charAt( c ) == 's' && buffer.charAt( c + 1 ) == 't' ) {
 		    buffer.setCharAt( c, '!' );
 		    buffer.deleteCharAt( c + 1 );
 		    substCount++;
 		}
 	    }
 	}
    }
    /**
@ -259,36 +273,37 @@ public class GermanStemmer {
     * character combinations. Umlauts will remain as their corresponding vowel,
     * as "ß" remains as "ss".
     */
-    private void resubstitute( StringBuffer buffer ) {
+    private void resubstitute( StringBuffer buffer )
-		for ( int c = 0; c < buffer.length(); c++ ) {
+    {
-			if ( buffer.charAt( c ) == '*' ) {
+	for ( int c = 0; c < buffer.length(); c++ ) {
-				char x = buffer.charAt( c - 1 );
+	    if ( buffer.charAt( c ) == '*' ) {
-				buffer.setCharAt( c, x );
+		char x = buffer.charAt( c - 1 );
-			}
+		buffer.setCharAt( c, x );
-			else if ( buffer.charAt( c ) == '$' ) {
+	    }
-				buffer.setCharAt( c, 's' );
+	    else if ( buffer.charAt( c ) == '$' ) {
-				buffer.insert( c + 1, new char[]{'c', 'h'}, 0, 2 );
+		buffer.setCharAt( c, 's' );
-			}
+		buffer.insert( c + 1, new char[]{'c', 'h'}, 0, 2 );
-			else if ( buffer.charAt( c ) == '§' ) {
+	    }
-				buffer.setCharAt( c, 'c' );
+	    else if ( buffer.charAt( c ) == '§' ) {
-				buffer.insert( c + 1, 'h' );
+		buffer.setCharAt( c, 'c' );
-			}
+		buffer.insert( c + 1, 'h' );
-			else if ( buffer.charAt( c ) == '%' ) {
+	    }
-				buffer.setCharAt( c, 'e' );
+	    else if ( buffer.charAt( c ) == '%' ) {
-				buffer.insert( c + 1, 'i' );
+		buffer.setCharAt( c, 'e' );
-			}
+		buffer.insert( c + 1, 'i' );
-			else if ( buffer.charAt( c ) == '&' ) {
+	    }
-				buffer.setCharAt( c, 'i' );
+	    else if ( buffer.charAt( c ) == '&' ) {
-				buffer.insert( c + 1, 'e' );
+		buffer.setCharAt( c, 'i' );
-			}
+		buffer.insert( c + 1, 'e' );
-			else if ( buffer.charAt( c ) == '#' ) {
+	    }
-				buffer.setCharAt( c, 'i' );
+	    else if ( buffer.charAt( c ) == '#' ) {
-				buffer.insert( c + 1, 'g' );
+		buffer.setCharAt( c, 'i' );
-			}
+		buffer.insert( c + 1, 'g' );
-			else if ( buffer.charAt( c ) == '!' ) {
+	    }
-				buffer.setCharAt( c, 's' );
+	    else if ( buffer.charAt( c ) == '!' ) {
-				buffer.insert( c + 1, 't' );
+		buffer.setCharAt( c, 's' );
-			}
+		buffer.insert( c + 1, 't' );
-		}
+	    }
 	}
    }
 }
--- a/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
+++ b/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
@ -68,71 +68,71 @@ import java.util.Hashtable;
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
-public class WordlistLoader {
+public class WordlistLoader
 {
    /**
     * @param path      Path to the wordlist
     * @param wordfile  Name of the wordlist
     */
    public static Hashtable getWordtable( String path, String wordfile ) {
 	if ( path == null || wordfile == null ) {
 	    return new Hashtable();
 	}
 	return getWordtable( new File( path, wordfile ) );
    }
-	/**
+    /**
-	 * @param path      Path to the wordlist
+     * @param wordfile  Complete path to the wordlist
-	 * @param wordfile  Name of the wordlist
+     */
-	 */
+    public static Hashtable getWordtable( String wordfile ) {
-	public static Hashtable getWordtable( String path, String wordfile ) {
+	if ( wordfile == null ) {
-		if ( path == null || wordfile == null ) {
+	    return new Hashtable();
 			return new Hashtable();
 		}
 		return getWordtable( new File( path, wordfile ) );
 	}
 	/**
 	 * @param wordfile  Complete path to the wordlist
 	 */
 	public static Hashtable getWordtable( String wordfile ) {
 		if ( wordfile == null ) {
 			return new Hashtable();
 		}
 		return getWordtable( new File( wordfile ) );
 	}
 	return getWordtable( new File( wordfile ) );
    }
-	/**
+    /**
-	 * @param wordfile  File containing the wordlist
+     * @param wordfile  File containing the wordlist
-	 */
+     */
-	public static Hashtable getWordtable( File wordfile ) {
+    public static Hashtable getWordtable( File wordfile ) {
-		if ( wordfile == null ) {
+	if ( wordfile == null ) {
-			return new Hashtable();
+	    return new Hashtable();
 		}
 		Hashtable result = null;
 		try {
 			LineNumberReader lnr = new LineNumberReader( new FileReader( wordfile ) );
 			String word = null;
 			String[] stopwords = new String[100];
 			int wordcount = 0;
 			while ( ( word = lnr.readLine() ) != null ) {
 				wordcount++;
 				if ( wordcount == stopwords.length ) {
 					String[] tmp = new String[stopwords.length + 50];
 					System.arraycopy( stopwords, 0, tmp, 0, wordcount );
 					stopwords = tmp;
 				}
 				stopwords[wordcount-1] = word;
 			}
 			result = makeWordTable( stopwords, wordcount );
 		}
 		// On error, use an empty table
 		catch ( IOException e ) {
 			result = new Hashtable();
 		}
 		return result;
 	}
 	Hashtable result = null;
 	try {
 	    LineNumberReader lnr = new LineNumberReader( new FileReader( wordfile ) );
 	    String word = null;
 	    String[] stopwords = new String[100];
 	    int wordcount = 0;
 	    while ( ( word = lnr.readLine() ) != null ) {
 		wordcount++;
 		if ( wordcount == stopwords.length ) {
 		    String[] tmp = new String[stopwords.length + 50];
 		    System.arraycopy( stopwords, 0, tmp, 0, wordcount );
 		    stopwords = tmp;
 		}
 		stopwords[wordcount-1] = word;
 	    }
 	    result = makeWordTable( stopwords, wordcount );
 	}
 	// On error, use an empty table
 	catch ( IOException e ) {
 	    result = new Hashtable();
 	}
 	return result;
    }
-	/**
+    /**
-	 * Builds the wordlist table.
+     * Builds the wordlist table.
-	 *
+     *
-	 * @param words   Word that where read
+     * @param words   Word that where read
-	 * @param length  Amount of words that where read into <tt>words</tt>
+     * @param length  Amount of words that where read into <tt>words</tt>
-	 */
+     */
-	private static Hashtable makeWordTable( String[] words, int length ) {
+    private static Hashtable makeWordTable( String[] words, int length ) {
-		Hashtable table = new Hashtable( length );
+	Hashtable table = new Hashtable( length );
-		for ( int i = 0; i < length; i++ ) {
+	for ( int i = 0; i < length; i++ ) {
-			table.put( words[i], words[i] );
+	    table.put( words[i], words[i] );
 		}
 		return table;
 	}
 	return table;
    }
 }