- Fixed some funky indentation that I found while testing the contributed

Portuguese stemmer. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149835 13f79535-47bb-0310-9956-ffa450edef68
2002-08-18 17:33:16 +00:00 · 2002-08-18 17:33:16 +00:00 · 63f7272924
parent bbbc192097
commit 63f7272924
4 changed files with 407 additions and 375 deletions
--- a/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -74,8 +74,8 @@ import java.util.Hashtable;
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
-public class GermanAnalyzer extends Analyzer {
-
+public class GermanAnalyzer extends Analyzer
+{
    /**
     * List of typical german stopwords.
     */
@ -98,6 +98,7 @@ public class GermanAnalyzer extends Analyzer {
     * Contains the stopwords used with the StopFilter.
     */
    private Hashtable stoptable = new Hashtable();
+
    /**
     * Contains words that should be indexed but not stemmed.
     */
@ -106,47 +107,56 @@ public class GermanAnalyzer extends Analyzer {
    /**
     * Builds an analyzer.
     */
-	public GermanAnalyzer() {
+    public GermanAnalyzer()
+    {
 	stoptable = StopFilter.makeStopTable( GERMAN_STOP_WORDS );
    }

    /**
     * Builds an analyzer with the given stop words.
     */
-	public GermanAnalyzer( String[] stopwords ) {
+    public GermanAnalyzer( String[] stopwords )
+    {
 	stoptable = StopFilter.makeStopTable( stopwords );
    }

    /**
     * Builds an analyzer with the given stop words.
     */
-	public GermanAnalyzer( Hashtable stopwords ) {
+    public GermanAnalyzer( Hashtable stopwords )
+    {
 	stoptable = stopwords;
    }

    /**
     * Builds an analyzer with the given stop words.
     */
-	public GermanAnalyzer( File stopwords ) {
+    public GermanAnalyzer( File stopwords )
+    {
 	stoptable = WordlistLoader.getWordtable( stopwords );
    }

    /**
     * Builds an exclusionlist from an array of Strings.
     */
-	public void setStemExclusionTable( String[] exclusionlist ) {
+    public void setStemExclusionTable( String[] exclusionlist )
+    {
 	excltable = StopFilter.makeStopTable( exclusionlist );
    }
+
    /**
     * Builds an exclusionlist from a Hashtable.
     */
-	public void setStemExclusionTable( Hashtable exclusionlist ) {
+    public void setStemExclusionTable( Hashtable exclusionlist )
+    {
 	excltable = exclusionlist;
    }
+
    /**
     * Builds an exclusionlist from the words contained in the given file.
     */
-	public void setStemExclusionTable( File exclusionlist ) {
+    public void setStemExclusionTable( File exclusionlist )
+    {
 	excltable = WordlistLoader.getWordtable( exclusionlist );
    }

@ -156,7 +166,8 @@ public class GermanAnalyzer extends Analyzer {
     * @return  A TokenStream build from a StandardTokenizer filtered with
     *		StandardFilter, StopFilter, GermanStemFilter and LowerCaseFilter
     */
-	public TokenStream tokenStream( String fieldName, Reader reader ) {
+    public TokenStream tokenStream( String fieldName, Reader reader )
+    {
 	TokenStream result = new StandardTokenizer( reader );
 	result = new StandardFilter( result );
 	result = new StopFilter( result, stoptable );
@ -164,4 +175,3 @@ public class GermanAnalyzer extends Analyzer {
 	return result;
    }
 }
-
--- a/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
+++ b/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
@ -68,8 +68,8 @@ import java.util.Hashtable;
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
-public final class GermanStemFilter extends TokenFilter {
-
+public final class GermanStemFilter extends TokenFilter
+{
    /**
     * The actual token in the input stream.
     */
@ -77,7 +77,8 @@ public final class GermanStemFilter extends TokenFilter {
    private GermanStemmer stemmer = null;
    private Hashtable exclusions = null;
    
-	public GermanStemFilter( TokenStream in ) {
+    public GermanStemFilter( TokenStream in )
+    {
 	stemmer = new GermanStemmer();
 	input = in;
    }
@ -85,7 +86,8 @@ public final class GermanStemFilter extends TokenFilter {
    /**
     * Builds a GermanStemFilter that uses an exclusiontable.
     */
-	public GermanStemFilter( TokenStream in, Hashtable exclusiontable ) {
+    public GermanStemFilter( TokenStream in, Hashtable exclusiontable )
+    {
 	this( in );
 	exclusions = exclusiontable;
    }
@ -94,7 +96,8 @@ public final class GermanStemFilter extends TokenFilter {
     * @return  Returns the next token in the stream, or null at EOS
     */
    public final Token next()
-		throws IOException {
+	throws IOException
+    {
 	if ( ( token = input.next() ) == null ) {
 	    return null;
 	}
@ -112,18 +115,22 @@ public final class GermanStemFilter extends TokenFilter {
 	    return token;
 	}
    }
+
    /**
     * Set a alternative/custom GermanStemmer for this filter.
     */
-	public void setStemmer( GermanStemmer stemmer ) {
+    public void setStemmer( GermanStemmer stemmer )
+    {
 	if ( stemmer != null ) {
 	    this.stemmer = stemmer;
 	}
    }
+
    /**
     * Set an alternative exclusion list for this filter.
     */
-	public void setExclusionTable( Hashtable exclusiontable ) {
+    public void setExclusionTable( Hashtable exclusiontable )
+    {
 	exclusions = exclusiontable;
    }
 }
--- a/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
+++ b/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
@ -62,17 +62,18 @@ package org.apache.lucene.analysis.de;
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
-
-public class GermanStemmer {
-
+public class GermanStemmer
+{
    /**
     * Buffer for the terms while stemming them.
     */
    private StringBuffer sb = new StringBuffer();
+
    /**
     * Indicates if a term is handled as a noun.
     */
    private boolean uppercase = false;
+
    /**
     * Amount of characters that are removed with <tt>substitute()</tt> while stemming.
     */
@ -84,12 +85,14 @@ public class GermanStemmer {
     * @param term  The term that should be stemmed.
     * @return      Discriminator for <tt>term</tt>
     */
-    protected String stem( String term ) {
+    protected String stem( String term )
+    {
 	// Mark a possible noun.
 	uppercase = Character.isUpperCase( term.charAt( 0 ) );
 	// Use lowercase for medium stemming.
 	term = term.toLowerCase();
-		if ( !isStemmable( term ) ) return term;
+	if ( !isStemmable( term ) )
+	    return term;
 	// Reset the StringBuffer.
 	sb.delete( 0, sb.length() );
 	sb.insert( 0, term );
@ -107,7 +110,8 @@ public class GermanStemmer {
     *
     * @return  true if, and only if, the given term consists in letters.
     */
-    private boolean isStemmable( String term ) {
+    private boolean isStemmable( String term )
+    {
 	for ( int c = 0; c < term.length(); c++ ) {
 	    if ( !Character.isLetter( term.charAt( c ) ) ) return false;
 	}
@ -122,16 +126,21 @@ public class GermanStemmer {
     * discriminators in the most of those cases.
     * The algorithm is context free, except of the length restrictions.
     */
-	private void strip( StringBuffer buffer ) {
+    private void strip( StringBuffer buffer )
+    {
 	boolean doMore = true;
 	while ( doMore && buffer.length() > 3 ) {
-			if ( ( buffer.length() + substCount > 5 ) && buffer.substring( buffer.length() - 2, buffer.length() ).equals( "nd" ) ) {
+	    if ( ( buffer.length() + substCount > 5 ) &&
+		buffer.substring( buffer.length() - 2, buffer.length() ).equals( "nd" ) )
+	    {
 		buffer.delete( buffer.length() - 2, buffer.length() );
 	    }
-			else if ( ( buffer.length() + substCount > 4 ) && buffer.substring( buffer.length() - 2, buffer.length() ).equals( "em" ) ) {
+	    else if ( ( buffer.length() + substCount > 4 ) &&
+		buffer.substring( buffer.length() - 2, buffer.length() ).equals( "em" ) ) {
 		buffer.delete( buffer.length() - 2, buffer.length() );
 	    }
-			else if ( ( buffer.length() + substCount > 4 ) && buffer.substring( buffer.length() - 2, buffer.length() ).equals( "er" ) ) {
+	    else if ( ( buffer.length() + substCount > 4 ) &&
+		buffer.substring( buffer.length() - 2, buffer.length() ).equals( "er" ) ) {
 		buffer.delete( buffer.length() - 2, buffer.length() );
 	    }
 	    else if ( buffer.charAt( buffer.length() - 1 ) == 'e' ) {
@ -159,7 +168,8 @@ public class GermanStemmer {
     *
     * @return  The term with the optimizations applied.
     */
-	private void optimize( StringBuffer buffer ) {
+    private void optimize( StringBuffer buffer )
+    {
 	// Additional step for female plurals of professions and inhabitants.
 	if ( buffer.length() > 5 && buffer.substring( buffer.length() - 5, buffer.length() ).equals( "erin*" ) ) {
 	    buffer.deleteCharAt( buffer.length() -1 );
@ -174,7 +184,8 @@ public class GermanStemmer {
    /**
     * Removes a particle denotion ("ge") from a term.
     */
-    private void removeParticleDenotion( StringBuffer buffer ) {
+    private void removeParticleDenotion( StringBuffer buffer )
+    {
 	if ( buffer.length() > 4 ) {
 	    for ( int c = 0; c < buffer.length() - 3; c++ ) {
 		if ( buffer.substring( c, c + 4 ).equals( "gege" ) ) {
@ -195,7 +206,8 @@ public class GermanStemmer {
     * - Substitute some common character combinations with a token:
     *   sch/ch/ei/ie/ig/st -> $/§/%/&/#/!
     */
-    private void substitute( StringBuffer buffer ) {
+    private void substitute( StringBuffer buffer )
+    {
 	substCount = 0;
 	for ( int c = 0; c < buffer.length(); c++ ) {
 	    // Replace the second char of a pair of the equal characters with an asterisk
@ -220,7 +232,9 @@ public class GermanStemmer {
 		    substCount++;
 		}
 		// Masking several common character combinations with an token
-				else if ( ( c < buffer.length() - 2 ) && buffer.charAt( c ) == 's' && buffer.charAt( c + 1 ) == 'c' && buffer.charAt( c + 2 ) == 'h' ) {
+		else if ( ( c < buffer.length() - 2 ) && buffer.charAt( c ) == 's' &&
+		    buffer.charAt( c + 1 ) == 'c' && buffer.charAt( c + 2 ) == 'h' )
+		{
 		    buffer.setCharAt( c, '$' );
 		    buffer.delete( c + 1, c + 3 );
 		    substCount =+ 2;
@ -259,7 +273,8 @@ public class GermanStemmer {
     * character combinations. Umlauts will remain as their corresponding vowel,
     * as "ß" remains as "ss".
     */
-    private void resubstitute( StringBuffer buffer ) {
+    private void resubstitute( StringBuffer buffer )
+    {
 	for ( int c = 0; c < buffer.length(); c++ ) {
 	    if ( buffer.charAt( c ) == '*' ) {
 		char x = buffer.charAt( c - 1 );
--- a/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
+++ b/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
@ -68,8 +68,8 @@ import java.util.Hashtable;
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
-public class WordlistLoader {
-
+public class WordlistLoader
+{
    /**
     * @param path      Path to the wordlist
     * @param wordfile  Name of the wordlist
@ -80,6 +80,7 @@ public class WordlistLoader {
 	}
 	return getWordtable( new File( path, wordfile ) );
    }
+
    /**
     * @param wordfile  Complete path to the wordlist
     */
@ -135,4 +136,3 @@ public class WordlistLoader {
 	return table;
    }
 }
-