copy the Russian and German analyzers plus their test cases to the sandbox

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150998 13f79535-47bb-0310-9956-ffa450edef68
2004-08-16 20:30:46 +00:00 · 2004-08-16 20:30:46 +00:00 · 87bcdf6f25
parent 726ddaeb5a
commit 87bcdf6f25
24 changed files with 2398 additions and 0 deletions
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@ -0,0 +1,135 @@
 package org.apache.lucene.analysis.de;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Set;
 /**
 * Analyzer for German language. Supports an external list of stopwords (words that
 * will not be indexed at all) and an external list of exclusions (word that will
 * not be stemmed, but indexed).
 * A default set of stopwords is used unless an alternative list is specified, the
 * exclusion list is empty by default.
 *
 * @author Gerhard Schwarz
 * @version $Id$
 */
 public class GermanAnalyzer extends Analyzer {
  /**
   * List of typical german stopwords.
   */
  private String[] GERMAN_STOP_WORDS = {
    "einer", "eine", "eines", "einem", "einen",
    "der", "die", "das", "dass", "daß",
    "du", "er", "sie", "es",
    "was", "wer", "wie", "wir",
    "und", "oder", "ohne", "mit",
    "am", "im", "in", "aus", "auf",
    "ist", "sein", "war", "wird",
    "ihr", "ihre", "ihres",
    "als", "für", "von", "mit",
    "dich", "dir", "mich", "mir",
    "mein", "sein", "kein",
    "durch", "wegen", "wird"
  };
  /**
   * Contains the stopwords used with the StopFilter.
   */
  private Set stopSet = new HashSet();
  /**
   * Contains words that should be indexed but not stemmed.
   */
  private Set exclusionSet = new HashSet();
  /**
   * Builds an analyzer.
   */
  public GermanAnalyzer() {
    stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS);
  }
  /**
   * Builds an analyzer with the given stop words.
   */
  public GermanAnalyzer(String[] stopwords) {
    stopSet = StopFilter.makeStopSet(stopwords);
  }
  /**
   * Builds an analyzer with the given stop words.
   */
  public GermanAnalyzer(Hashtable stopwords) {
    stopSet = new HashSet(stopwords.keySet());
  }
  /**
   * Builds an analyzer with the given stop words.
   */
  public GermanAnalyzer(File stopwords) throws IOException {
    stopSet = WordlistLoader.getWordSet(stopwords);
  }
  /**
   * Builds an exclusionlist from an array of Strings.
   */
  public void setStemExclusionTable(String[] exclusionlist) {
    exclusionSet = StopFilter.makeStopSet(exclusionlist);
  }
  /**
   * Builds an exclusionlist from a Hashtable.
   */
  public void setStemExclusionTable(Hashtable exclusionlist) {
    exclusionSet = new HashSet(exclusionlist.keySet());
  }
  /**
   * Builds an exclusionlist from the words contained in the given file.
   */
  public void setStemExclusionTable(File exclusionlist) throws IOException {
    exclusionSet = WordlistLoader.getWordSet(exclusionlist);
  }
  /**
   * Creates a TokenStream which tokenizes all the text in the provided Reader.
   *
   * @return A TokenStream build from a StandardTokenizer filtered with
   *         StandardFilter, LowerCaseFilter, StopFilter, GermanStemFilter
   */
  public TokenStream tokenStream(String fieldName, Reader reader) {
    TokenStream result = new StandardTokenizer(reader);
    result = new StandardFilter(result);
    result = new LowerCaseFilter(result);
    result = new StopFilter(result, stopSet);
    result = new GermanStemFilter(result, exclusionSet);
    return result;
  }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
@ -0,0 +1,119 @@
 package org.apache.lucene.analysis.de;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import java.io.IOException;
 import java.util.Hashtable;
 import java.util.Set;
 import java.util.HashSet;
 /**
 * A filter that stems German words. It supports a table of words that should
 * not be stemmed at all. The stemmer used can be changed at runtime after the
 * filter object is created (as long as it is a GermanStemmer).
 *
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
 public final class GermanStemFilter extends TokenFilter
 {
    /**
     * The actual token in the input stream.
     */
    private Token token = null;
    private GermanStemmer stemmer = null;
    private Set exclusionSet = null;
    public GermanStemFilter( TokenStream in )
    {
      super(in);
      stemmer = new GermanStemmer();
    }
    /**
     * Builds a GermanStemFilter that uses an exclusiontable.
     * @deprecated Use {@link #GermanStemFilter(org.apache.lucene.analysis.TokenStream, java.util.Set)} instead.
     */
    public GermanStemFilter( TokenStream in, Hashtable exclusiontable )
    {
      this( in );
      exclusionSet = new HashSet(exclusiontable.keySet());
    }
    /**
     * Builds a GermanStemFilter that uses an exclusiontable.
     */
    public GermanStemFilter( TokenStream in, Set exclusionSet )
    {
      this( in );
      this.exclusionSet = exclusionSet;
    }
    /**
     * @return  Returns the next token in the stream, or null at EOS
     */
    public final Token next()
      throws IOException
    {
      if ( ( token = input.next() ) == null ) {
        return null;
      }
      // Check the exclusiontable
      else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) {
        return token;
      }
      else {
        String s = stemmer.stem( token.termText() );
        // If not stemmed, dont waste the time creating a new token
        if ( !s.equals( token.termText() ) ) {
          return new Token( s, token.startOffset(),
            token.endOffset(), token.type() );
        }
        return token;
      }
    }
    /**
     * Set a alternative/custom GermanStemmer for this filter.
     */
    public void setStemmer( GermanStemmer stemmer )
    {
      if ( stemmer != null ) {
        this.stemmer = stemmer;
      }
    }
    /**
     * Set an alternative exclusion list for this filter.
     * @deprecated Use {@link #setExclusionSet(java.util.Set)} instead.
     */
    public void setExclusionTable( Hashtable exclusiontable )
    {
      exclusionSet = new HashSet(exclusiontable.keySet());
    }
    /**
     * Set an alternative exclusion list for this filter.
     */
    public void setExclusionSet( Set exclusionSet )
    {
      this.exclusionSet = exclusionSet;
    }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
@ -0,0 +1,265 @@
 package org.apache.lucene.analysis.de;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 * A stemmer for German words. The algorithm is based on the report
 * "A Fast and Simple Stemming Algorithm for German Words" by Jörg
 * Caumanns (joerg.caumanns@isst.fhg.de).
 *
 * @author    Gerhard Schwarz
 * @version   $Id$
 */
 public class GermanStemmer
 {
    /**
     * Buffer for the terms while stemming them.
     */
    private StringBuffer sb = new StringBuffer();
    /**
     * Amount of characters that are removed with <tt>substitute()</tt> while stemming.
     */
    private int substCount = 0;
    /**
     * Stemms the given term to an unique <tt>discriminator</tt>.
     *
     * @param term  The term that should be stemmed.
     * @return      Discriminator for <tt>term</tt>
     */
    protected String stem( String term )
    {
      // Use lowercase for medium stemming.
      term = term.toLowerCase();
      if ( !isStemmable( term ) )
        return term;
      // Reset the StringBuffer.
      sb.delete( 0, sb.length() );
      sb.insert( 0, term );
      // Stemming starts here...
      substitute( sb );
      strip( sb );
      optimize( sb );
      resubstitute( sb );
      removeParticleDenotion( sb );
      return sb.toString();
    }
    /**
     * Checks if a term could be stemmed.
     *
     * @return  true if, and only if, the given term consists in letters.
     */
    private boolean isStemmable( String term )
    {
      for ( int c = 0; c < term.length(); c++ ) {
        if ( !Character.isLetter( term.charAt( c ) ) )
          return false;
      }
      return true;
    }
    /**
     * suffix stripping (stemming) on the current term. The stripping is reduced
     * to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
     * from which all regular suffixes are build of. The simplification causes
     * some overstemming, and way more irregular stems, but still provides unique.
     * discriminators in the most of those cases.
     * The algorithm is context free, except of the length restrictions.
     */
    private void strip( StringBuffer buffer )
    {
      boolean doMore = true;
      while ( doMore && buffer.length() > 3 ) {
        if ( ( buffer.length() + substCount > 5 ) &&
          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "nd" ) )
        {
          buffer.delete( buffer.length() - 2, buffer.length() );
        }
        else if ( ( buffer.length() + substCount > 4 ) &&
          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "em" ) ) {
            buffer.delete( buffer.length() - 2, buffer.length() );
        }
        else if ( ( buffer.length() + substCount > 4 ) &&
          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "er" ) ) {
            buffer.delete( buffer.length() - 2, buffer.length() );
        }
        else if ( buffer.charAt( buffer.length() - 1 ) == 'e' ) {
          buffer.deleteCharAt( buffer.length() - 1 );
        }
        else if ( buffer.charAt( buffer.length() - 1 ) == 's' ) {
          buffer.deleteCharAt( buffer.length() - 1 );
        }
        else if ( buffer.charAt( buffer.length() - 1 ) == 'n' ) {
          buffer.deleteCharAt( buffer.length() - 1 );
        }
        // "t" occurs only as suffix of verbs.
        else if ( buffer.charAt( buffer.length() - 1 ) == 't' ) {
          buffer.deleteCharAt( buffer.length() - 1 );
        }
        else {
          doMore = false;
        }
      }
    }
    /**
     * Does some optimizations on the term. This optimisations are
     * contextual.
     */
    private void optimize( StringBuffer buffer )
    {
      // Additional step for female plurals of professions and inhabitants.
      if ( buffer.length() > 5 && buffer.substring( buffer.length() - 5, buffer.length() ).equals( "erin*" ) ) {
        buffer.deleteCharAt( buffer.length() -1 );
        strip( buffer );
      }
      // Additional step for irregular plural nouns like "Matrizen -> Matrix".
      if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
        buffer.setCharAt( buffer.length() - 1, 'x' );
      }
    }
    /**
     * Removes a particle denotion ("ge") from a term.
     */
    private void removeParticleDenotion( StringBuffer buffer )
    {
      if ( buffer.length() > 4 ) {
        for ( int c = 0; c < buffer.length() - 3; c++ ) {
          if ( buffer.substring( c, c + 4 ).equals( "gege" ) ) {
            buffer.delete( c, c + 2 );
            return;
          }
        }
      }
    }
    /**
     * Do some substitutions for the term to reduce overstemming:
     *
     * - Substitute Umlauts with their corresponding vowel: äöü -> aou,
     *   "ß" is substituted by "ss"
     * - Substitute a second char of a pair of equal characters with
     *   an asterisk: ?? -> ?*
     * - Substitute some common character combinations with a token:
     *   sch/ch/ei/ie/ig/st -> $/§/%/&/#/!
     */
    private void substitute( StringBuffer buffer )
    {
      substCount = 0;
      for ( int c = 0; c < buffer.length(); c++ ) {
        // Replace the second char of a pair of the equal characters with an asterisk
        if ( c > 0 && buffer.charAt( c ) == buffer.charAt ( c - 1 )  ) {
          buffer.setCharAt( c, '*' );
        }
        // Substitute Umlauts.
        else if ( buffer.charAt( c ) == 'ä' ) {
          buffer.setCharAt( c, 'a' );
        }
        else if ( buffer.charAt( c ) == 'ö' ) {
          buffer.setCharAt( c, 'o' );
        }
        else if ( buffer.charAt( c ) == 'ü' ) {
          buffer.setCharAt( c, 'u' );
        }
        // Fix bug so that 'ß' at the end of a word is replaced.
        else if ( buffer.charAt( c ) == 'ß' ) {
            buffer.setCharAt( c, 's' );
            buffer.insert( c + 1, 's' );
            substCount++;
        }
        // Take care that at least one character is left left side from the current one
        if ( c < buffer.length() - 1 ) {
          // Masking several common character combinations with an token
          if ( ( c < buffer.length() - 2 ) && buffer.charAt( c ) == 's' &&
            buffer.charAt( c + 1 ) == 'c' && buffer.charAt( c + 2 ) == 'h' )
          {
            buffer.setCharAt( c, '$' );
            buffer.delete( c + 1, c + 3 );
            substCount =+ 2;
          }
          else if ( buffer.charAt( c ) == 'c' && buffer.charAt( c + 1 ) == 'h' ) {
            buffer.setCharAt( c, '§' );
            buffer.deleteCharAt( c + 1 );
            substCount++;
          }
          else if ( buffer.charAt( c ) == 'e' && buffer.charAt( c + 1 ) == 'i' ) {
            buffer.setCharAt( c, '%' );
            buffer.deleteCharAt( c + 1 );
            substCount++;
          }
          else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'e' ) {
            buffer.setCharAt( c, '&' );
            buffer.deleteCharAt( c + 1 );
            substCount++;
          }
          else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'g' ) {
            buffer.setCharAt( c, '#' );
            buffer.deleteCharAt( c + 1 );
            substCount++;
          }
          else if ( buffer.charAt( c ) == 's' && buffer.charAt( c + 1 ) == 't' ) {
            buffer.setCharAt( c, '!' );
            buffer.deleteCharAt( c + 1 );
            substCount++;
          }
        }
      }
    }
    /**
     * Undoes the changes made by substitute(). That are character pairs and
     * character combinations. Umlauts will remain as their corresponding vowel,
     * as "ß" remains as "ss".
     */
    private void resubstitute( StringBuffer buffer )
    {
      for ( int c = 0; c < buffer.length(); c++ ) {
        if ( buffer.charAt( c ) == '*' ) {
          char x = buffer.charAt( c - 1 );
          buffer.setCharAt( c, x );
        }
        else if ( buffer.charAt( c ) == '$' ) {
          buffer.setCharAt( c, 's' );
          buffer.insert( c + 1, new char[]{'c', 'h'}, 0, 2 );
        }
        else if ( buffer.charAt( c ) == '§' ) {
          buffer.setCharAt( c, 'c' );
          buffer.insert( c + 1, 'h' );
        }
        else if ( buffer.charAt( c ) == '%' ) {
          buffer.setCharAt( c, 'e' );
          buffer.insert( c + 1, 'i' );
        }
        else if ( buffer.charAt( c ) == '&' ) {
          buffer.setCharAt( c, 'i' );
          buffer.insert( c + 1, 'e' );
        }
        else if ( buffer.charAt( c ) == '#' ) {
          buffer.setCharAt( c, 'i' );
          buffer.insert( c + 1, 'g' );
        }
        else if ( buffer.charAt( c ) == '!' ) {
          buffer.setCharAt( c, 's' );
          buffer.insert( c + 1, 't' );
        }
      }
    }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/WordlistLoader.java
@ -0,0 +1,111 @@
 package org.apache.lucene.analysis.de;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.LineNumberReader;
 import java.util.HashSet;
 import java.util.Hashtable;
 import java.util.Iterator;
 /**
 * Loader for text files that represent a list of stopwords.
 *
 * @author Gerhard Schwarz
 * @version $Id$
 *
 * @todo this is not specific to German, it should be moved up
 */
 public class WordlistLoader {
  /**
   * Loads a text file and adds every line as an entry to a HashSet (omitting
   * leading and trailing whitespace). Every line of the file should contain only 
   * one word. The words need to be in lowercase if you make use of an
   * Analyzer which uses LowerCaseFilter (like GermanAnalyzer).
   * 
   * @param wordfile File containing the wordlist
   * @return A HashSet with the file's words
   */
  public static HashSet getWordSet(File wordfile) throws IOException {
    HashSet result = new HashSet();
    FileReader freader = null;
    LineNumberReader lnr = null;
    try {
      freader = new FileReader(wordfile);
      lnr = new LineNumberReader(freader);
      String word = null;
      while ((word = lnr.readLine()) != null) {
        result.add(word.trim());
      }
    }
    finally {
      if (lnr != null)
        lnr.close();
      if (freader != null)
        freader.close();
    }
    return result;
  }
  /**
   * @param path      Path to the wordlist
   * @param wordfile  Name of the wordlist
   * 
   * @deprecated Use {@link #getWordSet(File)} getWordSet(File)} instead
   */
  public static Hashtable getWordtable(String path, String wordfile) throws IOException {
    return getWordtable(new File(path, wordfile));
  }
  /**
   * @param wordfile  Complete path to the wordlist
   * 
   * @deprecated Use {@link #getWordSet(File)} getWordSet(File)} instead
   */
  public static Hashtable getWordtable(String wordfile) throws IOException {
    return getWordtable(new File(wordfile));
  }
  /**
   * @param wordfile  File object that points to the wordlist
   *
   * @deprecated Use {@link #getWordSet(File)} getWordSet(File)} instead
   */
  public static Hashtable getWordtable(File wordfile) throws IOException {
    HashSet wordSet = (HashSet)getWordSet(wordfile);
    Hashtable result = makeWordTable(wordSet);
    return result;
  }
  /**
   * Builds a wordlist table, using words as both keys and values
   * for backward compatibility.
   *
   * @param wordSet   stopword set
   */
  private static Hashtable makeWordTable(HashSet wordSet) {
    Hashtable table = new Hashtable();
    for (Iterator iter = wordSet.iterator(); iter.hasNext();) {
      String word = (String)iter.next();
      table.put(word, word);
    }
    return table;
  }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/package.html
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/de/package.html
@ -0,0 +1,5 @@
 <html>
 <body>
 Support for indexing and searching of German text.
 </body>
 </html>
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@ -0,0 +1,259 @@
 package org.apache.lucene.analysis.ru;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import java.io.Reader;
 import java.util.Hashtable;
 import java.util.Set;
 import java.util.HashSet;
 /**
 * Analyzer for Russian language. Supports an external list of stopwords (words that
 * will not be indexed at all).
 * A default set of stopwords is used unless an alternative list is specified.
 *
 * @author  Boris Okner, b.okner@rogers.com
 * @version $Id$
 */
 public final class RussianAnalyzer extends Analyzer
 {
    // letters (currently unused letters are commented out)
    private final static char A = 0;
    private final static char B = 1;
    private final static char V = 2;
    private final static char G = 3;
    private final static char D = 4;
    private final static char E = 5;
    private final static char ZH = 6;
    private final static char Z = 7;
    private final static char I = 8;
    private final static char I_ = 9;
    private final static char K = 10;
    private final static char L = 11;
    private final static char M = 12;
    private final static char N = 13;
    private final static char O = 14;
    private final static char P = 15;
    private final static char R = 16;
    private final static char S = 17;
    private final static char T = 18;
    private final static char U = 19;
    //private final static char F = 20;
    private final static char X = 21;
    //private final static char TS = 22;
    private final static char CH = 23;
    private final static char SH = 24;
    private final static char SHCH = 25;
    //private final static char HARD = 26;
    private final static char Y = 27;
    private final static char SOFT = 28;
    private final static char AE = 29;
    private final static char IU = 30;
    private final static char IA = 31;
    /**
     * List of typical Russian stopwords.
     */
    private static char[][] RUSSIAN_STOP_WORDS = {
        {A},
        {B, E, Z},
        {B, O, L, E, E},
        {B, Y},
        {B, Y, L},
        {B, Y, L, A},
        {B, Y, L, I},
        {B, Y, L, O},
        {B, Y, T, SOFT},
        {V},
        {V, A, M},
        {V, A, S},
        {V, E, S, SOFT},
        {V, O},
        {V, O, T},
        {V, S, E},
        {V, S, E, G, O},
        {V, S, E, X},
        {V, Y},
        {G, D, E},
        {D, A},
        {D, A, ZH, E},
        {D, L, IA},
        {D, O},
        {E, G, O},
        {E, E},
        {E, I_,},
        {E, IU},
        {E, S, L, I},
        {E, S, T, SOFT},
        {E, SHCH, E},
        {ZH, E},
        {Z, A},
        {Z, D, E, S, SOFT},
        {I},
        {I, Z},
        {I, L, I},
        {I, M},
        {I, X},
        {K},
        {K, A, K},
        {K, O},
        {K, O, G, D, A},
        {K, T, O},
        {L, I},
        {L, I, B, O},
        {M, N, E},
        {M, O, ZH, E, T},
        {M, Y},
        {N, A},
        {N, A, D, O},
        {N, A, SH},
        {N, E},
        {N, E, G, O},
        {N, E, E},
        {N, E, T},
        {N, I},
        {N, I, X},
        {N, O},
        {N, U},
        {O},
        {O, B},
        {O, D, N, A, K, O},
        {O, N},
        {O, N, A},
        {O, N, I},
        {O, N, O},
        {O, T},
        {O, CH, E, N, SOFT},
        {P, O},
        {P, O, D},
        {P, R, I},
        {S},
        {S, O},
        {T, A, K},
        {T, A, K, ZH, E},
        {T, A, K, O, I_},
        {T, A, M},
        {T, E},
        {T, E, M},
        {T, O},
        {T, O, G, O},
        {T, O, ZH, E},
        {T, O, I_},
        {T, O, L, SOFT, K, O},
        {T, O, M},
        {T, Y},
        {U},
        {U, ZH, E},
        {X, O, T, IA},
        {CH, E, G, O},
        {CH, E, I_},
        {CH, E, M},
        {CH, T, O},
        {CH, T, O, B, Y},
        {CH, SOFT, E},
        {CH, SOFT, IA},
        {AE, T, A},
        {AE, T, I},
        {AE, T, O},
        {IA}
    };
    /**
     * Contains the stopwords used with the StopFilter.
     */
    private Set stopSet = new HashSet();
    /**
     * Charset for Russian letters.
     * Represents encoding for 32 lowercase Russian letters.
     * Predefined charsets can be taken from RussianCharSets class
     */
    private char[] charset;
    public RussianAnalyzer() {
        charset = RussianCharsets.UnicodeRussian;
        stopSet = StopFilter.makeStopSet(
                    makeStopWords(RussianCharsets.UnicodeRussian));
    }
    /**
     * Builds an analyzer.
     */
    public RussianAnalyzer(char[] charset)
    {
        this.charset = charset;
        stopSet = StopFilter.makeStopSet(makeStopWords(charset));
    }
    /**
     * Builds an analyzer with the given stop words.
     */
    public RussianAnalyzer(char[] charset, String[] stopwords)
    {
        this.charset = charset;
        stopSet = StopFilter.makeStopSet(stopwords);
    }
    // Takes russian stop words and translates them to a String array, using
    // the given charset
    private static String[] makeStopWords(char[] charset)
    {
        String[] res = new String[RUSSIAN_STOP_WORDS.length];
        for (int i = 0; i < res.length; i++)
        {
            char[] theStopWord = RUSSIAN_STOP_WORDS[i];
            // translate the word, using the charset
            StringBuffer theWord = new StringBuffer();
            for (int j = 0; j < theStopWord.length; j++)
            {
                theWord.append(charset[theStopWord[j]]);
            }
            res[i] = theWord.toString();
        }
        return res;
    }
    /**
     * Builds an analyzer with the given stop words.
     * @todo create a Set version of this ctor
     */
    public RussianAnalyzer(char[] charset, Hashtable stopwords)
    {
        this.charset = charset;
        stopSet = new HashSet(stopwords.keySet());
    }
    /**
     * Creates a TokenStream which tokenizes all the text in the provided Reader.
     *
     * @return  A TokenStream build from a RussianLetterTokenizer filtered with
     *                  RussianLowerCaseFilter, StopFilter, and RussianStemFilter
     */
    public TokenStream tokenStream(String fieldName, Reader reader)
    {
        TokenStream result = new RussianLetterTokenizer(reader, charset);
        result = new RussianLowerCaseFilter(result, charset);
        result = new StopFilter(result, stopSet);
        result = new RussianStemFilter(result, charset);
        return result;
    }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianCharsets.java
@ -0,0 +1,279 @@
 package org.apache.lucene.analysis.ru;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 * RussianCharsets class contains encodings schemes (charsets) and toLowerCase() method implementation
 * for russian characters in Unicode, KOI8 and CP1252.
 * Each encoding scheme contains lowercase (positions 0-31) and uppercase (position 32-63) characters.
 * One should be able to add other encoding schemes (like ISO-8859-5 or customized) by adding a new charset
 * and adding logic to toLowerCase() method for that charset.
 *
 * @author  Boris Okner, b.okner@rogers.com
 * @version $Id$
 */
 public class RussianCharsets
 {
    // Unicode Russian charset (lowercase letters only)
    public static char[] UnicodeRussian = {
        '\u0430',
        '\u0431',
        '\u0432',
        '\u0433',
        '\u0434',
        '\u0435',
        '\u0436',
        '\u0437',
        '\u0438',
        '\u0439',
        '\u043A',
        '\u043B',
        '\u043C',
        '\u043D',
        '\u043E',
        '\u043F',
        '\u0440',
        '\u0441',
        '\u0442',
        '\u0443',
        '\u0444',
        '\u0445',
        '\u0446',
        '\u0447',
        '\u0448',
        '\u0449',
        '\u044A',
        '\u044B',
        '\u044C',
        '\u044D',
        '\u044E',
        '\u044F',
        // upper case
        '\u0410',
        '\u0411',
        '\u0412',
        '\u0413',
        '\u0414',
        '\u0415',
        '\u0416',
        '\u0417',
        '\u0418',
        '\u0419',
        '\u041A',
        '\u041B',
        '\u041C',
        '\u041D',
        '\u041E',
        '\u041F',
        '\u0420',
        '\u0421',
        '\u0422',
        '\u0423',
        '\u0424',
        '\u0425',
        '\u0426',
        '\u0427',
        '\u0428',
        '\u0429',
        '\u042A',
        '\u042B',
        '\u042C',
        '\u042D',
        '\u042E',
        '\u042F'
    };
    // KOI8 charset
    public static char[] KOI8 = {
        0xc1,
        0xc2,
        0xd7,
        0xc7,
        0xc4,
        0xc5,
        0xd6,
        0xda,
        0xc9,
        0xca,
        0xcb,
        0xcc,
        0xcd,
        0xce,
        0xcf,
        0xd0,
        0xd2,
        0xd3,
        0xd4,
        0xd5,
        0xc6,
        0xc8,
        0xc3,
        0xde,
        0xdb,
        0xdd,
        0xdf,
        0xd9,
        0xd8,
        0xdc,
        0xc0,
        0xd1,
        // upper case
        0xe1,
        0xe2,
        0xf7,
        0xe7,
        0xe4,
        0xe5,
        0xf6,
        0xfa,
        0xe9,
        0xea,
        0xeb,
        0xec,
        0xed,
        0xee,
        0xef,
        0xf0,
        0xf2,
        0xf3,
        0xf4,
        0xf5,
        0xe6,
        0xe8,
        0xe3,
        0xfe,
        0xfb,
        0xfd,
        0xff,
        0xf9,
        0xf8,
        0xfc,
        0xe0,
        0xf1
    };
    // CP1251 eharset
    public static char[] CP1251 = {
        0xE0,
        0xE1,
        0xE2,
        0xE3,
        0xE4,
        0xE5,
        0xE6,
        0xE7,
        0xE8,
        0xE9,
        0xEA,
        0xEB,
        0xEC,
        0xED,
        0xEE,
        0xEF,
        0xF0,
        0xF1,
        0xF2,
        0xF3,
        0xF4,
        0xF5,
        0xF6,
        0xF7,
        0xF8,
        0xF9,
        0xFA,
        0xFB,
        0xFC,
        0xFD,
        0xFE,
        0xFF,
        // upper case
        0xC0,
        0xC1,
        0xC2,
        0xC3,
        0xC4,
        0xC5,
        0xC6,
        0xC7,
        0xC8,
        0xC9,
        0xCA,
        0xCB,
        0xCC,
        0xCD,
        0xCE,
        0xCF,
        0xD0,
        0xD1,
        0xD2,
        0xD3,
        0xD4,
        0xD5,
        0xD6,
        0xD7,
        0xD8,
        0xD9,
        0xDA,
        0xDB,
        0xDC,
        0xDD,
        0xDE,
        0xDF
    };
    public static char toLowerCase(char letter, char[] charset)
    {
        if (charset == UnicodeRussian)
        {
            if (letter >= '\u0430' && letter <= '\u044F')
            {
                return letter;
            }
            if (letter >= '\u0410' && letter <= '\u042F')
            {
                return (char) (letter + 32);
            }
        }
        if (charset == KOI8)
        {
            if (letter >= 0xe0 && letter <= 0xff)
            {
                return (char) (letter - 32);
            }
            if (letter >= 0xc0 && letter <= 0xdf)
            {
                return letter;
            }
        }
        if (charset == CP1251)
        {
            if (letter >= 0xC0 && letter <= 0xDF)
            {
                return (char) (letter + 32);
            }
            if (letter >= 0xE0 && letter <= 0xFF)
            {
                return letter;
            }
        }
        return Character.toLowerCase(letter);
    }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
@ -0,0 +1,58 @@
 package org.apache.lucene.analysis.ru;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.Reader;
 import org.apache.lucene.analysis.CharTokenizer;
 /**
 * A RussianLetterTokenizer is a tokenizer that extends LetterTokenizer by additionally looking up letters
 * in a given "russian charset". The problem with LeterTokenizer is that it uses Character.isLetter() method,
 * which doesn't know how to detect letters in encodings like CP1252 and KOI8
 * (well-known problems with 0xD7 and 0xF7 chars)
 *
 * @author  Boris Okner, b.okner@rogers.com
 * @version $Id$
 */
 public class RussianLetterTokenizer extends CharTokenizer
 {
    /** Construct a new LetterTokenizer. */
    private char[] charset;
    public RussianLetterTokenizer(Reader in, char[] charset)
    {
        super(in);
        this.charset = charset;
    }
    /**
     * Collects only characters which satisfy
     * {@link Character#isLetter(char)}.
     */
    protected boolean isTokenChar(char c)
    {
        if (Character.isLetter(c))
            return true;
        for (int i = 0; i < charset.length; i++)
        {
            if (c == charset[i])
                return true;
        }
        return false;
    }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
@ -0,0 +1,60 @@
 package org.apache.lucene.analysis.ru;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 /**
 * Normalizes token text to lower case, analyzing given ("russian") charset.
 *
 * @author  Boris Okner, b.okner@rogers.com
 * @version $Id$
 */
 public final class RussianLowerCaseFilter extends TokenFilter
 {
    char[] charset;
    public RussianLowerCaseFilter(TokenStream in, char[] charset)
    {
        super(in);
        this.charset = charset;
    }
    public final Token next() throws java.io.IOException
    {
        Token t = input.next();
        if (t == null)
            return null;
        String txt = t.termText();
        char[] chArray = txt.toCharArray();
        for (int i = 0; i < chArray.length; i++)
        {
            chArray[i] = RussianCharsets.toLowerCase(chArray[i], charset);
        }
        String newTxt = new String(chArray);
        // create new token
        Token newToken = new Token(newTxt, t.startOffset(), t.endOffset());
        return newToken;
    }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
@ -0,0 +1,77 @@
 package org.apache.lucene.analysis.ru;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import java.io.IOException;
 /**
 * A filter that stems Russian words. The implementation was inspired by GermanStemFilter.
 * The input should be filtered by RussianLowerCaseFilter before passing it to RussianStemFilter ,
 * because RussianStemFilter only works  with lowercase part of any "russian" charset.
 *
 * @author    Boris Okner, b.okner@rogers.com
 * @version   $Id$
 */
 public final class RussianStemFilter extends TokenFilter
 {
    /**
     * The actual token in the input stream.
     */
    private Token token = null;
    private RussianStemmer stemmer = null;
    public RussianStemFilter(TokenStream in, char[] charset)
    {
        super(in);
        stemmer = new RussianStemmer(charset);
    }
    /**
     * @return  Returns the next token in the stream, or null at EOS
     */
    public final Token next() throws IOException
    {
        if ((token = input.next()) == null)
        {
            return null;
        }
        else
        {
            String s = stemmer.stem(token.termText());
            if (!s.equals(token.termText()))
            {
                return new Token(s, token.startOffset(), token.endOffset(),
                    token.type());
            }
            return token;
        }
    }
    /**
     * Set a alternative/custom RussianStemmer for this filter.
     */
    public void setStemmer(RussianStemmer stemmer)
    {
        if (stemmer != null)
        {
            this.stemmer = stemmer;
        }
    }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java
@ -0,0 +1,629 @@
 package org.apache.lucene.analysis.ru;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 /**
 * Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
 *
 * @author  Boris Okner, b.okner@rogers.com
 * @version $Id$
 */
 class RussianStemmer
 {
    private char[] charset;
    // positions of RV, R1 and R2 respectively
    private int RV, R1, R2;
    // letters (currently unused letters are commented out)
    private final static char A = 0;
    //private final static char B = 1;
    private final static char V = 2;
    private final static char G = 3;
    //private final static char D = 4;
    private final static char E = 5;
    //private final static char ZH = 6;
    //private final static char Z = 7;
    private final static char I = 8;
    private final static char I_ = 9;
    //private final static char K = 10;
    private final static char L = 11;
    private final static char M = 12;
    private final static char N = 13;
    private final static char O = 14;
    //private final static char P = 15;
    //private final static char R = 16;
    private final static char S = 17;
    private final static char T = 18;
    private final static char U = 19;
    //private final static char F = 20;
    private final static char X = 21;
    //private final static char TS = 22;
    //private final static char CH = 23;
    private final static char SH = 24;
    private final static char SHCH = 25;
    //private final static char HARD = 26;
    private final static char Y = 27;
    private final static char SOFT = 28;
    private final static char AE = 29;
    private final static char IU = 30;
    private final static char IA = 31;
    // stem definitions
    private static char[] vowels = { A, E, I, O, U, Y, AE, IU, IA };
    private static char[][] perfectiveGerundEndings1 = {
        { V },
        { V, SH, I },
        { V, SH, I, S, SOFT }
    };
    private static char[][] perfectiveGerund1Predessors = {
        { A },
        { IA }
    };
    private static char[][] perfectiveGerundEndings2 = { { I, V }, {
        Y, V }, {
            I, V, SH, I }, {
                Y, V, SH, I }, {
                    I, V, SH, I, S, SOFT }, {
                        Y, V, SH, I, S, SOFT }
    };
    private static char[][] adjectiveEndings = {
        { E, E },
        { I, E },
        { Y, E },
        { O, E },
        { E, I_ },
        { I, I_ },
        { Y, I_ },
        { O, I_ },
        { E, M },
        { I, M },
        { Y, M },
        { O, M },
        { I, X },
        { Y, X },
        { U, IU },
        { IU, IU },
        { A, IA },
        { IA, IA },
        { O, IU },
        { E, IU },
        { I, M, I },
        { Y, M, I },
        { E, G, O },
        { O, G, O },
        { E, M, U },
        {O, M, U }
    };
    private static char[][] participleEndings1 = {
        { SHCH },
        { E, M },
        { N, N },
        { V, SH },
        { IU, SHCH }
    };
    private static char[][] participleEndings2 = {
        { I, V, SH },
        { Y, V, SH },
        { U, IU, SHCH }
    };
    private static char[][] participle1Predessors = {
        { A },
        { IA }
    };
    private static char[][] reflexiveEndings = {
        { S, IA },
        { S, SOFT }
    };
    private static char[][] verbEndings1 = {
        { I_ },
        { L },
        { N },
        { L, O },
        { N, O },
        { E, T },
        { IU, T },
        { L, A },
        { N, A },
        { L, I },
        { E, M },
        { N, Y },
        { E, T, E },
        { I_, T, E },
        { T, SOFT },
        { E, SH, SOFT },
        { N, N, O }
    };
    private static char[][] verbEndings2 = {
        { IU },
        { U, IU },
        { E, N },
        { E, I_ },
        { IA, T },
        { U, I_ },
        { I, L },
        { Y, L },
        { I, M },
        { Y, M },
        { I, T },
        { Y, T },
        { I, L, A },
        { Y, L, A },
        { E, N, A },
        { I, T, E },
        { I, L, I },
        { Y, L, I },
        { I, L, O },
        { Y, L, O },
        { E, N, O },
        { U, E, T },
        { U, IU, T },
        { E, N, Y },
        { I, T, SOFT },
        { Y, T, SOFT },
        { I, SH, SOFT },
        { E, I_, T, E },
        { U, I_, T, E }
    };
    private static char[][] verb1Predessors = {
        { A },
        { IA }
    };
    private static char[][] nounEndings = {
        { A },
        { U },
        { I_ },
        { O },
        { U },
        { E },
        { Y },
        { I },
        { SOFT },
        { IA },
        { E, V },
        { O, V },
        { I, E },
        { SOFT, E },
        { IA, X },
        { I, IU },
        { E, I },
        { I, I },
        { E, I_ },
        { O, I_ },
        { E, M },
        { A, M },
        { O, M },
        { A, X },
        { SOFT, IU },
        { I, IA },
        { SOFT, IA },
        { I, I_ },
        { IA, M },
        { IA, M, I },
        { A, M, I },
        { I, E, I_ },
        { I, IA, M },
        { I, E, M },
        { I, IA, X },
        { I, IA, M, I }
    };
    private static char[][] superlativeEndings = {
        { E, I_, SH },
        { E, I_, SH, E }
    };
    private static char[][] derivationalEndings = {
        { O, S, T },
        { O, S, T, SOFT }
    };
    /**
     * RussianStemmer constructor comment.
     */
    public RussianStemmer()
    {
        super();
    }
    /**
     * RussianStemmer constructor comment.
     */
    public RussianStemmer(char[] charset)
    {
        super();
        this.charset = charset;
    }
    /**
     * Adjectival ending is an adjective ending,
     * optionally preceded by participle ending.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean adjectival(StringBuffer stemmingZone)
    {
        // look for adjective ending in a stemming zone
        if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
            return false;
        // if adjective ending was found, try for participle ending.
        // variable r is unused, we are just interested in the side effect of
        // findAndRemoveEnding():
        boolean r =
            findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
            ||
            findAndRemoveEnding(stemmingZone, participleEndings2);
        return true;
    }
    /**
     * Derivational endings
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean derivational(StringBuffer stemmingZone)
    {
        int endingLength = findEnding(stemmingZone, derivationalEndings);
        if (endingLength == 0)
             // no derivational ending found
            return false;
        else
        {
            // Ensure that the ending locates in R2
            if (R2 - RV <= stemmingZone.length() - endingLength)
            {
                stemmingZone.setLength(stemmingZone.length() - endingLength);
                return true;
            }
            else
            {
                return false;
            }
        }
    }
    /**
     * Finds ending among given ending class and returns the length of ending found(0, if not found).
     * Creation date: (17/03/2002 8:18:34 PM)
     */
    private int findEnding(StringBuffer stemmingZone, int startIndex, char[][] theEndingClass)
    {
        boolean match = false;
        for (int i = theEndingClass.length - 1; i >= 0; i--)
        {
            char[] theEnding = theEndingClass[i];
            // check if the ending is bigger than stemming zone
            if (startIndex < theEnding.length - 1)
            {
                match = false;
                continue;
            }
            match = true;
            int stemmingIndex = startIndex;
            for (int j = theEnding.length - 1; j >= 0; j--)
            {
                if (stemmingZone.charAt(stemmingIndex--) != charset[theEnding[j]])
                {
                    match = false;
                    break;
                }
            }
            // check if ending was found
            if (match)
            {
                return theEndingClass[i].length; // cut ending
            }
        }
        return 0;
    }
    private int findEnding(StringBuffer stemmingZone, char[][] theEndingClass)
    {
        return findEnding(stemmingZone, stemmingZone.length() - 1, theEndingClass);
    }
    /**
     * Finds the ending among the given class of endings and removes it from stemming zone.
     * Creation date: (17/03/2002 8:18:34 PM)
     */
    private boolean findAndRemoveEnding(StringBuffer stemmingZone, char[][] theEndingClass)
    {
        int endingLength = findEnding(stemmingZone, theEndingClass);
        if (endingLength == 0)
            // not found
            return false;
        else {
            stemmingZone.setLength(stemmingZone.length() - endingLength);
            // cut the ending found
            return true;
        }
    }
    /**
     * Finds the ending among the given class of endings, then checks if this ending was
     * preceded by any of given predessors, and if so, removes it from stemming zone.
     * Creation date: (17/03/2002 8:18:34 PM)
     */
    private boolean findAndRemoveEnding(StringBuffer stemmingZone,
        char[][] theEndingClass, char[][] thePredessors)
    {
        int endingLength = findEnding(stemmingZone, theEndingClass);
        if (endingLength == 0)
            // not found
            return false;
        else
        {
            int predessorLength =
                findEnding(stemmingZone,
                    stemmingZone.length() - endingLength - 1,
                    thePredessors);
            if (predessorLength == 0)
                return false;
            else {
                stemmingZone.setLength(stemmingZone.length() - endingLength);
                // cut the ending found
                return true;
            }
        }
    }
    /**
     * Marks positions of RV, R1 and R2 in a given word.
     * Creation date: (16/03/2002 3:40:11 PM)
     */
    private void markPositions(String word)
    {
        RV = 0;
        R1 = 0;
        R2 = 0;
        int i = 0;
        // find RV
        while (word.length() > i && !isVowel(word.charAt(i)))
        {
            i++;
        }
        if (word.length() - 1 < ++i)
            return; // RV zone is empty
        RV = i;
        // find R1
        while (word.length() > i && isVowel(word.charAt(i)))
        {
            i++;
        }
        if (word.length() - 1 < ++i)
            return; // R1 zone is empty
        R1 = i;
        // find R2
        while (word.length() > i && !isVowel(word.charAt(i)))
        {
            i++;
        }
        if (word.length() - 1 < ++i)
            return; // R2 zone is empty
        while (word.length() > i && isVowel(word.charAt(i)))
        {
            i++;
        }
        if (word.length() - 1 < ++i)
            return; // R2 zone is empty
        R2 = i;
    }
    /**
     * Checks if character is a vowel..
     * Creation date: (16/03/2002 10:47:03 PM)
     * @return boolean
     * @param letter char
     */
    private boolean isVowel(char letter)
    {
        for (int i = 0; i < vowels.length; i++)
        {
            if (letter == charset[vowels[i]])
                return true;
        }
        return false;
    }
    /**
     * Noun endings.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean noun(StringBuffer stemmingZone)
    {
        return findAndRemoveEnding(stemmingZone, nounEndings);
    }
    /**
     * Perfective gerund endings.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean perfectiveGerund(StringBuffer stemmingZone)
    {
        return findAndRemoveEnding(
            stemmingZone,
            perfectiveGerundEndings1,
            perfectiveGerund1Predessors)
            || findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
    }
    /**
     * Reflexive endings.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean reflexive(StringBuffer stemmingZone)
    {
        return findAndRemoveEnding(stemmingZone, reflexiveEndings);
    }
    /**
     * Insert the method's description here.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean removeI(StringBuffer stemmingZone)
    {
        if (stemmingZone.length() > 0
            && stemmingZone.charAt(stemmingZone.length() - 1) == charset[I])
        {
            stemmingZone.setLength(stemmingZone.length() - 1);
            return true;
        }
        else
        {
            return false;
        }
    }
    /**
     * Insert the method's description here.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean removeSoft(StringBuffer stemmingZone)
    {
        if (stemmingZone.length() > 0
            && stemmingZone.charAt(stemmingZone.length() - 1) == charset[SOFT])
        {
            stemmingZone.setLength(stemmingZone.length() - 1);
            return true;
        }
        else
        {
            return false;
        }
    }
    /**
     * Insert the method's description here.
     * Creation date: (16/03/2002 10:58:42 PM)
     * @param newCharset char[]
     */
    public void setCharset(char[] newCharset)
    {
        charset = newCharset;
    }
    /**
     * Finds the stem for given Russian word.
     * Creation date: (16/03/2002 3:36:48 PM)
     * @return java.lang.String
     * @param input java.lang.String
     */
    public String stem(String input)
    {
        markPositions(input);
        if (RV == 0)
            return input; //RV wasn't detected, nothing to stem
        StringBuffer stemmingZone = new StringBuffer(input.substring(RV));
        // stemming goes on in RV
        // Step 1
        if (!perfectiveGerund(stemmingZone))
        {
            reflexive(stemmingZone);
            // variable r is unused, we are just interested in the flow that gets
            // created by logical expression: apply adjectival(); if that fails,
            // apply verb() etc
            boolean r =
                adjectival(stemmingZone)
                || verb(stemmingZone)
                || noun(stemmingZone);
        }
        // Step 2
        removeI(stemmingZone);
        // Step 3
        derivational(stemmingZone);
        // Step 4
        superlative(stemmingZone);
        undoubleN(stemmingZone);
        removeSoft(stemmingZone);
        // return result
        return input.substring(0, RV) + stemmingZone.toString();
    }
    /**
     * Superlative endings.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean superlative(StringBuffer stemmingZone)
    {
        return findAndRemoveEnding(stemmingZone, superlativeEndings);
    }
    /**
     * Undoubles N.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean undoubleN(StringBuffer stemmingZone)
    {
        char[][] doubleN = {
            { N, N }
        };
        if (findEnding(stemmingZone, doubleN) != 0)
        {
            stemmingZone.setLength(stemmingZone.length() - 1);
            return true;
        }
        else
        {
            return false;
        }
    }
    /**
     * Verb endings.
     * Creation date: (17/03/2002 12:14:58 AM)
     * @param stemmingZone java.lang.StringBuffer
     */
    private boolean verb(StringBuffer stemmingZone)
    {
        return findAndRemoveEnding(
            stemmingZone,
            verbEndings1,
            verb1Predessors)
            || findAndRemoveEnding(stemmingZone, verbEndings2);
    }
    /**
     * Static method for stemming with different charsets
     */
    public static String stem(String theWord, char[] charset)
    {
        RussianStemmer stemmer = new RussianStemmer();
        stemmer.setCharset(charset);
        return stemmer.stem(theWord);
    }
 }
--- a/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/package.html
+++ b/sandbox/contributions/analyzers/src/java/org/apache/lucene/analysis/ru/package.html
@ -0,0 +1,5 @@
 <html>
 <body>
 Support for indexing and searching Russian text.
 </body>
 </html>
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
@ -0,0 +1,78 @@
 package org.apache.lucene.analysis.de;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.StringReader;
 import junit.framework.TestCase;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 /**
 * Test the German stemmer. The stemming algorithm is known to work less 
 * than perfect, as it doesn't use any word lists with exceptions. We 
 * also check some of the cases where the algorithm is wrong.
 * 
 * @author Daniel Naber
 */
 public class TestGermanStemFilter extends TestCase {
  public void testStemming() {
    try {
      // read test cases from external file:
      File dataDir = new File(System.getProperty("dataDir", "./bin"));
      File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt");
      FileInputStream fis = new FileInputStream(testFile);
      InputStreamReader isr = new InputStreamReader(fis, "iso-8859-1");
      BufferedReader breader = new BufferedReader(isr);
      while(true) {
        String line = breader.readLine();
        if (line == null)
          break;
        line = line.trim();
        if (line.startsWith("#") || line.equals(""))
          continue;    // ignore comments and empty lines
        String[] parts = line.split(";");
        //System.out.println(parts[0] + " -- " + parts[1]);
        check(parts[0], parts[1]);
      }
      breader.close();
      isr.close();
      fis.close();
    } catch (IOException e) {
       e.printStackTrace();
       fail();
    }
  }
  private void check(final String input, final String expected) throws IOException {
    StandardTokenizer tokenStream = new StandardTokenizer(new StringReader(input));
    GermanStemFilter filter = new GermanStemFilter(tokenStream);
    Token t = filter.next();
    if (t == null)
      fail();
    assertEquals(expected, t.termText());
    filter.close();
  }
 }
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/de/data.txt
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/de/data.txt
@ -0,0 +1,48 @@
 # German special characters are replaced:
 häufig;haufig
 # here the stemmer works okay, it maps related words to the same stem:
 abschließen;abschliess
 abschließender;abschliess
 abschließendes;abschliess
 abschließenden;abschliess
 Tisch;tisch
 Tische;tisch
 Tischen;tisch
 Haus;hau
 Hauses;hau
 Häuser;hau
 Häusern;hau
 # here's a case where overstemming occurs, i.e. a word is 
 # mapped to the same stem as unrelated words:
 hauen;hau
 # here's a case where understemming occurs, i.e. two related words
 # are not mapped to the same stem. This is the case with basically
 # all irregular forms:
 Drama;drama
 Dramen;dram
 # replace "ß" with 'ss':
 Ausmaß;ausmass
 # fake words to test if suffixes are cut off:
 xxxxxe;xxxxx
 xxxxxs;xxxxx
 xxxxxn;xxxxx
 xxxxxt;xxxxx
 xxxxxem;xxxxx
 xxxxxer;xxxxx
 xxxxxnd;xxxxx
 # the suffixes are also removed when combined:
 xxxxxetende;xxxxx
 # words that are shorter than four charcters are not changed:
 xxe;xxe
 # -em and -er are not removed from words shorter than five characters:
 xxem;xxem
 xxer;xxer
 # -nd is not removed from words shorter than six characters:
 xxxnd;xxxnd
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
@ -0,0 +1,170 @@
 package org.apache.lucene.analysis.ru;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import junit.framework.TestCase;
 import java.io.*;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 /**
 * Test case for RussianAnalyzer.
 *
 * @author    Boris Okner
 * @version   $Id$
 */
 public class TestRussianAnalyzer extends TestCase
 {
    private InputStreamReader inWords;
    private InputStreamReader sampleUnicode;
    private Reader inWordsKOI8;
    private Reader sampleKOI8;
    private Reader inWords1251;
    private Reader sample1251;
    private File dataDir;
    protected void setUp() throws Exception
    {
      dataDir = new File(System.getProperty("dataDir", "./bin"));
    }
    public void testUnicode() throws IOException
    {
        RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.UnicodeRussian);
        inWords =
            new InputStreamReader(
                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUnicode.txt")),
                "Unicode");
        sampleUnicode =
            new InputStreamReader(
                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resUnicode.htm")),
                "Unicode");
        TokenStream in = ra.tokenStream("all", inWords);
        RussianLetterTokenizer sample =
            new RussianLetterTokenizer(
                sampleUnicode,
                RussianCharsets.UnicodeRussian);
        for (;;)
        {
            Token token = in.next();
            if (token == null)
            {
                break;
            }
            Token sampleToken = sample.next();
            assertEquals(
                "Unicode",
                token.termText(),
                sampleToken == null
                ? null
                : sampleToken.termText());
        }
        inWords.close();
        sampleUnicode.close();
    }
    public void testKOI8() throws IOException
    {
        //System.out.println(new java.util.Date());
        RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.KOI8);
        // KOI8
        inWordsKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testKOI8.txt")), "iso-8859-1");
        sampleKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resKOI8.htm")), "iso-8859-1");
        TokenStream in = ra.tokenStream("all", inWordsKOI8);
        RussianLetterTokenizer sample =
            new RussianLetterTokenizer(
                sampleKOI8,
                RussianCharsets.KOI8);
        for (;;)
        {
            Token token = in.next();
            if (token == null)
            {
                break;
            }
            Token sampleToken = sample.next();
            assertEquals(
                "KOI8",
                token.termText(),
                sampleToken == null
                ? null
                : sampleToken.termText());
        }
        inWordsKOI8.close();
        sampleKOI8.close();
    }
    public void test1251() throws IOException
    {
        // 1251
        inWords1251 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/test1251.txt")), "iso-8859-1");
        sample1251 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/res1251.htm")), "iso-8859-1");
        RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.CP1251);
        TokenStream in = ra.tokenStream("", inWords1251);
        RussianLetterTokenizer sample =
            new RussianLetterTokenizer(
                sample1251,
                RussianCharsets.CP1251);
        for (;;)
        {
            Token token = in.next();
            if (token == null)
            {
                break;
            }
            Token sampleToken = sample.next();
            assertEquals(
                "1251",
                token.termText(),
                sampleToken == null
                ? null
                : sampleToken.termText());
        }
        inWords1251.close();
        sample1251.close();
    }
 }
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
@ -0,0 +1,94 @@
 package org.apache.lucene.analysis.ru;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import junit.framework.TestCase;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStreamReader;
 import java.io.FileInputStream;
 import java.util.ArrayList;
 public class TestRussianStem extends TestCase
 {
    private ArrayList words = new ArrayList();
    private ArrayList stems = new ArrayList();
    public TestRussianStem(String name)
    {
        super(name);
    }
    /**
     * @see TestCase#setUp()
     */
    protected void setUp() throws Exception
    {
        super.setUp();
        //System.out.println(new java.util.Date());
        String str;
        File dataDir = new File(System.getProperty("dataDir", "./bin"));
        // open and read words into an array list
        BufferedReader inWords =
            new BufferedReader(
                new InputStreamReader(
                    new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/wordsUnicode.txt")),
                    "Unicode"));
        while ((str = inWords.readLine()) != null)
        {
            words.add(str);
        }
        inWords.close();
        // open and read stems into an array list
        BufferedReader inStems =
            new BufferedReader(
                new InputStreamReader(
                    new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/stemsUnicode.txt")),
                    "Unicode"));
        while ((str = inStems.readLine()) != null)
        {
            stems.add(str);
        }
        inStems.close();
    }
    /**
     * @see TestCase#tearDown()
     */
    protected void tearDown() throws Exception
    {
        super.tearDown();
    }
    public void testStem()
    {
        for (int i = 0; i < words.size(); i++)
        {
            //if ( (i % 100) == 0 ) System.err.println(i);
            String realStem =
                RussianStemmer.stem(
                    (String) words.get(i),
                    RussianCharsets.UnicodeRussian);
            assertEquals("unicode", stems.get(i), realStem);
        }
    }
 }
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/res1251.htm
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/res1251.htm
@ -0,0 +1 @@
 [вмест][сил][электромагнитн][энерг][имел][представлен][скаж][жрец][древн][египт][знан][хран][тайн][узк][круг][посвящен][всяк][времен][виток][прин][соб][нов][технолог][сам][дел][раскрыва][потаен][знан][прежн][век][говор][нов][информац][станов][доступн][широк][круг][пользовател][тех][случа][сознан][обществ][готов][восприня][воспользова]
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/resKOI8.htm
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/resKOI8.htm
@ -0,0 +1 @@
 [淄庞註[由蘛[芴潘砸贤燎紊晕][芪乓荾[赏盘][幸拍釉磷膛蝅[铀林][忠琶][囊抛蝅[徘尚註[谖廖][纫廖][粤饰][遮薦[艘涨][邢幼演盼][子阉][滓磐盼][咨韵薦[幸晌][酉耛[蜗譣[耘任咸锨][恿蚞[呐蘛[伊铀屹琢][邢粤盼][谖廖][幸胖蝅[着薦[窍紫襗[蜗譣[晌葡彝撩][釉廖献][南釉招蝅[凵蚁薦[艘涨][邢特谙琢耘蘛[耘萞[犹辙羃[酉谖廖][下菖釉譣[窍韵譣[紫有疑窝][紫有咸刳献羃
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/resUnicode.htm
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/resUnicode.htm
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/stemsUnicode.txt
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/stemsUnicode.txt
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/test1251.txt
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/test1251.txt
@ -0,0 +1,2 @@
 Вместе с тем о силе электромагнитной энергии имели представление еще, скажем, жрецы Древнего Египта. Но знание это хранилось в тайне, в 
 узком кругу посвященных. Всякий временной виток, принося с собой новые технологии, на самом деле раскрывает потаенное знание прежних веков. Мы уже говорили, что новая информация становится доступной широкому кругу пользователей только в тех случаях, когда сознание общества готово ее воспринять и воспользоваться ею.
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/testKOI8.txt
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/testKOI8.txt
@ -0,0 +1,2 @@
 Вместе с тем о силе электромагнитной энергии имели представление еще, скажем, жрецы Древнего Египта. Но знание это хранилось в тайне, в 
 узком кругу посвященных. Всякий временной виток, принося с собой новые технологии, на самом деле раскрывает потаенное знание прежних веков. Мы уже говорили, что новая информация становится доступной широкому кругу пользователей только в тех случаях, когда сознание общества готово ее воспринять и воспользоваться ею.
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/testUnicode.txt
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/testUnicode.txt
--- a/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/wordsUnicode.txt
+++ b/sandbox/contributions/analyzers/src/test/org/apache/lucene/analysis/ru/wordsUnicode.txt
		`@ -0,0 +1 @@`
							[вмест][сил][электромагнитн][энерг][имел][представлен][скаж][жрец][древн][египт][знан][хран][тайн][узк][круг][посвящен][всяк][времен][виток][прин][соб][нов][технолог][сам][дел][раскрыва][потаен][знан][прежн][век][говор][нов][информац][станов][доступн][широк][круг][пользовател][тех][случа][сознан][обществ][готов][восприня][воспользова]
		`@ -0,0 +1 @@`
							`[淄庞註[由蘛[芴潘砸贤燎紊晕][芪乓荾[赏盘][幸拍釉磷膛蝅[铀林][忠琶][囊抛蝅[徘尚註[谖廖][纫廖][粤饰][遮薦[艘涨][邢幼演盼][子阉][滓磐盼][咨韵薦[幸晌][酉耛[蜗譣[耘任咸锨][恿蚞[呐蘛[伊铀屹琢][邢粤盼][谖廖][幸胖蝅[着薦[窍紫襗[蜗譣[晌葡彝撩][釉廖献][南釉招蝅[凵蚁薦[艘涨][邢特谙琢耘蘛[耘萞[犹辙羃[酉谖廖][下菖釉譣[窍韵譣[紫有疑窝][紫有咸刳献羃`
		`@ -0,0 +1,2 @@`
							`Вместе с тем о силе электромагнитной энергии имели представление еще, скажем, жрецы Древнего Египта. Но знание это хранилось в тайне, в`
							узком кругу посвященных. Всякий временной виток, принося с собой новые технологии, на самом деле раскрывает потаенное знание прежних веков. Мы уже говорили, что новая информация становится доступной широкому кругу пользователей только в тех случаях, когда сознание общества готово ее воспринять и воспользоваться ею.