From 2b9effb894dba18b05972a0bca6e94614d691d63 Mon Sep 17 00:00:00 2001
From: Daniel Naber <dnaber@apache.org>
Date: Fri, 9 Jun 2006 22:15:47 +0000
Subject: [PATCH] deprecate the analysis.nl.WordlistLoader class because it's
 not robust (fails silently) and use analysis.WordlistLoader instead

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@413180 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES.txt                                   |  4 ++
 .../lucene/analysis/nl/DutchAnalyzer.java     | 43 +++++++++++++------
 .../lucene/analysis/nl/WordlistLoader.java    |  8 +++-
 .../lucene/analysis/WordlistLoader.java       | 32 ++++++++++++++
 4 files changed, 73 insertions(+), 14 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 4d32f77d5c2..16775619299 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -14,6 +14,10 @@ API Changes
  1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow
     changing of termText via setTermText().  (Yonik Seeley)
 
+ 2. org.apache.lucene.analysis.nl.WordlistLoader has been deprecated
+    and is supposed to be replaced with the WordlistLoader class in
+    package org.apache.lucene.analysis (Daniel Naber)
+    
 Bug fixes
 
  1. Fixed the web application demo (built with "ant war-demo") which
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index f1194f30f28..a035f1c6974 100644
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -23,6 +23,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
 import java.io.File;
+import java.io.IOException;
 import java.io.Reader;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -68,18 +69,20 @@ public class DutchAnalyzer extends Analyzer {
    */
   private Set excltable = new HashSet();
 
-  private Map _stemdict = new HashMap();
+  private Map stemdict = new HashMap();
 
 
   /**
-   * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}).
+   * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS}) 
+   * and a few default entries for the stem exclusion table.
+   * 
    */
   public DutchAnalyzer() {
     stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
-    _stemdict.put("fiets", "fiets"); //otherwise fiet
-    _stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
-    _stemdict.put("ei", "eier");
-    _stemdict.put("kind", "kinder");
+    stemdict.put("fiets", "fiets"); //otherwise fiet
+    stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
+    stemdict.put("ei", "eier");
+    stemdict.put("kind", "kinder");
   }
 
   /**
@@ -106,7 +109,12 @@ public class DutchAnalyzer extends Analyzer {
    * @param stopwords
    */
   public DutchAnalyzer(File stopwords) {
-    stoptable = new HashSet(WordlistLoader.getWordtable(stopwords).keySet());
+    try {
+      stoptable = org.apache.lucene.analysis.WordlistLoader.getWordSet(stopwords);
+    } catch (IOException e) {
+      // TODO: throw IOException
+      throw new RuntimeException(e);
+    }
   }
 
   /**
@@ -129,17 +137,26 @@ public class DutchAnalyzer extends Analyzer {
    * Builds an exclusionlist from the words contained in the given file.
    */
   public void setStemExclusionTable(File exclusionlist) {
-    excltable = new HashSet(WordlistLoader.getWordtable(exclusionlist).keySet());
+    try {
+      excltable = org.apache.lucene.analysis.WordlistLoader.getWordSet(exclusionlist);
+    } catch (IOException e) {
+      // TODO: throw IOException
+      throw new RuntimeException(e);
+    }
   }
 
   /**
    * Reads a stemdictionary file , that overrules the stemming algorithm
    * This is a textfile that contains per line
-   * word\tstem
-   * i.e: tabseperated
+   * <tt>word<b>\t</b>stem</tt>, i.e: two tab seperated words
    */
-  public void setStemDictionary(File stemdict) {
-    _stemdict = WordlistLoader.getStemDict(stemdict);
+  public void setStemDictionary(File stemdictFile) {
+    try {
+      stemdict = org.apache.lucene.analysis.WordlistLoader.getStemDict(stemdictFile);
+    } catch (IOException e) {
+      // TODO: throw IOException
+      throw new RuntimeException(e);
+    }
   }
 
   /**
@@ -152,7 +169,7 @@ public class DutchAnalyzer extends Analyzer {
     TokenStream result = new StandardTokenizer(reader);
     result = new StandardFilter(result);
     result = new StopFilter(result, stoptable);
-    result = new DutchStemFilter(result, excltable, _stemdict);
+    result = new DutchStemFilter(result, excltable, stemdict);
     return result;
   }
 }
diff --git a/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/WordlistLoader.java b/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/WordlistLoader.java
index 7fc3d26ba46..285defbd537 100644
--- a/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/WordlistLoader.java
+++ b/contrib/analyzers/src/java/org/apache/lucene/analysis/nl/WordlistLoader.java
@@ -23,16 +23,19 @@ import java.io.LineNumberReader;
 import java.util.HashMap;
 
 /**
- * @author Gerhard Schwarz
  *         <p/>
  *         Loads a text file and adds every line as an entry to a Hashtable. Every line
  *         should contain only one word. If the file is not found or on any error, an
  *         empty table is returned.
+ *         
+ * @author Gerhard Schwarz
+ * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader} instead
  */
 public class WordlistLoader {
   /**
    * @param path     Path to the wordlist
    * @param wordfile Name of the wordlist
+   * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
    */
   public static HashMap getWordtable(String path, String wordfile) {
     if (path == null || wordfile == null) {
@@ -43,6 +46,7 @@ public class WordlistLoader {
 
   /**
    * @param wordfile Complete path to the wordlist
+   * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
    */
   public static HashMap getWordtable(String wordfile) {
     if (wordfile == null) {
@@ -57,6 +61,7 @@ public class WordlistLoader {
    * i.e. tab seperated)
    *
    * @return Stem dictionary that overrules, the stemming algorithm
+   * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getStemDict(File)} instead
    */
   public static HashMap getStemDict(File wordstemfile) {
     if (wordstemfile == null) {
@@ -78,6 +83,7 @@ public class WordlistLoader {
 
   /**
    * @param wordfile File containing the wordlist
+   * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
    */
   public static HashMap getWordtable(File wordfile) {
     if (wordfile == null) {
diff --git a/src/java/org/apache/lucene/analysis/WordlistLoader.java b/src/java/org/apache/lucene/analysis/WordlistLoader.java
index 02ddc02ea44..6d20eaa61b6 100644
--- a/src/java/org/apache/lucene/analysis/WordlistLoader.java
+++ b/src/java/org/apache/lucene/analysis/WordlistLoader.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.Reader;
+import java.util.HashMap;
 import java.util.HashSet;
 
 /**
@@ -84,4 +85,35 @@ public class WordlistLoader {
     return result;
   }
 
+  /**
+   * Reads a stem dictionary. Each line contains:
+   * <pre>word<b>\t</b>stem</pre>
+   * (i.e. two tab seperated words)
+   *
+   * @return stem dictionary that overrules the stemming algorithm
+   * @throws IOException 
+   */
+  public static HashMap getStemDict(File wordstemfile) throws IOException {
+    if (wordstemfile == null)
+      throw new NullPointerException("wordstemfile may not be null");
+    HashMap result = new HashMap();
+    BufferedReader br = null;
+    FileReader fr = null;
+    try {
+      fr = new FileReader(wordstemfile);
+      br = new BufferedReader(fr);
+      String line;
+      while ((line = br.readLine()) != null) {
+        String[] wordstem = line.split("\t", 2);
+        result.put(wordstem[0], wordstem[1]);
+      }
+    } finally {
+      if (fr != null)
+        fr.close();
+      if (br != null)
+        br.close();
+    }
+    return result;
+  }
+
 }