mirror of https://github.com/apache/lucene.git
convert Hashtable to Set, to avoid unnecessary synchronization issues
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150225 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c436cd8914
commit
c230388a92
|
@ -55,31 +55,55 @@ package org.apache.lucene.analysis;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Hashtable;
|
import java.util.Hashtable;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/** Removes stop words from a token stream. */
|
/**
|
||||||
|
* Removes stop words from a token stream.
|
||||||
|
*/
|
||||||
|
|
||||||
public final class StopFilter extends TokenFilter {
|
public final class StopFilter extends TokenFilter {
|
||||||
|
|
||||||
private Hashtable table;
|
private Set table;
|
||||||
|
|
||||||
/** Constructs a filter which removes words from the input
|
/**
|
||||||
TokenStream that are named in the array of words. */
|
* Constructs a filter which removes words from the input
|
||||||
|
* TokenStream that are named in the array of words.
|
||||||
|
*/
|
||||||
public StopFilter(TokenStream in, String[] stopWords) {
|
public StopFilter(TokenStream in, String[] stopWords) {
|
||||||
super(in);
|
super(in);
|
||||||
table = makeStopTable(stopWords);
|
table = makeStopSet(stopWords);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Constructs a filter which removes words from the input
|
/**
|
||||||
TokenStream that are named in the Hashtable. */
|
* Constructs a filter which removes words from the input
|
||||||
|
* TokenStream that are named in the Hashtable.
|
||||||
|
*
|
||||||
|
* @deprecated Use {@link #StopFilter(TokenStream, Set)} StopFilter(TokenStream,Map)} instead
|
||||||
|
*/
|
||||||
public StopFilter(TokenStream in, Hashtable stopTable) {
|
public StopFilter(TokenStream in, Hashtable stopTable) {
|
||||||
|
super(in);
|
||||||
|
table = stopTable.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a filter which removes words from the input
|
||||||
|
* TokenStream that are named in the Set.
|
||||||
|
*/
|
||||||
|
public StopFilter(TokenStream in, Set stopTable) {
|
||||||
super(in);
|
super(in);
|
||||||
table = stopTable;
|
table = stopTable;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds a Hashtable from an array of stop words, appropriate for passing
|
/**
|
||||||
into the StopFilter constructor. This permits this table construction to
|
* Builds a Hashtable from an array of stop words,
|
||||||
be cached once when an Analyzer is constructed. */
|
* appropriate for passing into the StopFilter constructor.
|
||||||
|
* This permits this table construction to be cached once when
|
||||||
|
* an Analyzer is constructed.
|
||||||
|
*
|
||||||
|
* @deprecated Use {@link #makeStopSet(String[] makeStopSet) instead.
|
||||||
|
*/
|
||||||
public static final Hashtable makeStopTable(String[] stopWords) {
|
public static final Hashtable makeStopTable(String[] stopWords) {
|
||||||
Hashtable stopTable = new Hashtable(stopWords.length);
|
Hashtable stopTable = new Hashtable(stopWords.length);
|
||||||
for (int i = 0; i < stopWords.length; i++)
|
for (int i = 0; i < stopWords.length; i++)
|
||||||
|
@ -87,11 +111,26 @@ public final class StopFilter extends TokenFilter {
|
||||||
return stopTable;
|
return stopTable;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the next input Token whose termText() is not a stop word. */
|
/**
|
||||||
|
* Builds a Set from an array of stop words,
|
||||||
|
* appropriate for passing into the StopFilter constructor.
|
||||||
|
* This permits this table construction to be cached once when
|
||||||
|
* an Analyzer is constructed.
|
||||||
|
*/
|
||||||
|
public static final Set makeStopSet(String[] stopWords) {
|
||||||
|
Set stopTable = new HashSet(stopWords.length);
|
||||||
|
for (int i = 0; i < stopWords.length; i++)
|
||||||
|
stopTable.add(stopWords[i]);
|
||||||
|
return stopTable;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next input Token whose termText() is not a stop word.
|
||||||
|
*/
|
||||||
public final Token next() throws IOException {
|
public final Token next() throws IOException {
|
||||||
// return the first non-stop word found
|
// return the first non-stop word found
|
||||||
for (Token token = input.next(); token != null; token = input.next())
|
for (Token token = input.next(); token != null; token = input.next())
|
||||||
if (table.get(token.termText) == null)
|
if (!table.contains(token.termText))
|
||||||
return token;
|
return token;
|
||||||
// reached EOS -- return null
|
// reached EOS -- return null
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -68,71 +68,70 @@ import java.util.Hashtable;
|
||||||
* @author Gerhard Schwarz
|
* @author Gerhard Schwarz
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public class WordlistLoader
|
public class WordlistLoader {
|
||||||
{
|
/**
|
||||||
/**
|
* @param path Path to the wordlist
|
||||||
* @param path Path to the wordlist
|
* @param wordfile Name of the wordlist
|
||||||
* @param wordfile Name of the wordlist
|
*/
|
||||||
*/
|
public static Hashtable getWordtable(String path, String wordfile) {
|
||||||
public static Hashtable getWordtable( String path, String wordfile ) {
|
if (path == null || wordfile == null) {
|
||||||
if ( path == null || wordfile == null ) {
|
return new Hashtable();
|
||||||
return new Hashtable();
|
|
||||||
}
|
|
||||||
return getWordtable( new File( path, wordfile ) );
|
|
||||||
}
|
}
|
||||||
|
return getWordtable(new File(path, wordfile));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param wordfile Complete path to the wordlist
|
* @param wordfile Complete path to the wordlist
|
||||||
*/
|
*/
|
||||||
public static Hashtable getWordtable( String wordfile ) {
|
public static Hashtable getWordtable(String wordfile) {
|
||||||
if ( wordfile == null ) {
|
if (wordfile == null) {
|
||||||
return new Hashtable();
|
return new Hashtable();
|
||||||
}
|
|
||||||
return getWordtable( new File( wordfile ) );
|
|
||||||
}
|
}
|
||||||
|
return getWordtable(new File(wordfile));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param wordfile File containing the wordlist
|
* @param wordfile File containing the wordlist
|
||||||
*/
|
*/
|
||||||
public static Hashtable getWordtable( File wordfile ) {
|
public static Hashtable getWordtable(File wordfile) {
|
||||||
if ( wordfile == null ) {
|
if (wordfile == null) {
|
||||||
return new Hashtable();
|
return new Hashtable();
|
||||||
}
|
|
||||||
Hashtable result = null;
|
|
||||||
try {
|
|
||||||
LineNumberReader lnr = new LineNumberReader( new FileReader( wordfile ) );
|
|
||||||
String word = null;
|
|
||||||
String[] stopwords = new String[100];
|
|
||||||
int wordcount = 0;
|
|
||||||
while ( ( word = lnr.readLine() ) != null ) {
|
|
||||||
wordcount++;
|
|
||||||
if ( wordcount == stopwords.length ) {
|
|
||||||
String[] tmp = new String[stopwords.length + 50];
|
|
||||||
System.arraycopy( stopwords, 0, tmp, 0, wordcount );
|
|
||||||
stopwords = tmp;
|
|
||||||
}
|
|
||||||
stopwords[wordcount-1] = word;
|
|
||||||
}
|
|
||||||
result = makeWordTable( stopwords, wordcount );
|
|
||||||
}
|
|
||||||
// On error, use an empty table
|
|
||||||
catch ( IOException e ) {
|
|
||||||
result = new Hashtable();
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
Hashtable result = null;
|
||||||
|
try {
|
||||||
|
LineNumberReader lnr = new LineNumberReader(new FileReader(wordfile));
|
||||||
|
String word = null;
|
||||||
|
String[] stopwords = new String[100];
|
||||||
|
int wordcount = 0;
|
||||||
|
while ((word = lnr.readLine()) != null) {
|
||||||
|
wordcount++;
|
||||||
|
if (wordcount == stopwords.length) {
|
||||||
|
String[] tmp = new String[stopwords.length + 50];
|
||||||
|
System.arraycopy(stopwords, 0, tmp, 0, wordcount);
|
||||||
|
stopwords = tmp;
|
||||||
|
}
|
||||||
|
stopwords[wordcount - 1] = word;
|
||||||
|
}
|
||||||
|
result = makeWordTable(stopwords, wordcount);
|
||||||
|
}
|
||||||
|
// On error, use an empty table
|
||||||
|
catch (IOException e) {
|
||||||
|
result = new Hashtable();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds the wordlist table.
|
* Builds the wordlist table.
|
||||||
*
|
*
|
||||||
* @param words Word that where read
|
* @param words Word that where read
|
||||||
* @param length Amount of words that where read into <tt>words</tt>
|
* @param length Amount of words that where read into <tt>words</tt>
|
||||||
*/
|
*/
|
||||||
private static Hashtable makeWordTable( String[] words, int length ) {
|
private static Hashtable makeWordTable(String[] words, int length) {
|
||||||
Hashtable table = new Hashtable( length );
|
Hashtable table = new Hashtable(length);
|
||||||
for ( int i = 0; i < length; i++ ) {
|
for (int i = 0; i < length; i++) {
|
||||||
table.put( words[i], words[i] );
|
table.put(words[i], words[i]);
|
||||||
}
|
|
||||||
return table;
|
|
||||||
}
|
}
|
||||||
|
return table;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue