mirror of
https://github.com/apache/lucene.git
synced 2025-02-28 05:19:17 +00:00
LUCENE-2564: Cut over WordListLoader to CharArrayMap/Set and use CharSetDecoder to detect encoding problems early
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1200091 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
dc6b4b6533
commit
c0a7abbec0
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||||||
import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
|
import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
|
||||||
import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
|
import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
|
||||||
import org.apache.lucene.analysis.core.StopFilter;
|
import org.apache.lucene.analysis.core.StopFilter;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -89,16 +90,11 @@ public final class SmartChineseAnalyzer extends Analyzer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static CharArraySet loadDefaultStopWordSet() throws IOException {
|
static CharArraySet loadDefaultStopWordSet() throws IOException {
|
||||||
InputStream stream = SmartChineseAnalyzer.class
|
// make sure it is unmodifiable as we expose it in the outer class
|
||||||
.getResourceAsStream(DEFAULT_STOPWORD_FILE);
|
return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(IOUtils
|
||||||
try {
|
.getDecodingReader(SmartChineseAnalyzer.class, DEFAULT_STOPWORD_FILE,
|
||||||
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
|
IOUtils.CHARSET_UTF_8), STOPWORD_FILE_COMMENT,
|
||||||
// make sure it is unmodifiable as we expose it in the outer class
|
Version.LUCENE_CURRENT));
|
||||||
return CharArraySet.unmodifiableSet(WordlistLoader.getWordSet(reader,
|
|
||||||
STOPWORD_FILE_COMMENT, Version.LUCENE_CURRENT));
|
|
||||||
} finally {
|
|
||||||
stream.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user