完善注释,规范流实例化时使用StandardCharsets常量;
This commit is contained in:
parent
43e3ba1c9d
commit
07d1c132a7
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* IK 中文分词 版本 7.0
|
||||
* IK Analyzer release 7.0
|
||||
* update by 高志成(magese@live.cn)
|
||||
* IK 中文分词 版本 7.4
|
||||
* IK Analyzer release 7.4
|
||||
* update by Magese(magese@live.cn)
|
||||
*/
|
||||
package org.wltea.analyzer.dic;
|
||||
|
||||
@ -9,6 +9,7 @@ import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
@ -16,7 +17,7 @@ import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.cfg.DefaultConfig;
|
||||
|
||||
/**
|
||||
* 词典管理类,单子模式
|
||||
* 词典管理类,单例模式
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public class Dictionary {
|
||||
@ -45,6 +46,11 @@ public class Dictionary {
|
||||
*/
|
||||
private Configuration cfg;
|
||||
|
||||
/**
|
||||
* 私有构造方法,阻止外部直接实例化本类
|
||||
*
|
||||
* @param cfg ik分词器配置实例
|
||||
*/
|
||||
private Dictionary(Configuration cfg) {
|
||||
this.cfg = cfg;
|
||||
this.loadMainDict();
|
||||
@ -84,15 +90,19 @@ public class Dictionary {
|
||||
/**
|
||||
* 重新更新词典
|
||||
* 由于停用词等不经常变也不建议常增加,故这里只修改动态扩展词库
|
||||
*
|
||||
* @param inputStreamList 词典文件IO流集合
|
||||
*/
|
||||
public static void reloadDic(List<InputStream> inputStreamList) {
|
||||
// 如果本类单例尚未实例化,则先进行初始化操作
|
||||
if (singleton == null) {
|
||||
Configuration cfg = DefaultConfig.getInstance();
|
||||
initial(cfg);
|
||||
}
|
||||
// 对词典流集合进行循环读取,将读取到的词语加载到主词典中
|
||||
for (InputStream is : inputStreamList) {
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
@ -124,7 +134,7 @@ public class Dictionary {
|
||||
if (words != null) {
|
||||
for (String word : words) {
|
||||
if (word != null) {
|
||||
//批量加载词条到主内存词典中
|
||||
// 批量加载词条到主内存词典中
|
||||
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
}
|
||||
@ -138,7 +148,7 @@ public class Dictionary {
|
||||
if (words != null) {
|
||||
for (String word : words) {
|
||||
if (word != null) {
|
||||
//批量屏蔽词条
|
||||
// 批量屏蔽词条
|
||||
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
}
|
||||
@ -197,16 +207,16 @@ public class Dictionary {
|
||||
* 加载主词典及扩展词典
|
||||
*/
|
||||
private void loadMainDict() {
|
||||
//建立一个主词典实例
|
||||
// 建立一个主词典实例
|
||||
_MainDict = new DictSegment((char) 0);
|
||||
//读取主词典文件
|
||||
// 读取主词典文件
|
||||
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary());
|
||||
if (is == null) {
|
||||
throw new RuntimeException("Main Dictionary not found!!!");
|
||||
}
|
||||
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
@ -226,7 +236,7 @@ public class Dictionary {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
//加载扩展词典
|
||||
// 加载扩展词典
|
||||
this.loadExtDict();
|
||||
}
|
||||
|
||||
@ -234,26 +244,26 @@ public class Dictionary {
|
||||
* 加载用户配置的扩展词典到主词库表
|
||||
*/
|
||||
private void loadExtDict() {
|
||||
//加载扩展词典配置
|
||||
// 加载扩展词典配置
|
||||
List<String> extDictFiles = cfg.getExtDictionarys();
|
||||
if (extDictFiles != null) {
|
||||
InputStream is;
|
||||
for (String extDictName : extDictFiles) {
|
||||
//读取扩展词典文件
|
||||
// 读取扩展词典文件
|
||||
System.out.println("加载扩展词典:" + extDictName);
|
||||
is = this.getClass().getClassLoader().getResourceAsStream(extDictName);
|
||||
//如果找不到扩展的字典,则忽略
|
||||
// 如果找不到扩展的字典,则忽略
|
||||
if (is == null) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
//加载扩展词典数据到主内存词典中
|
||||
//System.out.println(theWord);
|
||||
// 加载扩展词典数据到主内存词典中
|
||||
// System.out.println(theWord);
|
||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
@ -277,28 +287,28 @@ public class Dictionary {
|
||||
* 加载用户扩展的停止词词典
|
||||
*/
|
||||
private void loadStopWordDict() {
|
||||
//建立一个主词典实例
|
||||
// 建立一个主词典实例
|
||||
_StopWordDict = new DictSegment((char) 0);
|
||||
//加载扩展停止词典
|
||||
// 加载扩展停止词典
|
||||
List<String> extStopWordDictFiles = cfg.getExtStopWordDictionarys();
|
||||
if (extStopWordDictFiles != null) {
|
||||
InputStream is;
|
||||
for (String extStopWordDictName : extStopWordDictFiles) {
|
||||
System.out.println("加载扩展停止词典:" + extStopWordDictName);
|
||||
//读取扩展词典文件
|
||||
// 读取扩展词典文件
|
||||
is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName);
|
||||
//如果找不到扩展的字典,则忽略
|
||||
// 如果找不到扩展的字典,则忽略
|
||||
if (is == null) {
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
//System.out.println(theWord);
|
||||
//加载扩展停止词典数据到内存中
|
||||
// System.out.println(theWord);
|
||||
// 加载扩展停止词典数据到内存中
|
||||
_StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
@ -322,15 +332,15 @@ public class Dictionary {
|
||||
* 加载量词词典
|
||||
*/
|
||||
private void loadQuantifierDict() {
|
||||
//建立一个量词典实例
|
||||
// 建立一个量词典实例
|
||||
_QuantifierDict = new DictSegment((char) 0);
|
||||
//读取量词词典文件
|
||||
// 读取量词词典文件
|
||||
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getQuantifierDicionary());
|
||||
if (is == null) {
|
||||
throw new RuntimeException("Quantifier Dictionary not found!!!");
|
||||
}
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
|
||||
String theWord;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
|
Loading…
x
Reference in New Issue
Block a user