完善注释,规范流实例化时使用StandardCharsets常量;

This commit is contained in:
magese 2018-08-23 09:44:34 +08:00
parent 43e3ba1c9d
commit 07d1c132a7

View File

@ -1,7 +1,7 @@
/*
* IK 中文分词 版本 7.0
* IK Analyzer release 7.0
* update by 高志成(magese@live.cn)
* IK 中文分词 版本 7.4
* IK Analyzer release 7.4
* update by Magese(magese@live.cn)
*/
package org.wltea.analyzer.dic;
@ -9,6 +9,7 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.List;
@ -16,7 +17,7 @@ import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.cfg.DefaultConfig;
/**
* 词典管理类,单子模式
* 词典管理类单例模式
*/
@SuppressWarnings("unused")
public class Dictionary {
@ -45,6 +46,11 @@ public class Dictionary {
*/
private Configuration cfg;
/**
* 私有构造方法阻止外部直接实例化本类
*
* @param cfg ik分词器配置实例
*/
private Dictionary(Configuration cfg) {
this.cfg = cfg;
this.loadMainDict();
@ -84,15 +90,19 @@ public class Dictionary {
/**
* 重新更新词典
* 由于停用词等不经常变也不建议常增加故这里只修改动态扩展词库
*
* @param inputStreamList 词典文件IO流集合
*/
public static void reloadDic(List<InputStream> inputStreamList) {
// 如果本类单例尚未实例化则先进行初始化操作
if (singleton == null) {
Configuration cfg = DefaultConfig.getInstance();
initial(cfg);
}
// 对词典流集合进行循环读取将读取到的词语加载到主词典中
for (InputStream is : inputStreamList) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord;
do {
theWord = br.readLine();
@ -124,7 +134,7 @@ public class Dictionary {
if (words != null) {
for (String word : words) {
if (word != null) {
//批量加载词条到主内存词典中
// 批量加载词条到主内存词典中
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
}
}
@ -138,7 +148,7 @@ public class Dictionary {
if (words != null) {
for (String word : words) {
if (word != null) {
//批量屏蔽词条
// 批量屏蔽词条
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
}
}
@ -197,16 +207,16 @@ public class Dictionary {
* 加载主词典及扩展词典
*/
private void loadMainDict() {
//建立一个主词典实例
// 建立一个主词典实例
_MainDict = new DictSegment((char) 0);
//读取主词典文件
// 读取主词典文件
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary());
if (is == null) {
throw new RuntimeException("Main Dictionary not found!!!");
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord;
do {
theWord = br.readLine();
@ -226,7 +236,7 @@ public class Dictionary {
e.printStackTrace();
}
}
//加载扩展词典
// 加载扩展词典
this.loadExtDict();
}
@ -234,26 +244,26 @@ public class Dictionary {
* 加载用户配置的扩展词典到主词库表
*/
private void loadExtDict() {
//加载扩展词典配置
// 加载扩展词典配置
List<String> extDictFiles = cfg.getExtDictionarys();
if (extDictFiles != null) {
InputStream is;
for (String extDictName : extDictFiles) {
//读取扩展词典文件
// 读取扩展词典文件
System.out.println("加载扩展词典:" + extDictName);
is = this.getClass().getClassLoader().getResourceAsStream(extDictName);
//如果找不到扩展的字典则忽略
// 如果找不到扩展的字典则忽略
if (is == null) {
continue;
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中
//System.out.println(theWord);
// 加载扩展词典数据到主内存词典中
// System.out.println(theWord);
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
}
} while (theWord != null);
@ -277,28 +287,28 @@ public class Dictionary {
* 加载用户扩展的停止词词典
*/
private void loadStopWordDict() {
//建立一个主词典实例
// 建立一个主词典实例
_StopWordDict = new DictSegment((char) 0);
//加载扩展停止词典
// 加载扩展停止词典
List<String> extStopWordDictFiles = cfg.getExtStopWordDictionarys();
if (extStopWordDictFiles != null) {
InputStream is;
for (String extStopWordDictName : extStopWordDictFiles) {
System.out.println("加载扩展停止词典:" + extStopWordDictName);
//读取扩展词典文件
// 读取扩展词典文件
is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName);
//如果找不到扩展的字典则忽略
// 如果找不到扩展的字典则忽略
if (is == null) {
continue;
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
//System.out.println(theWord);
//加载扩展停止词典数据到内存中
// System.out.println(theWord);
// 加载扩展停止词典数据到内存中
_StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
}
} while (theWord != null);
@ -322,15 +332,15 @@ public class Dictionary {
* 加载量词词典
*/
private void loadQuantifierDict() {
//建立一个量词典实例
// 建立一个量词典实例
_QuantifierDict = new DictSegment((char) 0);
//读取量词词典文件
// 读取量词词典文件
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getQuantifierDicionary());
if (is == null) {
throw new RuntimeException("Quantifier Dictionary not found!!!");
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord;
do {
theWord = br.readLine();