完善注释,规范流实例化时使用StandardCharsets常量;

This commit is contained in:
magese 2018-08-23 09:44:34 +08:00
parent 43e3ba1c9d
commit 07d1c132a7

View File

@ -1,7 +1,7 @@
/* /*
* IK 中文分词 版本 7.0 * IK 中文分词 版本 7.4
* IK Analyzer release 7.0 * IK Analyzer release 7.4
* update by 高志成(magese@live.cn) * update by Magese(magese@live.cn)
*/ */
package org.wltea.analyzer.dic; package org.wltea.analyzer.dic;
@ -9,6 +9,7 @@ import java.io.BufferedReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
@ -16,7 +17,7 @@ import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.cfg.DefaultConfig; import org.wltea.analyzer.cfg.DefaultConfig;
/** /**
* 词典管理类,单子模式 * 词典管理类单例模式
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
public class Dictionary { public class Dictionary {
@ -45,6 +46,11 @@ public class Dictionary {
*/ */
private Configuration cfg; private Configuration cfg;
/**
* 私有构造方法阻止外部直接实例化本类
*
* @param cfg ik分词器配置实例
*/
private Dictionary(Configuration cfg) { private Dictionary(Configuration cfg) {
this.cfg = cfg; this.cfg = cfg;
this.loadMainDict(); this.loadMainDict();
@ -84,15 +90,19 @@ public class Dictionary {
/** /**
* 重新更新词典 * 重新更新词典
* 由于停用词等不经常变也不建议常增加故这里只修改动态扩展词库 * 由于停用词等不经常变也不建议常增加故这里只修改动态扩展词库
*
* @param inputStreamList 词典文件IO流集合
*/ */
public static void reloadDic(List<InputStream> inputStreamList) { public static void reloadDic(List<InputStream> inputStreamList) {
// 如果本类单例尚未实例化则先进行初始化操作
if (singleton == null) { if (singleton == null) {
Configuration cfg = DefaultConfig.getInstance(); Configuration cfg = DefaultConfig.getInstance();
initial(cfg); initial(cfg);
} }
// 对词典流集合进行循环读取将读取到的词语加载到主词典中
for (InputStream is : inputStreamList) { for (InputStream is : inputStreamList) {
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
@ -124,7 +134,7 @@ public class Dictionary {
if (words != null) { if (words != null) {
for (String word : words) { for (String word : words) {
if (word != null) { if (word != null) {
//批量加载词条到主内存词典中 // 批量加载词条到主内存词典中
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray()); singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
} }
} }
@ -138,7 +148,7 @@ public class Dictionary {
if (words != null) { if (words != null) {
for (String word : words) { for (String word : words) {
if (word != null) { if (word != null) {
//批量屏蔽词条 // 批量屏蔽词条
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray()); singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
} }
} }
@ -197,16 +207,16 @@ public class Dictionary {
* 加载主词典及扩展词典 * 加载主词典及扩展词典
*/ */
private void loadMainDict() { private void loadMainDict() {
//建立一个主词典实例 // 建立一个主词典实例
_MainDict = new DictSegment((char) 0); _MainDict = new DictSegment((char) 0);
//读取主词典文件 // 读取主词典文件
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary()); InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary());
if (is == null) { if (is == null) {
throw new RuntimeException("Main Dictionary not found!!!"); throw new RuntimeException("Main Dictionary not found!!!");
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
@ -226,7 +236,7 @@ public class Dictionary {
e.printStackTrace(); e.printStackTrace();
} }
} }
//加载扩展词典 // 加载扩展词典
this.loadExtDict(); this.loadExtDict();
} }
@ -234,26 +244,26 @@ public class Dictionary {
* 加载用户配置的扩展词典到主词库表 * 加载用户配置的扩展词典到主词库表
*/ */
private void loadExtDict() { private void loadExtDict() {
//加载扩展词典配置 // 加载扩展词典配置
List<String> extDictFiles = cfg.getExtDictionarys(); List<String> extDictFiles = cfg.getExtDictionarys();
if (extDictFiles != null) { if (extDictFiles != null) {
InputStream is; InputStream is;
for (String extDictName : extDictFiles) { for (String extDictName : extDictFiles) {
//读取扩展词典文件 // 读取扩展词典文件
System.out.println("加载扩展词典:" + extDictName); System.out.println("加载扩展词典:" + extDictName);
is = this.getClass().getClassLoader().getResourceAsStream(extDictName); is = this.getClass().getClassLoader().getResourceAsStream(extDictName);
//如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if (is == null) { if (is == null) {
continue; continue;
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中 // 加载扩展词典数据到主内存词典中
//System.out.println(theWord); // System.out.println(theWord);
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
@ -277,28 +287,28 @@ public class Dictionary {
* 加载用户扩展的停止词词典 * 加载用户扩展的停止词词典
*/ */
private void loadStopWordDict() { private void loadStopWordDict() {
//建立一个主词典实例 // 建立一个主词典实例
_StopWordDict = new DictSegment((char) 0); _StopWordDict = new DictSegment((char) 0);
//加载扩展停止词典 // 加载扩展停止词典
List<String> extStopWordDictFiles = cfg.getExtStopWordDictionarys(); List<String> extStopWordDictFiles = cfg.getExtStopWordDictionarys();
if (extStopWordDictFiles != null) { if (extStopWordDictFiles != null) {
InputStream is; InputStream is;
for (String extStopWordDictName : extStopWordDictFiles) { for (String extStopWordDictName : extStopWordDictFiles) {
System.out.println("加载扩展停止词典:" + extStopWordDictName); System.out.println("加载扩展停止词典:" + extStopWordDictName);
//读取扩展词典文件 // 读取扩展词典文件
is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName); is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName);
//如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if (is == null) { if (is == null) {
continue; continue;
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//System.out.println(theWord); // System.out.println(theWord);
//加载扩展停止词典数据到内存中 // 加载扩展停止词典数据到内存中
_StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); _StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
@ -322,15 +332,15 @@ public class Dictionary {
* 加载量词词典 * 加载量词词典
*/ */
private void loadQuantifierDict() { private void loadQuantifierDict() {
//建立一个量词典实例 // 建立一个量词典实例
_QuantifierDict = new DictSegment((char) 0); _QuantifierDict = new DictSegment((char) 0);
//读取量词词典文件 // 读取量词词典文件
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getQuantifierDicionary()); InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getQuantifierDicionary());
if (is == null) { if (is == null) {
throw new RuntimeException("Quantifier Dictionary not found!!!"); throw new RuntimeException("Quantifier Dictionary not found!!!");
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();