文档注释修改;
This commit is contained in:
parent
07d1c132a7
commit
7b9e8ae31e
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.cfg;
|
package org.wltea.analyzer.cfg;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.cfg;
|
package org.wltea.analyzer.cfg;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
@ -152,7 +152,7 @@ class QuickSortSet {
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.0
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.0
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
class Cell implements Comparable<Cell>{
|
class Cell implements Comparable<Cell>{
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.dic;
|
package org.wltea.analyzer.dic;
|
||||||
|
|
||||||
@ -80,27 +80,27 @@ class DictSegment implements Comparable<DictSegment> {
|
|||||||
Hit match(char[] charArray, int begin, int length, Hit searchHit) {
|
Hit match(char[] charArray, int begin, int length, Hit searchHit) {
|
||||||
|
|
||||||
if (searchHit == null) {
|
if (searchHit == null) {
|
||||||
//如果hit为空,新建
|
// 如果hit为空,新建
|
||||||
searchHit = new Hit();
|
searchHit = new Hit();
|
||||||
//设置hit的其实文本位置
|
// 设置hit的其实文本位置
|
||||||
searchHit.setBegin(begin);
|
searchHit.setBegin(begin);
|
||||||
} else {
|
} else {
|
||||||
//否则要将HIT状态重置
|
// 否则要将HIT状态重置
|
||||||
searchHit.setUnmatch();
|
searchHit.setUnmatch();
|
||||||
}
|
}
|
||||||
//设置hit的当前处理位置
|
// 设置hit的当前处理位置
|
||||||
searchHit.setEnd(begin);
|
searchHit.setEnd(begin);
|
||||||
|
|
||||||
Character keyChar = charArray[begin];
|
Character keyChar = charArray[begin];
|
||||||
DictSegment ds = null;
|
DictSegment ds = null;
|
||||||
|
|
||||||
//引用实例变量为本地变量,避免查询时遇到更新的同步问题
|
// 引用实例变量为本地变量,避免查询时遇到更新的同步问题
|
||||||
DictSegment[] segmentArray = this.childrenArray;
|
DictSegment[] segmentArray = this.childrenArray;
|
||||||
Map<Character, DictSegment> segmentMap = this.childrenMap;
|
Map<Character, DictSegment> segmentMap = this.childrenMap;
|
||||||
|
|
||||||
//STEP1 在节点中查找keyChar对应的DictSegment
|
// STEP1 在节点中查找keyChar对应的DictSegment
|
||||||
if (segmentArray != null) {
|
if (segmentArray != null) {
|
||||||
//在数组中查找
|
// 在数组中查找
|
||||||
DictSegment keySegment = new DictSegment(keyChar);
|
DictSegment keySegment = new DictSegment(keyChar);
|
||||||
int position = Arrays.binarySearch(segmentArray, 0, this.storeSize, keySegment);
|
int position = Arrays.binarySearch(segmentArray, 0, this.storeSize, keySegment);
|
||||||
if (position >= 0) {
|
if (position >= 0) {
|
||||||
@ -108,33 +108,33 @@ class DictSegment implements Comparable<DictSegment> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} else if (segmentMap != null) {
|
} else if (segmentMap != null) {
|
||||||
//在map中查找
|
// 在map中查找
|
||||||
ds = segmentMap.get(keyChar);
|
ds = segmentMap.get(keyChar);
|
||||||
}
|
}
|
||||||
|
|
||||||
//STEP2 找到DictSegment,判断词的匹配状态,是否继续递归,还是返回结果
|
// STEP2 找到DictSegment,判断词的匹配状态,是否继续递归,还是返回结果
|
||||||
if (ds != null) {
|
if (ds != null) {
|
||||||
if (length > 1) {
|
if (length > 1) {
|
||||||
//词未匹配完,继续往下搜索
|
// 词未匹配完,继续往下搜索
|
||||||
return ds.match(charArray, begin + 1, length - 1, searchHit);
|
return ds.match(charArray, begin + 1, length - 1, searchHit);
|
||||||
} else if (length == 1) {
|
} else if (length == 1) {
|
||||||
|
|
||||||
//搜索最后一个char
|
// 搜索最后一个char
|
||||||
if (ds.nodeState == 1) {
|
if (ds.nodeState == 1) {
|
||||||
//添加HIT状态为完全匹配
|
// 添加HIT状态为完全匹配
|
||||||
searchHit.setMatch();
|
searchHit.setMatch();
|
||||||
}
|
}
|
||||||
if (ds.hasNextNode()) {
|
if (ds.hasNextNode()) {
|
||||||
//添加HIT状态为前缀匹配
|
// 添加HIT状态为前缀匹配
|
||||||
searchHit.setPrefix();
|
searchHit.setPrefix();
|
||||||
//记录当前位置的DictSegment
|
// 记录当前位置的DictSegment
|
||||||
searchHit.setMatchedDictSegment(ds);
|
searchHit.setMatchedDictSegment(ds);
|
||||||
}
|
}
|
||||||
return searchHit;
|
return searchHit;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
//STEP3 没有找到DictSegment, 将HIT设置为不匹配
|
// STEP3 没有找到DictSegment, 将HIT设置为不匹配
|
||||||
return searchHit;
|
return searchHit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,25 +156,25 @@ class DictSegment implements Comparable<DictSegment> {
|
|||||||
* 加载填充词典片段
|
* 加载填充词典片段
|
||||||
*/
|
*/
|
||||||
private synchronized void fillSegment(char[] charArray, int begin, int length, int enabled) {
|
private synchronized void fillSegment(char[] charArray, int begin, int length, int enabled) {
|
||||||
//获取字典表中的汉字对象
|
// 获取字典表中的汉字对象
|
||||||
Character beginChar = charArray[begin];
|
Character beginChar = charArray[begin];
|
||||||
Character keyChar = charMap.get(beginChar);
|
Character keyChar = charMap.get(beginChar);
|
||||||
//字典中没有该字,则将其添加入字典
|
// 字典中没有该字,则将其添加入字典
|
||||||
if (keyChar == null) {
|
if (keyChar == null) {
|
||||||
charMap.put(beginChar, beginChar);
|
charMap.put(beginChar, beginChar);
|
||||||
keyChar = beginChar;
|
keyChar = beginChar;
|
||||||
}
|
}
|
||||||
|
|
||||||
//搜索当前节点的存储,查询对应keyChar的keyChar,如果没有则创建
|
// 搜索当前节点的存储,查询对应keyChar的keyChar,如果没有则创建
|
||||||
DictSegment ds = lookforSegment(keyChar, enabled);
|
DictSegment ds = lookforSegment(keyChar, enabled);
|
||||||
if (ds != null) {
|
if (ds != null) {
|
||||||
//处理keyChar对应的segment
|
// 处理keyChar对应的segment
|
||||||
if (length > 1) {
|
if (length > 1) {
|
||||||
//词元还没有完全加入词典树
|
// 词元还没有完全加入词典树
|
||||||
ds.fillSegment(charArray, begin + 1, length - 1, enabled);
|
ds.fillSegment(charArray, begin + 1, length - 1, enabled);
|
||||||
} else if (length == 1) {
|
} else if (length == 1) {
|
||||||
//已经是词元的最后一个char,设置当前节点状态为enabled,
|
// 已经是词元的最后一个char,设置当前节点状态为enabled,
|
||||||
//enabled=1表明一个完整的词,enabled=0表示从词典中屏蔽当前词
|
// enabled=1表明一个完整的词,enabled=0表示从词典中屏蔽当前词
|
||||||
ds.nodeState = enabled;
|
ds.nodeState = enabled;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -191,51 +191,51 @@ class DictSegment implements Comparable<DictSegment> {
|
|||||||
DictSegment ds = null;
|
DictSegment ds = null;
|
||||||
|
|
||||||
if (this.storeSize <= ARRAY_LENGTH_LIMIT) {
|
if (this.storeSize <= ARRAY_LENGTH_LIMIT) {
|
||||||
//获取数组容器,如果数组未创建则创建数组
|
// 获取数组容器,如果数组未创建则创建数组
|
||||||
DictSegment[] segmentArray = getChildrenArray();
|
DictSegment[] segmentArray = getChildrenArray();
|
||||||
//搜寻数组
|
// 搜寻数组
|
||||||
DictSegment keySegment = new DictSegment(keyChar);
|
DictSegment keySegment = new DictSegment(keyChar);
|
||||||
int position = Arrays.binarySearch(segmentArray, 0, this.storeSize, keySegment);
|
int position = Arrays.binarySearch(segmentArray, 0, this.storeSize, keySegment);
|
||||||
if (position >= 0) {
|
if (position >= 0) {
|
||||||
ds = segmentArray[position];
|
ds = segmentArray[position];
|
||||||
}
|
}
|
||||||
|
|
||||||
//遍历数组后没有找到对应的segment
|
// 遍历数组后没有找到对应的segment
|
||||||
if (ds == null && create == 1) {
|
if (ds == null && create == 1) {
|
||||||
ds = keySegment;
|
ds = keySegment;
|
||||||
if (this.storeSize < ARRAY_LENGTH_LIMIT) {
|
if (this.storeSize < ARRAY_LENGTH_LIMIT) {
|
||||||
//数组容量未满,使用数组存储
|
// 数组容量未满,使用数组存储
|
||||||
segmentArray[this.storeSize] = ds;
|
segmentArray[this.storeSize] = ds;
|
||||||
//segment数目+1
|
// segment数目+1
|
||||||
this.storeSize++;
|
this.storeSize++;
|
||||||
Arrays.sort(segmentArray, 0, this.storeSize);
|
Arrays.sort(segmentArray, 0, this.storeSize);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//数组容量已满,切换Map存储
|
// 数组容量已满,切换Map存储
|
||||||
//获取Map容器,如果Map未创建,则创建Map
|
// 获取Map容器,如果Map未创建,则创建Map
|
||||||
Map<Character, DictSegment> segmentMap = getChildrenMap();
|
Map<Character, DictSegment> segmentMap = getChildrenMap();
|
||||||
//将数组中的segment迁移到Map中
|
// 将数组中的segment迁移到Map中
|
||||||
migrate(segmentArray, segmentMap);
|
migrate(segmentArray, segmentMap);
|
||||||
//存储新的segment
|
// 存储新的segment
|
||||||
segmentMap.put(keyChar, ds);
|
segmentMap.put(keyChar, ds);
|
||||||
//segment数目+1 , 必须在释放数组前执行storeSize++ , 确保极端情况下,不会取到空的数组
|
// segment数目+1 , 必须在释放数组前执行storeSize++ , 确保极端情况下,不会取到空的数组
|
||||||
this.storeSize++;
|
this.storeSize++;
|
||||||
//释放当前的数组引用
|
// 释放当前的数组引用
|
||||||
this.childrenArray = null;
|
this.childrenArray = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//获取Map容器,如果Map未创建,则创建Map
|
// 获取Map容器,如果Map未创建,则创建Map
|
||||||
Map<Character, DictSegment> segmentMap = getChildrenMap();
|
Map<Character, DictSegment> segmentMap = getChildrenMap();
|
||||||
//搜索Map
|
// 搜索Map
|
||||||
ds = segmentMap.get(keyChar);
|
ds = segmentMap.get(keyChar);
|
||||||
if (ds == null && create == 1) {
|
if (ds == null && create == 1) {
|
||||||
//构造新的segment
|
// 构造新的segment
|
||||||
ds = new DictSegment(keyChar);
|
ds = new DictSegment(keyChar);
|
||||||
segmentMap.put(keyChar, ds);
|
segmentMap.put(keyChar, ds);
|
||||||
//当前节点存储segment数目+1
|
// 当前节点存储segment数目+1
|
||||||
this.storeSize++;
|
this.storeSize++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -291,7 +291,7 @@ class DictSegment implements Comparable<DictSegment> {
|
|||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
public int compareTo(DictSegment o) {
|
public int compareTo(DictSegment o) {
|
||||||
//对当前节点存储的char进行比较
|
// 对当前节点存储的char进行比较
|
||||||
return this.nodeChar.compareTo(o.nodeChar);
|
return this.nodeChar.compareTo(o.nodeChar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.dic;
|
package org.wltea.analyzer.dic;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.lucene;
|
package org.wltea.analyzer.lucene;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.lucene;
|
package org.wltea.analyzer.lucene;
|
||||||
|
|
||||||
@ -35,7 +35,7 @@ public final class IKTokenizer extends Tokenizer {
|
|||||||
private int endPosition;
|
private int endPosition;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lucene 4.0 Tokenizer适配器类构造函数
|
* Lucene 7.4 Tokenizer适配器类构造函数
|
||||||
*/
|
*/
|
||||||
public IKTokenizer() {
|
public IKTokenizer() {
|
||||||
this(false);
|
this(false);
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.query;
|
package org.wltea.analyzer.query;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.query;
|
package org.wltea.analyzer.query;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.sample;
|
package org.wltea.analyzer.sample;
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 7.0
|
* IK 中文分词 版本 7.4
|
||||||
* IK Analyzer release 7.0
|
* IK Analyzer release 7.4
|
||||||
* update by 高志成(magese@live.cn)
|
* update by Magese(magese@live.cn)
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.sample;
|
package org.wltea.analyzer.sample;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user