更新Lucene版本为8.3.1;
This commit is contained in:
parent
4dd4a86b4d
commit
8b04070253
12
README.md
12
README.md
@ -22,15 +22,16 @@ ik-analyzer for solr 7.x-8.x
|
||||
| word | 64.2万 | 2014年 |
|
||||
| jieba | 58.4万 | 2012年 |
|
||||
| jcesg | 16.6万 | 2018年 |
|
||||
| sougou词库 | 115.2万 | 2019年 |
|
||||
| sougou词库 | 115.2万 | 2020年 |
|
||||
#### 将以上词库进行整理后约187.1万条词汇;
|
||||
#### 添加动态加载词典表功能,在不需要重启solr服务的情况下加载新增的词典。
|
||||
> <small>关闭默认主词典请在`IKAnalyzer.cfg.xml`配置文件中设置`use_main_dict`为`false`。</small>
|
||||
* IKAnalyzer的原作者为林良益<linliangyi2007@gmail.com>,项目网站为<http://code.google.com/p/ik-analyzer>
|
||||
* 该项目动态加载功能根据博主[@星火燎原智勇](http://www.cnblogs.com/liang1101/articles/6395016.html)的博客进行修改,其GITHUB地址为[@liang68](https://github.com/liang68)
|
||||
|
||||
|
||||
## 使用说明
|
||||
* jar包下载地址:[](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.3.0/ik-analyzer-8.3.0.jar)
|
||||
* jar包下载地址:[](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.3.1/ik-analyzer-8.3.1.jar)
|
||||
* 历史版本:[](https://search.maven.org/search?q=g:com.github.magese%20AND%20a:ik-analyzer&core=gav)
|
||||
|
||||
```console
|
||||
@ -38,7 +39,7 @@ ik-analyzer for solr 7.x-8.x
|
||||
<dependency>
|
||||
<groupId>com.github.magese</groupId>
|
||||
<artifactId>ik-analyzer</artifactId>
|
||||
<version>8.3.0</version>
|
||||
<version>8.3.1</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
@ -79,7 +80,7 @@ ik-analyzer for solr 7.x-8.x
|
||||
5. `IKAnalyzer.cfg.xml`配置文件说明:
|
||||
|
||||
| 名称 | 类型 | 描述 | 默认 |
|
||||
| :------: | :------: | :------: | :------: |
|
||||
| ------ | ------ | ------ | ------ |
|
||||
| use_main_dict | boolean | 是否使用默认主词典 | true |
|
||||
| ext_dict | String | 扩展词典文件名称,多个用分号隔开 | ext.dic; |
|
||||
| ext_stopwords | String | 停用词典文件名称,多个用分号隔开 | stopword.dic; |
|
||||
@ -100,6 +101,9 @@ ik-analyzer for solr 7.x-8.x
|
||||
|
||||
|
||||
## 更新说明
|
||||
- `2020-12-30:`
|
||||
- 升级lucene版本为`8.3.1`
|
||||
- 更新词库
|
||||
- `2019-11-12:`
|
||||
- 升级lucene版本为`8.3.0`
|
||||
- `IKAnalyzer.cfg.xml`增加配置项`use_main_dict`,用于配置是否启用默认主词典
|
||||
|
5
pom.xml
5
pom.xml
@ -4,7 +4,7 @@
|
||||
|
||||
<groupId>com.github.magese</groupId>
|
||||
<artifactId>ik-analyzer</artifactId>
|
||||
<version>8.3.0</version>
|
||||
<version>8.3.1</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>ik-analyzer-solr</name>
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<lucene.version>8.3.0</lucene.version>
|
||||
<lucene.version>8.3.1</lucene.version>
|
||||
<javac.src.version>1.8</javac.src.version>
|
||||
<javac.target.version>1.8</javac.target.version>
|
||||
<maven.compiler.plugin.version>3.3</maven.compiler.plugin.version>
|
||||
@ -152,4 +152,3 @@
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.cfg;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.cfg;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,23 +21,19 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 分词器上下文状态
|
||||
*/
|
||||
@ -66,17 +62,17 @@ class AnalyzeContext {
|
||||
|
||||
//子分词器锁
|
||||
//该集合非空,说明有子分词器在占用segmentBuff
|
||||
private Set<String> buffLocker;
|
||||
private final Set<String> buffLocker;
|
||||
|
||||
//原始分词结果集合,未经歧义处理
|
||||
private QuickSortSet orgLexemes;
|
||||
//LexemePath位置索引表
|
||||
private Map<Integer, LexemePath> pathMap;
|
||||
private final Map<Integer, LexemePath> pathMap;
|
||||
//最终分词结果集
|
||||
private LinkedList<Lexeme> results;
|
||||
private final LinkedList<Lexeme> results;
|
||||
|
||||
//分词器配置项
|
||||
private Configuration cfg;
|
||||
private final Configuration cfg;
|
||||
|
||||
AnalyzeContext(Configuration cfg) {
|
||||
this.cfg = cfg;
|
||||
@ -254,7 +250,7 @@ class AnalyzeContext {
|
||||
*/
|
||||
void outputToResult() {
|
||||
int index = 0;
|
||||
for (; index <= this.cursor; ) {
|
||||
while (index <= this.cursor) {
|
||||
//跳过非CJK字符
|
||||
if (CharacterUtil.CHAR_USELESS == this.charTypes[index]) {
|
||||
index++;
|
||||
@ -353,6 +349,7 @@ class AnalyzeContext {
|
||||
if (Lexeme.TYPE_ARABIC == result.getLexemeType()) {
|
||||
Lexeme nextLexeme = this.results.peekFirst();
|
||||
boolean appendOk = false;
|
||||
if (nextLexeme != null) {
|
||||
if (Lexeme.TYPE_CNUM == nextLexeme.getLexemeType()) {
|
||||
//合并英文数词+中文数词
|
||||
appendOk = result.append(nextLexeme, Lexeme.TYPE_CNUM);
|
||||
@ -360,6 +357,7 @@ class AnalyzeContext {
|
||||
//合并英文数词+中文量词
|
||||
appendOk = result.append(nextLexeme, Lexeme.TYPE_CQUAN);
|
||||
}
|
||||
}
|
||||
if (appendOk) {
|
||||
//弹出
|
||||
this.results.pollFirst();
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -38,44 +38,46 @@ class QuickSortSet {
|
||||
//链表的实际大小
|
||||
private int size;
|
||||
|
||||
QuickSortSet(){
|
||||
QuickSortSet() {
|
||||
this.size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 向链表集合添加词元
|
||||
*/
|
||||
void addLexeme(Lexeme lexeme){
|
||||
void addLexeme(Lexeme lexeme) {
|
||||
Cell newCell = new Cell(lexeme);
|
||||
if(this.size == 0){
|
||||
if (this.size == 0) {
|
||||
this.head = newCell;
|
||||
this.tail = newCell;
|
||||
this.size++;
|
||||
|
||||
}else{
|
||||
} else {
|
||||
/*if(this.tail.compareTo(newCell) == 0){//词元与尾部词元相同,不放入集合
|
||||
|
||||
}else */if(this.tail.compareTo(newCell) < 0){//词元接入链表尾部
|
||||
}else */
|
||||
if (this.tail.compareTo(newCell) < 0) {//词元接入链表尾部
|
||||
this.tail.next = newCell;
|
||||
newCell.prev = this.tail;
|
||||
this.tail = newCell;
|
||||
this.size++;
|
||||
|
||||
}else if(this.head.compareTo(newCell) > 0){//词元接入链表头部
|
||||
} else if (this.head.compareTo(newCell) > 0) {//词元接入链表头部
|
||||
this.head.prev = newCell;
|
||||
newCell.next = this.head;
|
||||
this.head = newCell;
|
||||
this.size++;
|
||||
|
||||
}else{
|
||||
} else {
|
||||
//从尾部上逆
|
||||
Cell index = this.tail;
|
||||
while(index != null && index.compareTo(newCell) > 0){
|
||||
while (index != null && index.compareTo(newCell) > 0) {
|
||||
index = index.prev;
|
||||
}
|
||||
/*if(index.compareTo(newCell) == 0){//词元与集合中的词元重复,不放入集合
|
||||
|
||||
}else */if((index != null ? index.compareTo(newCell) : 1) < 0){//词元插入链表中的某个位置
|
||||
}else */
|
||||
if ((index != null ? index.compareTo(newCell) : 1) < 0) {//词元插入链表中的某个位置
|
||||
newCell.prev = index;
|
||||
newCell.next = index.next;
|
||||
index.next.prev = newCell;
|
||||
@ -89,8 +91,8 @@ class QuickSortSet {
|
||||
/**
|
||||
* 返回链表头部元素
|
||||
*/
|
||||
Lexeme peekFirst(){
|
||||
if(this.head != null){
|
||||
Lexeme peekFirst() {
|
||||
if (this.head != null) {
|
||||
return this.head.lexeme;
|
||||
}
|
||||
return null;
|
||||
@ -98,21 +100,22 @@ class QuickSortSet {
|
||||
|
||||
/**
|
||||
* 取出链表集合的第一个元素
|
||||
*
|
||||
* @return Lexeme
|
||||
*/
|
||||
Lexeme pollFirst(){
|
||||
if(this.size == 1){
|
||||
Lexeme pollFirst() {
|
||||
if (this.size == 1) {
|
||||
Lexeme first = this.head.lexeme;
|
||||
this.head = null;
|
||||
this.tail = null;
|
||||
this.size--;
|
||||
return first;
|
||||
}else if(this.size > 1){
|
||||
} else if (this.size > 1) {
|
||||
Lexeme first = this.head.lexeme;
|
||||
this.head = this.head.next;
|
||||
this.size --;
|
||||
this.size--;
|
||||
return first;
|
||||
}else{
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -120,8 +123,8 @@ class QuickSortSet {
|
||||
/**
|
||||
* 返回链表尾部元素
|
||||
*/
|
||||
Lexeme peekLast(){
|
||||
if(this.tail != null){
|
||||
Lexeme peekLast() {
|
||||
if (this.tail != null) {
|
||||
return this.tail.lexeme;
|
||||
}
|
||||
return null;
|
||||
@ -129,23 +132,24 @@ class QuickSortSet {
|
||||
|
||||
/**
|
||||
* 取出链表集合的最后一个元素
|
||||
*
|
||||
* @return Lexeme
|
||||
*/
|
||||
Lexeme pollLast(){
|
||||
if(this.size == 1){
|
||||
Lexeme pollLast() {
|
||||
if (this.size == 1) {
|
||||
Lexeme last = this.head.lexeme;
|
||||
this.head = null;
|
||||
this.tail = null;
|
||||
this.size--;
|
||||
return last;
|
||||
|
||||
}else if(this.size > 1){
|
||||
} else if (this.size > 1) {
|
||||
Lexeme last = this.tail.lexeme;
|
||||
this.tail = this.tail.prev;
|
||||
this.size--;
|
||||
return last;
|
||||
|
||||
}else{
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@ -153,37 +157,37 @@ class QuickSortSet {
|
||||
/**
|
||||
* 返回集合大小
|
||||
*/
|
||||
int size(){
|
||||
int size() {
|
||||
return this.size;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断集合是否为空
|
||||
*/
|
||||
boolean isEmpty(){
|
||||
boolean isEmpty() {
|
||||
return this.size == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 返回lexeme链的头部
|
||||
*/
|
||||
Cell getHead(){
|
||||
Cell getHead() {
|
||||
return this.head;
|
||||
}
|
||||
|
||||
/*
|
||||
* IK 中文分词 版本 7.0
|
||||
* IK Analyzer release 7.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
* update by Magese(magese@live.cn)
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
class Cell implements Comparable<Cell>{
|
||||
static class Cell implements Comparable<Cell> {
|
||||
private Cell prev;
|
||||
private Cell next;
|
||||
private Lexeme lexeme;
|
||||
private final Lexeme lexeme;
|
||||
|
||||
Cell(Lexeme lexeme){
|
||||
if(lexeme == null){
|
||||
Cell(Lexeme lexeme) {
|
||||
if (lexeme == null) {
|
||||
throw new IllegalArgumentException("lexeme must not be null");
|
||||
}
|
||||
this.lexeme = lexeme;
|
||||
@ -193,15 +197,15 @@ class QuickSortSet {
|
||||
return this.lexeme.compareTo(o.lexeme);
|
||||
}
|
||||
|
||||
public Cell getPrev(){
|
||||
public Cell getPrev() {
|
||||
return this.prev;
|
||||
}
|
||||
|
||||
Cell getNext(){
|
||||
Cell getNext() {
|
||||
return this.next;
|
||||
}
|
||||
|
||||
public Lexeme getLexeme(){
|
||||
public Lexeme getLexeme() {
|
||||
return this.lexeme;
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.dic;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.dic;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.dic;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.lucene;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.lucene;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.lucene;
|
||||
@ -74,7 +74,7 @@ public class IKTokenizerFactory extends TokenizerFactory implements ResourceLoad
|
||||
*/
|
||||
@Override
|
||||
public void inform(ResourceLoader resourceLoader) throws IOException {
|
||||
System.out.println(String.format("IKTokenizerFactory " + this.hashCode() + " inform conf: %s", getConf()));
|
||||
System.out.printf("IKTokenizerFactory " + this.hashCode() + " inform conf: %s%n", getConf());
|
||||
this.loader = resourceLoader;
|
||||
update();
|
||||
if ((getConf() != null) && (!getConf().trim().isEmpty())) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.lucene;
|
||||
@ -35,7 +35,7 @@ import java.util.Vector;
|
||||
*/
|
||||
public class UpdateThread implements Runnable {
|
||||
private static final long INTERVAL = 30000L; // 循环等待时间
|
||||
private Vector<UpdateJob> filterFactorys; // 更新任务集合
|
||||
private final Vector<UpdateJob> filterFactorys; // 更新任务集合
|
||||
|
||||
/**
|
||||
* 私有化构造器,阻止外部进行实例化
|
||||
@ -51,7 +51,7 @@ public class UpdateThread implements Runnable {
|
||||
* 静态内部类,实现线程安全单例模式
|
||||
*/
|
||||
private static class Builder {
|
||||
private static UpdateThread singleton = new UpdateThread();
|
||||
private static final UpdateThread singleton = new UpdateThread();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -81,6 +81,7 @@ public class UpdateThread implements Runnable {
|
||||
//noinspection InfiniteLoopStatement
|
||||
while (true) {
|
||||
try {
|
||||
//noinspection BusyWait
|
||||
Thread.sleep(INTERVAL);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.query;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.query;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.sample;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
* IK 中文分词 版本 8.3.1
|
||||
* IK Analyzer release 8.3.1
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
* 8.3.1版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.1 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.sample;
|
||||
@ -76,6 +76,7 @@ public class LuceneIndexAndSearchDemo {
|
||||
IndexSearcher isearcher;
|
||||
try {
|
||||
//建立内存索引对象
|
||||
//noinspection deprecation
|
||||
directory = new RAMDirectory();
|
||||
|
||||
//配置IndexWriterConfig
|
||||
|
Loading…
x
Reference in New Issue
Block a user