Update lucene version to 8.3.0
This commit is contained in:
parent
356d9d9ae9
commit
0c8992fd80
@ -30,7 +30,7 @@ ik-analyzer for solr 7.x-8.x
|
|||||||
|
|
||||||
|
|
||||||
## 使用说明
|
## 使用说明
|
||||||
* jar包下载地址:[](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.2.0/ik-analyzer-8.2.0.jar)
|
* jar包下载地址:[](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.3.0/ik-analyzer-8.3.0.jar)
|
||||||
* 历史版本:[](https://search.maven.org/search?q=g:com.github.magese%20AND%20a:ik-analyzer&core=gav)
|
* 历史版本:[](https://search.maven.org/search?q=g:com.github.magese%20AND%20a:ik-analyzer&core=gav)
|
||||||
|
|
||||||
```console
|
```console
|
||||||
@ -38,7 +38,7 @@ ik-analyzer for solr 7.x-8.x
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.github.magese</groupId>
|
<groupId>com.github.magese</groupId>
|
||||||
<artifactId>ik-analyzer</artifactId>
|
<artifactId>ik-analyzer</artifactId>
|
||||||
<version>8.2.0</version>
|
<version>8.3.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
4
pom.xml
4
pom.xml
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
<groupId>com.github.magese</groupId>
|
<groupId>com.github.magese</groupId>
|
||||||
<artifactId>ik-analyzer</artifactId>
|
<artifactId>ik-analyzer</artifactId>
|
||||||
<version>8.2.0</version>
|
<version>8.3.0</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
<name>ik-analyzer-solr</name>
|
<name>ik-analyzer-solr</name>
|
||||||
@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<lucene.version>8.2.0</lucene.version>
|
<lucene.version>8.3.0</lucene.version>
|
||||||
<javac.src.version>1.8</javac.src.version>
|
<javac.src.version>1.8</javac.src.version>
|
||||||
<javac.target.version>1.8</javac.target.version>
|
<javac.target.version>1.8</javac.target.version>
|
||||||
<maven.compiler.plugin.version>3.3</maven.compiler.plugin.version>
|
<maven.compiler.plugin.version>3.3</maven.compiler.plugin.version>
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,21 +21,21 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
|
import org.wltea.analyzer.cfg.Configuration;
|
||||||
|
import org.wltea.analyzer.cfg.DefaultConfig;
|
||||||
|
import org.wltea.analyzer.dic.Dictionary;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.wltea.analyzer.cfg.Configuration;
|
|
||||||
import org.wltea.analyzer.cfg.DefaultConfig;
|
|
||||||
import org.wltea.analyzer.dic.Dictionary;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IK分词器主类
|
* IK分词器主类
|
||||||
*/
|
*/
|
||||||
@ -58,7 +58,6 @@ public final class IKSegmenter {
|
|||||||
*
|
*
|
||||||
* @param input 读取流
|
* @param input 读取流
|
||||||
* @param useSmart 为true,使用智能分词策略
|
* @param useSmart 为true,使用智能分词策略
|
||||||
* <p>
|
|
||||||
* 非智能分词:细粒度输出所有可能的切分结果
|
* 非智能分词:细粒度输出所有可能的切分结果
|
||||||
* 智能分词: 合并数词和量词,对分词结果进行歧义判断
|
* 智能分词: 合并数词和量词,对分词结果进行歧义判断
|
||||||
*/
|
*/
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.2.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.dic;
|
package org.wltea.analyzer.dic;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.dic;
|
package org.wltea.analyzer.dic;
|
||||||
@ -226,23 +226,25 @@ public class Dictionary {
|
|||||||
private void loadMainDict() {
|
private void loadMainDict() {
|
||||||
// 建立一个主词典实例
|
// 建立一个主词典实例
|
||||||
_MainDict = new DictSegment((char) 0);
|
_MainDict = new DictSegment((char) 0);
|
||||||
// 读取主词典文件
|
// 获取是否加载主词典
|
||||||
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary());
|
if (cfg.useMainDict()) {
|
||||||
if (is == null) {
|
// 读取主词典文件
|
||||||
throw new RuntimeException("Main Dictionary not found!!!");
|
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary());
|
||||||
}
|
if (is == null) {
|
||||||
|
throw new RuntimeException("Main Dictionary not found!!!");
|
||||||
try {
|
}
|
||||||
readDict(is, _MainDict);
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
System.err.println("Main Dictionary loading exception.");
|
|
||||||
ioe.printStackTrace();
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
try {
|
try {
|
||||||
is.close();
|
readDict(is, _MainDict);
|
||||||
} catch (IOException e) {
|
} catch (IOException ioe) {
|
||||||
e.printStackTrace();
|
System.err.println("Main Dictionary loading exception.");
|
||||||
|
ioe.printStackTrace();
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
is.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// 加载扩展词典
|
// 加载扩展词典
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.dic;
|
package org.wltea.analyzer.dic;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.lucene;
|
package org.wltea.analyzer.lucene;
|
||||||
@ -34,44 +34,40 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||||||
* IK分词器,Lucene Analyzer接口实现
|
* IK分词器,Lucene Analyzer接口实现
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
public final class IKAnalyzer extends Analyzer{
|
public final class IKAnalyzer extends Analyzer {
|
||||||
|
|
||||||
private boolean useSmart;
|
|
||||||
|
|
||||||
private boolean useSmart() {
|
|
||||||
return useSmart;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setUseSmart(boolean useSmart) {
|
private boolean useSmart;
|
||||||
this.useSmart = useSmart;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
private boolean useSmart() {
|
||||||
* IK分词器Lucene Analyzer接口实现类
|
return useSmart;
|
||||||
*
|
}
|
||||||
* 默认细粒度切分算法
|
|
||||||
*/
|
|
||||||
public IKAnalyzer(){
|
|
||||||
this(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* IK分词器Lucene Analyzer接口实现类
|
|
||||||
*
|
|
||||||
* @param useSmart 当为true时,分词器进行智能切分
|
|
||||||
*/
|
|
||||||
public IKAnalyzer(boolean useSmart){
|
|
||||||
super();
|
|
||||||
this.useSmart = useSmart;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 重载Analyzer接口,构造分词组件
|
/**
|
||||||
*/
|
* IK分词器Lucene Analyzer接口实现类
|
||||||
@Override
|
* 默认细粒度切分算法
|
||||||
protected TokenStreamComponents createComponents(String fieldName) {
|
*/
|
||||||
Tokenizer _IKTokenizer = new IKTokenizer(this.useSmart());
|
public IKAnalyzer() {
|
||||||
return new TokenStreamComponents(_IKTokenizer);
|
this(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IK分词器Lucene Analyzer接口实现类
|
||||||
|
*
|
||||||
|
* @param useSmart 当为true时,分词器进行智能切分
|
||||||
|
*/
|
||||||
|
public IKAnalyzer(boolean useSmart) {
|
||||||
|
super();
|
||||||
|
this.useSmart = useSmart;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 重载Analyzer接口,构造分词组件
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
Tokenizer _IKTokenizer = new IKTokenizer(this.useSmart());
|
||||||
|
return new TokenStreamComponents(_IKTokenizer);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.lucene;
|
package org.wltea.analyzer.lucene;
|
||||||
@ -43,88 +43,88 @@ import java.io.IOException;
|
|||||||
*/
|
*/
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
public final class IKTokenizer extends Tokenizer {
|
public final class IKTokenizer extends Tokenizer {
|
||||||
|
|
||||||
//IK分词器实现
|
|
||||||
private IKSegmenter _IKImplement;
|
|
||||||
|
|
||||||
//词元文本属性
|
|
||||||
private CharTermAttribute termAtt;
|
|
||||||
//词元位移属性
|
|
||||||
private OffsetAttribute offsetAtt;
|
|
||||||
//词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量)
|
|
||||||
private TypeAttribute typeAtt;
|
|
||||||
//记录最后一个词元的结束位置
|
|
||||||
private int endPosition;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Lucene 7.6 Tokenizer适配器类构造函数
|
|
||||||
*/
|
|
||||||
public IKTokenizer() {
|
|
||||||
this(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
IKTokenizer(boolean useSmart) {
|
//IK分词器实现
|
||||||
super();
|
private IKSegmenter _IKImplement;
|
||||||
init(useSmart);
|
|
||||||
}
|
|
||||||
|
|
||||||
public IKTokenizer(AttributeFactory factory) {
|
//词元文本属性
|
||||||
this(factory, false);
|
private CharTermAttribute termAtt;
|
||||||
}
|
//词元位移属性
|
||||||
|
private OffsetAttribute offsetAtt;
|
||||||
|
//词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量)
|
||||||
|
private TypeAttribute typeAtt;
|
||||||
|
//记录最后一个词元的结束位置
|
||||||
|
private int endPosition;
|
||||||
|
|
||||||
IKTokenizer(AttributeFactory factory, boolean useSmart) {
|
/**
|
||||||
super(factory);
|
* Lucene 7.6 Tokenizer适配器类构造函数
|
||||||
init(useSmart);
|
*/
|
||||||
}
|
public IKTokenizer() {
|
||||||
|
this(false);
|
||||||
|
}
|
||||||
|
|
||||||
private void init(boolean useSmart) {
|
IKTokenizer(boolean useSmart) {
|
||||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
super();
|
||||||
termAtt = addAttribute(CharTermAttribute.class);
|
init(useSmart);
|
||||||
typeAtt = addAttribute(TypeAttribute.class);
|
}
|
||||||
_IKImplement = new IKSegmenter(input , useSmart);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* (non-Javadoc)
|
public IKTokenizer(AttributeFactory factory) {
|
||||||
* @see org.apache.lucene.analysis.TokenStream#incrementToken()
|
this(factory, false);
|
||||||
*/
|
}
|
||||||
@Override
|
|
||||||
public boolean incrementToken() throws IOException {
|
IKTokenizer(AttributeFactory factory, boolean useSmart) {
|
||||||
//清除所有的词元属性
|
super(factory);
|
||||||
clearAttributes();
|
init(useSmart);
|
||||||
Lexeme nextLexeme = _IKImplement.next();
|
}
|
||||||
if(nextLexeme != null){
|
|
||||||
//将Lexeme转成Attributes
|
private void init(boolean useSmart) {
|
||||||
//设置词元文本
|
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
termAtt.append(nextLexeme.getLexemeText());
|
termAtt = addAttribute(CharTermAttribute.class);
|
||||||
//设置词元长度
|
typeAtt = addAttribute(TypeAttribute.class);
|
||||||
termAtt.setLength(nextLexeme.getLength());
|
_IKImplement = new IKSegmenter(input, useSmart);
|
||||||
//设置词元位移
|
}
|
||||||
offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
|
|
||||||
//记录分词的最后位置
|
/* (non-Javadoc)
|
||||||
endPosition = nextLexeme.getEndPosition();
|
* @see org.apache.lucene.analysis.TokenStream#incrementToken()
|
||||||
//记录词元分类
|
*/
|
||||||
typeAtt.setType(nextLexeme.getLexemeTypeString());
|
@Override
|
||||||
//返会true告知还有下个词元
|
public boolean incrementToken() throws IOException {
|
||||||
return true;
|
//清除所有的词元属性
|
||||||
}
|
clearAttributes();
|
||||||
//返会false告知词元输出完毕
|
Lexeme nextLexeme = _IKImplement.next();
|
||||||
return false;
|
if (nextLexeme != null) {
|
||||||
}
|
//将Lexeme转成Attributes
|
||||||
|
//设置词元文本
|
||||||
/*
|
termAtt.append(nextLexeme.getLexemeText());
|
||||||
* (non-Javadoc)
|
//设置词元长度
|
||||||
* @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
|
termAtt.setLength(nextLexeme.getLength());
|
||||||
*/
|
//设置词元位移
|
||||||
@Override
|
offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
|
||||||
public void reset() throws IOException {
|
//记录分词的最后位置
|
||||||
super.reset();
|
endPosition = nextLexeme.getEndPosition();
|
||||||
_IKImplement.reset(input);
|
//记录词元分类
|
||||||
}
|
typeAtt.setType(nextLexeme.getLexemeTypeString());
|
||||||
|
//返会true告知还有下个词元
|
||||||
@Override
|
return true;
|
||||||
public final void end() {
|
}
|
||||||
// set final offset
|
//返会false告知词元输出完毕
|
||||||
int finalOffset = correctOffset(this.endPosition);
|
return false;
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
}
|
||||||
}
|
|
||||||
|
/*
|
||||||
|
* (non-Javadoc)
|
||||||
|
* @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
_IKImplement.reset(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final void end() {
|
||||||
|
// set final offset
|
||||||
|
int finalOffset = correctOffset(this.endPosition);
|
||||||
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.lucene;
|
package org.wltea.analyzer.lucene;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.lucene;
|
package org.wltea.analyzer.lucene;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.query;
|
package org.wltea.analyzer.query;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.query;
|
package org.wltea.analyzer.query;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.sample;
|
package org.wltea.analyzer.sample;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
/*
|
/*
|
||||||
* IK 中文分词 版本 8.2.0
|
* IK 中文分词 版本 8.3.0
|
||||||
* IK Analyzer release 8.2.0
|
* IK Analyzer release 8.3.0
|
||||||
*
|
*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
@ -21,8 +21,8 @@
|
|||||||
* 版权声明 2012,乌龙茶工作室
|
* 版权声明 2012,乌龙茶工作室
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||||
*
|
*
|
||||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||||
* release 8.2.0 update by Magese(magese@live.cn)
|
* release 8.3.0 update by Magese(magese@live.cn)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.sample;
|
package org.wltea.analyzer.sample;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user