Update lucene version to 8.3.0
This commit is contained in:
parent
356d9d9ae9
commit
0c8992fd80
@ -30,7 +30,7 @@ ik-analyzer for solr 7.x-8.x
|
||||
|
||||
|
||||
## 使用说明
|
||||
* jar包下载地址:[](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.2.0/ik-analyzer-8.2.0.jar)
|
||||
* jar包下载地址:[](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.3.0/ik-analyzer-8.3.0.jar)
|
||||
* 历史版本:[](https://search.maven.org/search?q=g:com.github.magese%20AND%20a:ik-analyzer&core=gav)
|
||||
|
||||
```console
|
||||
@ -38,7 +38,7 @@ ik-analyzer for solr 7.x-8.x
|
||||
<dependency>
|
||||
<groupId>com.github.magese</groupId>
|
||||
<artifactId>ik-analyzer</artifactId>
|
||||
<version>8.2.0</version>
|
||||
<version>8.3.0</version>
|
||||
</dependency>
|
||||
```
|
||||
|
||||
|
4
pom.xml
4
pom.xml
@ -4,7 +4,7 @@
|
||||
|
||||
<groupId>com.github.magese</groupId>
|
||||
<artifactId>ik-analyzer</artifactId>
|
||||
<version>8.2.0</version>
|
||||
<version>8.3.0</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>ik-analyzer-solr</name>
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<lucene.version>8.2.0</lucene.version>
|
||||
<lucene.version>8.3.0</lucene.version>
|
||||
<javac.src.version>1.8</javac.src.version>
|
||||
<javac.target.version>1.8</javac.target.version>
|
||||
<maven.compiler.plugin.version>3.3</maven.compiler.plugin.version>
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,21 +21,21 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.cfg.DefaultConfig;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.cfg.DefaultConfig;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
|
||||
/**
|
||||
* IK分词器主类
|
||||
*/
|
||||
@ -58,7 +58,6 @@ public final class IKSegmenter {
|
||||
*
|
||||
* @param input 读取流
|
||||
* @param useSmart 为true,使用智能分词策略
|
||||
* <p>
|
||||
* 非智能分词:细粒度输出所有可能的切分结果
|
||||
* 智能分词: 合并数词和量词,对分词结果进行歧义判断
|
||||
*/
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.core;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.2.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.dic;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.dic;
|
||||
@ -226,23 +226,25 @@ public class Dictionary {
|
||||
private void loadMainDict() {
|
||||
// 建立一个主词典实例
|
||||
_MainDict = new DictSegment((char) 0);
|
||||
// 读取主词典文件
|
||||
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary());
|
||||
if (is == null) {
|
||||
throw new RuntimeException("Main Dictionary not found!!!");
|
||||
}
|
||||
|
||||
try {
|
||||
readDict(is, _MainDict);
|
||||
} catch (IOException ioe) {
|
||||
System.err.println("Main Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
|
||||
} finally {
|
||||
// 获取是否加载主词典
|
||||
if (cfg.useMainDict()) {
|
||||
// 读取主词典文件
|
||||
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary());
|
||||
if (is == null) {
|
||||
throw new RuntimeException("Main Dictionary not found!!!");
|
||||
}
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
readDict(is, _MainDict);
|
||||
} catch (IOException ioe) {
|
||||
System.err.println("Main Dictionary loading exception.");
|
||||
ioe.printStackTrace();
|
||||
|
||||
} finally {
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
// 加载扩展词典
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.dic;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.lucene;
|
||||
@ -34,44 +34,40 @@ import org.apache.lucene.analysis.Tokenizer;
|
||||
* IK分词器,Lucene Analyzer接口实现
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public final class IKAnalyzer extends Analyzer{
|
||||
public final class IKAnalyzer extends Analyzer {
|
||||
|
||||
private boolean useSmart;
|
||||
private boolean useSmart;
|
||||
|
||||
private boolean useSmart() {
|
||||
return useSmart;
|
||||
}
|
||||
private boolean useSmart() {
|
||||
return useSmart;
|
||||
}
|
||||
|
||||
public void setUseSmart(boolean useSmart) {
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
|
||||
/**
|
||||
* IK分词器Lucene Analyzer接口实现类
|
||||
*
|
||||
* 默认细粒度切分算法
|
||||
*/
|
||||
public IKAnalyzer(){
|
||||
this(false);
|
||||
}
|
||||
/**
|
||||
* IK分词器Lucene Analyzer接口实现类
|
||||
* 默认细粒度切分算法
|
||||
*/
|
||||
public IKAnalyzer() {
|
||||
this(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* IK分词器Lucene Analyzer接口实现类
|
||||
*
|
||||
* @param useSmart 当为true时,分词器进行智能切分
|
||||
*/
|
||||
public IKAnalyzer(boolean useSmart){
|
||||
super();
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
/**
|
||||
* IK分词器Lucene Analyzer接口实现类
|
||||
*
|
||||
* @param useSmart 当为true时,分词器进行智能切分
|
||||
*/
|
||||
public IKAnalyzer(boolean useSmart) {
|
||||
super();
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
|
||||
/**
|
||||
* 重载Analyzer接口,构造分词组件
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer _IKTokenizer = new IKTokenizer(this.useSmart());
|
||||
return new TokenStreamComponents(_IKTokenizer);
|
||||
}
|
||||
/**
|
||||
* 重载Analyzer接口,构造分词组件
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer _IKTokenizer = new IKTokenizer(this.useSmart());
|
||||
return new TokenStreamComponents(_IKTokenizer);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.lucene;
|
||||
@ -44,87 +44,87 @@ import java.io.IOException;
|
||||
@SuppressWarnings("unused")
|
||||
public final class IKTokenizer extends Tokenizer {
|
||||
|
||||
//IK分词器实现
|
||||
private IKSegmenter _IKImplement;
|
||||
//IK分词器实现
|
||||
private IKSegmenter _IKImplement;
|
||||
|
||||
//词元文本属性
|
||||
private CharTermAttribute termAtt;
|
||||
//词元位移属性
|
||||
private OffsetAttribute offsetAtt;
|
||||
//词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量)
|
||||
private TypeAttribute typeAtt;
|
||||
//记录最后一个词元的结束位置
|
||||
private int endPosition;
|
||||
//词元文本属性
|
||||
private CharTermAttribute termAtt;
|
||||
//词元位移属性
|
||||
private OffsetAttribute offsetAtt;
|
||||
//词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量)
|
||||
private TypeAttribute typeAtt;
|
||||
//记录最后一个词元的结束位置
|
||||
private int endPosition;
|
||||
|
||||
/**
|
||||
* Lucene 7.6 Tokenizer适配器类构造函数
|
||||
*/
|
||||
public IKTokenizer() {
|
||||
this(false);
|
||||
}
|
||||
/**
|
||||
* Lucene 7.6 Tokenizer适配器类构造函数
|
||||
*/
|
||||
public IKTokenizer() {
|
||||
this(false);
|
||||
}
|
||||
|
||||
IKTokenizer(boolean useSmart) {
|
||||
super();
|
||||
init(useSmart);
|
||||
}
|
||||
IKTokenizer(boolean useSmart) {
|
||||
super();
|
||||
init(useSmart);
|
||||
}
|
||||
|
||||
public IKTokenizer(AttributeFactory factory) {
|
||||
this(factory, false);
|
||||
}
|
||||
public IKTokenizer(AttributeFactory factory) {
|
||||
this(factory, false);
|
||||
}
|
||||
|
||||
IKTokenizer(AttributeFactory factory, boolean useSmart) {
|
||||
super(factory);
|
||||
init(useSmart);
|
||||
}
|
||||
IKTokenizer(AttributeFactory factory, boolean useSmart) {
|
||||
super(factory);
|
||||
init(useSmart);
|
||||
}
|
||||
|
||||
private void init(boolean useSmart) {
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
typeAtt = addAttribute(TypeAttribute.class);
|
||||
_IKImplement = new IKSegmenter(input , useSmart);
|
||||
}
|
||||
private void init(boolean useSmart) {
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
typeAtt = addAttribute(TypeAttribute.class);
|
||||
_IKImplement = new IKSegmenter(input, useSmart);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.analysis.TokenStream#incrementToken()
|
||||
*/
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
//清除所有的词元属性
|
||||
clearAttributes();
|
||||
Lexeme nextLexeme = _IKImplement.next();
|
||||
if(nextLexeme != null){
|
||||
//将Lexeme转成Attributes
|
||||
//设置词元文本
|
||||
termAtt.append(nextLexeme.getLexemeText());
|
||||
//设置词元长度
|
||||
termAtt.setLength(nextLexeme.getLength());
|
||||
//设置词元位移
|
||||
offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
|
||||
//记录分词的最后位置
|
||||
endPosition = nextLexeme.getEndPosition();
|
||||
//记录词元分类
|
||||
typeAtt.setType(nextLexeme.getLexemeTypeString());
|
||||
//返会true告知还有下个词元
|
||||
return true;
|
||||
}
|
||||
//返会false告知词元输出完毕
|
||||
return false;
|
||||
}
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.analysis.TokenStream#incrementToken()
|
||||
*/
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
//清除所有的词元属性
|
||||
clearAttributes();
|
||||
Lexeme nextLexeme = _IKImplement.next();
|
||||
if (nextLexeme != null) {
|
||||
//将Lexeme转成Attributes
|
||||
//设置词元文本
|
||||
termAtt.append(nextLexeme.getLexemeText());
|
||||
//设置词元长度
|
||||
termAtt.setLength(nextLexeme.getLength());
|
||||
//设置词元位移
|
||||
offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
|
||||
//记录分词的最后位置
|
||||
endPosition = nextLexeme.getEndPosition();
|
||||
//记录词元分类
|
||||
typeAtt.setType(nextLexeme.getLexemeTypeString());
|
||||
//返会true告知还有下个词元
|
||||
return true;
|
||||
}
|
||||
//返会false告知词元输出完毕
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
* @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
|
||||
*/
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
_IKImplement.reset(input);
|
||||
}
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
* @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
|
||||
*/
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
_IKImplement.reset(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void end() {
|
||||
// set final offset
|
||||
int finalOffset = correctOffset(this.endPosition);
|
||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
@Override
|
||||
public final void end() {
|
||||
// set final offset
|
||||
int finalOffset = correctOffset(this.endPosition);
|
||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.lucene;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.lucene;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.query;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.query;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.sample;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* IK 中文分词 版本 8.2.0
|
||||
* IK Analyzer release 8.2.0
|
||||
* IK 中文分词 版本 8.3.0
|
||||
* IK Analyzer release 8.3.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,8 +21,8 @@
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
* 8.2.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.2.0 update by Magese(magese@live.cn)
|
||||
* 8.3.0版本 由 Magese (magese@live.cn) 更新
|
||||
* release 8.3.0 update by Magese(magese@live.cn)
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.sample;
|
||||
|
Loading…
x
Reference in New Issue
Block a user