Update lucene version to 8.3.0

This commit is contained in:
magese 2019-11-12 11:30:57 +08:00
parent 356d9d9ae9
commit 0c8992fd80
24 changed files with 222 additions and 225 deletions

View File

@ -30,7 +30,7 @@ ik-analyzer for solr 7.x-8.x
## 使用说明 ## 使用说明
* jar包下载地址[![GitHub version](https://img.shields.io/badge/version-8.2.0-519dd9.svg)](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.2.0/ik-analyzer-8.2.0.jar) * jar包下载地址[![GitHub version](https://img.shields.io/badge/version-8.3.0-519dd9.svg)](https://search.maven.org/remotecontent?filepath=com/github/magese/ik-analyzer/8.3.0/ik-analyzer-8.3.0.jar)
* 历史版本:[![GitHub version](https://img.shields.io/maven-central/v/com.github.magese/ik-analyzer.svg?style=flat-square)](https://search.maven.org/search?q=g:com.github.magese%20AND%20a:ik-analyzer&core=gav) * 历史版本:[![GitHub version](https://img.shields.io/maven-central/v/com.github.magese/ik-analyzer.svg?style=flat-square)](https://search.maven.org/search?q=g:com.github.magese%20AND%20a:ik-analyzer&core=gav)
```console ```console
@ -38,7 +38,7 @@ ik-analyzer for solr 7.x-8.x
<dependency> <dependency>
<groupId>com.github.magese</groupId> <groupId>com.github.magese</groupId>
<artifactId>ik-analyzer</artifactId> <artifactId>ik-analyzer</artifactId>
<version>8.2.0</version> <version>8.3.0</version>
</dependency> </dependency>
``` ```

View File

@ -4,7 +4,7 @@
<groupId>com.github.magese</groupId> <groupId>com.github.magese</groupId>
<artifactId>ik-analyzer</artifactId> <artifactId>ik-analyzer</artifactId>
<version>8.2.0</version> <version>8.3.0</version>
<packaging>jar</packaging> <packaging>jar</packaging>
<name>ik-analyzer-solr</name> <name>ik-analyzer-solr</name>
@ -13,7 +13,7 @@
<properties> <properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<lucene.version>8.2.0</lucene.version> <lucene.version>8.3.0</lucene.version>
<javac.src.version>1.8</javac.src.version> <javac.src.version>1.8</javac.src.version>
<javac.target.version>1.8</javac.target.version> <javac.target.version>1.8</javac.target.version>
<maven.compiler.plugin.version>3.3</maven.compiler.plugin.version> <maven.compiler.plugin.version>3.3</maven.compiler.plugin.version>

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,21 +21,21 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;
import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.cfg.DefaultConfig;
import org.wltea.analyzer.dic.Dictionary;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.cfg.DefaultConfig;
import org.wltea.analyzer.dic.Dictionary;
/** /**
* IK分词器主类 * IK分词器主类
*/ */
@ -58,7 +58,6 @@ public final class IKSegmenter {
* *
* @param input 读取流 * @param input 读取流
* @param useSmart 为true使用智能分词策略 * @param useSmart 为true使用智能分词策略
* <p>
* 非智能分词细粒度输出所有可能的切分结果 * 非智能分词细粒度输出所有可能的切分结果
* 智能分词 合并数词和量词对分词结果进行歧义判断 * 智能分词 合并数词和量词对分词结果进行歧义判断
*/ */

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;

View File

@ -1,5 +1,5 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.2.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.dic; package org.wltea.analyzer.dic;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.dic; package org.wltea.analyzer.dic;
@ -226,23 +226,25 @@ public class Dictionary {
private void loadMainDict() { private void loadMainDict() {
// 建立一个主词典实例 // 建立一个主词典实例
_MainDict = new DictSegment((char) 0); _MainDict = new DictSegment((char) 0);
// 读取主词典文件 // 获取是否加载主词典
InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary()); if (cfg.useMainDict()) {
if (is == null) { // 读取主词典文件
throw new RuntimeException("Main Dictionary not found!!!"); InputStream is = this.getClass().getClassLoader().getResourceAsStream(cfg.getMainDictionary());
} if (is == null) {
throw new RuntimeException("Main Dictionary not found!!!");
try { }
readDict(is, _MainDict);
} catch (IOException ioe) {
System.err.println("Main Dictionary loading exception.");
ioe.printStackTrace();
} finally {
try { try {
is.close(); readDict(is, _MainDict);
} catch (IOException e) { } catch (IOException ioe) {
e.printStackTrace(); System.err.println("Main Dictionary loading exception.");
ioe.printStackTrace();
} finally {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
} }
} }
// 加载扩展词典 // 加载扩展词典

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.dic; package org.wltea.analyzer.dic;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.lucene; package org.wltea.analyzer.lucene;
@ -34,44 +34,40 @@ import org.apache.lucene.analysis.Tokenizer;
* IK分词器Lucene Analyzer接口实现 * IK分词器Lucene Analyzer接口实现
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
public final class IKAnalyzer extends Analyzer{ public final class IKAnalyzer extends Analyzer {
private boolean useSmart;
private boolean useSmart() {
return useSmart;
}
public void setUseSmart(boolean useSmart) { private boolean useSmart;
this.useSmart = useSmart;
}
/** private boolean useSmart() {
* IK分词器Lucene Analyzer接口实现类 return useSmart;
* }
* 默认细粒度切分算法
*/
public IKAnalyzer(){
this(false);
}
/**
* IK分词器Lucene Analyzer接口实现类
*
* @param useSmart 当为true时分词器进行智能切分
*/
public IKAnalyzer(boolean useSmart){
super();
this.useSmart = useSmart;
}
/**
* 重载Analyzer接口构造分词组件 /**
*/ * IK分词器Lucene Analyzer接口实现类
@Override * 默认细粒度切分算法
protected TokenStreamComponents createComponents(String fieldName) { */
Tokenizer _IKTokenizer = new IKTokenizer(this.useSmart()); public IKAnalyzer() {
return new TokenStreamComponents(_IKTokenizer); this(false);
} }
/**
* IK分词器Lucene Analyzer接口实现类
*
* @param useSmart 当为true时分词器进行智能切分
*/
public IKAnalyzer(boolean useSmart) {
super();
this.useSmart = useSmart;
}
/**
* 重载Analyzer接口构造分词组件
*/
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer _IKTokenizer = new IKTokenizer(this.useSmart());
return new TokenStreamComponents(_IKTokenizer);
}
} }

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.lucene; package org.wltea.analyzer.lucene;
@ -43,88 +43,88 @@ import java.io.IOException;
*/ */
@SuppressWarnings("unused") @SuppressWarnings("unused")
public final class IKTokenizer extends Tokenizer { public final class IKTokenizer extends Tokenizer {
//IK分词器实现
private IKSegmenter _IKImplement;
//词元文本属性
private CharTermAttribute termAtt;
//词元位移属性
private OffsetAttribute offsetAtt;
//词元分类属性该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量
private TypeAttribute typeAtt;
//记录最后一个词元的结束位置
private int endPosition;
/**
* Lucene 7.6 Tokenizer适配器类构造函数
*/
public IKTokenizer() {
this(false);
}
IKTokenizer(boolean useSmart) { //IK分词器实现
super(); private IKSegmenter _IKImplement;
init(useSmart);
}
public IKTokenizer(AttributeFactory factory) { //词元文本属性
this(factory, false); private CharTermAttribute termAtt;
} //词元位移属性
private OffsetAttribute offsetAtt;
//词元分类属性该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量
private TypeAttribute typeAtt;
//记录最后一个词元的结束位置
private int endPosition;
IKTokenizer(AttributeFactory factory, boolean useSmart) { /**
super(factory); * Lucene 7.6 Tokenizer适配器类构造函数
init(useSmart); */
} public IKTokenizer() {
this(false);
}
private void init(boolean useSmart) { IKTokenizer(boolean useSmart) {
offsetAtt = addAttribute(OffsetAttribute.class); super();
termAtt = addAttribute(CharTermAttribute.class); init(useSmart);
typeAtt = addAttribute(TypeAttribute.class); }
_IKImplement = new IKSegmenter(input , useSmart);
}
/* (non-Javadoc) public IKTokenizer(AttributeFactory factory) {
* @see org.apache.lucene.analysis.TokenStream#incrementToken() this(factory, false);
*/ }
@Override
public boolean incrementToken() throws IOException { IKTokenizer(AttributeFactory factory, boolean useSmart) {
//清除所有的词元属性 super(factory);
clearAttributes(); init(useSmart);
Lexeme nextLexeme = _IKImplement.next(); }
if(nextLexeme != null){
//将Lexeme转成Attributes private void init(boolean useSmart) {
//设置词元文本 offsetAtt = addAttribute(OffsetAttribute.class);
termAtt.append(nextLexeme.getLexemeText()); termAtt = addAttribute(CharTermAttribute.class);
//设置词元长度 typeAtt = addAttribute(TypeAttribute.class);
termAtt.setLength(nextLexeme.getLength()); _IKImplement = new IKSegmenter(input, useSmart);
//设置词元位移 }
offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
//记录分词的最后位置 /* (non-Javadoc)
endPosition = nextLexeme.getEndPosition(); * @see org.apache.lucene.analysis.TokenStream#incrementToken()
//记录词元分类 */
typeAtt.setType(nextLexeme.getLexemeTypeString()); @Override
//返会true告知还有下个词元 public boolean incrementToken() throws IOException {
return true; //清除所有的词元属性
} clearAttributes();
//返会false告知词元输出完毕 Lexeme nextLexeme = _IKImplement.next();
return false; if (nextLexeme != null) {
} //将Lexeme转成Attributes
//设置词元文本
/* termAtt.append(nextLexeme.getLexemeText());
* (non-Javadoc) //设置词元长度
* @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader) termAtt.setLength(nextLexeme.getLength());
*/ //设置词元位移
@Override offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
public void reset() throws IOException { //记录分词的最后位置
super.reset(); endPosition = nextLexeme.getEndPosition();
_IKImplement.reset(input); //记录词元分类
} typeAtt.setType(nextLexeme.getLexemeTypeString());
//返会true告知还有下个词元
@Override return true;
public final void end() { }
// set final offset //返会false告知词元输出完毕
int finalOffset = correctOffset(this.endPosition); return false;
offsetAtt.setOffset(finalOffset, finalOffset); }
}
/*
* (non-Javadoc)
* @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
*/
@Override
public void reset() throws IOException {
super.reset();
_IKImplement.reset(input);
}
@Override
public final void end() {
// set final offset
int finalOffset = correctOffset(this.endPosition);
offsetAtt.setOffset(finalOffset, finalOffset);
}
} }

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.lucene; package org.wltea.analyzer.lucene;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.lucene; package org.wltea.analyzer.lucene;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.query; package org.wltea.analyzer.query;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.query; package org.wltea.analyzer.query;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.sample; package org.wltea.analyzer.sample;

View File

@ -1,6 +1,6 @@
/* /*
* IK 中文分词 版本 8.2.0 * IK 中文分词 版本 8.3.0
* IK Analyzer release 8.2.0 * IK Analyzer release 8.3.0
* *
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,8 +21,8 @@
* 版权声明 2012乌龙茶工作室 * 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio * provided by Linliangyi and copyright 2012 by Oolong studio
* *
* 8.2.0版本 Magese (magese@live.cn) 更新 * 8.3.0版本 Magese (magese@live.cn) 更新
* release 8.2.0 update by Magese(magese@live.cn) * release 8.3.0 update by Magese(magese@live.cn)
* *
*/ */
package org.wltea.analyzer.sample; package org.wltea.analyzer.sample;