移除demo,修改配置
This commit is contained in:
parent
4180900415
commit
bf3df0e58c
7
pom.xml
7
pom.xml
@ -20,13 +20,6 @@
|
|||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
|
||||||
<groupId>junit</groupId>
|
|
||||||
<artifactId>junit</artifactId>
|
|
||||||
<version>4.13.1</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.lucene</groupId>
|
<groupId>org.apache.lucene</groupId>
|
||||||
<artifactId>lucene-core</artifactId>
|
<artifactId>lucene-core</artifactId>
|
||||||
|
@ -1,86 +0,0 @@
|
|||||||
/*
|
|
||||||
* IK 中文分词 版本 8.4.0
|
|
||||||
* IK Analyzer release 8.4.0
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
|
||||||
* 版权声明 2012,乌龙茶工作室
|
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
|
||||||
*
|
|
||||||
* 8.4.0版本 由 Magese (magese@live.cn) 更新
|
|
||||||
* release 8.4.0 update by Magese(magese@live.cn)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
package org.wltea.analyzer.sample;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
|
||||||
import org.wltea.analyzer.lucene.IKAnalyzer;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.StringReader;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 使用IKAnalyzer进行分词的演示
|
|
||||||
* 2012-10-22
|
|
||||||
*/
|
|
||||||
public class IKAnalzyerDemo {
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
|
||||||
//构建IK分词器,使用smart分词模式
|
|
||||||
Analyzer analyzer = new IKAnalyzer(true);
|
|
||||||
|
|
||||||
//获取Lucene的TokenStream对象
|
|
||||||
TokenStream ts = null;
|
|
||||||
try {
|
|
||||||
ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子,你可以直接运行它!IKAnalyer can analysis english text too"));
|
|
||||||
//获取词元位置属性
|
|
||||||
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
|
|
||||||
//获取词元文本属性
|
|
||||||
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
|
|
||||||
//获取词元文本属性
|
|
||||||
TypeAttribute type = ts.addAttribute(TypeAttribute.class);
|
|
||||||
|
|
||||||
|
|
||||||
//重置TokenStream(重置StringReader)
|
|
||||||
ts.reset();
|
|
||||||
//迭代获取分词结果
|
|
||||||
while (ts.incrementToken()) {
|
|
||||||
System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type());
|
|
||||||
}
|
|
||||||
//关闭TokenStream(关闭StringReader)
|
|
||||||
ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} finally {
|
|
||||||
//释放TokenStream的所有资源
|
|
||||||
if (ts != null) {
|
|
||||||
try {
|
|
||||||
ts.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,136 +0,0 @@
|
|||||||
/*
|
|
||||||
* IK 中文分词 版本 8.4.0
|
|
||||||
* IK Analyzer release 8.4.0
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
|
||||||
* 版权声明 2012,乌龙茶工作室
|
|
||||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
|
||||||
*
|
|
||||||
* 8.4.0版本 由 Magese (magese@live.cn) 更新
|
|
||||||
* release 8.4.0 update by Magese(magese@live.cn)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
package org.wltea.analyzer.sample;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.StringField;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|
||||||
import org.apache.lucene.queryparser.classic.ParseException;
|
|
||||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
|
||||||
import org.apache.lucene.search.*;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
|
||||||
import org.wltea.analyzer.lucene.IKAnalyzer;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 使用IKAnalyzer进行Lucene索引和查询的演示
|
|
||||||
* 2012-3-2
|
|
||||||
* <p>
|
|
||||||
* 以下是结合Lucene4.0 API的写法
|
|
||||||
*/
|
|
||||||
public class LuceneIndexAndSearchDemo {
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 模拟:
|
|
||||||
* 创建一个单条记录的索引,并对其进行搜索
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public static void main(String[] args) {
|
|
||||||
//Lucene Document的域名
|
|
||||||
String fieldName = "text";
|
|
||||||
//检索内容
|
|
||||||
String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
|
|
||||||
|
|
||||||
//实例化IKAnalyzer分词器
|
|
||||||
Analyzer analyzer = new IKAnalyzer(true);
|
|
||||||
|
|
||||||
Directory directory = null;
|
|
||||||
IndexWriter iwriter;
|
|
||||||
IndexReader ireader = null;
|
|
||||||
IndexSearcher isearcher;
|
|
||||||
try {
|
|
||||||
//建立内存索引对象
|
|
||||||
//noinspection deprecation
|
|
||||||
directory = new RAMDirectory();
|
|
||||||
|
|
||||||
//配置IndexWriterConfig
|
|
||||||
IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
|
|
||||||
iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
|
|
||||||
iwriter = new IndexWriter(directory, iwConfig);
|
|
||||||
//写入索引
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(new StringField("ID", "10000", Field.Store.YES));
|
|
||||||
doc.add(new TextField(fieldName, text, Field.Store.YES));
|
|
||||||
iwriter.addDocument(doc);
|
|
||||||
iwriter.close();
|
|
||||||
|
|
||||||
|
|
||||||
//搜索过程**********************************
|
|
||||||
//实例化搜索器
|
|
||||||
ireader = DirectoryReader.open(directory);
|
|
||||||
isearcher = new IndexSearcher(ireader);
|
|
||||||
|
|
||||||
String keyword = "中文分词工具包";
|
|
||||||
//使用QueryParser查询分析器构造Query对象
|
|
||||||
QueryParser qp = new QueryParser(fieldName, analyzer);
|
|
||||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
|
||||||
Query query = qp.parse(keyword);
|
|
||||||
System.out.println("Query = " + query);
|
|
||||||
|
|
||||||
//搜索相似度最高的5条记录
|
|
||||||
TopDocs topDocs = isearcher.search(query, 5);
|
|
||||||
long totalHits = topDocs.totalHits.value;
|
|
||||||
System.out.println("命中:" + totalHits);
|
|
||||||
//输出结果
|
|
||||||
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
|
||||||
for (int i = 0; i < totalHits; i++) {
|
|
||||||
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
|
|
||||||
System.out.println("内容:" + targetDoc.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (ParseException | IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
} finally {
|
|
||||||
if (ireader != null) {
|
|
||||||
try {
|
|
||||||
ireader.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (directory != null) {
|
|
||||||
try {
|
|
||||||
directory.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,3 +1,3 @@
|
|||||||
Wed Aug 01 11:21:30 CST 2018
|
Wed Aug 01 00:00:00 CST 2021
|
||||||
files=dynamicdic.txt
|
files=dynamicdic.txt
|
||||||
lastupdate=0
|
lastupdate=0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user