diff --git a/pom.xml b/pom.xml index e93f79c..1c34182 100644 --- a/pom.xml +++ b/pom.xml @@ -20,13 +20,6 @@ - - junit - junit - 4.13.1 - test - - org.apache.lucene lucene-core diff --git a/src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java b/src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java deleted file mode 100644 index 906997b..0000000 --- a/src/main/java/org/wltea/analyzer/sample/IKAnalzyerDemo.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * IK 中文分词 版本 8.4.0 - * IK Analyzer release 8.4.0 - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * 源代码由林良益(linliangyi2005@gmail.com)提供 - * 版权声明 2012,乌龙茶工作室 - * provided by Linliangyi and copyright 2012 by Oolong studio - * - * 8.4.0版本 由 Magese (magese@live.cn) 更新 - * release 8.4.0 update by Magese(magese@live.cn) - * - */ -package org.wltea.analyzer.sample; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.wltea.analyzer.lucene.IKAnalyzer; - -import java.io.IOException; -import java.io.StringReader; - -/** - * 使用IKAnalyzer进行分词的演示 - * 2012-10-22 - */ -public class IKAnalzyerDemo { - - public static void main(String[] args) { - //构建IK分词器,使用smart分词模式 - Analyzer analyzer = new IKAnalyzer(true); - - //获取Lucene的TokenStream对象 - TokenStream ts = null; - try { - ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子,你可以直接运行它!IKAnalyer can analysis english text too")); - //获取词元位置属性 - OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); - //获取词元文本属性 - CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); - //获取词元文本属性 - TypeAttribute type = ts.addAttribute(TypeAttribute.class); - - - //重置TokenStream(重置StringReader) - ts.reset(); - //迭代获取分词结果 - while (ts.incrementToken()) { - System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); - } - //关闭TokenStream(关闭StringReader) - ts.end(); // Perform end-of-stream operations, e.g. set the final offset. - - } catch (IOException e) { - e.printStackTrace(); - } finally { - //释放TokenStream的所有资源 - if (ts != null) { - try { - ts.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - - } - -} diff --git a/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java b/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java deleted file mode 100644 index 9ad1d8f..0000000 --- a/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * IK 中文分词 版本 8.4.0 - * IK Analyzer release 8.4.0 - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * 源代码由林良益(linliangyi2005@gmail.com)提供 - * 版权声明 2012,乌龙茶工作室 - * provided by Linliangyi and copyright 2012 by Oolong studio - * - * 8.4.0版本 由 Magese (magese@live.cn) 更新 - * release 8.4.0 update by Magese(magese@live.cn) - * - */ -package org.wltea.analyzer.sample; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.*; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.RAMDirectory; -import org.wltea.analyzer.lucene.IKAnalyzer; - -import java.io.IOException; - - -/** - * 使用IKAnalyzer进行Lucene索引和查询的演示 - * 2012-3-2 - *

- * 以下是结合Lucene4.0 API的写法 - */ -public class LuceneIndexAndSearchDemo { - - - /** - * 模拟: - * 创建一个单条记录的索引,并对其进行搜索 - * - */ - public static void main(String[] args) { - //Lucene Document的域名 - String fieldName = "text"; - //检索内容 - String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。"; - - //实例化IKAnalyzer分词器 - Analyzer analyzer = new IKAnalyzer(true); - - Directory directory = null; - IndexWriter iwriter; - IndexReader ireader = null; - IndexSearcher isearcher; - try { - //建立内存索引对象 - //noinspection deprecation - directory = new RAMDirectory(); - - //配置IndexWriterConfig - IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); - iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); - iwriter = new IndexWriter(directory, iwConfig); - //写入索引 - Document doc = new Document(); - doc.add(new StringField("ID", "10000", Field.Store.YES)); - doc.add(new TextField(fieldName, text, Field.Store.YES)); - iwriter.addDocument(doc); - iwriter.close(); - - - //搜索过程********************************** - //实例化搜索器 - ireader = DirectoryReader.open(directory); - isearcher = new IndexSearcher(ireader); - - String keyword = "中文分词工具包"; - //使用QueryParser查询分析器构造Query对象 - QueryParser qp = new QueryParser(fieldName, analyzer); - qp.setDefaultOperator(QueryParser.AND_OPERATOR); - Query query = qp.parse(keyword); - System.out.println("Query = " + query); - - //搜索相似度最高的5条记录 - TopDocs topDocs = isearcher.search(query, 5); - long totalHits = topDocs.totalHits.value; - System.out.println("命中:" + totalHits); - //输出结果 - ScoreDoc[] scoreDocs = topDocs.scoreDocs; - for (int i = 0; i < totalHits; i++) { - Document targetDoc = isearcher.doc(scoreDocs[i].doc); - System.out.println("内容:" + targetDoc.toString()); - } - - } catch (ParseException | IOException e) { - e.printStackTrace(); - } finally { - if (ireader != null) { - try { - ireader.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - if (directory != null) { - try { - directory.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - } -} diff --git a/src/main/resources/ik.conf b/src/main/resources/ik.conf index 34d9f60..2a467ff 100644 --- a/src/main/resources/ik.conf +++ b/src/main/resources/ik.conf @@ -1,3 +1,3 @@ -Wed Aug 01 11:21:30 CST 2018 +Wed Aug 01 00:00:00 CST 2021 files=dynamicdic.txt lastupdate=0