commit 30a4fe263c6a8a651820a59688a722328870abf2 Author: Shay Banon Date: Mon Apr 30 13:42:59 2012 +0300 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..dcd5458de92 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +/data +/work +/logs +/.idea +/target +.DS_Store +*.iml +/.project +/.settings +/.classpath diff --git a/README.md b/README.md new file mode 100644 index 00000000000..8f19fe8be82 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +Japanese (kuromoji) Analysis for ElasticSearch +================================== + +The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. + +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.0.0`. + + -------------------------------------------------- + | Smart Chinese Analysis Plugin | ElasticSearch | + -------------------------------------------------- + | master | 0.19 -> master | + -------------------------------------------------- + | 1.0.0 | 0.19 -> master | + -------------------------------------------------- + +The plugin includes the `kuromoji` analyzer. diff --git a/pom.xml b/pom.xml new file mode 100644 index 00000000000..6fb6126ed72 --- /dev/null +++ b/pom.xml @@ -0,0 +1,139 @@ + + + elasticsearch-analysis-kuromoji + 4.0.0 + org.elasticsearch + elasticsearch-analysis-kuromoji + 1.0.0-SNAPSHOT + jar + Japanese (kuromoji) Analysis for ElasticSearch + 2009 + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git + scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git + + http://github.com/elasticsearch/elasticsearch-analysis-kuromoji + + + + org.sonatype.oss + oss-parent + 7 + + + + 0.19.3 + + + + + + + + org.elasticsearch + elasticsearch + ${elasticsearch.version} + compile + + + + org.apache.lucene + lucene-kuromoji + 3.6.0 + compile + + + + log4j + log4j + 1.2.16 + runtime + + + + org.testng + testng + 6.3.1 + test + + + + org.hamcrest + hamcrest-core + 1.3.RC2 + test + + + + org.hamcrest + hamcrest-library + 1.3.RC2 + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 1.6 + 1.6 + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.11 + + + **/*Tests.java + + + + + org.apache.maven.plugins + maven-source-plugin + 2.1.2 + + + attach-sources + + jar + + + + + + maven-assembly-plugin + 2.3 + + false + ${project.build.directory}/releases/ + + ${basedir}/src/main/assemblies/plugin.xml + + + + + package + + single + + + + + + + \ No newline at end of file diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml new file mode 100644 index 00000000000..03c6bb0ac1b --- /dev/null +++ b/src/main/assemblies/plugin.xml @@ -0,0 +1,26 @@ + + + plugin + + zip + + false + + + / + true + true + + org.elasticsearch:elasticsearch + + + + / + true + true + + org.apache.lucene:lucene-kuromoji + + + + \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java new file mode 100644 index 00000000000..857e3f89dd8 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -0,0 +1,63 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.ja.JapaneseTokenizer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +import java.util.Set; + +/** + */ +public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final JapaneseAnalyzer analyzer; + + @Inject + public KuromojiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + Set stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet(), version); + JapaneseTokenizer.Mode mode = JapaneseTokenizer.DEFAULT_MODE; + String modeSetting = settings.get("mode", null); + if (modeSetting != null) { + if ("search".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.SEARCH; + } else if ("normal".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.NORMAL; + } else if ("extended".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.EXTENDED; + } + } + + analyzer = new JapaneseAnalyzer(version, null, mode, CharArraySet.copy(version, stopWords), JapaneseAnalyzer.getDefaultStopTags()); + } + + @Override + public JapaneseAnalyzer get() { + return this.analyzer; + } +} diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java new file mode 100644 index 00000000000..a3ba70ad5cf --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -0,0 +1,44 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.plugin.analysis.kuromoji; + +import org.elasticsearch.index.analysis.KuromojiAnalyzerProvider; +import org.elasticsearch.index.analysis.AnalysisModule; +import org.elasticsearch.plugins.AbstractPlugin; + +/** + * + */ +public class AnalysisKuromojiPlugin extends AbstractPlugin { + + @Override + public String name() { + return "analysis-kuromoji"; + } + + @Override + public String description() { + return "Kuromoji analysis support"; + } + + public void onModule(AnalysisModule module) { + module.addAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); + } +} diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties new file mode 100644 index 00000000000..c35abda2c62 --- /dev/null +++ b/src/main/resources/es-plugin.properties @@ -0,0 +1 @@ +plugin=org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin