From 30a4fe263c6a8a651820a59688a722328870abf2 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 30 Apr 2012 13:42:59 +0300 Subject: [PATCH 001/115] initial commit --- .gitignore | 10 ++ README.md | 16 ++ pom.xml | 139 ++++++++++++++++++ src/main/assemblies/plugin.xml | 26 ++++ .../analysis/KuromojiAnalyzerProvider.java | 63 ++++++++ .../kuromoji/AnalysisKuromojiPlugin.java | 44 ++++++ src/main/resources/es-plugin.properties | 1 + 7 files changed, 299 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 pom.xml create mode 100644 src/main/assemblies/plugin.xml create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java create mode 100644 src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java create mode 100644 src/main/resources/es-plugin.properties diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..dcd5458de92 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +/data +/work +/logs +/.idea +/target +.DS_Store +*.iml +/.project +/.settings +/.classpath diff --git a/README.md b/README.md new file mode 100644 index 00000000000..8f19fe8be82 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +Japanese (kuromoji) Analysis for ElasticSearch +================================== + +The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. + +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.0.0`. + + -------------------------------------------------- + | Smart Chinese Analysis Plugin | ElasticSearch | + -------------------------------------------------- + | master | 0.19 -> master | + -------------------------------------------------- + | 1.0.0 | 0.19 -> master | + -------------------------------------------------- + +The plugin includes the `kuromoji` analyzer. diff --git a/pom.xml b/pom.xml new file mode 100644 index 00000000000..6fb6126ed72 --- /dev/null +++ b/pom.xml @@ -0,0 +1,139 @@ + + + elasticsearch-analysis-kuromoji + 4.0.0 + org.elasticsearch + elasticsearch-analysis-kuromoji + 1.0.0-SNAPSHOT + jar + Japanese (kuromoji) Analysis for ElasticSearch + 2009 + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git + scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git + + http://github.com/elasticsearch/elasticsearch-analysis-kuromoji + + + + org.sonatype.oss + oss-parent + 7 + + + + 0.19.3 + + + + + + + + org.elasticsearch + elasticsearch + ${elasticsearch.version} + compile + + + + org.apache.lucene + lucene-kuromoji + 3.6.0 + compile + + + + log4j + log4j + 1.2.16 + runtime + + + + org.testng + testng + 6.3.1 + test + + + + org.hamcrest + hamcrest-core + 1.3.RC2 + test + + + + org.hamcrest + hamcrest-library + 1.3.RC2 + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 1.6 + 1.6 + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.11 + + + **/*Tests.java + + + + + org.apache.maven.plugins + maven-source-plugin + 2.1.2 + + + attach-sources + + jar + + + + + + maven-assembly-plugin + 2.3 + + false + ${project.build.directory}/releases/ + + ${basedir}/src/main/assemblies/plugin.xml + + + + + package + + single + + + + + + + \ No newline at end of file diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml new file mode 100644 index 00000000000..03c6bb0ac1b --- /dev/null +++ b/src/main/assemblies/plugin.xml @@ -0,0 +1,26 @@ + + + plugin + + zip + + false + + + / + true + true + + org.elasticsearch:elasticsearch + + + + / + true + true + + org.apache.lucene:lucene-kuromoji + + + + \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java new file mode 100644 index 00000000000..857e3f89dd8 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -0,0 +1,63 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.ja.JapaneseTokenizer; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +import java.util.Set; + +/** + */ +public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider { + + private final JapaneseAnalyzer analyzer; + + @Inject + public KuromojiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + Set stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet(), version); + JapaneseTokenizer.Mode mode = JapaneseTokenizer.DEFAULT_MODE; + String modeSetting = settings.get("mode", null); + if (modeSetting != null) { + if ("search".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.SEARCH; + } else if ("normal".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.NORMAL; + } else if ("extended".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.EXTENDED; + } + } + + analyzer = new JapaneseAnalyzer(version, null, mode, CharArraySet.copy(version, stopWords), JapaneseAnalyzer.getDefaultStopTags()); + } + + @Override + public JapaneseAnalyzer get() { + return this.analyzer; + } +} diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java new file mode 100644 index 00000000000..a3ba70ad5cf --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -0,0 +1,44 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.plugin.analysis.kuromoji; + +import org.elasticsearch.index.analysis.KuromojiAnalyzerProvider; +import org.elasticsearch.index.analysis.AnalysisModule; +import org.elasticsearch.plugins.AbstractPlugin; + +/** + * + */ +public class AnalysisKuromojiPlugin extends AbstractPlugin { + + @Override + public String name() { + return "analysis-kuromoji"; + } + + @Override + public String description() { + return "Kuromoji analysis support"; + } + + public void onModule(AnalysisModule module) { + module.addAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); + } +} diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties new file mode 100644 index 00000000000..c35abda2c62 --- /dev/null +++ b/src/main/resources/es-plugin.properties @@ -0,0 +1 @@ +plugin=org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin From fcd0d3991f4886b76982b8363e54b25af4508033 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 30 Apr 2012 13:50:14 +0300 Subject: [PATCH 002/115] release 1.0.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6fb6126ed72..d489afc12b5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.0.0-SNAPSHOT + 1.0.0 jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 9356f463f272e9e320b8b844c669da45240cd1b7 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 30 Apr 2012 13:50:47 +0300 Subject: [PATCH 003/115] move to 1.1.0 snap --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d489afc12b5..6129fdb4d05 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.0.0 + 1.1.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 801f621b5274f9ebdd2bf93f4abca5862d0eae48 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 10 Jun 2012 16:04:42 +0200 Subject: [PATCH 004/115] expose individual kuromoji token fitlers & tokenizers in elasticsearch in addition to the japanese analyzer --- README.md | 2 +- .../KuromojiAnalysisBinderProcessor.java | 39 ++++ .../analysis/KuromojiAnalyzerProvider.java | 21 +- .../KuromojiBaseFormFilterFactory.java | 41 ++++ .../KuromojiKatakanaStemmerFactory.java | 47 ++++ .../KuromojiPartOfSpeechFilterFactory.java | 60 +++++ .../KuromojiReadingFormFilterFactory.java | 46 ++++ .../analysis/KuromojiTokenizerFactory.java | 98 ++++++++ .../analysis/KuromojiIndicesAnalysis.java | 121 ++++++++++ .../KuromojiIndicesAnalysisModule.java | 32 +++ .../kuromoji/AnalysisKuromojiPlugin.java | 3 + .../index/analysis/KuromojiAnalysisTests.java | 218 ++++++++++++++++++ .../index/analysis/kuromoji_analysis.json | 31 +++ src/test/resources/es-plugin.properties | 1 + 14 files changed, 745 insertions(+), 15 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java create mode 100644 src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java create mode 100644 src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java create mode 100644 src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java create mode 100644 src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json create mode 100644 src/test/resources/es-plugin.properties diff --git a/README.md b/README.md index 8f19fe8be82..fe9e72e3d76 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.0.0`. -------------------------------------------------- - | Smart Chinese Analysis Plugin | ElasticSearch | + | Kuromoji Analysis Plugin | ElasticSearch | -------------------------------------------------- | master | 0.19 -> master | -------------------------------------------------- diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java new file mode 100644 index 00000000000..a761b92c950 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java @@ -0,0 +1,39 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + + +/** + */ +public class KuromojiAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { + + @Override + public void processTokenizers(TokenizersBindings tokenizersBindings) { + tokenizersBindings.processTokenizer("kuromoji_tokenizer", KuromojiTokenizerFactory.class); + } + + @Override + public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { + tokenFiltersBindings.processTokenFilter("kuromoji_baseform", KuromojiBaseFormFilterFactory.class); + tokenFiltersBindings.processTokenFilter("kuromoji_part_of_speech", KuromojiPartOfSpeechFilterFactory.class); + tokenFiltersBindings.processTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory.class); + tokenFiltersBindings.processTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory.class); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java index 857e3f89dd8..1e59f46709f 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ja.JapaneseAnalyzer; import org.apache.lucene.analysis.ja.JapaneseTokenizer; +import org.apache.lucene.analysis.ja.dict.UserDictionary; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,24 +41,16 @@ public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet(), version); - JapaneseTokenizer.Mode mode = JapaneseTokenizer.DEFAULT_MODE; - String modeSetting = settings.get("mode", null); - if (modeSetting != null) { - if ("search".equalsIgnoreCase(modeSetting)) { - mode = JapaneseTokenizer.Mode.SEARCH; - } else if ("normal".equalsIgnoreCase(modeSetting)) { - mode = JapaneseTokenizer.Mode.NORMAL; - } else if ("extended".equalsIgnoreCase(modeSetting)) { - mode = JapaneseTokenizer.Mode.EXTENDED; - } - } - - analyzer = new JapaneseAnalyzer(version, null, mode, CharArraySet.copy(version, stopWords), JapaneseAnalyzer.getDefaultStopTags()); + final Set stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet(), version); + final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings); + final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); + analyzer = new JapaneseAnalyzer(version, userDictionary, mode, CharArraySet.copy(version, stopWords), JapaneseAnalyzer.getDefaultStopTags()); } @Override public JapaneseAnalyzer get() { return this.analyzer; } + + } diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java new file mode 100644 index 00000000000..d9a1bcfd879 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java @@ -0,0 +1,41 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ja.JapaneseBaseFormFilter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +public class KuromojiBaseFormFilterFactory extends AbstractTokenFilterFactory { + + @Inject + public KuromojiBaseFormFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapaneseBaseFormFilter(tokenStream); + } +} diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java new file mode 100644 index 00000000000..32064031232 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java @@ -0,0 +1,47 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ja.JapaneseKatakanaStemFilter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +public class KuromojiKatakanaStemmerFactory extends AbstractTokenFilterFactory { + + private final int minimumLength; + + @Inject + public KuromojiKatakanaStemmerFactory(Index index, + @IndexSettings Settings indexSettings, @Assisted String name, + @Assisted Settings settings) { + super(index, indexSettings, name, settings); + minimumLength = settings.getAsInt("minimum_length", + JapaneseKatakanaStemFilter.DEFAULT_MINIMUM_LENGTH).intValue(); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapaneseKatakanaStemFilter(tokenStream, minimumLength); + } +} diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java new file mode 100644 index 00000000000..9ccf3f96f22 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -0,0 +1,60 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +public class KuromojiPartOfSpeechFilterFactory extends + AbstractTokenFilterFactory { + + private final boolean enablePositionIncrements; + private final Set stopTags = new HashSet(); + + @Inject + public KuromojiPartOfSpeechFilterFactory(Index index, + @IndexSettings Settings indexSettings, Environment env, + @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + List wordList = Analysis.getWordList(env, settings, "stoptags"); + if (wordList != null) { + stopTags.addAll(wordList); + } + this.enablePositionIncrements = settings.getAsBoolean( + "enable_position_increments", true); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapanesePartOfSpeechStopFilter(enablePositionIncrements, + tokenStream, stopTags); + } + +} diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java new file mode 100644 index 00000000000..ffe0e75b917 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java @@ -0,0 +1,46 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ja.JapaneseReadingFormFilter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +public class KuromojiReadingFormFilterFactory extends + AbstractTokenFilterFactory { + private final boolean useRomaji; + + @Inject + public KuromojiReadingFormFilterFactory(Index index, + @IndexSettings Settings indexSettings, @Assisted String name, + @Assisted Settings settings) { + super(index, indexSettings, name, settings); + useRomaji = settings.getAsBoolean("use_romaji", false); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapaneseReadingFormFilter(tokenStream, useRomaji); + } +} diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java new file mode 100644 index 00000000000..10e9946942a --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -0,0 +1,98 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis; + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.ja.JapaneseTokenizer; +import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; +import org.apache.lucene.analysis.ja.dict.UserDictionary; +import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + */ +public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { + + private static final String USER_DICT_OPTION = "user_dictionary"; + + private final UserDictionary userDictionary; + private final Mode mode; + + private boolean discartPunctuation; + + @Inject + public KuromojiTokenizerFactory(Index index, + @IndexSettings Settings indexSettings, Environment env, + @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + mode = getMode(settings); + userDictionary = getUserDictionary(env, settings); + discartPunctuation = settings.getAsBoolean("discard_punctuation", true); + } + + public static UserDictionary getUserDictionary(Environment env, + Settings settings) { + try { + final Reader reader = Analysis.getReaderFromFile(env, settings, + USER_DICT_OPTION); + if (reader == null) { + return null; + } else { + try { + return new UserDictionary(reader); + } finally { + reader.close(); + } + } + } catch (IOException e) { + throw new ElasticSearchException( + "failed to load kuromoji user dictionary", e); + } + } + + public static JapaneseTokenizer.Mode getMode(Settings settings) { + JapaneseTokenizer.Mode mode = JapaneseTokenizer.DEFAULT_MODE; + String modeSetting = settings.get("mode", null); + if (modeSetting != null) { + if ("search".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.SEARCH; + } else if ("normal".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.NORMAL; + } else if ("extended".equalsIgnoreCase(modeSetting)) { + mode = JapaneseTokenizer.Mode.EXTENDED; + } + } + return mode; + } + + @Override + public Tokenizer create(Reader reader) { + return new JapaneseTokenizer(reader, userDictionary, + discartPunctuation, mode); + } + +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java new file mode 100644 index 00000000000..06d6e16b309 --- /dev/null +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -0,0 +1,121 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.indices.analysis; + +import java.io.Reader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.ja.JapaneseBaseFormFilter; +import org.apache.lucene.analysis.ja.JapaneseKatakanaStemFilter; +import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; +import org.apache.lucene.analysis.ja.JapaneseReadingFormFilter; +import org.apache.lucene.analysis.ja.JapaneseTokenizer; +import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; +import org.elasticsearch.common.component.AbstractComponent; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory; +import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; + +/** + * Registers indices level analysis components so, if not explicitly configured, + * will be shared among all indices. + */ +public class KuromojiIndicesAnalysis extends AbstractComponent { + + @Inject + public KuromojiIndicesAnalysis(Settings settings, + IndicesAnalysisService indicesAnalysisService) { + super(settings); + + indicesAnalysisService.tokenizerFactories().put("kuromoji_tokenizer", + new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { + @Override + public String name() { + return "kuromoji_tokenizer"; + } + + @Override + public Tokenizer create(Reader reader) { + return new JapaneseTokenizer(reader, null, true, + Mode.SEARCH); + } + })); + + indicesAnalysisService.tokenFilterFactories().put("kuromoji_baseform", + new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + @Override + public String name() { + return "kuromoji_baseform"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapaneseBaseFormFilter(tokenStream); + } + })); + + indicesAnalysisService.tokenFilterFactories().put( + "kuromoji_part_of_speech", + new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + @Override + public String name() { + return "kuromoji_part_of_speech"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapanesePartOfSpeechStopFilter(false, + tokenStream, JapaneseAnalyzer + .getDefaultStopTags()); + } + })); + + indicesAnalysisService.tokenFilterFactories().put( + "kuromoji_readingform", + new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + @Override + public String name() { + return "kuromoji_readingform"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapaneseReadingFormFilter(tokenStream, true); + } + })); + + indicesAnalysisService.tokenFilterFactories().put("kuromoji_stemmer", + new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + @Override + public String name() { + return "kuromoji_stemmer"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new JapaneseKatakanaStemFilter(tokenStream); + } + })); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java new file mode 100644 index 00000000000..1c44342057a --- /dev/null +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java @@ -0,0 +1,32 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.indices.analysis; + +import org.elasticsearch.common.inject.AbstractModule; + +/** + */ +public class KuromojiIndicesAnalysisModule extends AbstractModule { + + @Override + protected void configure() { + bind(KuromojiIndicesAnalysis.class).asEagerSingleton(); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index a3ba70ad5cf..7a0fcb5d890 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -19,6 +19,7 @@ package org.elasticsearch.plugin.analysis.kuromoji; +import org.elasticsearch.index.analysis.KuromojiAnalysisBinderProcessor; import org.elasticsearch.index.analysis.KuromojiAnalyzerProvider; import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.plugins.AbstractPlugin; @@ -40,5 +41,7 @@ public class AnalysisKuromojiPlugin extends AbstractPlugin { public void onModule(AnalysisModule module) { module.addAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); + module.addProcessor(new KuromojiAnalysisBinderProcessor()); + } } diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java new file mode 100644 index 00000000000..bd82192b7d6 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -0,0 +1,218 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; +import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; +import static org.hamcrest.Matchers.instanceOf; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.ja.JapaneseTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.elasticsearch.common.inject.Injector; +import org.elasticsearch.common.inject.ModulesBuilder; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.settings.SettingsModule; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.EnvironmentModule; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexNameModule; +import org.elasticsearch.index.settings.IndexSettingsModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisService; +import org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin; +import org.elasticsearch.plugins.PluginsModule; +import org.elasticsearch.plugins.PluginsService; +import org.hamcrest.MatcherAssert; +import org.testng.Assert; +import org.testng.annotations.Test; + +/** + */ +public class KuromojiAnalysisTests { + + @Test + public void testDefaultsKuromojiAnalysis() { + Index index = new Index("test"); + + Injector parentInjector = new ModulesBuilder().add( + new SettingsModule(EMPTY_SETTINGS), + new EnvironmentModule(new Environment(EMPTY_SETTINGS)), + new IndicesAnalysisModule()).createInjector(); + AnalysisModule analysisModule = new AnalysisModule(EMPTY_SETTINGS, + parentInjector.getInstance(IndicesAnalysisService.class)); + new AnalysisKuromojiPlugin().onModule(analysisModule); + Injector injector = new ModulesBuilder().add( + new IndexSettingsModule(index, EMPTY_SETTINGS), + new IndexNameModule(index), analysisModule) + .createChildInjector(parentInjector); + + AnalysisService analysisService = injector + .getInstance(AnalysisService.class); + + TokenizerFactory tokenizerFactory = analysisService + .tokenizer("kuromoji_tokenizer"); + MatcherAssert.assertThat(tokenizerFactory, + instanceOf(KuromojiTokenizerFactory.class)); + + TokenFilterFactory filterFactory = analysisService + .tokenFilter("kuromoji_part_of_speech"); + MatcherAssert.assertThat(filterFactory, + instanceOf(KuromojiPartOfSpeechFilterFactory.class)); + + filterFactory = analysisService.tokenFilter("kuromoji_readingform"); + MatcherAssert.assertThat(filterFactory, + instanceOf(KuromojiReadingFormFilterFactory.class)); + + filterFactory = analysisService.tokenFilter("kuromoji_baseform"); + MatcherAssert.assertThat(filterFactory, + instanceOf(KuromojiBaseFormFilterFactory.class)); + + filterFactory = analysisService.tokenFilter("kuromoji_stemmer"); + MatcherAssert.assertThat(filterFactory, + instanceOf(KuromojiKatakanaStemmerFactory.class)); + + NamedAnalyzer analyzer = analysisService.analyzer("kuromoji"); + MatcherAssert.assertThat(analyzer.analyzer(), + instanceOf(JapaneseAnalyzer.class)); + } + + @Test + public void testBaseFormFilterFactory() throws IOException { + AnalysisService analysisService = createAnalysisService(); + TokenFilterFactory tokenFilter = analysisService + .tokenFilter("kuromoji_pos"); + MatcherAssert.assertThat(tokenFilter, + instanceOf(KuromojiPartOfSpeechFilterFactory.class)); + String source = "私は制限スピードを超える。"; + String[] expected = new String[] { "私", "は", "制限", "スピード", "を" }; + Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), + null, true, JapaneseTokenizer.Mode.SEARCH); + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); + + } + + @Test + public void testReadingFormFilterFactory() throws IOException { + AnalysisService analysisService = createAnalysisService(); + TokenFilterFactory tokenFilter = analysisService + .tokenFilter("kuromoji_rf"); + MatcherAssert.assertThat(tokenFilter, + instanceOf(KuromojiReadingFormFilterFactory.class)); + String source = "今夜はロバート先生と話した"; + String[] expected_tokens_romanji = new String[] { "kon'ya", "ha", + "robato", "sensei", "to", "hanashi", "ta" }; + + Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), + null, true, JapaneseTokenizer.Mode.SEARCH); + + assertSimpleTSOutput(tokenFilter.create(tokenizer), + expected_tokens_romanji); + + tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, + JapaneseTokenizer.Mode.SEARCH); + String[] expected_tokens_katakana = new String[] { "コンヤ", "ハ", "ロバート", + "センセイ", "ト", "ハナシ", "タ" }; + tokenFilter = analysisService.tokenFilter("kuromoji_readingform"); + MatcherAssert.assertThat(tokenFilter, + instanceOf(KuromojiReadingFormFilterFactory.class)); + assertSimpleTSOutput(tokenFilter.create(tokenizer), + expected_tokens_katakana); + } + + @Test + public void testKatakanaStemFilter() throws IOException { + AnalysisService analysisService = createAnalysisService(); + TokenFilterFactory tokenFilter = analysisService + .tokenFilter("kuromoji_stemmer"); + MatcherAssert.assertThat(tokenFilter, + instanceOf(KuromojiKatakanaStemmerFactory.class)); + String source = "明後日パーティーに行く予定がある。図書館で資料をコピーしました。"; + ; + Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), + null, true, JapaneseTokenizer.Mode.SEARCH); + + // パーティー should be stemmed by default + // (min len) コピー should not be stemmed + String[] expected_tokens_katakana = new String[] { "明後日", "パーティ", "に", + "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", + "た" }; + assertSimpleTSOutput(tokenFilter.create(tokenizer), + expected_tokens_katakana); + + tokenFilter = analysisService.tokenFilter("kuromoji_ks"); + MatcherAssert.assertThat(tokenFilter, + instanceOf(KuromojiKatakanaStemmerFactory.class)); + tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, + JapaneseTokenizer.Mode.SEARCH); + + // パーティー should not be stemmed since min len == 6 + // コピー should not be stemmed + expected_tokens_katakana = new String[] { "明後日", "パーティー", "に", "行く", + "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た" }; + assertSimpleTSOutput(tokenFilter.create(tokenizer), + expected_tokens_katakana); + + } + + public AnalysisService createAnalysisService() { + Index index = new Index("test"); + Settings settings = settingsBuilder().loadFromClasspath( + "org/elasticsearch/index/analysis/kuromoji_analysis.json") + .build(); + Injector parentInjector = new ModulesBuilder().add( + new SettingsModule(settings), + new EnvironmentModule(new Environment(settings)), + new IndicesAnalysisModule()).createInjector(); + AnalysisModule analysisModule = new AnalysisModule(settings, + parentInjector.getInstance(IndicesAnalysisService.class)); + Injector injector = new ModulesBuilder().add( + new IndexSettingsModule(index, settings), + new PluginsModule(settings, parentInjector + .getInstance(PluginsService.class)), + new IndexNameModule(index), analysisModule) + .createChildInjector(parentInjector); + + AnalysisService analysisService = injector + .getInstance(AnalysisService.class); + return analysisService; + } + + public static void assertSimpleTSOutput(TokenStream stream, + String[] expected) throws IOException { + stream.reset(); + CharTermAttribute termAttr = stream + .getAttribute(CharTermAttribute.class); + Assert.assertNotNull(termAttr); + int i = 0; + while (stream.incrementToken()) { + Assert.assertTrue(i < expected.length); + Assert.assertEquals(expected[i++], termAttr.toString(), + "expected different term at index " + i); + } + Assert.assertEquals(i, expected.length, "not all tokens produced"); + } + +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json new file mode 100644 index 00000000000..de8dd28770f --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json @@ -0,0 +1,31 @@ +{ + "index":{ + "analysis":{ + "filter":{ + "kuromoji_rf":{ + "type":"kuromoji_readingform", + "use_romaji" : "true" + }, + "kuromoji_pos" : { + "type": "kuromoji_part_of_speech", + "enable_position_increment" : "false", + "stoptags" : ["# verb-main:", "動詞-自立"] + }, + "kuromoji_ks" : { + "type": "kuromoji_stemmer", + "minimum_length" : 6 + } + + + }, + + "tokenizer" : { + "kuromoji" : { + "type":"kuromoji_tokenizer" + } + + } + + } + } +} \ No newline at end of file diff --git a/src/test/resources/es-plugin.properties b/src/test/resources/es-plugin.properties new file mode 100644 index 00000000000..c35abda2c62 --- /dev/null +++ b/src/test/resources/es-plugin.properties @@ -0,0 +1 @@ +plugin=org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin From 32a1503b94fb13282d6e372e3104bf20d2260fea Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sun, 10 Jun 2012 21:51:56 +0200 Subject: [PATCH 005/115] cleanup the additional analysis components --- LICENSE.txt | 202 ++++++++++++++++++ README.md | 20 ++ pom.xml | 4 + .../KuromojiAnalysisBinderProcessor.java | 39 ---- .../analysis/KuromojiAnalyzerProvider.java | 4 +- .../KuromojiBaseFormFilterFactory.java | 16 +- .../KuromojiKatakanaStemmerFactory.java | 7 +- .../KuromojiPartOfSpeechFilterFactory.java | 21 +- .../KuromojiReadingFormFilterFactory.java | 8 +- .../analysis/KuromojiTokenizerFactory.java | 19 +- .../kuromoji/AnalysisKuromojiPlugin.java | 11 +- .../index/analysis/KuromojiAnalysisTests.java | 164 +++++--------- src/test/resources/es-plugin.properties | 1 - 13 files changed, 317 insertions(+), 199 deletions(-) create mode 100644 LICENSE.txt delete mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java delete mode 100644 src/test/resources/es-plugin.properties diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index fe9e72e3d76..ac74adea3bf 100644 --- a/README.md +++ b/README.md @@ -14,3 +14,23 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e -------------------------------------------------- The plugin includes the `kuromoji` analyzer. + +h1. License + +
+This software is licensed under the Apache 2 license, quoted below.
+
+Copyright 2009-2011 Shay Banon and ElasticSearch 
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not
+use this file except in compliance with the License. You may obtain a copy of
+the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+License for the specific language governing permissions and limitations under
+the License.
+
\ No newline at end of file diff --git a/pom.xml b/pom.xml index 6129fdb4d05..d76016fd63c 100644 --- a/pom.xml +++ b/pom.xml @@ -35,6 +35,10 @@ + + sonatype + http://oss.sonatype.org/content/repositories/releases/ + diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java deleted file mode 100644 index a761b92c950..00000000000 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - - -/** - */ -public class KuromojiAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { - - @Override - public void processTokenizers(TokenizersBindings tokenizersBindings) { - tokenizersBindings.processTokenizer("kuromoji_tokenizer", KuromojiTokenizerFactory.class); - } - - @Override - public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { - tokenFiltersBindings.processTokenFilter("kuromoji_baseform", KuromojiBaseFormFilterFactory.class); - tokenFiltersBindings.processTokenFilter("kuromoji_part_of_speech", KuromojiPartOfSpeechFilterFactory.class); - tokenFiltersBindings.processTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory.class); - tokenFiltersBindings.processTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory.class); - } -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java index 1e59f46709f..cb96830f926 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -51,6 +51,6 @@ public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider stopTags = new HashSet(); @Inject - public KuromojiPartOfSpeechFilterFactory(Index index, - @IndexSettings Settings indexSettings, Environment env, - @Assisted String name, @Assisted Settings settings) { + public KuromojiPartOfSpeechFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); List wordList = Analysis.getWordList(env, settings, "stoptags"); if (wordList != null) { stopTags.addAll(wordList); } - this.enablePositionIncrements = settings.getAsBoolean( - "enable_position_increments", true); + this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", true); } @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(enablePositionIncrements, - tokenStream, stopTags); + return new JapanesePartOfSpeechStopFilter(enablePositionIncrements, tokenStream, stopTags); } } diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java index ffe0e75b917..b9af0641613 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java @@ -27,14 +27,12 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -public class KuromojiReadingFormFilterFactory extends - AbstractTokenFilterFactory { +public class KuromojiReadingFormFilterFactory extends AbstractTokenFilterFactory { + private final boolean useRomaji; @Inject - public KuromojiReadingFormFilterFactory(Index index, - @IndexSettings Settings indexSettings, @Assisted String name, - @Assisted Settings settings) { + public KuromojiReadingFormFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); useRomaji = settings.getAsBoolean("use_romaji", false); } diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java index 10e9946942a..583e8b74c8f 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -18,9 +18,6 @@ */ package org.elasticsearch.index.analysis; -import java.io.IOException; -import java.io.Reader; - import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ja.JapaneseTokenizer; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; @@ -33,6 +30,9 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; +import java.io.IOException; +import java.io.Reader; + /** */ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { @@ -45,20 +45,16 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { private boolean discartPunctuation; @Inject - public KuromojiTokenizerFactory(Index index, - @IndexSettings Settings indexSettings, Environment env, - @Assisted String name, @Assisted Settings settings) { + public KuromojiTokenizerFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); mode = getMode(settings); userDictionary = getUserDictionary(env, settings); discartPunctuation = settings.getAsBoolean("discard_punctuation", true); } - public static UserDictionary getUserDictionary(Environment env, - Settings settings) { + public static UserDictionary getUserDictionary(Environment env, Settings settings) { try { - final Reader reader = Analysis.getReaderFromFile(env, settings, - USER_DICT_OPTION); + final Reader reader = Analysis.getReaderFromFile(env, settings, USER_DICT_OPTION); if (reader == null) { return null; } else { @@ -69,8 +65,7 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { } } } catch (IOException e) { - throw new ElasticSearchException( - "failed to load kuromoji user dictionary", e); + throw new ElasticSearchException("failed to load kuromoji user dictionary", e); } } diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index 7a0fcb5d890..c816a43fd5a 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -19,9 +19,7 @@ package org.elasticsearch.plugin.analysis.kuromoji; -import org.elasticsearch.index.analysis.KuromojiAnalysisBinderProcessor; -import org.elasticsearch.index.analysis.KuromojiAnalyzerProvider; -import org.elasticsearch.index.analysis.AnalysisModule; +import org.elasticsearch.index.analysis.*; import org.elasticsearch.plugins.AbstractPlugin; /** @@ -41,7 +39,10 @@ public class AnalysisKuromojiPlugin extends AbstractPlugin { public void onModule(AnalysisModule module) { module.addAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); - module.addProcessor(new KuromojiAnalysisBinderProcessor()); - + module.addTokenizer("kuromoji_tokenizer", KuromojiTokenizerFactory.class); + module.addTokenFilter("kuromoji_baseform", KuromojiBaseFormFilterFactory.class); + module.addTokenFilter("kuromoji_part_of_speech", KuromojiPartOfSpeechFilterFactory.class); + module.addTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory.class); + module.addTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory.class); } } diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index bd82192b7d6..0bfcb53a00d 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -19,13 +19,6 @@ package org.elasticsearch.index.analysis; -import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; -import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; -import static org.hamcrest.Matchers.instanceOf; - -import java.io.IOException; -import java.io.StringReader; - import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ja.JapaneseAnalyzer; @@ -33,6 +26,7 @@ import org.apache.lucene.analysis.ja.JapaneseTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; +import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.env.Environment; @@ -43,176 +37,128 @@ import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.indices.analysis.IndicesAnalysisModule; import org.elasticsearch.indices.analysis.IndicesAnalysisService; import org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin; -import org.elasticsearch.plugins.PluginsModule; -import org.elasticsearch.plugins.PluginsService; -import org.hamcrest.MatcherAssert; import org.testng.Assert; import org.testng.annotations.Test; +import java.io.IOException; +import java.io.StringReader; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.instanceOf; + /** */ public class KuromojiAnalysisTests { @Test public void testDefaultsKuromojiAnalysis() { - Index index = new Index("test"); + AnalysisService analysisService = createAnalysisService(); - Injector parentInjector = new ModulesBuilder().add( - new SettingsModule(EMPTY_SETTINGS), - new EnvironmentModule(new Environment(EMPTY_SETTINGS)), - new IndicesAnalysisModule()).createInjector(); - AnalysisModule analysisModule = new AnalysisModule(EMPTY_SETTINGS, - parentInjector.getInstance(IndicesAnalysisService.class)); - new AnalysisKuromojiPlugin().onModule(analysisModule); - Injector injector = new ModulesBuilder().add( - new IndexSettingsModule(index, EMPTY_SETTINGS), - new IndexNameModule(index), analysisModule) - .createChildInjector(parentInjector); + TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_tokenizer"); + assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); - AnalysisService analysisService = injector - .getInstance(AnalysisService.class); - - TokenizerFactory tokenizerFactory = analysisService - .tokenizer("kuromoji_tokenizer"); - MatcherAssert.assertThat(tokenizerFactory, - instanceOf(KuromojiTokenizerFactory.class)); - - TokenFilterFactory filterFactory = analysisService - .tokenFilter("kuromoji_part_of_speech"); - MatcherAssert.assertThat(filterFactory, - instanceOf(KuromojiPartOfSpeechFilterFactory.class)); + TokenFilterFactory filterFactory = analysisService.tokenFilter("kuromoji_part_of_speech"); + assertThat(filterFactory, instanceOf(KuromojiPartOfSpeechFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_readingform"); - MatcherAssert.assertThat(filterFactory, - instanceOf(KuromojiReadingFormFilterFactory.class)); + assertThat(filterFactory, instanceOf(KuromojiReadingFormFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_baseform"); - MatcherAssert.assertThat(filterFactory, - instanceOf(KuromojiBaseFormFilterFactory.class)); + assertThat(filterFactory, instanceOf(KuromojiBaseFormFilterFactory.class)); filterFactory = analysisService.tokenFilter("kuromoji_stemmer"); - MatcherAssert.assertThat(filterFactory, - instanceOf(KuromojiKatakanaStemmerFactory.class)); + assertThat(filterFactory, instanceOf(KuromojiKatakanaStemmerFactory.class)); NamedAnalyzer analyzer = analysisService.analyzer("kuromoji"); - MatcherAssert.assertThat(analyzer.analyzer(), - instanceOf(JapaneseAnalyzer.class)); + assertThat(analyzer.analyzer(), instanceOf(JapaneseAnalyzer.class)); } @Test public void testBaseFormFilterFactory() throws IOException { AnalysisService analysisService = createAnalysisService(); - TokenFilterFactory tokenFilter = analysisService - .tokenFilter("kuromoji_pos"); - MatcherAssert.assertThat(tokenFilter, - instanceOf(KuromojiPartOfSpeechFilterFactory.class)); + TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_pos"); + assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class)); String source = "私は制限スピードを超える。"; - String[] expected = new String[] { "私", "は", "制限", "スピード", "を" }; - Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), - null, true, JapaneseTokenizer.Mode.SEARCH); + String[] expected = new String[]{"私", "は", "制限", "スピード", "を"}; + Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); - } @Test public void testReadingFormFilterFactory() throws IOException { AnalysisService analysisService = createAnalysisService(); - TokenFilterFactory tokenFilter = analysisService - .tokenFilter("kuromoji_rf"); - MatcherAssert.assertThat(tokenFilter, - instanceOf(KuromojiReadingFormFilterFactory.class)); + TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_rf"); + assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class)); String source = "今夜はロバート先生と話した"; - String[] expected_tokens_romanji = new String[] { "kon'ya", "ha", - "robato", "sensei", "to", "hanashi", "ta" }; + String[] expected_tokens_romanji = new String[]{"kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta"}; - Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), - null, true, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); - assertSimpleTSOutput(tokenFilter.create(tokenizer), - expected_tokens_romanji); + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_romanji); - tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, - JapaneseTokenizer.Mode.SEARCH); - String[] expected_tokens_katakana = new String[] { "コンヤ", "ハ", "ロバート", - "センセイ", "ト", "ハナシ", "タ" }; + tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); + String[] expected_tokens_katakana = new String[]{"コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ"}; tokenFilter = analysisService.tokenFilter("kuromoji_readingform"); - MatcherAssert.assertThat(tokenFilter, - instanceOf(KuromojiReadingFormFilterFactory.class)); - assertSimpleTSOutput(tokenFilter.create(tokenizer), - expected_tokens_katakana); + assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class)); + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); } @Test public void testKatakanaStemFilter() throws IOException { AnalysisService analysisService = createAnalysisService(); - TokenFilterFactory tokenFilter = analysisService - .tokenFilter("kuromoji_stemmer"); - MatcherAssert.assertThat(tokenFilter, - instanceOf(KuromojiKatakanaStemmerFactory.class)); + TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_stemmer"); + assertThat(tokenFilter, instanceOf(KuromojiKatakanaStemmerFactory.class)); String source = "明後日パーティーに行く予定がある。図書館で資料をコピーしました。"; - ; - Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), - null, true, JapaneseTokenizer.Mode.SEARCH); + + Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); // パーティー should be stemmed by default // (min len) コピー should not be stemmed - String[] expected_tokens_katakana = new String[] { "明後日", "パーティ", "に", - "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", - "た" }; - assertSimpleTSOutput(tokenFilter.create(tokenizer), - expected_tokens_katakana); + String[] expected_tokens_katakana = new String[]{"明後日", "パーティ", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"}; + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); tokenFilter = analysisService.tokenFilter("kuromoji_ks"); - MatcherAssert.assertThat(tokenFilter, - instanceOf(KuromojiKatakanaStemmerFactory.class)); - tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, - JapaneseTokenizer.Mode.SEARCH); + assertThat(tokenFilter, instanceOf(KuromojiKatakanaStemmerFactory.class)); + tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); // パーティー should not be stemmed since min len == 6 // コピー should not be stemmed - expected_tokens_katakana = new String[] { "明後日", "パーティー", "に", "行く", - "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た" }; - assertSimpleTSOutput(tokenFilter.create(tokenizer), - expected_tokens_katakana); - + expected_tokens_katakana = new String[]{"明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"}; + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); } public AnalysisService createAnalysisService() { + Settings settings = ImmutableSettings.settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/kuromoji_analysis.json").build(); + Index index = new Index("test"); - Settings settings = settingsBuilder().loadFromClasspath( - "org/elasticsearch/index/analysis/kuromoji_analysis.json") - .build(); - Injector parentInjector = new ModulesBuilder().add( - new SettingsModule(settings), + + Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), - new IndicesAnalysisModule()).createInjector(); - AnalysisModule analysisModule = new AnalysisModule(settings, - parentInjector.getInstance(IndicesAnalysisService.class)); + new IndicesAnalysisModule()) + .createInjector(); + + AnalysisModule analysisModule = new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)); + new AnalysisKuromojiPlugin().onModule(analysisModule); + Injector injector = new ModulesBuilder().add( new IndexSettingsModule(index, settings), - new PluginsModule(settings, parentInjector - .getInstance(PluginsService.class)), - new IndexNameModule(index), analysisModule) + new IndexNameModule(index), + analysisModule) .createChildInjector(parentInjector); - AnalysisService analysisService = injector - .getInstance(AnalysisService.class); - return analysisService; + return injector.getInstance(AnalysisService.class); } public static void assertSimpleTSOutput(TokenStream stream, - String[] expected) throws IOException { + String[] expected) throws IOException { stream.reset(); - CharTermAttribute termAttr = stream - .getAttribute(CharTermAttribute.class); + CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); Assert.assertNotNull(termAttr); int i = 0; while (stream.incrementToken()) { Assert.assertTrue(i < expected.length); - Assert.assertEquals(expected[i++], termAttr.toString(), - "expected different term at index " + i); + Assert.assertEquals(expected[i++], termAttr.toString(), "expected different term at index " + i); } Assert.assertEquals(i, expected.length, "not all tokens produced"); } - } \ No newline at end of file diff --git a/src/test/resources/es-plugin.properties b/src/test/resources/es-plugin.properties deleted file mode 100644 index c35abda2c62..00000000000 --- a/src/test/resources/es-plugin.properties +++ /dev/null @@ -1 +0,0 @@ -plugin=org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin From 2667e8f6acf6f35591e2e6dc57cc5aa9c6b78c52 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sun, 10 Jun 2012 21:56:19 +0200 Subject: [PATCH 006/115] md format.... --- README.md | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index ac74adea3bf..6bad470c4a8 100644 --- a/README.md +++ b/README.md @@ -15,22 +15,21 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e The plugin includes the `kuromoji` analyzer. -h1. License +License +------- -
-This software is licensed under the Apache 2 license, quoted below.
+    This software is licensed under the Apache 2 license, quoted below.
 
-Copyright 2009-2011 Shay Banon and ElasticSearch 
+    Copyright 2009-2012 Shay Banon and ElasticSearch 
 
-Licensed under the Apache License, Version 2.0 (the "License"); you may not
-use this file except in compliance with the License. You may obtain a copy of
-the License at
+    Licensed under the Apache License, Version 2.0 (the "License"); you may not
+    use this file except in compliance with the License. You may obtain a copy of
+    the License at
 
-    http://www.apache.org/licenses/LICENSE-2.0
+        http://www.apache.org/licenses/LICENSE-2.0
 
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
-License for the specific language governing permissions and limitations under
-the License.
-
\ No newline at end of file + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations under + the License. From e0def771f8c7eaf976f88b5cfceb668477de4e47 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 13 Nov 2012 13:27:15 +0100 Subject: [PATCH 007/115] Fixed tests. --- pom.xml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pom.xml b/pom.xml index d76016fd63c..edb79033d32 100644 --- a/pom.xml +++ b/pom.xml @@ -86,6 +86,24 @@
+ + + + ${basedir}/src/test/java + + **/*.json + **/*.yml + **/*.txt + + + + ${basedir}/src/test/resources + + **/*.* + + + + org.apache.maven.plugins From 47b424f4723eb43e0851ac65ad6111307ee4cd92 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 13 Nov 2012 13:30:45 +0100 Subject: [PATCH 008/115] Updated libraries --- pom.xml | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index edb79033d32..599633e2843 100644 --- a/pom.xml +++ b/pom.xml @@ -52,41 +52,57 @@ org.apache.lucene lucene-kuromoji - 3.6.0 + 3.6.1 compile log4j log4j - 1.2.16 + 1.2.17 runtime org.testng testng - 6.3.1 + 6.8 test org.hamcrest hamcrest-core - 1.3.RC2 + 1.3 test org.hamcrest hamcrest-library - 1.3.RC2 + 1.3 test + + + ${basedir}/src/main/java + + **/*.json + **/*.yml + + + + ${basedir}/src/main/resources + + **/*.* + + + + ${basedir}/src/test/java From f7fb05117914d6dd39534b5723d1843aeee8c10f Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 13 Nov 2012 22:18:21 +0100 Subject: [PATCH 009/115] - The KuromojiIndicesAnalysis class get loaded during plugin loading. - TokenFilters and Tokenizers are now loaded via a binding processor. --- .../KuromojiAnalysisBinderProcessor.java | 44 +++++++++++++++++++ .../kuromoji/AnalysisKuromojiPlugin.java | 17 ++++--- .../index/analysis/KuromojiAnalysisTests.java | 4 ++ .../index/analysis/kuromoji_analysis.json | 6 +++ 4 files changed, 65 insertions(+), 6 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java new file mode 100644 index 00000000000..65b39320d36 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java @@ -0,0 +1,44 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +/** + */ +public class KuromojiAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { + + @Override + public void processAnalyzers(AnalyzersBindings analyzersBindings) { + analyzersBindings.processAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); + } + + @Override + public void processTokenizers(TokenizersBindings tokenizersBindings) { + tokenizersBindings.processTokenizer("kuromoji_tokenizer", KuromojiTokenizerFactory.class); + } + + @Override + public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { + tokenFiltersBindings.processTokenFilter("kuromoji_baseform", KuromojiBaseFormFilterFactory.class); + tokenFiltersBindings.processTokenFilter("kuromoji_part_of_speech", KuromojiPartOfSpeechFilterFactory.class); + tokenFiltersBindings.processTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory.class); + tokenFiltersBindings.processTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory.class); + } + +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index c816a43fd5a..053d2ca8415 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -19,9 +19,14 @@ package org.elasticsearch.plugin.analysis.kuromoji; +import org.elasticsearch.common.collect.ImmutableList; +import org.elasticsearch.common.inject.Module; import org.elasticsearch.index.analysis.*; +import org.elasticsearch.indices.analysis.KuromojiIndicesAnalysisModule; import org.elasticsearch.plugins.AbstractPlugin; +import java.util.Collection; + /** * */ @@ -37,12 +42,12 @@ public class AnalysisKuromojiPlugin extends AbstractPlugin { return "Kuromoji analysis support"; } + @Override + public Collection> modules() { + return ImmutableList.>of(KuromojiIndicesAnalysisModule.class); + } + public void onModule(AnalysisModule module) { - module.addAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); - module.addTokenizer("kuromoji_tokenizer", KuromojiTokenizerFactory.class); - module.addTokenFilter("kuromoji_baseform", KuromojiBaseFormFilterFactory.class); - module.addTokenFilter("kuromoji_part_of_speech", KuromojiPartOfSpeechFilterFactory.class); - module.addTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory.class); - module.addTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory.class); + module.addProcessor(new KuromojiAnalysisBinderProcessor()); } } diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 0bfcb53a00d..b4f06e76167 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -71,6 +71,10 @@ public class KuromojiAnalysisTests { NamedAnalyzer analyzer = analysisService.analyzer("kuromoji"); assertThat(analyzer.analyzer(), instanceOf(JapaneseAnalyzer.class)); + + analyzer = analysisService.analyzer("my_analyzer"); + assertThat(analyzer.analyzer(), instanceOf(CustomAnalyzer.class)); + assertThat(analyzer.analyzer().tokenStream(null, null), instanceOf(JapaneseTokenizer.class)); } @Test diff --git a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json index de8dd28770f..46731d60db5 100644 --- a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json +++ b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json @@ -24,6 +24,12 @@ "type":"kuromoji_tokenizer" } + }, + "analyzer" : { + "my_analyzer" : { + "type" : "custom", + "tokenizer" : "kuromoji_tokenizer" + } } } From 75b6af56d2fb155d8473c9b93cad511f136f8adb Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 19 Nov 2012 15:08:23 +0100 Subject: [PATCH 010/115] - TokenFilters and Tokenizers are loaded via the original (more maintainable) way as was in the previous commit. --- .../KuromojiAnalysisBinderProcessor.java | 44 ------------------- .../kuromoji/AnalysisKuromojiPlugin.java | 7 ++- 2 files changed, 6 insertions(+), 45 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java deleted file mode 100644 index 65b39320d36..00000000000 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalysisBinderProcessor.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -/** - */ -public class KuromojiAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { - - @Override - public void processAnalyzers(AnalyzersBindings analyzersBindings) { - analyzersBindings.processAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); - } - - @Override - public void processTokenizers(TokenizersBindings tokenizersBindings) { - tokenizersBindings.processTokenizer("kuromoji_tokenizer", KuromojiTokenizerFactory.class); - } - - @Override - public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { - tokenFiltersBindings.processTokenFilter("kuromoji_baseform", KuromojiBaseFormFilterFactory.class); - tokenFiltersBindings.processTokenFilter("kuromoji_part_of_speech", KuromojiPartOfSpeechFilterFactory.class); - tokenFiltersBindings.processTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory.class); - tokenFiltersBindings.processTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory.class); - } - -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index 053d2ca8415..039a2716e57 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -48,6 +48,11 @@ public class AnalysisKuromojiPlugin extends AbstractPlugin { } public void onModule(AnalysisModule module) { - module.addProcessor(new KuromojiAnalysisBinderProcessor()); + module.addAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); + module.addTokenizer("kuromoji_tokenizer", KuromojiTokenizerFactory.class); + module.addTokenFilter("kuromoji_baseform", KuromojiBaseFormFilterFactory.class); + module.addTokenFilter("kuromoji_part_of_speech", KuromojiPartOfSpeechFilterFactory.class); + module.addTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory.class); + module.addTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory.class); } } From 3b814905c8fef036dc31834fe73d616f08749b71 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 21 Nov 2012 11:03:09 +0100 Subject: [PATCH 011/115] Release 1.1.0 --- README.md | 6 ++++-- pom.xml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6bad470c4a8..ade25cacb18 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,16 @@ Japanese (kuromoji) Analysis for ElasticSearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.0.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.1.0`. -------------------------------------------------- | Kuromoji Analysis Plugin | ElasticSearch | -------------------------------------------------- | master | 0.19 -> master | -------------------------------------------------- - | 1.0.0 | 0.19 -> master | + | 1.1.0 | 0.19 | + -------------------------------------------------- + | 1.0.0 | 0.19 | -------------------------------------------------- The plugin includes the `kuromoji` analyzer. diff --git a/pom.xml b/pom.xml index 599633e2843..0e5f62aeb05 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.1.0-SNAPSHOT + 1.1.0 jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From fc040db280b5a9eb21c30da1a1ff7fa9ac5eff69 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 21 Nov 2012 11:48:24 +0100 Subject: [PATCH 012/115] Moved to 1.2.0-SNAPSHOT for next development. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0e5f62aeb05..6030cc2c349 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.1.0 + 1.2.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 52ca251276c69e84a34f7e22a3c03a5cacbe1a5b Mon Sep 17 00:00:00 2001 From: David Pilato Date: Sun, 24 Feb 2013 21:46:34 +0100 Subject: [PATCH 013/115] Move to Elasticsearch 0.21.0.Beta1 Due to refactoring in 0.21.x we have to update this plugin Closes #2. --- pom.xml | 9 +++++---- .../index/analysis/KuromojiAnalyzerProvider.java | 2 +- .../index/analysis/KuromojiAnalysisTests.java | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index 6030cc2c349..7805d070c75 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,8 @@ - 0.19.3 + 0.21.0.Beta1-SNAPSHOT + 4.1.0 @@ -51,8 +52,8 @@ org.apache.lucene - lucene-kuromoji - 3.6.1 + lucene-analyzers-kuromoji + ${lucene.version} compile @@ -174,4 +175,4 @@ - \ No newline at end of file + diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java index cb96830f926..e1d0deac39f 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -19,10 +19,10 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ja.JapaneseAnalyzer; import org.apache.lucene.analysis.ja.JapaneseTokenizer; import org.apache.lucene.analysis.ja.dict.UserDictionary; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index b4f06e76167..6bd94d7d5a1 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -51,7 +51,7 @@ import static org.hamcrest.Matchers.instanceOf; public class KuromojiAnalysisTests { @Test - public void testDefaultsKuromojiAnalysis() { + public void testDefaultsKuromojiAnalysis() throws IOException { AnalysisService analysisService = createAnalysisService(); TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_tokenizer"); @@ -74,7 +74,7 @@ public class KuromojiAnalysisTests { analyzer = analysisService.analyzer("my_analyzer"); assertThat(analyzer.analyzer(), instanceOf(CustomAnalyzer.class)); - assertThat(analyzer.analyzer().tokenStream(null, null), instanceOf(JapaneseTokenizer.class)); + assertThat(analyzer.analyzer().tokenStream(null, new StringReader("")), instanceOf(JapaneseTokenizer.class)); } @Test @@ -165,4 +165,4 @@ public class KuromojiAnalysisTests { } Assert.assertEquals(i, expected.length, "not all tokens produced"); } -} \ No newline at end of file +} From a418dcf8087dd6fa8b5dc3e08768961876e7e81d Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 26 Feb 2013 15:30:13 +0100 Subject: [PATCH 014/115] move to 0.90 --- README.md | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ade25cacb18..c92f162f7dd 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e -------------------------------------------------- | Kuromoji Analysis Plugin | ElasticSearch | -------------------------------------------------- - | master | 0.19 -> master | + | master | 0.90 -> master | -------------------------------------------------- | 1.1.0 | 0.19 | -------------------------------------------------- diff --git a/pom.xml b/pom.xml index 7805d070c75..84ef49c1df4 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ - 0.21.0.Beta1-SNAPSHOT + 0.90.0.Beta1 4.1.0 From 624eedeef13a9e4709aa1d50450b4dcc25c86fcd Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 26 Feb 2013 15:36:34 +0100 Subject: [PATCH 015/115] release 1.2.0 --- README.md | 4 +++- pom.xml | 2 +- src/main/assemblies/plugin.xml | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c92f162f7dd..c0705143129 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,15 @@ Japanese (kuromoji) Analysis for ElasticSearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.1.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.2.0`. -------------------------------------------------- | Kuromoji Analysis Plugin | ElasticSearch | -------------------------------------------------- | master | 0.90 -> master | -------------------------------------------------- + | 1.2.0 | 0.90 -> master | + -------------------------------------------------- | 1.1.0 | 0.19 | -------------------------------------------------- | 1.0.0 | 0.19 | diff --git a/pom.xml b/pom.xml index 84ef49c1df4..37ac6c9e2f8 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.2.0-SNAPSHOT + 1.2.0 jar Japanese (kuromoji) Analysis for ElasticSearch 2009 diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml index 03c6bb0ac1b..7f051f2e6d5 100644 --- a/src/main/assemblies/plugin.xml +++ b/src/main/assemblies/plugin.xml @@ -19,7 +19,7 @@ true true - org.apache.lucene:lucene-kuromoji + org.apache.lucene:lucene-analyzers-kuromoji From 5f4f3be33d0be71d5809a576039bdc374f17570e Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 26 Feb 2013 15:36:58 +0100 Subject: [PATCH 016/115] move to 1.3 snap --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 37ac6c9e2f8..a2932dce0b5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.2.0 + 1.3.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From bdec4cda93a8377fdc46638033d7223fb388eb1e Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 29 Apr 2013 21:51:50 +0200 Subject: [PATCH 017/115] move to lucene 4.2.1 and ES 0.90 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index a2932dce0b5..a6cf195fcd7 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.0.Beta1 - 4.1.0 + 0.90.0 + 4.2.1 From 4a1108a7fab4e092a023fed8b05fdb87a0a5ac7a Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 29 Apr 2013 21:52:56 +0200 Subject: [PATCH 018/115] release 1.3.0 --- README.md | 4 +++- pom.xml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c0705143129..6919175054f 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,15 @@ Japanese (kuromoji) Analysis for ElasticSearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.2.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.3.0`. -------------------------------------------------- | Kuromoji Analysis Plugin | ElasticSearch | -------------------------------------------------- | master | 0.90 -> master | -------------------------------------------------- + | 1.3.0 | 0.90 -> master | + -------------------------------------------------- | 1.2.0 | 0.90 -> master | -------------------------------------------------- | 1.1.0 | 0.19 | diff --git a/pom.xml b/pom.xml index a6cf195fcd7..32904c47f95 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.3.0-SNAPSHOT + 1.3.0 jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 858596b61374fb1a343339252be94720d0f53a2c Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 29 Apr 2013 21:53:12 +0200 Subject: [PATCH 019/115] move to 1.4 snap --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 32904c47f95..626cf9f0ca5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.3.0 + 1.4.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 4282fdf8e1a8b2c5aec8d86bf885935cd6dc9323 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 May 2013 21:13:03 +0200 Subject: [PATCH 020/115] Update to Elasticsearch 0.90.1 / Lucene 4.3.0 Closes #4. --- README.md | 2 +- pom.xml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6919175054f..52bfbcb055a 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ License This software is licensed under the Apache 2 license, quoted below. - Copyright 2009-2012 Shay Banon and ElasticSearch + Copyright 2009-2013 Shay Banon and ElasticSearch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of diff --git a/pom.xml b/pom.xml index 626cf9f0ca5..c7e4ad00f5f 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.0 - 4.2.1 + 0.90.1 + 4.3.0 From 8115b687d8339d5bc13357929b30a114521cf3c9 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 May 2013 21:16:22 +0200 Subject: [PATCH 021/115] prepare release elasticsearch-analysis-kuromoji-1.4.0 --- README.md | 8 +++++--- pom.xml | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 52bfbcb055a..5f33a341b33 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,18 @@ Japanese (kuromoji) Analysis for ElasticSearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.3.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.4.0`. -------------------------------------------------- | Kuromoji Analysis Plugin | ElasticSearch | -------------------------------------------------- | master | 0.90 -> master | -------------------------------------------------- - | 1.3.0 | 0.90 -> master | + | 1.4.0 | 0.90.1 | -------------------------------------------------- - | 1.2.0 | 0.90 -> master | + | 1.3.0 | 0.90.0 | + -------------------------------------------------- + | 1.2.0 | 0.20.6 | -------------------------------------------------- | 1.1.0 | 0.19 | -------------------------------------------------- diff --git a/pom.xml b/pom.xml index c7e4ad00f5f..1aa253884a8 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.4.0-SNAPSHOT + 1.4.0 jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From e7a2f7d7b3e5f1d9ffb52afcacfd411455cf75ac Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 May 2013 21:22:05 +0200 Subject: [PATCH 022/115] prepare for next development iteration --- README.md | 30 +++++++++++++++--------------- pom.xml | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 5f33a341b33..b1598ef98de 100644 --- a/README.md +++ b/README.md @@ -5,21 +5,21 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.4.0`. - -------------------------------------------------- - | Kuromoji Analysis Plugin | ElasticSearch | - -------------------------------------------------- - | master | 0.90 -> master | - -------------------------------------------------- - | 1.4.0 | 0.90.1 | - -------------------------------------------------- - | 1.3.0 | 0.90.0 | - -------------------------------------------------- - | 1.2.0 | 0.20.6 | - -------------------------------------------------- - | 1.1.0 | 0.19 | - -------------------------------------------------- - | 1.0.0 | 0.19 | - -------------------------------------------------- + ---------------------------------------------------- + | Kuromoji Analysis Plugin | ElasticSearch | + ---------------------------------------------------- + | 1.5.0-SNAPSHOT (master) | 0.90.1 -> master | + ---------------------------------------------------- + | 1.4.0 | 0.90.1 | + ---------------------------------------------------- + | 1.3.0 | 0.90.0 | + ---------------------------------------------------- + | 1.2.0 | 0.20.6 | + ---------------------------------------------------- + | 1.1.0 | 0.19 | + ---------------------------------------------------- + | 1.0.0 | 0.19 | + ---------------------------------------------------- The plugin includes the `kuromoji` analyzer. diff --git a/pom.xml b/pom.xml index 1aa253884a8..fcd502c51b8 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.4.0 + 1.5.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From fc23bfd8f2fc66b32bec0ab292c2cb9a50ef1783 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 8 Aug 2013 15:43:00 +0200 Subject: [PATCH 023/115] Update to Elasticsearch 0.90.3 / Lucene 4.4.0 Closes #6. --- README.md | 4 ++-- pom.xml | 4 ++-- .../KuromojiPartOfSpeechFilterFactory.java | 5 ++--- .../analysis/KuromojiIndicesAnalysis.java | 16 ++++++---------- .../index/analysis/kuromoji_analysis.json | 3 +-- 5 files changed, 13 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index b1598ef98de..a34aa1078b1 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e ---------------------------------------------------- | Kuromoji Analysis Plugin | ElasticSearch | ---------------------------------------------------- - | 1.5.0-SNAPSHOT (master) | 0.90.1 -> master | + | 1.5.0-SNAPSHOT (master) | 0.90.3 -> master | ---------------------------------------------------- - | 1.4.0 | 0.90.1 | + | 1.4.0 | 0.90.1 -> 0.90.2 | ---------------------------------------------------- | 1.3.0 | 0.90.0 | ---------------------------------------------------- diff --git a/pom.xml b/pom.xml index fcd502c51b8..3631f291f70 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.1 - 4.3.0 + 0.90.3 + 4.4.0 diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java index 2497595dce5..0459eec739c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; +import org.apache.lucene.util.Version; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -34,7 +35,6 @@ import java.util.Set; public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactory { - private final boolean enablePositionIncrements; private final Set stopTags = new HashSet(); @Inject @@ -44,12 +44,11 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor if (wordList != null) { stopTags.addAll(wordList); } - this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", true); } @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(enablePositionIncrements, tokenStream, stopTags); + return new JapanesePartOfSpeechStopFilter(Version.LUCENE_44, tokenStream, stopTags); } } diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index 06d6e16b309..613c8b3b232 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -18,17 +18,11 @@ */ package org.elasticsearch.indices.analysis; -import java.io.Reader; - import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.ja.JapaneseAnalyzer; -import org.apache.lucene.analysis.ja.JapaneseBaseFormFilter; -import org.apache.lucene.analysis.ja.JapaneseKatakanaStemFilter; -import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; -import org.apache.lucene.analysis.ja.JapaneseReadingFormFilter; -import org.apache.lucene.analysis.ja.JapaneseTokenizer; +import org.apache.lucene.analysis.ja.*; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; +import org.apache.lucene.util.Version; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; @@ -37,6 +31,8 @@ import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; +import java.io.Reader; + /** * Registers indices level analysis components so, if not explicitly configured, * will be shared among all indices. @@ -85,7 +81,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(false, + return new JapanesePartOfSpeechStopFilter(Version.LUCENE_44, tokenStream, JapaneseAnalyzer .getDefaultStopTags()); } @@ -118,4 +114,4 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { } })); } -} \ No newline at end of file +} diff --git a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json index 46731d60db5..b0960850e85 100644 --- a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json +++ b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json @@ -8,7 +8,6 @@ }, "kuromoji_pos" : { "type": "kuromoji_part_of_speech", - "enable_position_increment" : "false", "stoptags" : ["# verb-main:", "動詞-自立"] }, "kuromoji_ks" : { @@ -34,4 +33,4 @@ } } -} \ No newline at end of file +} From 7b68a59de333b81da189cc7598396c687799a5ad Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 8 Aug 2013 15:44:02 +0200 Subject: [PATCH 024/115] prepare release elasticsearch-analysis-kuromoji-1.5.0 --- README.md | 6 ++++-- pom.xml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a34aa1078b1..3d21e98152e 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,14 @@ Japanese (kuromoji) Analysis for ElasticSearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.4.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.5.0`. ---------------------------------------------------- | Kuromoji Analysis Plugin | ElasticSearch | ---------------------------------------------------- - | 1.5.0-SNAPSHOT (master) | 0.90.3 -> master | + | 1.6.0-SNAPSHOT (master) | 0.90.3 -> master | + ---------------------------------------------------- + | 1.5.0 | 0.90.3 -> master | ---------------------------------------------------- | 1.4.0 | 0.90.1 -> 0.90.2 | ---------------------------------------------------- diff --git a/pom.xml b/pom.xml index 3631f291f70..0cd8ea672ad 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.5.0-SNAPSHOT + 1.5.0 jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 235b83443115fff983abb71ef4170ad20698daa1 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 8 Aug 2013 15:54:09 +0200 Subject: [PATCH 025/115] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0cd8ea672ad..05bb81fc4bd 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.5.0 + 1.6.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From a4b2409f33c34b0960a5619d2cd25118296462e8 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 8 Aug 2013 15:58:02 +0200 Subject: [PATCH 026/115] Documentation: Version table is incorrect Closes #5. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3d21e98152e..3a7ab8758b9 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,9 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e ---------------------------------------------------- | 1.3.0 | 0.90.0 | ---------------------------------------------------- - | 1.2.0 | 0.20.6 | + | 1.2.0 | 0.90.0 | ---------------------------------------------------- - | 1.1.0 | 0.19 | + | 1.1.0 | 0.19 -> 0.20 | ---------------------------------------------------- | 1.0.0 | 0.19 | ---------------------------------------------------- From 578c5acbb3513e6508fe8bfec59232471eb8705a Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 21 Aug 2013 11:54:57 +0200 Subject: [PATCH 027/115] Create CONTRIBUTING.md --- CONTRIBUTING.md | 98 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..6afaf89b28f --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,98 @@ +Contributing to elasticsearch +============================= + +Elasticsearch is an open source project and we love to receive contributions from our community — you! There are many ways to contribute, from writing tutorials or blog posts, improving the documentation, submitting bug reports and feature requests or writing code which can be incorporated into Elasticsearch itself. + +Bug reports +----------- + +If you think you have found a bug in Elasticsearch, first make sure that you are testing against the [latest version of Elasticsearch](http://www.elasticsearch.org/download/) - your issue may already have been fixed. If not, search our [issues list](https://github.com/elasticsearch/elasticsearch/issues) on GitHub in case a similar issue has already been opened. + +It is very helpful if you can prepare a reproduction of the bug. In other words, provide a small test case which we can run to confirm your bug. It makes it easier to find the problem and to fix it. Test cases should be provided as `curl` commands which we can copy and paste into a terminal to run it locally, for example: + +```sh +# delete the index +curl -XDELETE localhost:9200/test + +# insert a document +curl -XPUT localhost:9200/test/test/1 -d '{ + "title": "test document" +}' + +# this should return XXXX but instead returns YYY +curl .... +``` + +Provide as much information as you can. You may think that the problem lies with your query, when actually it depends on how your data is indexed. The easier it is for us to recreate your problem, the faster it is likely to be fixed. + +Feature requests +---------------- + +If you find yourself wishing for a feature that doesn't exist in Elasticsearch, you are probably not alone. There are bound to be others out there with similar needs. Many of the features that Elasticsearch has today have been added because our users saw the need. +Open an issue on our [issues list](https://github.com/elasticsearch/elasticsearch/issues) on GitHub which describes the feature you would like to see, why you need it, and how it should work. + +Contributing code and documentation changes +------------------------------------------- + +If you have a bugfix or new feature that you would like to contribute to Elasticsearch, please find or open an issue about it first. Talk about what you would like to do. It may be that somebody is already working on it, or that there are particular issues that you should know about before implementing the change. + +We enjoy working with contributors to get their code accepted. There are many approaches to fixing a problem and it is important to find the best approach before writing too much code. + +The process for contributing to any of the [Elasticsearch repositories](https://github.com/elasticsearch/) is similar. Details for individual projects can be found below. + +### Fork and clone the repository + +You will need to fork the main Elasticsearch code or documentation repository and clone it to your local machine. See +[github help page](https://help.github.com/articles/fork-a-repo) for help. + +Further instructions for specific projects are given below. + +### Submitting your changes + +Once your changes and tests are ready to submit for review: + +1. Test your changes +Run the test suite to make sure that nothing is broken. + +2. Sign the Contributor License Agreement +Please make sure you have signed our [Contributor License Agreement](http://www.elasticsearch.org/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once. + +3. Rebase your changes +Update your local repository with the most recent code from the main Elasticsearch repository, and rebase your branch on top of the latest master branch. We prefer your changes to be squashed into a single commit. + +4. Submit a pull request +Push your local changes to your forked copy of the repository and [submit a pull request](https://help.github.com/articles/using-pull-requests). In the pull request, describe what your changes do and mention the number of the issue where discussion has taken place, eg "Closes #123". + +Then sit back and wait. There will probably be discussion about the pull request and, if any changes are needed, we would love to work with you to get your pull request merged into Elasticsearch. + + +Contributing to the Elasticsearch plugin +---------------------------------------- + +**Repository:** [https://github.com/elasticsearch/elasticsearch-analysis-kuromoji](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji) + +Make sure you have [Maven](http://maven.apache.org) installed, as Elasticsearch uses it as its build system. Integration with IntelliJ and Eclipse should work out of the box. Eclipse users can automatically configure their IDE by running `mvn eclipse:eclipse` and then importing the project into their workspace: `File > Import > Existing project into workspace`. + +Please follow these formatting guidelines: + +* Java indent is 4 spaces +* Line width is 140 characters +* The rest is left to Java coding standards +* Disable “auto-format on save” to prevent unnecessary format changes. This makes reviews much harder as it generates unnecessary formatting changes. If your IDE supports formatting only modified chunks that is fine to do. + +To create a distribution from the source, simply run: + +```sh +cd elasticsearch-analysis-kuromoji/ +mvn clean package -DskipTests +``` + +You will find the newly built packages under: `./target/releases/`. + +Before submitting your changes, run the test suite to make sure that nothing is broken, with: + +```sh +mvn clean test +``` + +Source: [Contributing to elasticsearch](http://www.elasticsearch.org/contributing-to-elasticsearch/) From 4c31dfc37ec54779b6ef9bd5c458d35add68ffdd Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Sun, 20 Oct 2013 06:05:29 +0900 Subject: [PATCH 028/115] Add description and example --- README.md | 348 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) diff --git a/README.md b/README.md index 3a7ab8758b9..b04f11078a5 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,354 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e The plugin includes the `kuromoji` analyzer. +Includes Analyzer, Tokenizer, TokenFilter +---------------------------------------- + +The plugin includes these analyzer and tokenizer, tokenfilter. + +| name | type | +|-------------------------|-------------| +| kuromoji | analyzer | +| kuromoji_tokenizer | tokenizer | +| kuromoji_baseform | tokenfilter | +| kuromoji_part_of_speech | tokenfilter | +| kuromoji_readingform | tokenfilter | +| kuromoji_stemmer | tokenfilter | + + +Usage +----- + +## Analyzer : kuromoji + +An analyzer of type `kuromoji`. +This analyzer is the following tokenizer and tokenfilter combination. + +* `kuromoji_tokenizer` : Kuromoji Tokenizer +* `kuromoji_baseform` : Kuromoji BasicFormFilter (TokenFilter) +* `kuromoji_part_of_speech` : Kuromoji Part of Speech Stop Filter (TokenFilter) +* `cjk_width` : CJK Width Filter (TokenFilter) +* `stop` : Stop Filter (TokenFilter) +* `kuromoji_stemmer` : Kuromiji Katakana Stemmer Filter(TokenFilter) +* `lowercase` : LowerCase Filter (TokenFilter) + +## Tokenizer : kuromoji_tokenizer + +A tokenizer of type `kuromoji_tokenizer`. + +The following are settings that can be set for a `kuromoji_tokenizer` tokenizer type: + +| **Setting** | **Description** | **Default value** | +|:--------------------|:--------------------------------------------------------------------------------------------------------------------------|:------------------| +| mode | Tokenization mode: this determines how the tokenizer handles compound and unknown words. `normal` and `search`, `extended`| `search` | +| discard_punctuation | `true` if punctuation tokens should be dropped from the output. | `true` | +| user_dict | set User Dictionary file | | + +### Tokenization mode + +The mode is three types. + +* `normal` : Ordinary segmentation: no decomposition for compounds + +* `search` : Segmentation geared towards search: this includes a decompounding process for long nouns, also includeing the full compound token as a synonym. + +* `extended` : Extended mode outputs unigrams for unknown words. + +#### Difference tokenization mode outputs + +Input text is `関西国際空港` and `アブラカダブラ`. + +| **mode** | `関西国際空港` | `アブラカダブラ` | +|:-----------|:-------------|:-------| +| `normal` | `関西国際空港` | `アブラカダブラ` | +| `search` | `関西` `関西国際空港` `国際` `空港` | `アブラカダブラ` | +| `extended` | `関西` `国際` `空港` | `ア` `ブ` `ラ` `カ` `ダ` `ブ` `ラ` | + +### User Dictionary + +Kuromoji tokenizer use MecCab-IPADIC dictionary by default. +And Kuromoji is added an entry of dictionary to define by user; this is User Dictionary. +User Dictionary entries are defined using the following CSV format: + +``` +, ... , ... , +``` + +Dictionary Example + +``` +東京スカイツリー,東京 スカイツリー,トウキョウ スカイツリー,カスタム名詞 +``` + +To use User Dictionary set file path to `user_dict` attribute. +User Dictionary file is placed `ES_HOME/config` directory. + +### example + +``` +curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' +{ + "index":{ + "analysis":{ + "tokenizer" : { + "kuromoji_user_dict" : { + "type" : "kuromoji_tokenizer", + "mode" : "extended", + "discard_punctuation" : "false", + "user_dictionary" : "userdict_ja.txt" + } + }, + "analyzer" : { + "my_analyzer" : { + "type" : "custom", + "tokenizer" : "kuromoji_user_dict" + } + } + + } + } +} +' + +curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d '東京スカイツリー' +{ + "tokens" : [ { + "token" : "東京", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 1 + }, { + "token" : "スカイツリー", + "start_offset" : 2, + "end_offset" : 8, + "type" : "word", + "position" : 2 + } ] +} +``` + +## TokenFilter : kuromoji_baseform + +A token filter of type `kuromoji_baseform` that replcaes term text with BaseFormAttribute. +This acts as a lemmatizer for verbs and adjectives. + +### example + +``` +curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' +{ + "index":{ + "analysis":{ + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["kuromoji_baseform"] + } + } + } + } +} +' + +curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d '飲み' +{ + "tokens" : [ { + "token" : "飲む", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 1 + } ] +} +``` + +## TokenFilter : kuromoji_part_of_speech + +A token filter of type `kuromoji_part_of_speech` that removes tokens that match a set of part-of-speech tags. + +The following are settings that can be set for a stop token filter type: + +| **Setting** | **Description** | +|:------------|:-----------------------------------------------------| +| stoptags | A list of part-of-speech tags that should be removed | + +Note that default setting is stoptags.txt include lucene-analyzer-kuromji.jar. + +### example + +``` +curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' +{ + "index":{ + "analysis":{ + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["my_posfilter"] + } + }, + "filter" : { + "my_posfilter" : { + "type" : "kuromoji_part_of_speech", + "stoptags" : [ + "助詞-格助詞-一般", + "助詞-終助詞" + ] + } + } + } + } +} +' + +curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d '寿司がおいしいね' +{ + "tokens" : [ { + "token" : "寿司", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 1 + }, { + "token" : "おいしい", + "start_offset" : 3, + "end_offset" : 7, + "type" : "word", + "position" : 3 + } ] +} +``` + +## TokenFilter : kuromoji_readingform + +A token filter of type `kuromoji_readingform` that replaces the term attribute with the reading of a token in either katakana or romaji form. +The default reading form is katakana. + +The following are settings that can be set for a `kuromoji_readingform` token filter type: + +| **Setting** | **Description** | **Default value** | +|:------------|:----------------------------------------------------------|:------------------| +| use_romaji | `true` if romaji reading form output instead of katakana. | `false` | + +Note that elasticsearch-analysis-kuromoji built-in `kuromoji_readingform` set default `ture` to `use_romaji` attribute. + +### example + +``` +curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' +{ + "index":{ + "analysis":{ + "analyzer" : { + "romaji_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["romaji_readingform"] + }, + "katakana_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["katakana_readingform"] + } + }, + "filter" : { + "romaji_readingform" : { + "type" : "kuromoji_readingform", + "use_romaji" : true + }, + "katakana_readingform" : { + "type" : "kuromoji_readingform", + "use_romaji" : false + } + } + } + } +} +' + +curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=katakana_analyzer&pretty' -d '寿司' +{ + "tokens" : [ { + "token" : "スシ", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 1 + } ] +} + +curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=romaji_analyzer&pretty' -d '寿司' +{ + "tokens" : [ { + "token" : "sushi", + "start_offset" : 0, + "end_offset" : 2, + "type" : "word", + "position" : 1 + } ] +} +``` + +## TokenFilter : kuromoji_stemmer + +A token filter of type `kuromoji_stemmer` that normalizes common katakana spelling variations ending in a long sound character by removing this character (U+30FC). +Only katakana words longer than a minimum length are stemmed (default is four). + +Note that only full-width katakana characters are supported. + +The following are settings that can be set for a `kuromoji_stemmer` token filter type: + +| **Setting** | **Description** | **Default value** | +|:----------------|:---------------------------|:------------------| +| minimum_length | The minimum length to stem | `4` | + +### example + +``` +curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' +{ + "index":{ + "analysis":{ + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["my_katakana_stemmer"] + } + }, + "filter" : { + "my_katakana_stemmer" : { + "type" : "kuromoji_stemmer", + "minimum_length" : 4 + } + } + } + } +} +' + +curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d 'コピー' +{ + "tokens" : [ { + "token" : "コピー", + "start_offset" : 0, + "end_offset" : 3, + "type" : "word", + "position" : 1 + } ] +} + +curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d 'サーバー' +{ + "tokens" : [ { + "token" : "サーバ", + "start_offset" : 0, + "end_offset" : 4, + "type" : "word", + "position" : 1 + } ] +} +``` + + License ------- From d98014c942ee98c2844bb50f4121cfe7831e9cc1 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 6 Nov 2013 12:09:11 +0100 Subject: [PATCH 029/115] Update to Elasticsearch 0.90.6 / Lucene 4.5.1 Closes #10. --- README.md | 63 ++++++++++++++++++++++++++++++++++++++++--------------- pom.xml | 4 ++-- 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index b04f11078a5..ccfa90a4f10 100644 --- a/README.md +++ b/README.md @@ -5,23 +5,52 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.5.0`. - ---------------------------------------------------- - | Kuromoji Analysis Plugin | ElasticSearch | - ---------------------------------------------------- - | 1.6.0-SNAPSHOT (master) | 0.90.3 -> master | - ---------------------------------------------------- - | 1.5.0 | 0.90.3 -> master | - ---------------------------------------------------- - | 1.4.0 | 0.90.1 -> 0.90.2 | - ---------------------------------------------------- - | 1.3.0 | 0.90.0 | - ---------------------------------------------------- - | 1.2.0 | 0.90.0 | - ---------------------------------------------------- - | 1.1.0 | 0.19 -> 0.20 | - ---------------------------------------------------- - | 1.0.0 | 0.19 | - ---------------------------------------------------- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Kuromoji Analysis PluginElasticsearchRelease date
1.6.0-SNAPSHOT (master)0.90.6 -> master
1.5.00.90.3 -> 0.90.52013-08-08
1.4.00.90.1 -> 0.90.22013-05-30
1.3.00.90.02013-04-29
1.2.00.90.02013-02-26
1.1.00.19.2 -> 0.202012-11-21
1.0.00.19.0 -> 0.19.12012-04-30
The plugin includes the `kuromoji` analyzer. diff --git a/pom.xml b/pom.xml index 05bb81fc4bd..3f46fdb5268 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.3 - 4.4.0 + 0.90.6 + 4.5.1 From 7381e1e72f95dce4d79498e1ff8186fdf48e3820 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 6 Nov 2013 12:10:37 +0100 Subject: [PATCH 030/115] prepare release elasticsearch-analysis-kuromoji-1.6.0 --- README.md | 9 +++++++-- pom.xml | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ccfa90a4f10..a746cdaf628 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Japanese (kuromoji) Analysis for ElasticSearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.5.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.6.0`. @@ -15,10 +15,15 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e - + + + + + + diff --git a/pom.xml b/pom.xml index 3f46fdb5268..a7cc1853984 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0org.elasticsearchelasticsearch-analysis-kuromoji - 1.6.0-SNAPSHOT + 1.6.0jarJapanese (kuromoji) Analysis for ElasticSearch2009 From 939db5852644a7256f0562ac2209827485ec9a44 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 6 Nov 2013 12:23:01 +0100 Subject: [PATCH 031/115] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a7cc1853984..8ef2b94b1f5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.6.0 + 1.7.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 4c95a3ef7086c4f177da0bb3d4bb404585f29b6b Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 12 Nov 2013 13:14:34 +0100 Subject: [PATCH 032/115] Update to Elasticsearch 0.90.8 / Lucene 4.6.0 Move tests to JUnit Closes #11. Closes #14. --- .gitignore | 1 + README.md | 61 ++------ pom.xml | 136 +++++++++++++++--- .../index/analysis/KuromojiAnalysisTests.java | 16 +-- 4 files changed, 138 insertions(+), 76 deletions(-) diff --git a/.gitignore b/.gitignore index dcd5458de92..a1596ca81e8 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ /.project /.settings /.classpath +/.local-execution-hints.log diff --git a/README.md b/README.md index a746cdaf628..d45c9c8208f 100644 --- a/README.md +++ b/README.md @@ -5,57 +5,16 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.6.0`. -
1.6.0-SNAPSHOT (master)1.7.0-SNAPSHOT (master) 0.90.6 -> master
1.6.00.90.6 -> master2013-11-06
1.5.0 0.90.3 -> 0.90.5
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Kuromoji Analysis PluginElasticsearchRelease date
1.7.0-SNAPSHOT (master)0.90.6 -> master
1.6.00.90.6 -> master2013-11-06
1.5.00.90.3 -> 0.90.52013-08-08
1.4.00.90.1 -> 0.90.22013-05-30
1.3.00.90.02013-04-29
1.2.00.90.02013-02-26
1.1.00.19.2 -> 0.202012-11-21
1.0.00.19.0 -> 0.19.12012-04-30
+| Kuromoji Analysis Plugin | elasticsearch | Release date | +|--------------------------|------------------|:------------:| +| 1.7.0-SNAPSHOT (master) | 0.90.8 -> master | 2013-12-19 | +| 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | +| 1.5.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | +| 1.4.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | +| 1.3.0 | 0.90.0 | 2013-04-29 | +| 1.2.0 | 0.90.0 | 2013-02-26 | +| 1.1.0 | 0.19.2 -> 0.20 | 2012-11-21 | +| 1.0.0 | 0.19.0 -> 0.19.1 | 2012-04-30 | The plugin includes the `kuromoji` analyzer. diff --git a/pom.xml b/pom.xml index 8ef2b94b1f5..6b608e4c5a6 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,13 @@ - 0.90.6 - 4.5.1 + 0.90.8 + 4.6.0 + 1 + true + onerror + + INFO @@ -43,6 +48,18 @@ + + org.hamcrest + hamcrest-all + 1.3 + test + + + org.apache.lucene + lucene-test-framework + ${lucene.version} + test + org.elasticsearch elasticsearch @@ -65,9 +82,10 @@ - org.testng - testng - 6.8 + org.elasticsearch + elasticsearch + ${elasticsearch.version} + test-jar test @@ -77,13 +95,6 @@ 1.3 test - - - org.hamcrest - hamcrest-library - 1.3 - test - @@ -133,12 +144,103 @@ org.apache.maven.plugins - maven-surefire-plugin - 2.11 + maven-compiler-plugin + 2.3.2 - - **/*Tests.java - + 1.6 + 1.6 + + + + com.carrotsearch.randomizedtesting + junit4-maven-plugin + 2.0.12 + + + tests + test + + junit4 + + + 20 + pipe,warn + true + + + + + + + + + ${tests.jvms} + + + + + + + **/*Tests.class + **/*Test.class + + + **/Abstract*.class + **/*StressTest.class + + + -Xmx512m + -XX:MaxDirectMemorySize=512m + -Des.logger.prefix= + + ${tests.shuffle} + ${tests.verbose} + ${tests.seed} + ${tests.failfast} + + + ${tests.iters} + ${tests.maxfailures} + ${tests.failfast} + ${tests.class} + ${tests.method} + ${tests.nightly} + ${tests.badapples} + ${tests.weekly} + ${tests.slow} + ${tests.awaitsfix} + ${tests.slow} + ${tests.timeoutSuite} + ${tests.showSuccess} + ${tests.integration} + ${tests.cluster_seed} + ${tests.client.ratio} + ${env.ES_TEST_LOCAL} + ${es.node.mode} + ${es.logger.level} + true + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.15 + + true diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 6bd94d7d5a1..0861911cd50 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -37,18 +37,18 @@ import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.indices.analysis.IndicesAnalysisModule; import org.elasticsearch.indices.analysis.IndicesAnalysisService; import org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin; -import org.testng.Assert; -import org.testng.annotations.Test; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.junit.Test; import java.io.IOException; import java.io.StringReader; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.*; /** */ -public class KuromojiAnalysisTests { +public class KuromojiAnalysisTests extends ElasticsearchTestCase { @Test public void testDefaultsKuromojiAnalysis() throws IOException { @@ -157,12 +157,12 @@ public class KuromojiAnalysisTests { String[] expected) throws IOException { stream.reset(); CharTermAttribute termAttr = stream.getAttribute(CharTermAttribute.class); - Assert.assertNotNull(termAttr); + assertThat(termAttr, notNullValue()); int i = 0; while (stream.incrementToken()) { - Assert.assertTrue(i < expected.length); - Assert.assertEquals(expected[i++], termAttr.toString(), "expected different term at index " + i); + assertThat(expected.length, greaterThan(i)); + assertThat( "expected different term at index " + i, expected[i++], equalTo(termAttr.toString())); } - Assert.assertEquals(i, expected.length, "not all tokens produced"); + assertThat("not all tokens produced", i, equalTo(expected.length)); } } From 98b2544f2a6e0ed356f329e446ea1da4689329db Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Thu, 7 Nov 2013 11:22:50 +0900 Subject: [PATCH 033/115] Add JapaneseIterationMarkCharFilter support Currently, Kuromoji have JapaneseIterationMarkCharFilter. Add IterationMarkCharFilter to analysis-kuromoji. Closes #7. --- README.md | 13 +++++ ...uromojiIterationMarkCharFilterFactory.java | 29 ++++++++++ .../analysis/KuromojiIndicesAnalysis.java | 24 ++++++-- .../kuromoji/AnalysisKuromojiPlugin.java | 1 + .../index/analysis/KuromojiAnalysisTests.java | 56 ++++++++++++++++++- .../index/analysis/kuromoji_analysis.json | 18 +++++- 6 files changed, 133 insertions(+), 8 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java diff --git a/README.md b/README.md index d45c9c8208f..7df37ec4675 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ The plugin includes these analyzer and tokenizer, tokenfilter. | name | type | |-------------------------|-------------| +| kuromoji_iteration_mark | charfilter | | kuromoji | analyzer | | kuromoji_tokenizer | tokenizer | | kuromoji_baseform | tokenfilter | @@ -49,6 +50,18 @@ This analyzer is the following tokenizer and tokenfilter combination. * `kuromoji_stemmer` : Kuromiji Katakana Stemmer Filter(TokenFilter) * `lowercase` : LowerCase Filter (TokenFilter) +## CharFilter : kuromoji_iteration_mark + +A charfilter of type `kuromoji_iteration_mark`. +This charfilter is Normalizes Japanese horizontal iteration marks (odoriji) to their expanded form. + +The following ar setting that can be set for a `kuromoji_iteration_mark` charfilter type: + +| **Setting** | **Description** | **Default value** | +|:----------------|:-------------------------------------------------------------|:------------------| +| normalize_kanji | indicates whether kanji iteration marks should be normalized | `true` | +| normalize_kana | indicates whether kanji iteration marks should be normalized | `true` | + ## Tokenizer : kuromoji_tokenizer A tokenizer of type `kuromoji_tokenizer`. diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java new file mode 100644 index 00000000000..8b693793801 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java @@ -0,0 +1,29 @@ +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.ja.JapaneseIterationMarkCharFilter; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +import java.io.Reader; + +public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFactory { + + private final boolean normalizeKanji; + private final boolean normalizeKana; + + @Inject + public KuromojiIterationMarkCharFilterFactory(Index index, @IndexSettings Settings indexSettings, + @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name); + normalizeKanji = settings.getAsBoolean("normalize_kanji", JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT); + normalizeKana = settings.getAsBoolean("normalize_kana", JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT); + } + + @Override + public Reader create(Reader reader) { + return new JapaneseIterationMarkCharFilter(reader, normalizeKanji, normalizeKana); + } +} diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index 613c8b3b232..ea6d2a67077 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -26,10 +26,7 @@ import org.apache.lucene.util.Version; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory; -import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory; -import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; +import org.elasticsearch.index.analysis.*; import java.io.Reader; @@ -41,9 +38,24 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { @Inject public KuromojiIndicesAnalysis(Settings settings, - IndicesAnalysisService indicesAnalysisService) { + IndicesAnalysisService indicesAnalysisService) { super(settings); + indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark", + new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() { + @Override + public String name() { + return "kuromoji_iteration_mark"; + } + + @Override + public Reader create(Reader reader) { + return new JapaneseIterationMarkCharFilter(reader, + JapaneseIterationMarkCharFilter.NORMALIZE_KANJI_DEFAULT, + JapaneseIterationMarkCharFilter.NORMALIZE_KANA_DEFAULT); + } + })); + indicesAnalysisService.tokenizerFactories().put("kuromoji_tokenizer", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { @Override @@ -83,7 +95,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { public TokenStream create(TokenStream tokenStream) { return new JapanesePartOfSpeechStopFilter(Version.LUCENE_44, tokenStream, JapaneseAnalyzer - .getDefaultStopTags()); + .getDefaultStopTags()); } })); diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index 039a2716e57..f3bceaeb350 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -48,6 +48,7 @@ public class AnalysisKuromojiPlugin extends AbstractPlugin { } public void onModule(AnalysisModule module) { + module.addCharFilter("kuromoji_iteration_mark", KuromojiIterationMarkCharFilterFactory.class); module.addAnalyzer("kuromoji", KuromojiAnalyzerProvider.class); module.addTokenizer("kuromoji_tokenizer", KuromojiTokenizerFactory.class); module.addTokenFilter("kuromoji_baseform", KuromojiBaseFormFilterFactory.class); diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 0861911cd50..e49d8ce9301 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -41,9 +41,9 @@ import org.elasticsearch.test.ElasticsearchTestCase; import org.junit.Test; import java.io.IOException; +import java.io.Reader; import java.io.StringReader; -import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; /** @@ -75,6 +75,9 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { analyzer = analysisService.analyzer("my_analyzer"); assertThat(analyzer.analyzer(), instanceOf(CustomAnalyzer.class)); assertThat(analyzer.analyzer().tokenStream(null, new StringReader("")), instanceOf(JapaneseTokenizer.class)); + + CharFilterFactory charFilterFactory = analysisService.charFilter("kuromoji_iteration_mark"); + assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); } @Test @@ -130,6 +133,41 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { expected_tokens_katakana = new String[]{"明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"}; assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana); } + @Test + public void testIterationMarkCharFilter() throws IOException { + AnalysisService analysisService = createAnalysisService(); + // test only kanji + CharFilterFactory charFilterFactory = analysisService.charFilter("kuromoji_im_only_kanji"); + assertNotNull(charFilterFactory); + assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); + + String source = "ところゞゝゝ、ジヾが、時々、馬鹿々々しい"; + String expected = "ところゞゝゝ、ジヾが、時時、馬鹿馬鹿しい"; + + assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected); + + // test only kana + + charFilterFactory = analysisService.charFilter("kuromoji_im_only_kana"); + assertNotNull(charFilterFactory); + assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); + + expected = "ところどころ、ジジが、時々、馬鹿々々しい"; + + assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected); + + // test default + + charFilterFactory = analysisService.charFilter("kuromoji_im_default"); + assertNotNull(charFilterFactory); + assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); + + expected = "ところどころ、ジジが、時時、馬鹿馬鹿しい"; + + assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected); + + + } public AnalysisService createAnalysisService() { Settings settings = ImmutableSettings.settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/kuromoji_analysis.json").build(); @@ -165,4 +203,20 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { } assertThat("not all tokens produced", i, equalTo(expected.length)); } + + private void assertCharFilterEquals(Reader filtered, + String expected) throws IOException { + String actual = readFully(filtered); + assertThat(actual, equalTo(expected)); + } + + private String readFully(Reader reader) throws IOException { + StringBuilder buffer = new StringBuilder(); + int ch; + while((ch = reader.read()) != -1){ + buffer.append((char)ch); + } + return buffer.toString(); + } + } diff --git a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json index b0960850e85..478a6c1d293 100644 --- a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json +++ b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json @@ -17,7 +17,23 @@ }, - + + "char_filter":{ + "kuromoji_im_only_kanji":{ + "type":"kuromoji_iteration_mark", + "normalize_kanji":true, + "normalize_kana":false + }, + "kuromoji_im_only_kana":{ + "type":"kuromoji_iteration_mark", + "normalize_kanji":false, + "normalize_kana":true + }, + "kuromoji_im_default":{ + "type":"kuromoji_iteration_mark" + } + }, + "tokenizer" : { "kuromoji" : { "type":"kuromoji_tokenizer" From 938401bffc3966a536848b467f4caa8d1fae4623 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 20 Dec 2013 08:10:04 +0100 Subject: [PATCH 034/115] prepare release elasticsearch-analysis-kuromoji-1.7.0 --- README.md | 15 ++++++++------- pom.xml | 2 +- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 7df37ec4675..3c7cd15209d 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,12 @@ Japanese (kuromoji) Analysis for ElasticSearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.6.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.7.0`. | Kuromoji Analysis Plugin | elasticsearch | Release date | |--------------------------|------------------|:------------:| -| 1.7.0-SNAPSHOT (master) | 0.90.8 -> master | 2013-12-19 | +| 1.8.0-SNAPSHOT (master) | 0.90.8 -> master | | +| 1.7.0 | 0.90.8 -> master | 2013-12-20 | | 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | | 1.5.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | | 1.4.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | @@ -115,7 +116,7 @@ User Dictionary file is placed `ES_HOME/config` directory. ### example -``` +```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { "index":{ @@ -165,7 +166,7 @@ This acts as a lemmatizer for verbs and adjectives. ### example -``` +```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { "index":{ @@ -207,7 +208,7 @@ Note that default setting is stoptags.txt include lucene-analyzer-kuromji.jar. ### example -``` +```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { "index":{ @@ -265,7 +266,7 @@ Note that elasticsearch-analysis-kuromoji built-in `kuromoji_readingform` set de ### example -``` +```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { "index":{ @@ -333,7 +334,7 @@ The following are settings that can be set for a `kuromoji_stemmer` token filter ### example -``` +```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { "index":{ diff --git a/pom.xml b/pom.xml index 6b608e4c5a6..08582eb6fe9 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.7.0-SNAPSHOT + 1.7.0 jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 5a9395f126cb40f61ddcbaf1dbd0127003eec7e3 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 20 Dec 2013 08:13:46 +0100 Subject: [PATCH 035/115] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 08582eb6fe9..b0bebf5f177 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.7.0 + 1.8.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From b0ea86a635404056620dd1051731dc7188c813c5 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 10 Jan 2014 23:00:25 +0100 Subject: [PATCH 036/115] Update headers --- README.md | 8 ++++---- .../analysis/KuromojiAnalyzerProvider.java | 14 +++++++------- .../KuromojiBaseFormFilterFactory.java | 14 +++++++------- ...uromojiIterationMarkCharFilterFactory.java | 19 +++++++++++++++++++ .../KuromojiKatakanaStemmerFactory.java | 14 +++++++------- .../KuromojiPartOfSpeechFilterFactory.java | 14 +++++++------- .../KuromojiReadingFormFilterFactory.java | 14 +++++++------- .../analysis/KuromojiTokenizerFactory.java | 17 +++++++++-------- .../analysis/KuromojiIndicesAnalysis.java | 15 ++++++++------- .../KuromojiIndicesAnalysisModule.java | 16 ++++++++-------- .../kuromoji/AnalysisKuromojiPlugin.java | 14 +++++++------- .../index/analysis/KuromojiAnalysisTests.java | 14 +++++++------- 12 files changed, 97 insertions(+), 76 deletions(-) diff --git a/README.md b/README.md index 3c7cd15209d..8404a8840bb 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Japanese (kuromoji) Analysis for ElasticSearch +Japanese (kuromoji) Analysis for Elasticsearch ================================== The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. @@ -7,8 +7,8 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e | Kuromoji Analysis Plugin | elasticsearch | Release date | |--------------------------|------------------|:------------:| -| 1.8.0-SNAPSHOT (master) | 0.90.8 -> master | | -| 1.7.0 | 0.90.8 -> master | 2013-12-20 | +| 1.8.0-SNAPSHOT (master) | 0.90.8 -> 0.90 | | +| 1.7.0 | 0.90.8 -> 0.90 | 2013-12-20 | | 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | | 1.5.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | | 1.4.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | @@ -385,7 +385,7 @@ License This software is licensed under the Apache 2 license, quoted below. - Copyright 2009-2013 Shay Banon and ElasticSearch + Copyright 2009-2014 Elasticsearch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java index e1d0deac39f..ab46b481cf0 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java index e210057aa19..0db43bd429a 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java index 8b693793801..74b63c32afd 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java @@ -1,3 +1,22 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.ja.JapaneseIterationMarkCharFilter; diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java index 077d8ccf1ac..93db459b865 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java index 0459eec739c..222cd05f25a 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java index b9af0641613..a87ac8c8256 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java index 583e8b74c8f..e051f861ffe 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Tokenizer; @@ -90,4 +91,4 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { discartPunctuation, mode); } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index ea6d2a67077..9264acd9426 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.elasticsearch.indices.analysis; import org.apache.lucene.analysis.TokenStream; diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java index 1c44342057a..8046aece373 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -29,4 +29,4 @@ public class KuromojiIndicesAnalysisModule extends AbstractModule { protected void configure() { bind(KuromojiIndicesAnalysis.class).asEagerSingleton(); } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index f3bceaeb350..27d8a3e5b9e 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index e49d8ce9301..116652bf84e 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * From 1c4b33078857df1f24a628c19985c28bcaa31cff Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 10 Jan 2014 23:03:21 +0100 Subject: [PATCH 037/115] Preparing branch 1.x --- README.md | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 8404a8840bb..f60e919a00b 100644 --- a/README.md +++ b/README.md @@ -5,17 +5,18 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.7.0`. -| Kuromoji Analysis Plugin | elasticsearch | Release date | -|--------------------------|------------------|:------------:| -| 1.8.0-SNAPSHOT (master) | 0.90.8 -> 0.90 | | -| 1.7.0 | 0.90.8 -> 0.90 | 2013-12-20 | -| 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | -| 1.5.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | -| 1.4.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | -| 1.3.0 | 0.90.0 | 2013-04-29 | -| 1.2.0 | 0.90.0 | 2013-02-26 | -| 1.1.0 | 0.19.2 -> 0.20 | 2012-11-21 | -| 1.0.0 | 0.19.0 -> 0.19.1 | 2012-04-30 | +| Kuromoji Analysis Plugin | elasticsearch | Release date | +|-----------------------------|------------------|:------------:| +| 2.0.0.RC1-SNAPSHOT (master) | 0.90.8 -> 0.90 | | +| 1.8.0-SNAPSHOT (1.x) | 0.90.8 -> 0.90 | | +| 1.7.0 | 0.90.8 -> 0.90 | 2013-12-20 | +| 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | +| 1.5.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | +| 1.4.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | +| 1.3.0 | 0.90.0 | 2013-04-29 | +| 1.2.0 | 0.90.0 | 2013-02-26 | +| 1.1.0 | 0.19.2 -> 0.20 | 2012-11-21 | +| 1.0.0 | 0.19.0 -> 0.19.1 | 2012-04-30 | The plugin includes the `kuromoji` analyzer. From 77701b95ba119d3a26787465eb0b58742ffd8c67 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 10 Jan 2014 23:08:23 +0100 Subject: [PATCH 038/115] Update to elasticsearch 1.0.0.RC1 Closes #16. --- pom.xml | 4 ++-- .../index/analysis/KuromojiTokenizerFactory.java | 4 ++-- src/test/resources/log4j.properties | 5 +++++ 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 src/test/resources/log4j.properties diff --git a/pom.xml b/pom.xml index b0bebf5f177..a69328813af 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 1.8.0-SNAPSHOT + 2.0.0.RC1-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 0.90.8 + 1.0.0.RC1-SNAPSHOT 4.6.0 1 true diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java index e051f861ffe..99175aa1e5c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ja.JapaneseTokenizer; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; import org.apache.lucene.analysis.ja.dict.UserDictionary; -import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -66,7 +66,7 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { } } } catch (IOException e) { - throw new ElasticSearchException("failed to load kuromoji user dictionary", e); + throw new ElasticsearchException("failed to load kuromoji user dictionary", e); } } diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties new file mode 100644 index 00000000000..497c97f9959 --- /dev/null +++ b/src/test/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, out + +log4j.appender.out=org.apache.log4j.ConsoleAppender +log4j.appender.out.layout=org.apache.log4j.PatternLayout +log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n From e488cdfb4179cce99fcd388abb6a99cccb19580f Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 10 Jan 2014 23:11:53 +0100 Subject: [PATCH 039/115] Update version 2.0.0.RC1 --- README.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index f60e919a00b..1d10067696f 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,18 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.7.0`. -| Kuromoji Analysis Plugin | elasticsearch | Release date | -|-----------------------------|------------------|:------------:| -| 2.0.0.RC1-SNAPSHOT (master) | 0.90.8 -> 0.90 | | -| 1.8.0-SNAPSHOT (1.x) | 0.90.8 -> 0.90 | | -| 1.7.0 | 0.90.8 -> 0.90 | 2013-12-20 | -| 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | -| 1.5.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | -| 1.4.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | -| 1.3.0 | 0.90.0 | 2013-04-29 | -| 1.2.0 | 0.90.0 | 2013-02-26 | -| 1.1.0 | 0.19.2 -> 0.20 | 2012-11-21 | -| 1.0.0 | 0.19.0 -> 0.19.1 | 2012-04-30 | +| Kuromoji Analysis Plugin | elasticsearch | Release date | +|-----------------------------|---------------------|:------------:| +| 2.0.0.RC1-SNAPSHOT (master) | 1.0.0.RC1 -> master | | +| 1.8.0-SNAPSHOT (1.x) | 0.90.8 -> 0.90 | | +| 1.7.0 | 0.90.8 -> 0.90 | 2013-12-20 | +| 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | +| 1.5.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | +| 1.4.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | +| 1.3.0 | 0.90.0 | 2013-04-29 | +| 1.2.0 | 0.90.0 | 2013-02-26 | +| 1.1.0 | 0.19.2 -> 0.20 | 2012-11-21 | +| 1.0.0 | 0.19.0 -> 0.19.1 | 2012-04-30 | The plugin includes the `kuromoji` analyzer. From 51d39d37a2b7d74e9c9b30842c1ca019ddc076ce Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 15 Jan 2014 18:17:33 +0100 Subject: [PATCH 040/115] prepare release elasticsearch-analysis-kuromoji-2.0.0.RC1 --- README.md | 5 +++-- pom.xml | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1d10067696f..5bc5898cc9d 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,12 @@ Japanese (kuromoji) Analysis for Elasticsearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/1.7.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.0.0.RC1`. | Kuromoji Analysis Plugin | elasticsearch | Release date | |-----------------------------|---------------------|:------------:| -| 2.0.0.RC1-SNAPSHOT (master) | 1.0.0.RC1 -> master | | +| 2.0.0-SNAPSHOT (master) | 1.0.0.RC1 -> master | | +| 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | | 1.8.0-SNAPSHOT (1.x) | 0.90.8 -> 0.90 | | | 1.7.0 | 0.90.8 -> 0.90 | 2013-12-20 | | 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | diff --git a/pom.xml b/pom.xml index a69328813af..5b7bacc34b3 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 2.0.0.RC1-SNAPSHOT + 2.0.0.RC1 jar Japanese (kuromoji) Analysis for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 1.0.0.RC1-SNAPSHOT + 1.0.0.RC1 4.6.0 1 true From 69e1cb8e46cf19ea5a873afa477e25cdb4ec0fe9 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 15 Jan 2014 19:52:10 +0100 Subject: [PATCH 041/115] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 5b7bacc34b3..5abace4c643 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 2.0.0.RC1 + 2.0.0-SNAPSHOT jar Japanese (kuromoji) Analysis for ElasticSearch 2009 From 53bfb4f96d57e79a12b77dfb27b370a2746492ed Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 23:27:37 +0100 Subject: [PATCH 042/115] Add plugin release semi-automatic script Closes #22 --- README.md | 15 +- dev-tools/build_release.py | 708 +++++++++++++++++++++++++++++++++++++ dev-tools/upload-s3.py | 67 ++++ pom.xml | 5 +- 4 files changed, 783 insertions(+), 12 deletions(-) create mode 100755 dev-tools/build_release.py create mode 100644 dev-tools/upload-s3.py diff --git a/README.md b/README.md index 5bc5898cc9d..a3de0db745e 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,14 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.0.0.RC1`. +* For 1.0.x elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/master). +* For 0.90.x elasticsearch versions, look at [1.x branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/1.x). + + | Kuromoji Analysis Plugin | elasticsearch | Release date | |-----------------------------|---------------------|:------------:| -| 2.0.0-SNAPSHOT (master) | 1.0.0.RC1 -> master | | +| 2.0.0-SNAPSHOT | 1.0.0.RC1 -> master | XXXX-XX-XX | | 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | -| 1.8.0-SNAPSHOT (1.x) | 0.90.8 -> 0.90 | | -| 1.7.0 | 0.90.8 -> 0.90 | 2013-12-20 | -| 1.6.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | -| 1.5.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | -| 1.4.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | -| 1.3.0 | 0.90.0 | 2013-04-29 | -| 1.2.0 | 0.90.0 | 2013-02-26 | -| 1.1.0 | 0.19.2 -> 0.20 | 2012-11-21 | -| 1.0.0 | 0.19.0 -> 0.19.1 | 2012-04-30 | The plugin includes the `kuromoji` analyzer. diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py new file mode 100755 index 00000000000..9166b09e7e3 --- /dev/null +++ b/dev-tools/build_release.py @@ -0,0 +1,708 @@ +# Licensed to Elasticsearch under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on +# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import re +import tempfile +import shutil +import os +import datetime +import argparse +import github3 +import smtplib + +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +from os.path import dirname, abspath + +""" + This tool builds a release from the a given elasticsearch plugin branch. + In order to execute it go in the top level directory and run: + $ python3 dev_tools/build_release.py --branch master --publish --remote origin + + By default this script runs in 'dry' mode which essentially simulates a release. If the + '--publish' option is set the actual release is done. + If not in 'dry' mode, a mail will be automatically sent to the mailing list. + You can disable it with the option '--disable_mail' + + $ python3 dev_tools/build_release.py --publish --remote origin --disable_mail + + The script takes over almost all + steps necessary for a release from a high level point of view it does the following things: + + - run prerequisite checks ie. check for Java 1.6 being present or S3 credentials available as env variables + - detect the version to release from the specified branch (--branch) or the current branch + - creates a release branch & updates pom.xml and README.md to point to a release version rather than a snapshot + - builds the artifacts + - commits the new version and merges the release branch into the source branch + - creates a tag and pushes the commit to the specified origin (--remote) + - publishes the releases to sonatype and S3 + - send a mail based on github issues fixed by this version + +Once it's done it will print all the remaining steps. + + Prerequisites: + - Python 3k for script execution + - Boto for S3 Upload ($ apt-get install python-boto or pip-3.3 install boto) + - github3 module (pip-3.3 install github3.py) + - S3 keys exported via ENV Variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + - GITHUB (login/password) or key exported via ENV Variables (GITHUB_LOGIN, GITHUB_PASSWORD or GITHUB_KEY) + (see https://github.com/settings/applications#personal-access-tokens) - Optional: default to no authentication + - SMTP_HOST - Optional: default to localhost + - MAIL_SENDER - Optional: default to 'david@pilato.fr': must be authorized to send emails to elasticsearch mailing list + - MAIL_TO - Optional: default to 'elasticsearch@googlegroups.com' +""" +env = os.environ + +LOG = env.get('ES_RELEASE_LOG', '/tmp/elasticsearch_release.log') +ROOT_DIR = os.path.join(abspath(dirname(__file__)), '../') +README_FILE = ROOT_DIR + 'README.md' +POM_FILE = ROOT_DIR + 'pom.xml' + +def log(msg): + log_plain('\n%s' % msg) + +def log_plain(msg): + f = open(LOG, mode='ab') + f.write(msg.encode('utf-8')) + f.close() + +def run(command, quiet=False): + log('%s: RUN: %s\n' % (datetime.datetime.now(), command)) + if os.system('%s >> %s 2>&1' % (command, LOG)): + msg = ' FAILED: %s [see log %s]' % (command, LOG) + if not quiet: + print(msg) + raise RuntimeError(msg) + +try: + JAVA_HOME = env['JAVA_HOME'] +except KeyError: + raise RuntimeError(""" + Please set JAVA_HOME in the env before running release tool + On OSX use: export JAVA_HOME=`/usr/libexec/java_home -v '1.6*'`""") + +try: + MVN='mvn' + # make sure mvn3 is used if mvn3 is available + # some systems use maven 2 as default + run('mvn3 --version', quiet=True) + MVN='mvn3' +except RuntimeError: + pass + + +def java_exe(): + path = JAVA_HOME + return 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % (path, path, path) + +def verify_java_version(version): + s = os.popen('%s; java -version 2>&1' % java_exe()).read() + if s.find(' version "%s.' % version) == -1: + raise RuntimeError('got wrong version for java %s:\n%s' % (version, s)) + +# Verifies the java version. We guarantee that we run with Java 1.6 +# If 1.6 is not available fail the build! +def verify_mvn_java_version(version, mvn): + s = os.popen('%s; %s --version 2>&1' % (java_exe(), mvn)).read() + if s.find('Java version: %s' % version) == -1: + raise RuntimeError('got wrong java version for %s %s:\n%s' % (mvn, version, s)) + +# Returns the hash of the current git HEAD revision +def get_head_hash(): + return os.popen(' git rev-parse --verify HEAD 2>&1').read().strip() + +# Returns the hash of the given tag revision +def get_tag_hash(tag): + return os.popen('git show-ref --tags %s --hash 2>&1' % (tag)).read().strip() + +# Returns the name of the current branch +def get_current_branch(): + return os.popen('git rev-parse --abbrev-ref HEAD 2>&1').read().strip() + +verify_java_version('1.6') # we require to build with 1.6 +verify_mvn_java_version('1.6', MVN) + +# Utility that returns the name of the release branch for a given version +def release_branch(version): + return 'release_branch_%s' % version + +# runs get fetch on the given remote +def fetch(remote): + run('git fetch %s' % remote) + +# Creates a new release branch from the given source branch +# and rebases the source branch from the remote before creating +# the release branch. Note: This fails if the source branch +# doesn't exist on the provided remote. +def create_release_branch(remote, src_branch, release): + run('git checkout %s' % src_branch) + run('git pull --rebase %s %s' % (remote, src_branch)) + run('git checkout -b %s' % (release_branch(release))) + + +# Reads the given file and applies the +# callback to it. If the callback changed +# a line the given file is replaced with +# the modified input. +def process_file(file_path, line_callback): + fh, abs_path = tempfile.mkstemp() + modified = False + with open(abs_path,'w', encoding='utf-8') as new_file: + with open(file_path, encoding='utf-8') as old_file: + for line in old_file: + new_line = line_callback(line) + modified = modified or (new_line != line) + new_file.write(new_line) + os.close(fh) + if modified: + #Remove original file + os.remove(file_path) + #Move new file + shutil.move(abs_path, file_path) + return True + else: + # nothing to do - just remove the tmp file + os.remove(abs_path) + return False + +# Guess the next snapshot version number (increment second digit) +def guess_snapshot(version): + digits=list(map(int, re.findall(r'\d+', version))) + source='%s.%s' % (digits[0], digits[1]) + destination='%s.%s' % (digits[0], digits[1]+1) + return version.replace(source, destination) + +# Moves the pom.xml file from a snapshot to a release +def remove_maven_snapshot(pom, release): + pattern = '%s-SNAPSHOT' % release + replacement = '%s' % release + def callback(line): + return line.replace(pattern, replacement) + process_file(pom, callback) + +# Moves the README.md file from a snapshot to a release +def remove_version_snapshot(readme_file, release): + pattern = '%s-SNAPSHOT' % release + replacement = '%s ' % release + def callback(line): + return line.replace(pattern, replacement) + process_file(readme_file, callback) + +# Moves the pom.xml file to the next snapshot +def add_maven_snapshot(pom, release, snapshot): + pattern = '%s' % release + replacement = '%s-SNAPSHOT' % snapshot + def callback(line): + return line.replace(pattern, replacement) + process_file(pom, callback) + +# Add in README.md file the next snapshot +def add_version_snapshot(readme_file, release, snapshot): + pattern = '| %s ' % release + replacement = '| %s-SNAPSHOT' % snapshot + def callback(line): + # If we find pattern, we copy the line and replace its content + if line.find(pattern) >= 0: + return line.replace(pattern, replacement).replace('%s' % (datetime.datetime.now().strftime("%Y-%m-%d")), + 'XXXX-XX-XX')+line + else: + return line + process_file(readme_file, callback) + + +# Set release date in README.md file +def set_date(readme_file): + pattern = 'XXXX-XX-XX' + replacement = '%s' % (datetime.datetime.now().strftime("%Y-%m-%d")) + def callback(line): + return line.replace(pattern, replacement) + process_file(readme_file, callback) + +# Update installation instructions in README.md file +def set_install_instructions(readme_file, artifact_name, release): + pattern = '`bin/plugin -install elasticsearch/%s/.+`' % artifact_name + replacement = '`bin/plugin -install elasticsearch/%s/%s`' % (artifact_name, release) + def callback(line): + return re.sub(pattern, replacement, line) + process_file(readme_file, callback) + + +# Stages the given files for the next git commit +def add_pending_files(*files): + for file in files: + run('git add %s' % file) + +# Executes a git commit with 'release [version]' as the commit message +def commit_release(artifact_id, release): + run('git commit -m "prepare release %s-%s"' % (artifact_id, release)) + +def commit_snapshot(): + run('git commit -m "prepare for next development iteration"') + +def tag_release(release): + run('git tag -a v%s -m "Tag release version %s"' % (release, release)) + +def run_mvn(*cmd): + for c in cmd: + run('%s; %s -f %s %s' % (java_exe(), MVN, POM_FILE, c)) + +def build_release(run_tests=False, dry_run=True): + target = 'deploy' + if dry_run: + target = 'package' + if run_tests: + run_mvn('clean test') + run_mvn('clean %s -DskipTests' %(target)) + +# Checks the pom.xml for the release version. 2.0.0-SNAPSHOT +# This method fails if the pom file has no SNAPSHOT version set ie. +# if the version is already on a release version we fail. +# Returns the next version string ie. 0.90.7 +def find_release_version(src_branch): + run('git checkout %s' % src_branch) + with open(POM_FILE, encoding='utf-8') as file: + for line in file: + match = re.search(r'(.+)-SNAPSHOT', line) + if match: + return match.group(1) + raise RuntimeError('Could not find release version in branch %s' % src_branch) + +# extract a value from pom.xml +def find_from_pom(tag): + with open(POM_FILE, encoding='utf-8') as file: + for line in file: + match = re.search(r'<%s>(.+)' % (tag, tag), line) + if match: + return match.group(1) + raise RuntimeError('Could not find <%s> in pom.xml file' % (tag)) + +def get_artifacts(artifact_id, release): + artifact_path = ROOT_DIR + 'target/releases/%s-%s.zip' % (artifact_id, release) + print(' Path %s' % (artifact_path)) + if not os.path.isfile(artifact_path): + raise RuntimeError('Could not find required artifact at %s' % (artifact_path)) + return artifact_path + +# Generates sha1 for a file +# and returns the checksum files as well +# as the given files in a list +def generate_checksums(release_file): + res = [] + directory = os.path.dirname(release_file) + file = os.path.basename(release_file) + checksum_file = '%s.sha1.txt' % file + + if os.system('cd %s; shasum %s > %s' % (directory, file, checksum_file)): + raise RuntimeError('Failed to generate checksum for file %s' % release_file) + res = res + [os.path.join(directory, checksum_file), release_file] + return res + +def git_merge(src_branch, release_version): + run('git checkout %s' % src_branch) + run('git merge %s' % release_branch(release_version)) + +def git_push(remote, src_branch, release_version, dry_run): + if not dry_run: + run('git push %s %s' % (remote, src_branch)) # push the commit + run('git push %s v%s' % (remote, release_version)) # push the tag + else: + print(' dryrun [True] -- skipping push to remote %s' % remote) + +def publish_artifacts(artifacts, base='elasticsearch/elasticsearch', dry_run=True): + location = os.path.dirname(os.path.realpath(__file__)) + for artifact in artifacts: + if dry_run: + print('Skip Uploading %s to Amazon S3 in %s' % (artifact, base)) + else: + print('Uploading %s to Amazon S3' % artifact) + # requires boto to be installed but it is not available on python3k yet so we use a dedicated tool + run('python %s/upload-s3.py --file %s --path %s' % (location, os.path.abspath(artifact), base)) + + +################# +## +## +## Email and Github Management +## +## +################# +def format_issues_plain(issues, title='Fix'): + response = "" + + if len(issues) > 0: + response += '%s:\n' % title + for issue in issues: + response += ' * [%s] - %s (%s)\n' % (issue.number, issue.title, issue.html_url) + + return response + +def format_issues_html(issues, title='Fix'): + response = "" + + if len(issues) > 0: + response += '

%s

\n
    \n' % title + for issue in issues: + response += '[%s] - %s\n' % (issue.html_url, issue.number, issue.title) + response += '
\n' + + return response + +def get_github_repository(reponame, + login=env.get('GITHUB_LOGIN', None), + password=env.get('GITHUB_PASSWORD', None), + key=env.get('GITHUB_KEY', None)): + if login: + g = github3.login(login, password) + elif key: + g = github3.login(token=key) + else: + g = github3.GitHub() + + return g.repository("elasticsearch", reponame) + +# Check if there are some remaining open issues and fails +def check_opened_issues(version, repository, reponame): + opened_issues = [i for i in repository.iter_issues(state='open', labels='%s' % version)] + if len(opened_issues)>0: + raise NameError('Some issues [%s] are still opened. Check https://github.com/elasticsearch/%s/issues?labels=%s&state=open' + % (len(opened_issues), reponame, version)) + +# List issues from github: can be done anonymously if you don't +# exceed a given number of github API calls per day +# Check if there are some remaining open issues and fails +def list_issues(version, + repository, + severity='bug'): + issues = [i for i in repository.iter_issues(state='closed', labels='%s,%s' % (severity, version))] + return issues + +# Get issues from github and generates a Plain/HTML Multipart email +# And send it if dry_run=False +def prepare_email(artifact_id, release_version, repository, + artifact_name, artifact_description, project_url, + severity_labels_bug='bug', + severity_labels_update='update', + severity_labels_new='new', + severity_labels_doc='doc'): + + ## Get bugs from github + issues_bug = list_issues(release_version, repository, severity=severity_labels_bug) + issues_update = list_issues(release_version, repository, severity=severity_labels_update) + issues_new = list_issues(release_version, repository, severity=severity_labels_new) + issues_doc = list_issues(release_version, repository, severity=severity_labels_doc) + + ## Format content to plain text + plain_issues_bug = format_issues_plain(issues_bug, 'Fix') + plain_issues_update = format_issues_plain(issues_update, 'Update') + plain_issues_new = format_issues_plain(issues_new, 'New') + plain_issues_doc = format_issues_plain(issues_doc, 'Doc') + + ## Format content to html + html_issues_bug = format_issues_html(issues_bug, 'Fix') + html_issues_update = format_issues_html(issues_update, 'Update') + html_issues_new = format_issues_html(issues_new, 'New') + html_issues_doc = format_issues_html(issues_doc, 'Doc') + + if len(issues_bug)+len(issues_update)+len(issues_new)+len(issues_doc) > 0: + plain_empty_message = "" + html_empty_message = "" + + else: + plain_empty_message = "No issue listed for this release" + html_empty_message = "

No issue listed for this release

" + + msg = MIMEMultipart('alternative') + msg['Subject'] = '[ANN] %s %s released' % (artifact_name, release_version) + text = """ +Heya, + + +We are pleased to announce the release of the %(artifact_name)s, version %(release_version)s. + +%(artifact_description)s. + +%(project_url)s + +Release Notes - %(artifact_id)s - Version %(release_version)s + +%(empty_message)s +%(issues_bug)s +%(issues_update)s +%(issues_new)s +%(issues_doc)s + +Issues, Pull requests, Feature requests are warmly welcome on %(artifact_id)s project repository: %(project_url)s +For questions or comments around this plugin, feel free to use elasticsearch mailing list: https://groups.google.com/forum/#!forum/elasticsearch + +Enjoy, + +-The Elasticsearch team +""" % {'release_version': release_version, + 'artifact_id': artifact_id, + 'artifact_name': artifact_name, + 'artifact_description': artifact_description, + 'project_url': project_url, + 'empty_message': plain_empty_message, + 'issues_bug': plain_issues_bug, + 'issues_update': plain_issues_update, + 'issues_new': plain_issues_new, + 'issues_doc': plain_issues_doc} + + html = """ + + +

Heya,

+ +

We are pleased to announce the release of the %(artifact_name)s, version %(release_version)s

+ +
%(artifact_description)s.
+ +

Release Notes - Version %(release_version)s

+%(empty_message)s +%(issues_bug)s +%(issues_update)s +%(issues_new)s +%(issues_doc)s + +

Issues, Pull requests, Feature requests are warmly welcome on +%(artifact_id)s project repository!

+

For questions or comments around this plugin, feel free to use elasticsearch +mailing list!

+ +

Enjoy,

+ +

- The Elasticsearch team

+ +""" % {'release_version': release_version, + 'artifact_id': artifact_id, + 'artifact_name': artifact_name, + 'artifact_description': artifact_description, + 'project_url': project_url, + 'empty_message': html_empty_message, + 'issues_bug': html_issues_bug, + 'issues_update': html_issues_update, + 'issues_new': html_issues_new, + 'issues_doc': html_issues_doc} + + # Record the MIME types of both parts - text/plain and text/html. + part1 = MIMEText(text, 'plain') + part2 = MIMEText(html, 'html') + + # Attach parts into message container. + # According to RFC 2046, the last part of a multipart message, in this case + # the HTML message, is best and preferred. + msg.attach(part1) + msg.attach(part2) + + return msg + +def send_email(msg, + dry_run=True, + mail=True, + sender=env.get('MAIL_SENDER'), + to=env.get('MAIL_TO', 'elasticsearch@googlegroups.com'), + smtp_server=env.get('SMTP_SERVER', 'localhost')): + msg['From'] = 'Elasticsearch Team <%s>' % sender + msg['To'] = 'Elasticsearch Mailing List <%s>' % to + # save mail on disk + with open(ROOT_DIR+'target/email.txt', 'w') as email_file: + email_file.write(msg.as_string()) + if mail and not dry_run: + s = smtplib.SMTP(smtp_server, 25) + s.sendmail(sender, to, msg.as_string()) + s.quit() + else: + print('generated email: open %starget/email.txt' % ROOT_DIR) + +def print_sonatype_notice(): + settings = os.path.join(os.path.expanduser('~'), '.m2/settings.xml') + if os.path.isfile(settings): + with open(settings, encoding='utf-8') as settings_file: + for line in settings_file: + if line.strip() == 'sonatype-nexus-snapshots': + # moving out - we found the indicator no need to print the warning + return + print(""" + NOTE: No sonatype settings detected, make sure you have configured + your sonatype credentials in '~/.m2/settings.xml': + + + ... + + + sonatype-nexus-snapshots + your-jira-id + your-jira-pwd + + + sonatype-nexus-staging + your-jira-id + your-jira-pwd + + + ... + + """) + +def check_s3_credentials(): + if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None): + raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3') + +def check_github_credentials(): + if not env.get('GITHUB_KEY', None) and not env.get('GITHUB_LOGIN', None): + log('WARN: Could not find "GITHUB_LOGIN" / "GITHUB_PASSWORD" or "GITHUB_KEY" in the env variables. You could need it.') + +def check_email_settings(): + if not env.get('MAIL_SENDER', None): + raise RuntimeError('Could not find "MAIL_SENDER"') + +# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml +print_sonatype_notice() + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Builds and publishes a Elasticsearch Plugin Release') + parser.add_argument('--branch', '-b', metavar='master', default=get_current_branch(), + help='The branch to release from. Defaults to the current branch.') + parser.add_argument('--skiptests', '-t', dest='tests', action='store_false', + help='Skips tests before release. Tests are run by default.') + parser.set_defaults(tests=True) + parser.add_argument('--remote', '-r', metavar='origin', default='origin', + help='The remote to push the release commit and tag to. Default is [origin]') + parser.add_argument('--publish', '-p', dest='dryrun', action='store_false', + help='Publishes the release. Disable by default.') + parser.add_argument('--disable_mail', '-dm', dest='mail', action='store_false', + help='Do not send a release email. Email is sent by default.') + + parser.set_defaults(dryrun=True) + parser.set_defaults(mail=True) + args = parser.parse_args() + + src_branch = args.branch + remote = args.remote + run_tests = args.tests + dry_run = args.dryrun + mail = args.mail + + if not dry_run: + check_s3_credentials() + print('WARNING: dryrun is set to "false" - this will push and publish the release') + if mail: + check_email_settings() + print('An email to %s will be sent after the release' + % env.get('MAIL_TO', 'elasticsearch@googlegroups.com')) + input('Press Enter to continue...') + + check_github_credentials() + + print(''.join(['-' for _ in range(80)])) + print('Preparing Release from branch [%s] running tests: [%s] dryrun: [%s]' % (src_branch, run_tests, dry_run)) + print(' JAVA_HOME is [%s]' % JAVA_HOME) + print(' Running with maven command: [%s] ' % (MVN)) + + release_version = find_release_version(src_branch) + artifact_id = find_from_pom('artifactId') + artifact_name = find_from_pom('name') + artifact_description = find_from_pom('description') + project_url = find_from_pom('url') + print(' Artifact Id: [%s]' % artifact_id) + print(' Release version: [%s]' % release_version) + + # extract snapshot + default_snapshot_version = guess_snapshot(release_version) + snapshot_version = input('Enter next snapshot version [%s]:' % default_snapshot_version) + snapshot_version = snapshot_version or default_snapshot_version + + print(' Next version: [%s-SNAPSHOT]' % snapshot_version) + print(' Artifact Name: [%s]' % artifact_name) + print(' Artifact Description: [%s]' % artifact_description) + print(' Project URL: [%s]' % project_url) + + if not dry_run: + smoke_test_version = release_version + head_hash = get_head_hash() + run_mvn('clean') # clean the env! + create_release_branch(remote, src_branch, release_version) + print(' Created release branch [%s]' % (release_branch(release_version))) + success = False + try: + pending_files = [POM_FILE, README_FILE] + remove_maven_snapshot(POM_FILE, release_version) + remove_version_snapshot(README_FILE, release_version) + set_date(README_FILE) + set_install_instructions(README_FILE, artifact_id, release_version) + print(' Done removing snapshot version') + add_pending_files(*pending_files) # expects var args use * to expand + commit_release(artifact_id, release_version) + print(' Committed release version [%s]' % release_version) + print(''.join(['-' for _ in range(80)])) + print('Building Release candidate') + input('Press Enter to continue...') + print(' Checking github issues') + repository = get_github_repository(artifact_id) + check_opened_issues(release_version, repository, artifact_id) + if not dry_run: + print(' Running maven builds now and publish to sonatype - run-tests [%s]' % run_tests) + else: + print(' Running maven builds now run-tests [%s]' % run_tests) + build_release(run_tests=run_tests, dry_run=dry_run) + artifact = get_artifacts(artifact_id, release_version) + artifact_and_checksums = generate_checksums(artifact) + print(''.join(['-' for _ in range(80)])) + + print('Finish Release -- dry_run: %s' % dry_run) + input('Press Enter to continue...') + print(' merge release branch') + git_merge(src_branch, release_version) + print(' tag') + tag_release(release_version) + + add_maven_snapshot(POM_FILE, release_version, snapshot_version) + add_version_snapshot(README_FILE, release_version, snapshot_version) + add_pending_files(*pending_files) + commit_snapshot() + + print(' push to %s %s -- dry_run: %s' % (remote, src_branch, dry_run)) + git_push(remote, src_branch, release_version, dry_run) + print(' publish artifacts to S3 -- dry_run: %s' % dry_run) + publish_artifacts(artifact_and_checksums, base='elasticsearch/%s' % (artifact_id) , dry_run=dry_run) + print(' preparing email (from github issues)') + msg = prepare_email(artifact_id, release_version, repository, artifact_name, artifact_description, project_url) + print(' sending email -- dry_run: %s, mail: %s' % (dry_run, mail)) + send_email(msg, dry_run=dry_run, mail=mail) + + pending_msg = """ +Release successful pending steps: + * close and release sonatype repo: https://oss.sonatype.org/ + * check if the release is there https://oss.sonatype.org/content/repositories/releases/org/elasticsearch/%(artifact_id)s/%(version)s + * tweet about the release +""" + print(pending_msg % {'version': release_version, + 'artifact_id': artifact_id, + 'project_url': project_url}) + success = True + finally: + if not success: + run('git reset --hard HEAD') + run('git checkout %s' % src_branch) + elif dry_run: + print('End of dry_run') + input('Press Enter to reset changes...') + + run('git reset --hard %s' % head_hash) + run('git tag -d v%s' % release_version) + # we delete this one anyways + run('git branch -D %s' % (release_branch(release_version))) diff --git a/dev-tools/upload-s3.py b/dev-tools/upload-s3.py new file mode 100644 index 00000000000..95ea576e65c --- /dev/null +++ b/dev-tools/upload-s3.py @@ -0,0 +1,67 @@ +# Licensed to Elasticsearch under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on +# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import os +import sys +import argparse +try: + import boto.s3 +except: + raise RuntimeError(""" + S3 upload requires boto to be installed + Use one of: + 'pip install -U boto' + 'apt-get install python-boto' + 'easy_install boto' + """) + +import boto.s3 + + +def list_buckets(conn): + return conn.get_all_buckets() + + +def upload_s3(conn, path, key, file, bucket): + print 'Uploading %s to Amazon S3 bucket %s/%s' % \ + (file, bucket, os.path.join(path, key)) + def percent_cb(complete, total): + sys.stdout.write('.') + sys.stdout.flush() + bucket = conn.create_bucket(bucket) + k = bucket.new_key(os.path.join(path, key)) + k.set_contents_from_filename(file, cb=percent_cb, num_cb=100) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Uploads files to Amazon S3') + parser.add_argument('--file', '-f', metavar='path to file', + help='the branch to release from', required=True) + parser.add_argument('--bucket', '-b', metavar='B42', default='download.elasticsearch.org', + help='The S3 Bucket to upload to') + parser.add_argument('--path', '-p', metavar='elasticsearch/elasticsearch', default='elasticsearch/elasticsearch', + help='The key path to use') + parser.add_argument('--key', '-k', metavar='key', default=None, + help='The key - uses the file name as default key') + args = parser.parse_args() + if args.key: + key = args.key + else: + key = os.path.basename(args.file) + + connection = boto.connect_s3() + upload_s3(connection, args.path, key, args.file, args.bucket); + diff --git a/pom.xml b/pom.xml index 5abace4c643..d956a99d046 100644 --- a/pom.xml +++ b/pom.xml @@ -2,13 +2,14 @@ - elasticsearch-analysis-kuromoji 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji 2.0.0-SNAPSHOT jar - Japanese (kuromoji) Analysis for ElasticSearch + Japanese (kuromoji) Analysis plugin for elasticsearch + The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. + https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/ 2009 From bcbd107daf0fb082259e08d5327d2838423ef5e8 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 23:37:00 +0100 Subject: [PATCH 043/115] Update to Lucene 4.6.1 / Elasticsearch 1.0.0 Closes #18 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index d956a99d046..db6dad2b5ad 100644 --- a/pom.xml +++ b/pom.xml @@ -32,8 +32,8 @@ - 1.0.0.RC1 - 4.6.0 + 1.0.0 + 4.6.1 1 true onerror From dfcfe7e89476f2b6f056cc762b177b49c3029731 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 23:47:50 +0100 Subject: [PATCH 044/115] Can not upgrade kuromoji plugin from elasticsearch 0.90.5 Due to fix [3790](https://github.com/elasticsearch/elasticsearch/issues/3790) in core, upgrading an analyzer provided as a plugin now fails. See https://github.com/elasticsearch/elasticsearch/issues/5030 for details. Issue is in elasticsearch core code but can be fixed in plugins by overloading `PreBuiltAnalyzerProviderFactory`, `PreBuiltTokenFilterFactoryFactory`, `PreBuiltTokenizerFactoryFactory` or `PreBuiltCharFilterFactoryFactory ` when used. Closes #21 (cherry picked from commit 3401c21) --- .../analysis/KuromojiIndicesAnalysis.java | 12 +++--- .../KurumojiCharFilterFactoryFactory.java | 38 +++++++++++++++++++ .../KurumojiTokenFilterFactoryFactory.java | 38 +++++++++++++++++++ .../KurumojiTokenizerFactoryFactory.java | 38 +++++++++++++++++++ 4 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 src/main/java/org/elasticsearch/indices/analysis/KurumojiCharFilterFactoryFactory.java create mode 100644 src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenFilterFactoryFactory.java create mode 100644 src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenizerFactoryFactory.java diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index 9264acd9426..7074fcadac0 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -43,7 +43,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { super(settings); indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark", - new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() { + new KurumojiCharFilterFactoryFactory(new CharFilterFactory() { @Override public String name() { return "kuromoji_iteration_mark"; @@ -58,7 +58,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { })); indicesAnalysisService.tokenizerFactories().put("kuromoji_tokenizer", - new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { + new KurumojiTokenizerFactoryFactory(new TokenizerFactory() { @Override public String name() { return "kuromoji_tokenizer"; @@ -72,7 +72,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { })); indicesAnalysisService.tokenFilterFactories().put("kuromoji_baseform", - new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + new KurumojiTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_baseform"; @@ -86,7 +86,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { indicesAnalysisService.tokenFilterFactories().put( "kuromoji_part_of_speech", - new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + new KurumojiTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_part_of_speech"; @@ -102,7 +102,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { indicesAnalysisService.tokenFilterFactories().put( "kuromoji_readingform", - new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + new KurumojiTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_readingform"; @@ -115,7 +115,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { })); indicesAnalysisService.tokenFilterFactories().put("kuromoji_stemmer", - new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { + new KurumojiTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_stemmer"; diff --git a/src/main/java/org/elasticsearch/indices/analysis/KurumojiCharFilterFactoryFactory.java b/src/main/java/org/elasticsearch/indices/analysis/KurumojiCharFilterFactoryFactory.java new file mode 100644 index 00000000000..3737d81f1a7 --- /dev/null +++ b/src/main/java/org/elasticsearch/indices/analysis/KurumojiCharFilterFactoryFactory.java @@ -0,0 +1,38 @@ +/* + * Licensed to Elasticsearch (the "Author") under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Author licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.indices.analysis; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.PreBuiltCharFilterFactoryFactory; + +public class KurumojiCharFilterFactoryFactory extends PreBuiltCharFilterFactoryFactory { + private final CharFilterFactory charFilterFactory; + + public KurumojiCharFilterFactoryFactory(CharFilterFactory charFilterFactory) { + super(charFilterFactory); + this.charFilterFactory = charFilterFactory; + } + + @Override + public CharFilterFactory create(String name, Settings settings) { + return charFilterFactory; + } +} diff --git a/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenFilterFactoryFactory.java b/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenFilterFactoryFactory.java new file mode 100644 index 00000000000..2efaa91baa0 --- /dev/null +++ b/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenFilterFactoryFactory.java @@ -0,0 +1,38 @@ +/* + * Licensed to Elasticsearch (the "Author") under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Author licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.indices.analysis; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory; +import org.elasticsearch.index.analysis.TokenFilterFactory; + +public class KurumojiTokenFilterFactoryFactory extends PreBuiltTokenFilterFactoryFactory { + private final TokenFilterFactory tokenFilterFactory; + + public KurumojiTokenFilterFactoryFactory(TokenFilterFactory tokenFilterFactory) { + super(tokenFilterFactory); + this.tokenFilterFactory = tokenFilterFactory; + } + + @Override + public TokenFilterFactory create(String name, Settings settings) { + return tokenFilterFactory; + } +} diff --git a/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenizerFactoryFactory.java b/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenizerFactoryFactory.java new file mode 100644 index 00000000000..6e5525762ff --- /dev/null +++ b/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenizerFactoryFactory.java @@ -0,0 +1,38 @@ +/* + * Licensed to Elasticsearch (the "Author") under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Author licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.indices.analysis; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; + +public class KurumojiTokenizerFactoryFactory extends PreBuiltTokenizerFactoryFactory { + private final TokenizerFactory tokenizerFactory; + + public KurumojiTokenizerFactoryFactory(TokenizerFactory tokenizerFactory) { + super(tokenizerFactory); + this.tokenizerFactory = tokenizerFactory; + } + + @Override + public TokenizerFactory create(String name, Settings settings) { + return tokenizerFactory; + } +} From 8ae662da7cea8b27c0680b3c7987e85263e2e615 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 23:55:36 +0100 Subject: [PATCH 045/115] prepare release elasticsearch-analysis-kuromoji-2.0.0 --- README.md | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a3de0db745e..39c34b43354 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Japanese (kuromoji) Analysis for Elasticsearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.0.0.RC1`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.0.0`. * For 1.0.x elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/master). * For 0.90.x elasticsearch versions, look at [1.x branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/1.x). @@ -11,7 +11,7 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e | Kuromoji Analysis Plugin | elasticsearch | Release date | |-----------------------------|---------------------|:------------:| -| 2.0.0-SNAPSHOT | 1.0.0.RC1 -> master | XXXX-XX-XX | +| 2.0.0 | 1.0.0.RC1 -> master | 2014-02-28 | | 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | The plugin includes the `kuromoji` analyzer. diff --git a/pom.xml b/pom.xml index db6dad2b5ad..9ff6ac612b5 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 2.0.0-SNAPSHOT + 2.0.0 jar Japanese (kuromoji) Analysis plugin for elasticsearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. From f01363ec37247bf30f2ec9d1bff6cb30e8937458 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 23:57:04 +0100 Subject: [PATCH 046/115] prepare for next development iteration --- README.md | 1 + pom.xml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 39c34b43354..bf1d48b5f38 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e | Kuromoji Analysis Plugin | elasticsearch | Release date | |-----------------------------|---------------------|:------------:| +| 2.1.0-SNAPSHOT | 1.0.0.RC1 -> master | XXXX-XX-XX | | 2.0.0 | 1.0.0.RC1 -> master | 2014-02-28 | | 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | diff --git a/pom.xml b/pom.xml index 9ff6ac612b5..38dc01f2e98 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 2.0.0 + 2.1.0-SNAPSHOT jar Japanese (kuromoji) Analysis plugin for elasticsearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. From 207420cf6a4b4f99df38a3901e78c973c76e4a72 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 3 Mar 2014 10:36:55 +0100 Subject: [PATCH 047/115] Update naming for better mailing list announcement By now, when we release the plugin, users can be confused about version naming. For example, email title could be: ``` [ANN] Japanese (kuromoji) Analysis plugin for elasticsearch 1.8.0 released ``` We prefer to have that form: ``` [ANN] Elasticsearch Japanese (kuromoji) Analysis plugin 1.8.0 released ``` Thanks to @spinscale to reporting this. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 38dc01f2e98..d448682c3de 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ elasticsearch-analysis-kuromoji 2.1.0-SNAPSHOT jar - Japanese (kuromoji) Analysis plugin for elasticsearch + Elasticsearch Japanese (kuromoji) Analysis plugin The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/ 2009 From 15cfa67ed994de41860018805381116f6edc98f5 Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Mon, 3 Mar 2014 16:27:15 +0900 Subject: [PATCH 048/115] fix typos in README.md Closes #23 --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bf1d48b5f38..f14ef21d87b 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ This analyzer is the following tokenizer and tokenfilter combination. * `kuromoji_part_of_speech` : Kuromoji Part of Speech Stop Filter (TokenFilter) * `cjk_width` : CJK Width Filter (TokenFilter) * `stop` : Stop Filter (TokenFilter) -* `kuromoji_stemmer` : Kuromiji Katakana Stemmer Filter(TokenFilter) +* `kuromoji_stemmer` : Kuromoji Katakana Stemmer Filter(TokenFilter) * `lowercase` : LowerCase Filter (TokenFilter) ## CharFilter : kuromoji_iteration_mark @@ -79,7 +79,7 @@ The mode is three types. * `normal` : Ordinary segmentation: no decomposition for compounds -* `search` : Segmentation geared towards search: this includes a decompounding process for long nouns, also includeing the full compound token as a synonym. +* `search` : Segmentation geared towards search: this includes a decompounding process for long nouns, also including the full compound token as a synonym. * `extended` : Extended mode outputs unigrams for unknown words. @@ -159,7 +159,7 @@ curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer ## TokenFilter : kuromoji_baseform -A token filter of type `kuromoji_baseform` that replcaes term text with BaseFormAttribute. +A token filter of type `kuromoji_baseform` that replaces term text with BaseFormAttribute. This acts as a lemmatizer for verbs and adjectives. ### example @@ -202,7 +202,7 @@ The following are settings that can be set for a stop token filter type: |:------------|:-----------------------------------------------------| | stoptags | A list of part-of-speech tags that should be removed | -Note that default setting is stoptags.txt include lucene-analyzer-kuromji.jar. +Note that default setting is stoptags.txt include lucene-analyzer-kuromoji.jar. ### example @@ -260,7 +260,7 @@ The following are settings that can be set for a `kuromoji_readingform` token fi |:------------|:----------------------------------------------------------|:------------------| | use_romaji | `true` if romaji reading form output instead of katakana. | `false` | -Note that elasticsearch-analysis-kuromoji built-in `kuromoji_readingform` set default `ture` to `use_romaji` attribute. +Note that elasticsearch-analysis-kuromoji built-in `kuromoji_readingform` set default `true` to `use_romaji` attribute. ### example From d56619c4c24a3fc8ec9b43a8e8171836eacd155e Mon Sep 17 00:00:00 2001 From: Bill Hwang Date: Thu, 6 Mar 2014 14:46:12 -0800 Subject: [PATCH 049/115] Added plug-in test hooks --- pom.xml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pom.xml b/pom.xml index d448682c3de..44ad1c308a3 100644 --- a/pom.xml +++ b/pom.xml @@ -278,4 +278,34 @@
+ + + + default + + true + + + + + plugin-test + + + tests.plugin + + + + + ${basedir}/elasticsearch/target + + + + + ElasticsearchRepo + ElasticsearchRepo + file://${elasticsearch.lib} + + + + From 132ba63bc3938bbda6be6b206b0885b110572650 Mon Sep 17 00:00:00 2001 From: Bill Hwang Date: Fri, 7 Mar 2014 14:59:20 -0800 Subject: [PATCH 050/115] Adjusted local repository policy When running plugin test, set local repository update policy to always. --- pom.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pom.xml b/pom.xml index 44ad1c308a3..c482c30bfb9 100644 --- a/pom.xml +++ b/pom.xml @@ -304,6 +304,8 @@ ElasticsearchRepo ElasticsearchRepo file://${elasticsearch.lib} + truealways + truealways
From ef0cddbeb3cfaac43b3ef9cbaad91163df19b228 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Sun, 9 Mar 2014 10:38:43 +0100 Subject: [PATCH 051/115] Add plugin version in es-plugin.properties With https://github.com/elasticsearch/elasticsearch/issues/2784, we can now add plugin version in `es-plugin.properties` file. It will only be used with elasticsearch 1.0.0 and upper. No need to push it in 1.x branch. Closes #26. --- pom.xml | 18 +++++------------- src/main/resources/es-plugin.properties | 1 + 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index c482c30bfb9..92726d6f33e 100644 --- a/pom.xml +++ b/pom.xml @@ -102,31 +102,23 @@ - ${basedir}/src/main/java + src/main/resources + true - **/*.json - **/*.yml - - - - ${basedir}/src/main/resources - - **/*.* + **/*.properties - ${basedir}/src/test/java + src/test/java **/*.json - **/*.yml - **/*.txt - ${basedir}/src/test/resources + src/test/resources **/*.* diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties index c35abda2c62..aa2beeb4780 100644 --- a/src/main/resources/es-plugin.properties +++ b/src/main/resources/es-plugin.properties @@ -1 +1,2 @@ plugin=org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin +version=${project.version} From 003a1dc1056f769c74726ebc32ec6356ccdf6733 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 19 Mar 2014 22:38:17 +0100 Subject: [PATCH 052/115] Disable java and maven version checking And fix typo in email html --- dev-tools/build_release.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py index 9166b09e7e3..74acd8c5f4e 100755 --- a/dev-tools/build_release.py +++ b/dev-tools/build_release.py @@ -43,7 +43,7 @@ from os.path import dirname, abspath The script takes over almost all steps necessary for a release from a high level point of view it does the following things: - - run prerequisite checks ie. check for Java 1.6 being present or S3 credentials available as env variables + - run prerequisite checks ie. check for S3 credentials available as env variables - detect the version to release from the specified branch (--branch) or the current branch - creates a release branch & updates pom.xml and README.md to point to a release version rather than a snapshot - builds the artifacts @@ -109,18 +109,6 @@ def java_exe(): path = JAVA_HOME return 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % (path, path, path) -def verify_java_version(version): - s = os.popen('%s; java -version 2>&1' % java_exe()).read() - if s.find(' version "%s.' % version) == -1: - raise RuntimeError('got wrong version for java %s:\n%s' % (version, s)) - -# Verifies the java version. We guarantee that we run with Java 1.6 -# If 1.6 is not available fail the build! -def verify_mvn_java_version(version, mvn): - s = os.popen('%s; %s --version 2>&1' % (java_exe(), mvn)).read() - if s.find('Java version: %s' % version) == -1: - raise RuntimeError('got wrong java version for %s %s:\n%s' % (mvn, version, s)) - # Returns the hash of the current git HEAD revision def get_head_hash(): return os.popen(' git rev-parse --verify HEAD 2>&1').read().strip() @@ -133,9 +121,6 @@ def get_tag_hash(tag): def get_current_branch(): return os.popen('git rev-parse --abbrev-ref HEAD 2>&1').read().strip() -verify_java_version('1.6') # we require to build with 1.6 -verify_mvn_java_version('1.6', MVN) - # Utility that returns the name of the release branch for a given version def release_branch(version): return 'release_branch_%s' % version @@ -218,7 +203,7 @@ def add_version_snapshot(readme_file, release, snapshot): # If we find pattern, we copy the line and replace its content if line.find(pattern) >= 0: return line.replace(pattern, replacement).replace('%s' % (datetime.datetime.now().strftime("%Y-%m-%d")), - 'XXXX-XX-XX')+line + 'XXXX-XX-XX')+line else: return line process_file(readme_file, callback) @@ -356,15 +341,15 @@ def format_issues_html(issues, title='Fix'): if len(issues) > 0: response += '

%s

\n
    \n' % title for issue in issues: - response += '[%s] - %s\n' % (issue.html_url, issue.number, issue.title) + response += '
  • [%s] - %s\n' % (issue.html_url, issue.number, issue.title) response += '
\n' return response def get_github_repository(reponame, - login=env.get('GITHUB_LOGIN', None), - password=env.get('GITHUB_PASSWORD', None), - key=env.get('GITHUB_KEY', None)): + login=env.get('GITHUB_LOGIN', None), + password=env.get('GITHUB_PASSWORD', None), + key=env.get('GITHUB_KEY', None)): if login: g = github3.login(login, password) elif key: From 874c31646026dbc3651c0df8a94f0feb26768820 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 25 Mar 2014 19:16:54 +0100 Subject: [PATCH 053/115] Update master to elasticsearch master SNAPSHOT --- README.md | 10 +++++----- pom.xml | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index f14ef21d87b..d0f6e5938a9 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,15 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.0.0`. -* For 1.0.x elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/master). -* For 0.90.x elasticsearch versions, look at [1.x branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/1.x). +* For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/master). +* For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.1). +* For 1.0.x elasticsearch versions, look at [es-1.0 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.0). +* For 0.90.x elasticsearch versions, look at [es-0.90 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-0.90). | Kuromoji Analysis Plugin | elasticsearch | Release date | |-----------------------------|---------------------|:------------:| -| 2.1.0-SNAPSHOT | 1.0.0.RC1 -> master | XXXX-XX-XX | -| 2.0.0 | 1.0.0.RC1 -> master | 2014-02-28 | -| 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | +| 3.0.0-SNAPSHOT | master (2.x) | XXXX-XX-XX | The plugin includes the `kuromoji` analyzer. diff --git a/pom.xml b/pom.xml index 92726d6f33e..f9b4b31542f 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-kuromoji - 2.1.0-SNAPSHOT + 3.0.0-SNAPSHOT jar Elasticsearch Japanese (kuromoji) Analysis plugin The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. @@ -32,8 +32,8 @@ - 1.0.0 - 4.6.1 + 2.0.0-SNAPSHOT + 4.7.0 1 true onerror From 20a83fece47561247669c0ba1c1a6294fcfe5d12 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 26 Mar 2014 09:41:24 +0100 Subject: [PATCH 054/115] Check elasticsearch version SNAPSHOT dependency --- dev-tools/build_release.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py index 74acd8c5f4e..529ff94a5e8 100755 --- a/dev-tools/build_release.py +++ b/dev-tools/build_release.py @@ -603,8 +603,12 @@ if __name__ == '__main__': artifact_name = find_from_pom('name') artifact_description = find_from_pom('description') project_url = find_from_pom('url') + elasticsearch_version = find_from_pom('elasticsearch.version') print(' Artifact Id: [%s]' % artifact_id) print(' Release version: [%s]' % release_version) + print(' Elasticsearch: [%s]' % elasticsearch_version) + if elasticsearch_version.find('-SNAPSHOT') != -1: + raise RuntimeError('Can not release with a SNAPSHOT elasticsearch dependency: %s' % elasticsearch_version) # extract snapshot default_snapshot_version = guess_snapshot(release_version) From a69cdeeadabede85a71a6522540d13579edcbf6f Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 26 Mar 2014 12:11:12 +0100 Subject: [PATCH 055/115] Manage per version documentation links --- README.md | 5 +++++ dev-tools/build_release.py | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/README.md b/README.md index d0f6e5938a9..d5c1f06e4ae 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,11 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e |-----------------------------|---------------------|:------------:| | 3.0.0-SNAPSHOT | master (2.x) | XXXX-XX-XX | +Please read documentation relative to the version you are using: + +* [3.0.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/blob/master/README.md) + + The plugin includes the `kuromoji` analyzer. Includes Analyzer, Tokenizer, TokenFilter diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py index 529ff94a5e8..db8345440c7 100755 --- a/dev-tools/build_release.py +++ b/dev-tools/build_release.py @@ -208,6 +208,29 @@ def add_version_snapshot(readme_file, release, snapshot): return line process_file(readme_file, callback) +# Moves the README.md file from a snapshot to a release (documentation link) +def remove_documentation_snapshot(readme_file, repo_url, release, branch): + pattern = '* [%s-SNAPSHOT](%sblob/%s/README.md)' % (release, repo_url, branch) + replacement = '* [%s](%sblob/v%s/README.md)' % (release, repo_url, release) + def callback(line): + # If we find pattern, we replace its content + if line.find(pattern) >= 0: + return line.replace(pattern, replacement) + else: + return line + process_file(readme_file, callback) + +# Add in README.markdown file the documentation for the next version +def add_documentation_snapshot(readme_file, repo_url, release, snapshot, branch): + pattern = '* [%s](%sblob/v%s/README.md)' % (release, repo_url, release) + replacement = '* [%s-SNAPSHOT](%sblob/%s/README.md)' % (snapshot, repo_url, branch) + def callback(line): + # If we find pattern, we copy the line and replace its content + if line.find(pattern) >= 0: + return line.replace(pattern, replacement)+line + else: + return line + process_file(readme_file, callback) # Set release date in README.md file def set_date(readme_file): @@ -630,6 +653,7 @@ if __name__ == '__main__': try: pending_files = [POM_FILE, README_FILE] remove_maven_snapshot(POM_FILE, release_version) + remove_documentation_snapshot(README_FILE, project_url, release_version, src_branch) remove_version_snapshot(README_FILE, release_version) set_date(README_FILE) set_install_instructions(README_FILE, artifact_id, release_version) @@ -661,6 +685,7 @@ if __name__ == '__main__': add_maven_snapshot(POM_FILE, release_version, snapshot_version) add_version_snapshot(README_FILE, release_version, snapshot_version) + add_documentation_snapshot(README_FILE, project_url, release_version, snapshot_version, src_branch) add_pending_files(*pending_files) commit_snapshot() From 2f5e8ef0eccc67b858d4287cb8666d4121a9e84a Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 29 Apr 2014 09:37:17 +0200 Subject: [PATCH 056/115] Update to Lucene 4.8. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index f9b4b31542f..c23a0942d5f 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.0.0-SNAPSHOT - 4.7.0 + 4.8.0 1 true onerror From 0add553832ced9ba31234995de48f5447106b973 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 29 Apr 2014 11:23:04 +0200 Subject: [PATCH 057/115] Create branch es-1.2 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d5c1f06e4ae..ac9d5d4fb3a 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.0.0`. * For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/master). +* For 1.2.x elasticsearch versions, look at [es-1.2 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.2). * For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.1). * For 1.0.x elasticsearch versions, look at [es-1.0 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.0). * For 0.90.x elasticsearch versions, look at [es-0.90 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-0.90). From d0b89c227cca35977e2828541ac4577d01519a34 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 29 Apr 2014 11:26:55 +0200 Subject: [PATCH 058/115] Update to Lucene 4.8 Closes #30. (cherry picked from commit bf7cc95) --- .../index/analysis/KuromojiPartOfSpeechFilterFactory.java | 2 +- .../elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java index 222cd05f25a..3c4cbaeea5f 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -48,7 +48,7 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(Version.LUCENE_44, tokenStream, stopTags); + return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, tokenStream, stopTags); } } diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index 7074fcadac0..8a15426cd3a 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -94,7 +94,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(Version.LUCENE_44, + return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, tokenStream, JapaneseAnalyzer .getDefaultStopTags()); } From c2582130621f8fbda4ee8efc64d724e0f608e5ac Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 29 Apr 2014 11:27:34 +0200 Subject: [PATCH 059/115] Add Lucene version in plugin.properties file Closes #33. (cherry picked from commit fd508b8) --- src/main/resources/es-plugin.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties index aa2beeb4780..ac98c3db5d1 100644 --- a/src/main/resources/es-plugin.properties +++ b/src/main/resources/es-plugin.properties @@ -1,2 +1,3 @@ plugin=org.elasticsearch.plugin.analysis.kuromoji.AnalysisKuromojiPlugin version=${project.version} +lucene=${lucene.version} From a8ad05143548fd3051d40d1b62325fcf7ac5b6bf Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Mon, 28 Apr 2014 12:56:58 +0900 Subject: [PATCH 060/115] Not registered as a global analyzer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `kuromoji` analyzer is not registered as a global analyzer, so this fails: POST /_analyze?analyzer=kuromoji&text=J R 新宿駅の近くにビールを飲みに行こうか { "error": "ElasticsearchIllegalArgumentException[failed to find analyzer [kuromoji]]", "status": 400 } But this succeeds: PUT /t POST /t/_analyze?analyzer=kuromoji&text=J R 新宿駅の近くにビールを飲みに行こうか Closes #31. --- .../KuromojiPartOfSpeechFilterFactory.java | 4 +- .../analysis/KuromojiIndicesAnalysis.java | 8 +++- .../analysis/KuromojiIntegrationTests.java | 48 +++++++++++++++++++ 3 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java index 3c4cbaeea5f..020cd93b6a5 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -21,9 +21,9 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; -import org.apache.lucene.util.Version; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; @@ -48,7 +48,7 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, tokenStream, stopTags); + return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION, tokenStream, stopTags); } } diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index 8a15426cd3a..04c34a5f081 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ja.*; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; -import org.apache.lucene.util.Version; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.*; @@ -42,6 +42,10 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { IndicesAnalysisService indicesAnalysisService) { super(settings); + indicesAnalysisService.analyzerProviderFactories().put("kuromoji", + new PreBuiltAnalyzerProviderFactory("kuromoji", AnalyzerScope.INDICES, + new JapaneseAnalyzer(Lucene.ANALYZER_VERSION))); + indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark", new KurumojiCharFilterFactoryFactory(new CharFilterFactory() { @Override @@ -94,7 +98,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(Version.LUCENE_48, + return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION, tokenStream, JapaneseAnalyzer .getDefaultStopTags()); } diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java new file mode 100644 index 00000000000..5d909959db3 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis; + +import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.junit.Test; + +import java.util.concurrent.ExecutionException; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.notNullValue; + +@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE) +public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest { + + @Test + public void testKuromojiAnalyzer() throws ExecutionException, InterruptedException { + AnalyzeResponse response = client().admin().indices() + .prepareAnalyze("JR新宿駅の近くにビールを飲みに行こうか").setAnalyzer("kuromoji") + .execute().get(); + + String[] expectedTokens = {"jr", "新宿", "駅", "近く", "ビール", "飲む", "行く"}; + + assertThat(response, notNullValue()); + assertThat(response.getTokens().size(), is(7)); + + for (int i = 0; i < expectedTokens.length; i++) { + assertThat(response.getTokens().get(i).getTerm(), is(expectedTokens[i])); + } + } +} From 8e560c072b34a5a9eef9daed32ca85bf11410124 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 23 May 2014 17:03:43 +0200 Subject: [PATCH 061/115] Update to elasticsearch 1.3 Closes #35. --- README.md | 3 ++- pom.xml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ac9d5d4fb3a..3eeabbd9507 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,10 @@ Japanese (kuromoji) Analysis for Elasticsearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.0.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.1.0`. * For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/master). +* For 1.3.x elasticsearch versions, look at [es-1.3 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.3). * For 1.2.x elasticsearch versions, look at [es-1.2 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.2). * For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.1). * For 1.0.x elasticsearch versions, look at [es-1.0 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.0). diff --git a/pom.xml b/pom.xml index c23a0942d5f..0dec138f7ea 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.0.0-SNAPSHOT - 4.8.0 + 4.8.1 1 true onerror From dcfbeeb48b0b862dd3c1429f77fa60e925a6fa22 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 23 May 2014 16:57:46 +0200 Subject: [PATCH 062/115] Fix integration tests Due to change in test infra, we disable by default automatic loading for classpath plugins (see https://github.com/elasticsearch/elasticsearch/commit/75efa47d5ad89da8d51bbdf82e0e4c3c67108139), we need to explicitly enable it again. --- .../index/analysis/KuromojiIntegrationTests.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java index 5d909959db3..9475f1ca069 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java @@ -19,6 +19,9 @@ package org.elasticsearch.index.analysis; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.junit.Test; @@ -30,6 +33,14 @@ import static org.hamcrest.CoreMatchers.notNullValue; @ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.SUITE) public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest { + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return ImmutableSettings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true) + .build(); + } + @Test public void testKuromojiAnalyzer() throws ExecutionException, InterruptedException { AnalyzeResponse response = client().admin().indices() From 5a06757096439409c23dcfc668bd4e5eb943bdc2 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 28 May 2014 15:07:29 +0200 Subject: [PATCH 063/115] Add integration tests Closes #36. (cherry picked from commit f2c83df) --- .../analysis/KuromojiIntegrationTests.java | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java index 9475f1ca069..a24acdf6b37 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java @@ -19,14 +19,19 @@ package org.elasticsearch.index.analysis; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.junit.Test; +import java.io.IOException; import java.util.concurrent.ExecutionException; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.CoreMatchers.is; import static org.hamcrest.CoreMatchers.notNullValue; @@ -56,4 +61,31 @@ public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest { assertThat(response.getTokens().get(i).getTerm(), is(expectedTokens[i])); } } + + @Test + public void testKuromojiAnalyzerInMapping() throws ExecutionException, InterruptedException, IOException { + createIndex("test"); + ensureGreen("test"); + final XContentBuilder mapping = jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("foo") + .field("type", "string") + .field("analyzer", "kuromoji") + .endObject() + .endObject() + .endObject() + .endObject(); + + client().admin().indices().preparePutMapping("test").setType("type").setSource(mapping).get(); + + index("test", "type", "1", "foo", "JR新宿駅の近くにビールを飲みに行こうか"); + refresh(); + + SearchResponse response = client().prepareSearch("test").setQuery( + QueryBuilders.matchQuery("foo", "jr") + ).execute().actionGet(); + + assertThat(response.getHits().getTotalHits(), is(1L)); + } } From 19697b2597421b6cb91fbea8e1ce62c623b29d5f Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 27 Jun 2014 11:46:54 +0200 Subject: [PATCH 064/115] Update to Lucene 4.9.0 Closes #37. (cherry picked from commit 5243bdc) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0dec138f7ea..4922a06042c 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.0.0-SNAPSHOT - 4.8.1 + 4.9.0 1 true onerror From d794dcf1c49f7ed4dd1cb5a2324487f4cd06ba1a Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 16 Jul 2014 09:28:18 +0200 Subject: [PATCH 065/115] Update to elasticsearch 1.4.0 Closes #38. (cherry picked from commit 7ee03e7) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 3eeabbd9507..aab9956f65c 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.1.0`. * For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/master). +* For 1.4.x elasticsearch versions, look at [es-1.4 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.4). * For 1.3.x elasticsearch versions, look at [es-1.3 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.3). * For 1.2.x elasticsearch versions, look at [es-1.2 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.2). * For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.1). From 4526b8fd4d2829f6871633875ea22bf80f088194 Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Fri, 22 Aug 2014 13:54:19 +0900 Subject: [PATCH 066/115] Docs: make the welcome page more obvious Closes #39. --- .gitignore | 1 + README.md | 40 +- dev-tools/build_release.py | 722 ------------------------------------- dev-tools/release.py | 134 +++++++ dev-tools/upload-s3.py | 67 ---- 5 files changed, 157 insertions(+), 807 deletions(-) delete mode 100755 dev-tools/build_release.py create mode 100644 dev-tools/release.py delete mode 100644 dev-tools/upload-s3.py diff --git a/.gitignore b/.gitignore index a1596ca81e8..2660128de5e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ /.settings /.classpath /.local-execution-hints.log +/plugin_tools diff --git a/README.md b/README.md index aab9956f65c..2371af2c46f 100644 --- a/README.md +++ b/README.md @@ -3,30 +3,34 @@ Japanese (kuromoji) Analysis for Elasticsearch The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.1.0`. +In order to install the plugin, run: -* For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/master). -* For 1.4.x elasticsearch versions, look at [es-1.4 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.4). -* For 1.3.x elasticsearch versions, look at [es-1.3 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.3). -* For 1.2.x elasticsearch versions, look at [es-1.2 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.2). -* For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.1). -* For 1.0.x elasticsearch versions, look at [es-1.0 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.0). -* For 0.90.x elasticsearch versions, look at [es-0.90 branch](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-0.90). +```sh +bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.3.0` +``` +You need to install a version matching your Elasticsearch version: -| Kuromoji Analysis Plugin | elasticsearch | Release date | -|-----------------------------|---------------------|:------------:| -| 3.0.0-SNAPSHOT | master (2.x) | XXXX-XX-XX | +| elasticsearch | Kuromoji Analysis Plugin | Docs | +|---------------|-----------------------------|------------| +| master | Build from source | See below | +| es-1.x | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-240-snapshot-for-elasticsearch-1x) | +| es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | +| es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.2.0/#japanese-kuromoji-analysis-for-elasticsearch) | +| es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.1.0/#japanese-kuromoji-analysis-for-elasticsearch) | +| es-1.0 | 2.0.0 | [2.0.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.0.0/#japanese-kuromoji-analysis-for-elasticsearch) | +| es-0.90 | 1.8.0 | [1.8.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v1.8.0/#japanese-kuromoji-analysis-for-elasticsearch) | -Please read documentation relative to the version you are using: +To build a `SNAPSHOT` version, you need to build it with Maven: -* [3.0.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/blob/master/README.md) +```bash +mvn clean install +plugin --install analysis-kuromoji \ + --url file:target/releases/elasticsearch-analysis-kuromoji-X.X.X-SNAPSHOT.zip +``` - -The plugin includes the `kuromoji` analyzer. - -Includes Analyzer, Tokenizer, TokenFilter ----------------------------------------- +Includes Analyzer, Tokenizer, TokenFilter, CharFilter +----------------------------------------------- The plugin includes these analyzer and tokenizer, tokenfilter. diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py deleted file mode 100755 index db8345440c7..00000000000 --- a/dev-tools/build_release.py +++ /dev/null @@ -1,722 +0,0 @@ -# Licensed to Elasticsearch under one or more contributor -# license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright -# ownership. Elasticsearch licenses this file to you under -# the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on -# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import re -import tempfile -import shutil -import os -import datetime -import argparse -import github3 -import smtplib - -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText - -from os.path import dirname, abspath - -""" - This tool builds a release from the a given elasticsearch plugin branch. - In order to execute it go in the top level directory and run: - $ python3 dev_tools/build_release.py --branch master --publish --remote origin - - By default this script runs in 'dry' mode which essentially simulates a release. If the - '--publish' option is set the actual release is done. - If not in 'dry' mode, a mail will be automatically sent to the mailing list. - You can disable it with the option '--disable_mail' - - $ python3 dev_tools/build_release.py --publish --remote origin --disable_mail - - The script takes over almost all - steps necessary for a release from a high level point of view it does the following things: - - - run prerequisite checks ie. check for S3 credentials available as env variables - - detect the version to release from the specified branch (--branch) or the current branch - - creates a release branch & updates pom.xml and README.md to point to a release version rather than a snapshot - - builds the artifacts - - commits the new version and merges the release branch into the source branch - - creates a tag and pushes the commit to the specified origin (--remote) - - publishes the releases to sonatype and S3 - - send a mail based on github issues fixed by this version - -Once it's done it will print all the remaining steps. - - Prerequisites: - - Python 3k for script execution - - Boto for S3 Upload ($ apt-get install python-boto or pip-3.3 install boto) - - github3 module (pip-3.3 install github3.py) - - S3 keys exported via ENV Variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) - - GITHUB (login/password) or key exported via ENV Variables (GITHUB_LOGIN, GITHUB_PASSWORD or GITHUB_KEY) - (see https://github.com/settings/applications#personal-access-tokens) - Optional: default to no authentication - - SMTP_HOST - Optional: default to localhost - - MAIL_SENDER - Optional: default to 'david@pilato.fr': must be authorized to send emails to elasticsearch mailing list - - MAIL_TO - Optional: default to 'elasticsearch@googlegroups.com' -""" -env = os.environ - -LOG = env.get('ES_RELEASE_LOG', '/tmp/elasticsearch_release.log') -ROOT_DIR = os.path.join(abspath(dirname(__file__)), '../') -README_FILE = ROOT_DIR + 'README.md' -POM_FILE = ROOT_DIR + 'pom.xml' - -def log(msg): - log_plain('\n%s' % msg) - -def log_plain(msg): - f = open(LOG, mode='ab') - f.write(msg.encode('utf-8')) - f.close() - -def run(command, quiet=False): - log('%s: RUN: %s\n' % (datetime.datetime.now(), command)) - if os.system('%s >> %s 2>&1' % (command, LOG)): - msg = ' FAILED: %s [see log %s]' % (command, LOG) - if not quiet: - print(msg) - raise RuntimeError(msg) - -try: - JAVA_HOME = env['JAVA_HOME'] -except KeyError: - raise RuntimeError(""" - Please set JAVA_HOME in the env before running release tool - On OSX use: export JAVA_HOME=`/usr/libexec/java_home -v '1.6*'`""") - -try: - MVN='mvn' - # make sure mvn3 is used if mvn3 is available - # some systems use maven 2 as default - run('mvn3 --version', quiet=True) - MVN='mvn3' -except RuntimeError: - pass - - -def java_exe(): - path = JAVA_HOME - return 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % (path, path, path) - -# Returns the hash of the current git HEAD revision -def get_head_hash(): - return os.popen(' git rev-parse --verify HEAD 2>&1').read().strip() - -# Returns the hash of the given tag revision -def get_tag_hash(tag): - return os.popen('git show-ref --tags %s --hash 2>&1' % (tag)).read().strip() - -# Returns the name of the current branch -def get_current_branch(): - return os.popen('git rev-parse --abbrev-ref HEAD 2>&1').read().strip() - -# Utility that returns the name of the release branch for a given version -def release_branch(version): - return 'release_branch_%s' % version - -# runs get fetch on the given remote -def fetch(remote): - run('git fetch %s' % remote) - -# Creates a new release branch from the given source branch -# and rebases the source branch from the remote before creating -# the release branch. Note: This fails if the source branch -# doesn't exist on the provided remote. -def create_release_branch(remote, src_branch, release): - run('git checkout %s' % src_branch) - run('git pull --rebase %s %s' % (remote, src_branch)) - run('git checkout -b %s' % (release_branch(release))) - - -# Reads the given file and applies the -# callback to it. If the callback changed -# a line the given file is replaced with -# the modified input. -def process_file(file_path, line_callback): - fh, abs_path = tempfile.mkstemp() - modified = False - with open(abs_path,'w', encoding='utf-8') as new_file: - with open(file_path, encoding='utf-8') as old_file: - for line in old_file: - new_line = line_callback(line) - modified = modified or (new_line != line) - new_file.write(new_line) - os.close(fh) - if modified: - #Remove original file - os.remove(file_path) - #Move new file - shutil.move(abs_path, file_path) - return True - else: - # nothing to do - just remove the tmp file - os.remove(abs_path) - return False - -# Guess the next snapshot version number (increment second digit) -def guess_snapshot(version): - digits=list(map(int, re.findall(r'\d+', version))) - source='%s.%s' % (digits[0], digits[1]) - destination='%s.%s' % (digits[0], digits[1]+1) - return version.replace(source, destination) - -# Moves the pom.xml file from a snapshot to a release -def remove_maven_snapshot(pom, release): - pattern = '%s-SNAPSHOT' % release - replacement = '%s' % release - def callback(line): - return line.replace(pattern, replacement) - process_file(pom, callback) - -# Moves the README.md file from a snapshot to a release -def remove_version_snapshot(readme_file, release): - pattern = '%s-SNAPSHOT' % release - replacement = '%s ' % release - def callback(line): - return line.replace(pattern, replacement) - process_file(readme_file, callback) - -# Moves the pom.xml file to the next snapshot -def add_maven_snapshot(pom, release, snapshot): - pattern = '%s' % release - replacement = '%s-SNAPSHOT' % snapshot - def callback(line): - return line.replace(pattern, replacement) - process_file(pom, callback) - -# Add in README.md file the next snapshot -def add_version_snapshot(readme_file, release, snapshot): - pattern = '| %s ' % release - replacement = '| %s-SNAPSHOT' % snapshot - def callback(line): - # If we find pattern, we copy the line and replace its content - if line.find(pattern) >= 0: - return line.replace(pattern, replacement).replace('%s' % (datetime.datetime.now().strftime("%Y-%m-%d")), - 'XXXX-XX-XX')+line - else: - return line - process_file(readme_file, callback) - -# Moves the README.md file from a snapshot to a release (documentation link) -def remove_documentation_snapshot(readme_file, repo_url, release, branch): - pattern = '* [%s-SNAPSHOT](%sblob/%s/README.md)' % (release, repo_url, branch) - replacement = '* [%s](%sblob/v%s/README.md)' % (release, repo_url, release) - def callback(line): - # If we find pattern, we replace its content - if line.find(pattern) >= 0: - return line.replace(pattern, replacement) - else: - return line - process_file(readme_file, callback) - -# Add in README.markdown file the documentation for the next version -def add_documentation_snapshot(readme_file, repo_url, release, snapshot, branch): - pattern = '* [%s](%sblob/v%s/README.md)' % (release, repo_url, release) - replacement = '* [%s-SNAPSHOT](%sblob/%s/README.md)' % (snapshot, repo_url, branch) - def callback(line): - # If we find pattern, we copy the line and replace its content - if line.find(pattern) >= 0: - return line.replace(pattern, replacement)+line - else: - return line - process_file(readme_file, callback) - -# Set release date in README.md file -def set_date(readme_file): - pattern = 'XXXX-XX-XX' - replacement = '%s' % (datetime.datetime.now().strftime("%Y-%m-%d")) - def callback(line): - return line.replace(pattern, replacement) - process_file(readme_file, callback) - -# Update installation instructions in README.md file -def set_install_instructions(readme_file, artifact_name, release): - pattern = '`bin/plugin -install elasticsearch/%s/.+`' % artifact_name - replacement = '`bin/plugin -install elasticsearch/%s/%s`' % (artifact_name, release) - def callback(line): - return re.sub(pattern, replacement, line) - process_file(readme_file, callback) - - -# Stages the given files for the next git commit -def add_pending_files(*files): - for file in files: - run('git add %s' % file) - -# Executes a git commit with 'release [version]' as the commit message -def commit_release(artifact_id, release): - run('git commit -m "prepare release %s-%s"' % (artifact_id, release)) - -def commit_snapshot(): - run('git commit -m "prepare for next development iteration"') - -def tag_release(release): - run('git tag -a v%s -m "Tag release version %s"' % (release, release)) - -def run_mvn(*cmd): - for c in cmd: - run('%s; %s -f %s %s' % (java_exe(), MVN, POM_FILE, c)) - -def build_release(run_tests=False, dry_run=True): - target = 'deploy' - if dry_run: - target = 'package' - if run_tests: - run_mvn('clean test') - run_mvn('clean %s -DskipTests' %(target)) - -# Checks the pom.xml for the release version. 2.0.0-SNAPSHOT -# This method fails if the pom file has no SNAPSHOT version set ie. -# if the version is already on a release version we fail. -# Returns the next version string ie. 0.90.7 -def find_release_version(src_branch): - run('git checkout %s' % src_branch) - with open(POM_FILE, encoding='utf-8') as file: - for line in file: - match = re.search(r'(.+)-SNAPSHOT', line) - if match: - return match.group(1) - raise RuntimeError('Could not find release version in branch %s' % src_branch) - -# extract a value from pom.xml -def find_from_pom(tag): - with open(POM_FILE, encoding='utf-8') as file: - for line in file: - match = re.search(r'<%s>(.+)' % (tag, tag), line) - if match: - return match.group(1) - raise RuntimeError('Could not find <%s> in pom.xml file' % (tag)) - -def get_artifacts(artifact_id, release): - artifact_path = ROOT_DIR + 'target/releases/%s-%s.zip' % (artifact_id, release) - print(' Path %s' % (artifact_path)) - if not os.path.isfile(artifact_path): - raise RuntimeError('Could not find required artifact at %s' % (artifact_path)) - return artifact_path - -# Generates sha1 for a file -# and returns the checksum files as well -# as the given files in a list -def generate_checksums(release_file): - res = [] - directory = os.path.dirname(release_file) - file = os.path.basename(release_file) - checksum_file = '%s.sha1.txt' % file - - if os.system('cd %s; shasum %s > %s' % (directory, file, checksum_file)): - raise RuntimeError('Failed to generate checksum for file %s' % release_file) - res = res + [os.path.join(directory, checksum_file), release_file] - return res - -def git_merge(src_branch, release_version): - run('git checkout %s' % src_branch) - run('git merge %s' % release_branch(release_version)) - -def git_push(remote, src_branch, release_version, dry_run): - if not dry_run: - run('git push %s %s' % (remote, src_branch)) # push the commit - run('git push %s v%s' % (remote, release_version)) # push the tag - else: - print(' dryrun [True] -- skipping push to remote %s' % remote) - -def publish_artifacts(artifacts, base='elasticsearch/elasticsearch', dry_run=True): - location = os.path.dirname(os.path.realpath(__file__)) - for artifact in artifacts: - if dry_run: - print('Skip Uploading %s to Amazon S3 in %s' % (artifact, base)) - else: - print('Uploading %s to Amazon S3' % artifact) - # requires boto to be installed but it is not available on python3k yet so we use a dedicated tool - run('python %s/upload-s3.py --file %s --path %s' % (location, os.path.abspath(artifact), base)) - - -################# -## -## -## Email and Github Management -## -## -################# -def format_issues_plain(issues, title='Fix'): - response = "" - - if len(issues) > 0: - response += '%s:\n' % title - for issue in issues: - response += ' * [%s] - %s (%s)\n' % (issue.number, issue.title, issue.html_url) - - return response - -def format_issues_html(issues, title='Fix'): - response = "" - - if len(issues) > 0: - response += '

%s

\n
    \n' % title - for issue in issues: - response += '
  • [%s] - %s\n' % (issue.html_url, issue.number, issue.title) - response += '
\n' - - return response - -def get_github_repository(reponame, - login=env.get('GITHUB_LOGIN', None), - password=env.get('GITHUB_PASSWORD', None), - key=env.get('GITHUB_KEY', None)): - if login: - g = github3.login(login, password) - elif key: - g = github3.login(token=key) - else: - g = github3.GitHub() - - return g.repository("elasticsearch", reponame) - -# Check if there are some remaining open issues and fails -def check_opened_issues(version, repository, reponame): - opened_issues = [i for i in repository.iter_issues(state='open', labels='%s' % version)] - if len(opened_issues)>0: - raise NameError('Some issues [%s] are still opened. Check https://github.com/elasticsearch/%s/issues?labels=%s&state=open' - % (len(opened_issues), reponame, version)) - -# List issues from github: can be done anonymously if you don't -# exceed a given number of github API calls per day -# Check if there are some remaining open issues and fails -def list_issues(version, - repository, - severity='bug'): - issues = [i for i in repository.iter_issues(state='closed', labels='%s,%s' % (severity, version))] - return issues - -# Get issues from github and generates a Plain/HTML Multipart email -# And send it if dry_run=False -def prepare_email(artifact_id, release_version, repository, - artifact_name, artifact_description, project_url, - severity_labels_bug='bug', - severity_labels_update='update', - severity_labels_new='new', - severity_labels_doc='doc'): - - ## Get bugs from github - issues_bug = list_issues(release_version, repository, severity=severity_labels_bug) - issues_update = list_issues(release_version, repository, severity=severity_labels_update) - issues_new = list_issues(release_version, repository, severity=severity_labels_new) - issues_doc = list_issues(release_version, repository, severity=severity_labels_doc) - - ## Format content to plain text - plain_issues_bug = format_issues_plain(issues_bug, 'Fix') - plain_issues_update = format_issues_plain(issues_update, 'Update') - plain_issues_new = format_issues_plain(issues_new, 'New') - plain_issues_doc = format_issues_plain(issues_doc, 'Doc') - - ## Format content to html - html_issues_bug = format_issues_html(issues_bug, 'Fix') - html_issues_update = format_issues_html(issues_update, 'Update') - html_issues_new = format_issues_html(issues_new, 'New') - html_issues_doc = format_issues_html(issues_doc, 'Doc') - - if len(issues_bug)+len(issues_update)+len(issues_new)+len(issues_doc) > 0: - plain_empty_message = "" - html_empty_message = "" - - else: - plain_empty_message = "No issue listed for this release" - html_empty_message = "

No issue listed for this release

" - - msg = MIMEMultipart('alternative') - msg['Subject'] = '[ANN] %s %s released' % (artifact_name, release_version) - text = """ -Heya, - - -We are pleased to announce the release of the %(artifact_name)s, version %(release_version)s. - -%(artifact_description)s. - -%(project_url)s - -Release Notes - %(artifact_id)s - Version %(release_version)s - -%(empty_message)s -%(issues_bug)s -%(issues_update)s -%(issues_new)s -%(issues_doc)s - -Issues, Pull requests, Feature requests are warmly welcome on %(artifact_id)s project repository: %(project_url)s -For questions or comments around this plugin, feel free to use elasticsearch mailing list: https://groups.google.com/forum/#!forum/elasticsearch - -Enjoy, - --The Elasticsearch team -""" % {'release_version': release_version, - 'artifact_id': artifact_id, - 'artifact_name': artifact_name, - 'artifact_description': artifact_description, - 'project_url': project_url, - 'empty_message': plain_empty_message, - 'issues_bug': plain_issues_bug, - 'issues_update': plain_issues_update, - 'issues_new': plain_issues_new, - 'issues_doc': plain_issues_doc} - - html = """ - - -

Heya,

- -

We are pleased to announce the release of the %(artifact_name)s, version %(release_version)s

- -
%(artifact_description)s.
- -

Release Notes - Version %(release_version)s

-%(empty_message)s -%(issues_bug)s -%(issues_update)s -%(issues_new)s -%(issues_doc)s - -

Issues, Pull requests, Feature requests are warmly welcome on -%(artifact_id)s project repository!

-

For questions or comments around this plugin, feel free to use elasticsearch -mailing list!

- -

Enjoy,

- -

- The Elasticsearch team

- -""" % {'release_version': release_version, - 'artifact_id': artifact_id, - 'artifact_name': artifact_name, - 'artifact_description': artifact_description, - 'project_url': project_url, - 'empty_message': html_empty_message, - 'issues_bug': html_issues_bug, - 'issues_update': html_issues_update, - 'issues_new': html_issues_new, - 'issues_doc': html_issues_doc} - - # Record the MIME types of both parts - text/plain and text/html. - part1 = MIMEText(text, 'plain') - part2 = MIMEText(html, 'html') - - # Attach parts into message container. - # According to RFC 2046, the last part of a multipart message, in this case - # the HTML message, is best and preferred. - msg.attach(part1) - msg.attach(part2) - - return msg - -def send_email(msg, - dry_run=True, - mail=True, - sender=env.get('MAIL_SENDER'), - to=env.get('MAIL_TO', 'elasticsearch@googlegroups.com'), - smtp_server=env.get('SMTP_SERVER', 'localhost')): - msg['From'] = 'Elasticsearch Team <%s>' % sender - msg['To'] = 'Elasticsearch Mailing List <%s>' % to - # save mail on disk - with open(ROOT_DIR+'target/email.txt', 'w') as email_file: - email_file.write(msg.as_string()) - if mail and not dry_run: - s = smtplib.SMTP(smtp_server, 25) - s.sendmail(sender, to, msg.as_string()) - s.quit() - else: - print('generated email: open %starget/email.txt' % ROOT_DIR) - -def print_sonatype_notice(): - settings = os.path.join(os.path.expanduser('~'), '.m2/settings.xml') - if os.path.isfile(settings): - with open(settings, encoding='utf-8') as settings_file: - for line in settings_file: - if line.strip() == 'sonatype-nexus-snapshots': - # moving out - we found the indicator no need to print the warning - return - print(""" - NOTE: No sonatype settings detected, make sure you have configured - your sonatype credentials in '~/.m2/settings.xml': - - - ... - - - sonatype-nexus-snapshots - your-jira-id - your-jira-pwd - - - sonatype-nexus-staging - your-jira-id - your-jira-pwd - - - ... - - """) - -def check_s3_credentials(): - if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None): - raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3') - -def check_github_credentials(): - if not env.get('GITHUB_KEY', None) and not env.get('GITHUB_LOGIN', None): - log('WARN: Could not find "GITHUB_LOGIN" / "GITHUB_PASSWORD" or "GITHUB_KEY" in the env variables. You could need it.') - -def check_email_settings(): - if not env.get('MAIL_SENDER', None): - raise RuntimeError('Could not find "MAIL_SENDER"') - -# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml -print_sonatype_notice() - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Builds and publishes a Elasticsearch Plugin Release') - parser.add_argument('--branch', '-b', metavar='master', default=get_current_branch(), - help='The branch to release from. Defaults to the current branch.') - parser.add_argument('--skiptests', '-t', dest='tests', action='store_false', - help='Skips tests before release. Tests are run by default.') - parser.set_defaults(tests=True) - parser.add_argument('--remote', '-r', metavar='origin', default='origin', - help='The remote to push the release commit and tag to. Default is [origin]') - parser.add_argument('--publish', '-p', dest='dryrun', action='store_false', - help='Publishes the release. Disable by default.') - parser.add_argument('--disable_mail', '-dm', dest='mail', action='store_false', - help='Do not send a release email. Email is sent by default.') - - parser.set_defaults(dryrun=True) - parser.set_defaults(mail=True) - args = parser.parse_args() - - src_branch = args.branch - remote = args.remote - run_tests = args.tests - dry_run = args.dryrun - mail = args.mail - - if not dry_run: - check_s3_credentials() - print('WARNING: dryrun is set to "false" - this will push and publish the release') - if mail: - check_email_settings() - print('An email to %s will be sent after the release' - % env.get('MAIL_TO', 'elasticsearch@googlegroups.com')) - input('Press Enter to continue...') - - check_github_credentials() - - print(''.join(['-' for _ in range(80)])) - print('Preparing Release from branch [%s] running tests: [%s] dryrun: [%s]' % (src_branch, run_tests, dry_run)) - print(' JAVA_HOME is [%s]' % JAVA_HOME) - print(' Running with maven command: [%s] ' % (MVN)) - - release_version = find_release_version(src_branch) - artifact_id = find_from_pom('artifactId') - artifact_name = find_from_pom('name') - artifact_description = find_from_pom('description') - project_url = find_from_pom('url') - elasticsearch_version = find_from_pom('elasticsearch.version') - print(' Artifact Id: [%s]' % artifact_id) - print(' Release version: [%s]' % release_version) - print(' Elasticsearch: [%s]' % elasticsearch_version) - if elasticsearch_version.find('-SNAPSHOT') != -1: - raise RuntimeError('Can not release with a SNAPSHOT elasticsearch dependency: %s' % elasticsearch_version) - - # extract snapshot - default_snapshot_version = guess_snapshot(release_version) - snapshot_version = input('Enter next snapshot version [%s]:' % default_snapshot_version) - snapshot_version = snapshot_version or default_snapshot_version - - print(' Next version: [%s-SNAPSHOT]' % snapshot_version) - print(' Artifact Name: [%s]' % artifact_name) - print(' Artifact Description: [%s]' % artifact_description) - print(' Project URL: [%s]' % project_url) - - if not dry_run: - smoke_test_version = release_version - head_hash = get_head_hash() - run_mvn('clean') # clean the env! - create_release_branch(remote, src_branch, release_version) - print(' Created release branch [%s]' % (release_branch(release_version))) - success = False - try: - pending_files = [POM_FILE, README_FILE] - remove_maven_snapshot(POM_FILE, release_version) - remove_documentation_snapshot(README_FILE, project_url, release_version, src_branch) - remove_version_snapshot(README_FILE, release_version) - set_date(README_FILE) - set_install_instructions(README_FILE, artifact_id, release_version) - print(' Done removing snapshot version') - add_pending_files(*pending_files) # expects var args use * to expand - commit_release(artifact_id, release_version) - print(' Committed release version [%s]' % release_version) - print(''.join(['-' for _ in range(80)])) - print('Building Release candidate') - input('Press Enter to continue...') - print(' Checking github issues') - repository = get_github_repository(artifact_id) - check_opened_issues(release_version, repository, artifact_id) - if not dry_run: - print(' Running maven builds now and publish to sonatype - run-tests [%s]' % run_tests) - else: - print(' Running maven builds now run-tests [%s]' % run_tests) - build_release(run_tests=run_tests, dry_run=dry_run) - artifact = get_artifacts(artifact_id, release_version) - artifact_and_checksums = generate_checksums(artifact) - print(''.join(['-' for _ in range(80)])) - - print('Finish Release -- dry_run: %s' % dry_run) - input('Press Enter to continue...') - print(' merge release branch') - git_merge(src_branch, release_version) - print(' tag') - tag_release(release_version) - - add_maven_snapshot(POM_FILE, release_version, snapshot_version) - add_version_snapshot(README_FILE, release_version, snapshot_version) - add_documentation_snapshot(README_FILE, project_url, release_version, snapshot_version, src_branch) - add_pending_files(*pending_files) - commit_snapshot() - - print(' push to %s %s -- dry_run: %s' % (remote, src_branch, dry_run)) - git_push(remote, src_branch, release_version, dry_run) - print(' publish artifacts to S3 -- dry_run: %s' % dry_run) - publish_artifacts(artifact_and_checksums, base='elasticsearch/%s' % (artifact_id) , dry_run=dry_run) - print(' preparing email (from github issues)') - msg = prepare_email(artifact_id, release_version, repository, artifact_name, artifact_description, project_url) - print(' sending email -- dry_run: %s, mail: %s' % (dry_run, mail)) - send_email(msg, dry_run=dry_run, mail=mail) - - pending_msg = """ -Release successful pending steps: - * close and release sonatype repo: https://oss.sonatype.org/ - * check if the release is there https://oss.sonatype.org/content/repositories/releases/org/elasticsearch/%(artifact_id)s/%(version)s - * tweet about the release -""" - print(pending_msg % {'version': release_version, - 'artifact_id': artifact_id, - 'project_url': project_url}) - success = True - finally: - if not success: - run('git reset --hard HEAD') - run('git checkout %s' % src_branch) - elif dry_run: - print('End of dry_run') - input('Press Enter to reset changes...') - - run('git reset --hard %s' % head_hash) - run('git tag -d v%s' % release_version) - # we delete this one anyways - run('git branch -D %s' % (release_branch(release_version))) diff --git a/dev-tools/release.py b/dev-tools/release.py new file mode 100644 index 00000000000..edcc637d068 --- /dev/null +++ b/dev-tools/release.py @@ -0,0 +1,134 @@ +# Licensed to Elasticsearch under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on +# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import datetime +import os +import shutil +import sys +import time +import urllib +import urllib.request +import zipfile + +from os.path import dirname, abspath + +""" + This tool builds a release from the a given elasticsearch plugin branch. + + It is basically a wrapper on top of launch_release.py which: + + - tries to get a more recent version of launch_release.py in ... + - download it if needed + - launch it passing all arguments to it, like: + + $ python3 dev_tools/release.py --branch master --publish --remote origin + + Important options: + + # Dry run + $ python3 dev_tools/release.py + + # Dry run without tests + python3 dev_tools/release.py --skiptests + + # Release, publish artifacts and announce + $ python3 dev_tools/release.py --publish + + See full documentation in launch_release.py +""" +env = os.environ + +# Change this if the source repository for your scripts is at a different location +SOURCE_REPO = 'elasticsearch/elasticsearch-plugins-script' +# We define that we should download again the script after 1 days +SCRIPT_OBSOLETE_DAYS = 1 +# We ignore in master.zip file the following files +IGNORED_FILES = ['.gitignore', 'README.md'] + + +ROOT_DIR = abspath(os.path.join(abspath(dirname(__file__)), '../')) +TARGET_TOOLS_DIR = ROOT_DIR + '/plugin_tools' +DEV_TOOLS_DIR = ROOT_DIR + '/dev-tools' +BUILD_RELEASE_FILENAME = 'release.zip' +BUILD_RELEASE_FILE = TARGET_TOOLS_DIR + '/' + BUILD_RELEASE_FILENAME +SOURCE_URL = 'https://github.com/%s/archive/master.zip' % SOURCE_REPO + +# Download a recent version of the release plugin tool +try: + os.mkdir(TARGET_TOOLS_DIR) + print('directory %s created' % TARGET_TOOLS_DIR) +except FileExistsError: + pass + + +try: + # we check latest update. If we ran an update recently, we + # are not going to check it again + download = True + + try: + last_download_time = datetime.datetime.fromtimestamp(os.path.getmtime(BUILD_RELEASE_FILE)) + if (datetime.datetime.now()-last_download_time).days < SCRIPT_OBSOLETE_DAYS: + download = False + except FileNotFoundError: + pass + + if download: + urllib.request.urlretrieve(SOURCE_URL, BUILD_RELEASE_FILE) + with zipfile.ZipFile(BUILD_RELEASE_FILE) as myzip: + for member in myzip.infolist(): + filename = os.path.basename(member.filename) + # skip directories + if not filename: + continue + if filename in IGNORED_FILES: + continue + + # copy file (taken from zipfile's extract) + source = myzip.open(member.filename) + target = open(os.path.join(TARGET_TOOLS_DIR, filename), "wb") + with source, target: + shutil.copyfileobj(source, target) + # We keep the original date + date_time = time.mktime(member.date_time + (0, 0, -1)) + os.utime(os.path.join(TARGET_TOOLS_DIR, filename), (date_time, date_time)) + print('plugin-tools updated from %s' % SOURCE_URL) +except urllib.error.HTTPError: + pass + + +# Let see if we need to update the release.py script itself +source_time = os.path.getmtime(TARGET_TOOLS_DIR + '/release.py') +repo_time = os.path.getmtime(DEV_TOOLS_DIR + '/release.py') +if source_time > repo_time: + input('release.py needs an update. Press a key to update it...') + shutil.copyfile(TARGET_TOOLS_DIR + '/release.py', DEV_TOOLS_DIR + '/release.py') + +# We can launch the build process +try: + PYTHON = 'python' + # make sure python3 is used if python3 is available + # some systems use python 2 as default + os.system('python3 --version > /dev/null 2>&1') + PYTHON = 'python3' +except RuntimeError: + pass + +release_args = '' +for x in range(1, len(sys.argv)): + release_args += ' ' + sys.argv[x] + +os.system('%s %s/build_release.py %s' % (PYTHON, TARGET_TOOLS_DIR, release_args)) diff --git a/dev-tools/upload-s3.py b/dev-tools/upload-s3.py deleted file mode 100644 index 95ea576e65c..00000000000 --- a/dev-tools/upload-s3.py +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to Elasticsearch under one or more contributor -# license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright -# ownership. Elasticsearch licenses this file to you under -# the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on -# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import os -import sys -import argparse -try: - import boto.s3 -except: - raise RuntimeError(""" - S3 upload requires boto to be installed - Use one of: - 'pip install -U boto' - 'apt-get install python-boto' - 'easy_install boto' - """) - -import boto.s3 - - -def list_buckets(conn): - return conn.get_all_buckets() - - -def upload_s3(conn, path, key, file, bucket): - print 'Uploading %s to Amazon S3 bucket %s/%s' % \ - (file, bucket, os.path.join(path, key)) - def percent_cb(complete, total): - sys.stdout.write('.') - sys.stdout.flush() - bucket = conn.create_bucket(bucket) - k = bucket.new_key(os.path.join(path, key)) - k.set_contents_from_filename(file, cb=percent_cb, num_cb=100) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Uploads files to Amazon S3') - parser.add_argument('--file', '-f', metavar='path to file', - help='the branch to release from', required=True) - parser.add_argument('--bucket', '-b', metavar='B42', default='download.elasticsearch.org', - help='The S3 Bucket to upload to') - parser.add_argument('--path', '-p', metavar='elasticsearch/elasticsearch', default='elasticsearch/elasticsearch', - help='The key path to use') - parser.add_argument('--key', '-k', metavar='key', default=None, - help='The key - uses the file name as default key') - args = parser.parse_args() - if args.key: - key = args.key - else: - key = os.path.basename(args.file) - - connection = boto.connect_s3() - upload_s3(connection, args.path, key, args.file, args.bucket); - From 6e48989cd078d7fb64fbf83d2b60b66545de6a05 Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Fri, 22 Aug 2014 13:58:46 +0900 Subject: [PATCH 067/115] Docs: fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2371af2c46f..f7f36559811 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, run: ```sh -bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.3.0` +bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.3.0 ``` You need to install a version matching your Elasticsearch version: From 677211ad938212def319746df810a8eb3427035b Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 8 Sep 2014 18:18:48 +0200 Subject: [PATCH 068/115] Update to Lucene 4.10.0 Closes #44. --- pom.xml | 2 +- .../index/analysis/KuromojiAnalyzerProvider.java | 2 +- .../index/analysis/KuromojiPartOfSpeechFilterFactory.java | 3 +-- .../indices/analysis/KuromojiIndicesAnalysis.java | 6 ++---- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pom.xml b/pom.xml index 4922a06042c..4410b65a95b 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.0.0-SNAPSHOT - 4.9.0 + 4.10.0 1 true onerror diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java index ab46b481cf0..beaeac311d6 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -44,7 +44,7 @@ public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet(), version); final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings); final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); - analyzer = new JapaneseAnalyzer(version, userDictionary, mode, CharArraySet.copy(version, stopWords), JapaneseAnalyzer.getDefaultStopTags()); + analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); } @Override diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java index 020cd93b6a5..12a29a0741a 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -23,7 +23,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; -import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; @@ -48,7 +47,7 @@ public class KuromojiPartOfSpeechFilterFactory extends AbstractTokenFilterFactor @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION, tokenStream, stopTags); + return new JapanesePartOfSpeechStopFilter(tokenStream, stopTags); } } diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index 04c34a5f081..e0bec4354ff 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -25,7 +25,6 @@ import org.apache.lucene.analysis.ja.*; import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.*; @@ -44,7 +43,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { indicesAnalysisService.analyzerProviderFactories().put("kuromoji", new PreBuiltAnalyzerProviderFactory("kuromoji", AnalyzerScope.INDICES, - new JapaneseAnalyzer(Lucene.ANALYZER_VERSION))); + new JapaneseAnalyzer())); indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark", new KurumojiCharFilterFactoryFactory(new CharFilterFactory() { @@ -98,8 +97,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new JapanesePartOfSpeechStopFilter(Lucene.ANALYZER_VERSION, - tokenStream, JapaneseAnalyzer + return new JapanesePartOfSpeechStopFilter(tokenStream, JapaneseAnalyzer .getDefaultStopTags()); } })); From c2cf90657acbfd85fe290ebd69632ced9fb900ba Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 15 Sep 2014 13:52:16 +0200 Subject: [PATCH 069/115] Create branch es-1.4 for elasticsearch 1.4.0 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f7f36559811..fbeaba5e6f2 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,8 @@ You need to install a version matching your Elasticsearch version: | elasticsearch | Kuromoji Analysis Plugin | Docs | |---------------|-----------------------------|------------| | master | Build from source | See below | -| es-1.x | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-240-snapshot-for-elasticsearch-1x) | +| es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | +| es-1.4 | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.4/#version-240-snapshot-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.2.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.1.0/#japanese-kuromoji-analysis-for-elasticsearch) | From d90f1e76c63a263d14fbf6aec5750a994a4dda1f Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 24 Sep 2014 17:18:13 -0400 Subject: [PATCH 070/115] Upgrade to Lucene 4.10.1 snapshot --- pom.xml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 4410b65a95b..17d22ed73e2 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,8 @@ 2.0.0-SNAPSHOT - 4.10.0 + 4.10.1 + 4.10.1-snapshot-1627368 1 true onerror @@ -42,6 +43,10 @@ + + Lucene snapshots + https://download.elasticsearch.org/lucenesnapshots/ + sonatype http://oss.sonatype.org/content/repositories/releases/ @@ -58,7 +63,7 @@ org.apache.lucene lucene-test-framework - ${lucene.version} + ${lucene.maven.version} test @@ -71,7 +76,7 @@ org.apache.lucene lucene-analyzers-kuromoji - ${lucene.version} + ${lucene.maven.version} compile From 773b26837039364ad213ea02d9c1cf3ebdad0986 Mon Sep 17 00:00:00 2001 From: mikemccand Date: Sun, 28 Sep 2014 17:55:18 -0400 Subject: [PATCH 071/115] Upgrade to Lucene 4.10.1 --- pom.xml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 17d22ed73e2..40cbc390612 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 2.0.0-SNAPSHOT 4.10.1 - 4.10.1-snapshot-1627368 + 4.10.1 1 true onerror @@ -43,10 +43,6 @@ - - Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/ - sonatype http://oss.sonatype.org/content/repositories/releases/ From cb1577e2d2f16cf4952135d83d0de817f9dda192 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 2 Oct 2014 09:35:19 +0200 Subject: [PATCH 072/115] Update to elasticsearch 1.4.0.Beta1 (cherry picked from commit f47babb) (cherry picked from commit a515501) --- pom.xml | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/pom.xml b/pom.xml index 40cbc390612..65d834c3006 100644 --- a/pom.xml +++ b/pom.xml @@ -132,17 +132,8 @@ maven-compiler-plugin 2.3.2 - 1.6 - 1.6 - - - - org.apache.maven.plugins - maven-compiler-plugin - 2.3.2 - - 1.6 - 1.6 + 1.7 + 1.7 From cc60d9aa3ce33c0fac8e19839ea514ac764833d7 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 2 Oct 2014 09:43:31 +0200 Subject: [PATCH 073/115] update documentation with release 2.4.0 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fbeaba5e6f2..a47b5c18a98 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, run: ```sh -bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.3.0 +bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.4.0 ``` You need to install a version matching your Elasticsearch version: @@ -15,7 +15,7 @@ You need to install a version matching your Elasticsearch version: |---------------|-----------------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | -| es-1.4 | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.4/#version-240-snapshot-for-elasticsearch-14) | +| es-1.4 | 2.4.0 | [2.4.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.0/#version-240-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.2.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.1.0/#japanese-kuromoji-analysis-for-elasticsearch) | From ff686ac06a3627ed81d76265c1ac4083fbd85676 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 15 Oct 2014 16:02:11 +0200 Subject: [PATCH 074/115] Tests: index.version.created must be set Due to this [change](https://github.com/elasticsearch/elasticsearch/pull/8018), we need to fix our tests for elasticsearch 1.4.0 and above. Closes #47. (cherry picked from commit 3a90982) --- .../index/analysis/KuromojiAnalysisTests.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 116652bf84e..412179587ab 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -24,6 +24,8 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ja.JapaneseAnalyzer; import org.apache.lucene.analysis.ja.JapaneseTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; import org.elasticsearch.common.settings.ImmutableSettings; @@ -170,7 +172,10 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { } public AnalysisService createAnalysisService() { - Settings settings = ImmutableSettings.settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/kuromoji_analysis.json").build(); + Settings settings = ImmutableSettings.settingsBuilder() + .loadFromClasspath("org/elasticsearch/index/analysis/kuromoji_analysis.json") + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .build(); Index index = new Index("test"); From d847959618d959827a39748c9a91d95c5c978f4f Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Thu, 30 Oct 2014 13:45:42 +0900 Subject: [PATCH 075/115] Tests: Fix randomizedtest fail Closes #49 --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index 65d834c3006..3e71613d3d9 100644 --- a/pom.xml +++ b/pom.xml @@ -56,6 +56,12 @@ 1.3 test + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + 2.1.10 + test + org.apache.lucene lucene-test-framework From 89d14f9cfea52a34740cc72d05bc995eb05adb82 Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Thu, 30 Oct 2014 14:14:56 +0900 Subject: [PATCH 076/115] Update to Lucene 4.10.2 Closes #50 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 3e71613d3d9..1844a85201d 100644 --- a/pom.xml +++ b/pom.xml @@ -33,8 +33,8 @@ 2.0.0-SNAPSHOT - 4.10.1 - 4.10.1 + 4.10.2 + 4.10.2 1 true onerror From 5ce7e32c1eec93d26a6ad740186486d771aa5773 Mon Sep 17 00:00:00 2001 From: tlrx Date: Wed, 5 Nov 2014 20:22:48 +0100 Subject: [PATCH 077/115] update documentation with release 2.4.1 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index a47b5c18a98..fb26c9bcf0c 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, run: ```sh -bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.4.0 +bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.4.1 ``` You need to install a version matching your Elasticsearch version: @@ -15,7 +15,7 @@ You need to install a version matching your Elasticsearch version: |---------------|-----------------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | -| es-1.4 | 2.4.0 | [2.4.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.0/#version-240-for-elasticsearch-14) | +| es-1.4 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.2.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.1.0/#japanese-kuromoji-analysis-for-elasticsearch) | From 99b2b82c748b75800085dabd91a217d6f143cf50 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 5 Nov 2014 16:06:43 -0500 Subject: [PATCH 078/115] upgrade to lucene 5 snapshot --- pom.xml | 8 ++++++-- .../index/analysis/KuromojiAnalyzerProvider.java | 2 +- .../index/analysis/KuromojiTokenizerFactory.java | 5 ++--- .../indices/analysis/KuromojiIndicesAnalysis.java | 5 ++--- .../index/analysis/KuromojiAnalysisTests.java | 15 ++++++++++----- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pom.xml b/pom.xml index 1844a85201d..4a0e5aa5aa7 100644 --- a/pom.xml +++ b/pom.xml @@ -33,8 +33,8 @@ 2.0.0-SNAPSHOT - 4.10.2 - 4.10.2 + 5.0.0 + 5.0.0-snapshot-1636426 1 true onerror @@ -47,6 +47,10 @@ sonatype http://oss.sonatype.org/content/repositories/releases/ + + Lucene snapshots + https://download.elasticsearch.org/lucenesnapshots/maven/ + diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java index beaeac311d6..43fd2f75a73 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java @@ -41,7 +41,7 @@ public class KuromojiAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet(), version); + final Set stopWords = Analysis.parseStopWords(env, settings, JapaneseAnalyzer.getDefaultStopSet()); final JapaneseTokenizer.Mode mode = KuromojiTokenizerFactory.getMode(settings); final UserDictionary userDictionary = KuromojiTokenizerFactory.getUserDictionary(env, settings); analyzer = new JapaneseAnalyzer(userDictionary, mode, CharArraySet.copy(stopWords), JapaneseAnalyzer.getDefaultStopTags()); diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java index 99175aa1e5c..6d021c5e748 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -86,9 +86,8 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer create(Reader reader) { - return new JapaneseTokenizer(reader, userDictionary, - discartPunctuation, mode); + public Tokenizer create() { + return new JapaneseTokenizer(userDictionary, discartPunctuation, mode); } } diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index e0bec4354ff..222b6657ff7 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -68,9 +68,8 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { } @Override - public Tokenizer create(Reader reader) { - return new JapaneseTokenizer(reader, null, true, - Mode.SEARCH); + public Tokenizer create() { + return new JapaneseTokenizer(null, true, Mode.SEARCH); } })); diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 412179587ab..7d309f7a659 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -89,7 +89,8 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class)); String source = "私は制限スピードを超える。"; String[] expected = new String[]{"私", "は", "制限", "スピード", "を"}; - Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); } @@ -101,11 +102,13 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { String source = "今夜はロバート先生と話した"; String[] expected_tokens_romanji = new String[]{"kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta"}; - Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_romanji); - tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); String[] expected_tokens_katakana = new String[]{"コンヤ", "ハ", "ロバート", "センセイ", "ト", "ハナシ", "タ"}; tokenFilter = analysisService.tokenFilter("kuromoji_readingform"); assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class)); @@ -119,7 +122,8 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { assertThat(tokenFilter, instanceOf(KuromojiKatakanaStemmerFactory.class)); String source = "明後日パーティーに行く予定がある。図書館で資料をコピーしました。"; - Tokenizer tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); + Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); // パーティー should be stemmed by default // (min len) コピー should not be stemmed @@ -128,7 +132,8 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { tokenFilter = analysisService.tokenFilter("kuromoji_ks"); assertThat(tokenFilter, instanceOf(KuromojiKatakanaStemmerFactory.class)); - tokenizer = new JapaneseTokenizer(new StringReader(source), null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); // パーティー should not be stemmed since min len == 6 // コピー should not be stemmed From c17708660cc923097db95a76e7ddf51f570106f8 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 10 Nov 2014 16:45:08 -0500 Subject: [PATCH 079/115] Upgrade to Lucene 5.0.0-snapshot-1637347 --- pom.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 4a0e5aa5aa7..e63931d8f8d 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 2.0.0-SNAPSHOT 5.0.0 - 5.0.0-snapshot-1636426 + 5.0.0-snapshot-1637347 1 true onerror @@ -44,12 +44,12 @@ - sonatype - http://oss.sonatype.org/content/repositories/releases/ + Lucene snapshots + https://download.elasticsearch.org/lucenesnapshots/1637347/ - Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/maven/ + sonatype + http://oss.sonatype.org/content/repositories/releases/ From 9a4502fdba324d412d49049dae0c084dd0d7e86f Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 24 Nov 2014 05:51:11 -0500 Subject: [PATCH 080/115] Upgrade to Lucene 5.0.0-snapshot-1641343 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index e63931d8f8d..d71be39da84 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 2.0.0-SNAPSHOT 5.0.0 - 5.0.0-snapshot-1637347 + 5.0.0-snapshot-1641343 1 true onerror @@ -45,7 +45,7 @@ Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/1637347/ + https://download.elasticsearch.org/lucenesnapshots/1641343/ sonatype From b1caeee8fd8a3737a754e0b53a9453b71f10808a Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 2 Dec 2014 18:14:42 +0100 Subject: [PATCH 081/115] Upgrade to Lucene 5.0.0-snapshot-1642891 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index d71be39da84..e38be0672da 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 2.0.0-SNAPSHOT 5.0.0 - 5.0.0-snapshot-1641343 + 5.0.0-snapshot-1642891 1 true onerror @@ -45,7 +45,7 @@ Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/1641343/ + https://download.elasticsearch.org/lucenesnapshots/1642891/ sonatype From d01d7fa09f404fb1eb99f06d0301bea5b81ca272 Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Mon, 8 Dec 2014 00:23:44 +0900 Subject: [PATCH 082/115] Analysis: Use PreBuiltXXXFactory Closes #34 --- .../analysis/KuromojiIndicesAnalysis.java | 12 +++--- .../KurumojiCharFilterFactoryFactory.java | 38 ------------------- .../KurumojiTokenFilterFactoryFactory.java | 38 ------------------- .../KurumojiTokenizerFactoryFactory.java | 38 ------------------- 4 files changed, 6 insertions(+), 120 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/indices/analysis/KurumojiCharFilterFactoryFactory.java delete mode 100644 src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenFilterFactoryFactory.java delete mode 100644 src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenizerFactoryFactory.java diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java index 222b6657ff7..ba5d58073a3 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java +++ b/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java @@ -46,7 +46,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { new JapaneseAnalyzer())); indicesAnalysisService.charFilterFactories().put("kuromoji_iteration_mark", - new KurumojiCharFilterFactoryFactory(new CharFilterFactory() { + new PreBuiltCharFilterFactoryFactory(new CharFilterFactory() { @Override public String name() { return "kuromoji_iteration_mark"; @@ -61,7 +61,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { })); indicesAnalysisService.tokenizerFactories().put("kuromoji_tokenizer", - new KurumojiTokenizerFactoryFactory(new TokenizerFactory() { + new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { @Override public String name() { return "kuromoji_tokenizer"; @@ -74,7 +74,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { })); indicesAnalysisService.tokenFilterFactories().put("kuromoji_baseform", - new KurumojiTokenFilterFactoryFactory(new TokenFilterFactory() { + new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_baseform"; @@ -88,7 +88,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { indicesAnalysisService.tokenFilterFactories().put( "kuromoji_part_of_speech", - new KurumojiTokenFilterFactoryFactory(new TokenFilterFactory() { + new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_part_of_speech"; @@ -103,7 +103,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { indicesAnalysisService.tokenFilterFactories().put( "kuromoji_readingform", - new KurumojiTokenFilterFactoryFactory(new TokenFilterFactory() { + new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_readingform"; @@ -116,7 +116,7 @@ public class KuromojiIndicesAnalysis extends AbstractComponent { })); indicesAnalysisService.tokenFilterFactories().put("kuromoji_stemmer", - new KurumojiTokenFilterFactoryFactory(new TokenFilterFactory() { + new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "kuromoji_stemmer"; diff --git a/src/main/java/org/elasticsearch/indices/analysis/KurumojiCharFilterFactoryFactory.java b/src/main/java/org/elasticsearch/indices/analysis/KurumojiCharFilterFactoryFactory.java deleted file mode 100644 index 3737d81f1a7..00000000000 --- a/src/main/java/org/elasticsearch/indices/analysis/KurumojiCharFilterFactoryFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to Elasticsearch (the "Author") under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Author licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.indices.analysis; - -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.analysis.CharFilterFactory; -import org.elasticsearch.index.analysis.PreBuiltCharFilterFactoryFactory; - -public class KurumojiCharFilterFactoryFactory extends PreBuiltCharFilterFactoryFactory { - private final CharFilterFactory charFilterFactory; - - public KurumojiCharFilterFactoryFactory(CharFilterFactory charFilterFactory) { - super(charFilterFactory); - this.charFilterFactory = charFilterFactory; - } - - @Override - public CharFilterFactory create(String name, Settings settings) { - return charFilterFactory; - } -} diff --git a/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenFilterFactoryFactory.java b/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenFilterFactoryFactory.java deleted file mode 100644 index 2efaa91baa0..00000000000 --- a/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenFilterFactoryFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to Elasticsearch (the "Author") under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Author licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.indices.analysis; - -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory; -import org.elasticsearch.index.analysis.TokenFilterFactory; - -public class KurumojiTokenFilterFactoryFactory extends PreBuiltTokenFilterFactoryFactory { - private final TokenFilterFactory tokenFilterFactory; - - public KurumojiTokenFilterFactoryFactory(TokenFilterFactory tokenFilterFactory) { - super(tokenFilterFactory); - this.tokenFilterFactory = tokenFilterFactory; - } - - @Override - public TokenFilterFactory create(String name, Settings settings) { - return tokenFilterFactory; - } -} diff --git a/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenizerFactoryFactory.java b/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenizerFactoryFactory.java deleted file mode 100644 index 6e5525762ff..00000000000 --- a/src/main/java/org/elasticsearch/indices/analysis/KurumojiTokenizerFactoryFactory.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to Elasticsearch (the "Author") under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Author licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.indices.analysis; - -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; - -public class KurumojiTokenizerFactoryFactory extends PreBuiltTokenizerFactoryFactory { - private final TokenizerFactory tokenizerFactory; - - public KurumojiTokenizerFactoryFactory(TokenizerFactory tokenizerFactory) { - super(tokenizerFactory); - this.tokenizerFactory = tokenizerFactory; - } - - @Override - public TokenizerFactory create(String name, Settings settings) { - return tokenizerFactory; - } -} From 33db1aebcd6df05d6edaf363f4855035142cd72b Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 12 Dec 2014 18:34:32 +0100 Subject: [PATCH 083/115] Depend on elasticsearch-parent To simplify plugins maintenance and provide more value in the future, we are starting to build an `elasticsearch-parent` project. This commit is the first step for this plugin to depend on this new `pom` maven project. --- .gitignore | 3 +- dev-tools/tests.policy | 54 +++++++++++++++ pom.xml | 154 +++-------------------------------------- 3 files changed, 65 insertions(+), 146 deletions(-) create mode 100644 dev-tools/tests.policy diff --git a/.gitignore b/.gitignore index 2660128de5e..9533848e238 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,6 @@ /.project /.settings /.classpath -/.local-execution-hints.log /plugin_tools +/.local-execution-hints.log +/.local-*-execution-hints.log diff --git a/dev-tools/tests.policy b/dev-tools/tests.policy new file mode 100644 index 00000000000..6afb5025840 --- /dev/null +++ b/dev-tools/tests.policy @@ -0,0 +1,54 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Policy file to prevent tests from writing outside the test sandbox directory +// PLEASE NOTE: You may need to enable other permissions when new tests are added, +// everything not allowed here is forbidden! + +grant { + // permissions for file access, write access only to sandbox: + permission java.io.FilePermission "<>", "read,execute"; + permission java.io.FilePermission "${junit4.childvm.cwd}", "read,execute,write"; + permission java.io.FilePermission "${junit4.childvm.cwd}${/}-", "read,execute,write,delete"; + permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete"; + permission groovy.security.GroovyCodeSourcePermission "/groovy/script"; + + // Allow connecting to the internet anywhere + permission java.net.SocketPermission "*", "accept,listen,connect,resolve"; + + // Basic permissions needed for Lucene / Elasticsearch to work: + permission java.util.PropertyPermission "*", "read,write"; + permission java.lang.reflect.ReflectPermission "*"; + permission java.lang.RuntimePermission "*"; + + // These two *have* to be spelled out a separate + permission java.lang.management.ManagementPermission "control"; + permission java.lang.management.ManagementPermission "monitor"; + + permission java.net.NetPermission "*"; + permission java.util.logging.LoggingPermission "control"; + permission javax.management.MBeanPermission "*", "*"; + permission javax.management.MBeanServerPermission "*"; + permission javax.management.MBeanTrustPermission "*"; + + // Needed for some things in DNS caching in the JVM + permission java.security.SecurityPermission "getProperty.networkaddress.cache.ttl"; + permission java.security.SecurityPermission "getProperty.networkaddress.cache.negative.ttl"; + +}; diff --git a/pom.xml b/pom.xml index e38be0672da..fc203918ab2 100644 --- a/pom.xml +++ b/pom.xml @@ -3,6 +3,13 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 + + + org.elasticsearch + elasticsearch-parent + 2.0.0-SNAPSHOT + + org.elasticsearch elasticsearch-analysis-kuromoji 3.0.0-SNAPSHOT @@ -20,92 +27,46 @@ scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git - scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git - + scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git http://github.com/elasticsearch/elasticsearch-analysis-kuromoji - - org.sonatype.oss - oss-parent - 7 - - - 2.0.0-SNAPSHOT - 5.0.0 - 5.0.0-snapshot-1642891 - 1 - true - onerror - - INFO + - - - Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/1642891/ - - - sonatype - http://oss.sonatype.org/content/repositories/releases/ - - - org.hamcrest hamcrest-all - 1.3 - test com.carrotsearch.randomizedtesting randomizedtesting-runner - 2.1.10 - test org.apache.lucene lucene-test-framework - ${lucene.maven.version} - test org.elasticsearch elasticsearch - ${elasticsearch.version} - compile org.apache.lucene lucene-analyzers-kuromoji - ${lucene.maven.version} - compile log4j log4j - 1.2.17 - runtime org.elasticsearch elasticsearch - ${elasticsearch.version} test-jar - test - - - - org.hamcrest - hamcrest-core - 1.3 - test @@ -140,116 +101,19 @@ org.apache.maven.plugins maven-compiler-plugin - 2.3.2 - - 1.7 - 1.7 - com.carrotsearch.randomizedtesting junit4-maven-plugin - 2.0.12 - - - tests - test - - junit4 - - - 20 - pipe,warn - true - - - - - - - - - ${tests.jvms} - - - - - - - **/*Tests.class - **/*Test.class - - - **/Abstract*.class - **/*StressTest.class - - - -Xmx512m - -XX:MaxDirectMemorySize=512m - -Des.logger.prefix= - - ${tests.shuffle} - ${tests.verbose} - ${tests.seed} - ${tests.failfast} - - - ${tests.iters} - ${tests.maxfailures} - ${tests.failfast} - ${tests.class} - ${tests.method} - ${tests.nightly} - ${tests.badapples} - ${tests.weekly} - ${tests.slow} - ${tests.awaitsfix} - ${tests.slow} - ${tests.timeoutSuite} - ${tests.showSuccess} - ${tests.integration} - ${tests.cluster_seed} - ${tests.client.ratio} - ${env.ES_TEST_LOCAL} - ${es.node.mode} - ${es.logger.level} - true - - - - org.apache.maven.plugins maven-surefire-plugin - 2.15 - - true - org.apache.maven.plugins maven-source-plugin - 2.1.2 - - - attach-sources - - jar - - - maven-assembly-plugin From 2d39e415472150807407e65429bd729797da0ef7 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 12 Dec 2014 18:49:53 +0100 Subject: [PATCH 084/115] =?UTF-8?q?[Maven]=C2=A0best=20practice:=20add=20a?= =?UTF-8?q?lways=20groupId?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index fc203918ab2..98994e1f6c8 100644 --- a/pom.xml +++ b/pom.xml @@ -116,6 +116,7 @@ maven-source-plugin + org.apache.maven.plugins maven-assembly-plugin 2.3 From 6a19e38f68b8d3bc87485b1a9c250cacd20be145 Mon Sep 17 00:00:00 2001 From: Satoshi Kimura Date: Mon, 15 Dec 2014 19:08:29 +0900 Subject: [PATCH 085/115] romanji -> romaji --- .../elasticsearch/index/analysis/KuromojiAnalysisTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 7d309f7a659..fb9f1119fc7 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -100,12 +100,12 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { TokenFilterFactory tokenFilter = analysisService.tokenFilter("kuromoji_rf"); assertThat(tokenFilter, instanceOf(KuromojiReadingFormFilterFactory.class)); String source = "今夜はロバート先生と話した"; - String[] expected_tokens_romanji = new String[]{"kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta"}; + String[] expected_tokens_romaji = new String[]{"kon'ya", "ha", "robato", "sensei", "to", "hanashi", "ta"}; Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); tokenizer.setReader(new StringReader(source)); - assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_romanji); + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_romaji); tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); tokenizer.setReader(new StringReader(source)); From 5ced1949d626c915fca6ac095e414906433d4233 Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Fri, 19 Dec 2014 16:25:51 +0900 Subject: [PATCH 086/115] Docs: revise examples (cherry picked from commit 14ac3b0) --- README.md | 217 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 150 insertions(+), 67 deletions(-) diff --git a/README.md b/README.md index fb26c9bcf0c..9de9f7c76fb 100644 --- a/README.md +++ b/README.md @@ -127,32 +127,45 @@ User Dictionary file is placed `ES_HOME/config` directory. ### example +_Example Settings:_ + ```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { - "index":{ - "analysis":{ - "tokenizer" : { - "kuromoji_user_dict" : { - "type" : "kuromoji_tokenizer", - "mode" : "extended", - "discard_punctuation" : "false", - "user_dictionary" : "userdict_ja.txt" + "settings": { + "index":{ + "analysis":{ + "tokenizer" : { + "kuromoji_user_dict" : { + "type" : "kuromoji_tokenizer", + "mode" : "extended", + "discard_punctuation" : "false", + "user_dictionary" : "userdict_ja.txt" + } + }, + "analyzer" : { + "my_analyzer" : { + "type" : "custom", + "tokenizer" : "kuromoji_user_dict" + } } - }, - "analyzer" : { - "my_analyzer" : { - "type" : "custom", - "tokenizer" : "kuromoji_user_dict" - } - } + } } } } ' +``` +_Example Request using `_analyze` API :_ + +```sh curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d '東京スカイツリー' +``` + +_Response :_ + +```json { "tokens" : [ { "token" : "東京", @@ -177,23 +190,36 @@ This acts as a lemmatizer for verbs and adjectives. ### example +_Example Settings:_ + ```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { - "index":{ - "analysis":{ - "analyzer" : { - "my_analyzer" : { - "tokenizer" : "kuromoji_tokenizer", - "filter" : ["kuromoji_baseform"] + "settings": { + "index":{ + "analysis":{ + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["kuromoji_baseform"] + } } } } } } ' +``` +_Example Request using `_analyze` API :_ + +```sh curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d '飲み' +``` + +_Response :_ + +```json { "tokens" : [ { "token" : "飲む", @@ -219,32 +245,45 @@ Note that default setting is stoptags.txt include lucene-analyzer-kuromoji.jar. ### example +_Example Settings:_ + ```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { - "index":{ - "analysis":{ - "analyzer" : { - "my_analyzer" : { - "tokenizer" : "kuromoji_tokenizer", - "filter" : ["my_posfilter"] - } - }, - "filter" : { - "my_posfilter" : { - "type" : "kuromoji_part_of_speech", - "stoptags" : [ - "助詞-格助詞-一般", - "助詞-終助詞" - ] + "settings": { + "index":{ + "analysis":{ + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["my_posfilter"] + } + }, + "filter" : { + "my_posfilter" : { + "type" : "kuromoji_part_of_speech", + "stoptags" : [ + "助詞-格助詞-一般", + "助詞-終助詞" + ] + } } } } } } ' +``` +_Example Request using `_analyze` API :_ + +```sh curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d '寿司がおいしいね' +``` + +_Response :_ + +```json { "tokens" : [ { "token" : "寿司", @@ -277,37 +316,50 @@ Note that elasticsearch-analysis-kuromoji built-in `kuromoji_readingform` set de ### example +_Example Settings:_ + ```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { - "index":{ - "analysis":{ - "analyzer" : { - "romaji_analyzer" : { - "tokenizer" : "kuromoji_tokenizer", - "filter" : ["romaji_readingform"] + "settings": { + "index":{ + "analysis":{ + "analyzer" : { + "romaji_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["romaji_readingform"] + }, + "katakana_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["katakana_readingform"] + } }, - "katakana_analyzer" : { - "tokenizer" : "kuromoji_tokenizer", - "filter" : ["katakana_readingform"] - } - }, - "filter" : { - "romaji_readingform" : { - "type" : "kuromoji_readingform", - "use_romaji" : true - }, - "katakana_readingform" : { - "type" : "kuromoji_readingform", - "use_romaji" : false + "filter" : { + "romaji_readingform" : { + "type" : "kuromoji_readingform", + "use_romaji" : true + }, + "katakana_readingform" : { + "type" : "kuromoji_readingform", + "use_romaji" : false + } } } } } } ' +``` +_Example Request using `_analyze` API :_ + +```sh curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=katakana_analyzer&pretty' -d '寿司' +``` + +_Response :_ + +```json { "tokens" : [ { "token" : "スシ", @@ -317,8 +369,17 @@ curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=katakana_an "position" : 1 } ] } +``` +_Example Request using `_analyze` API :_ + +```sh curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=romaji_analyzer&pretty' -d '寿司' +``` + +_Response :_ + +```json { "tokens" : [ { "token" : "sushi", @@ -345,29 +406,42 @@ The following are settings that can be set for a `kuromoji_stemmer` token filter ### example +_Example Settings:_ + ```sh curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' { - "index":{ - "analysis":{ - "analyzer" : { - "my_analyzer" : { - "tokenizer" : "kuromoji_tokenizer", - "filter" : ["my_katakana_stemmer"] - } - }, - "filter" : { - "my_katakana_stemmer" : { - "type" : "kuromoji_stemmer", - "minimum_length" : 4 + "settings": { + "index":{ + "analysis":{ + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["my_katakana_stemmer"] + } + }, + "filter" : { + "my_katakana_stemmer" : { + "type" : "kuromoji_stemmer", + "minimum_length" : 4 + } } } } } } ' +``` +_Example Request using `_analyze` API :_ + +```sh curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d 'コピー' +``` + +_Response :_ + +```json { "tokens" : [ { "token" : "コピー", @@ -377,8 +451,17 @@ curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer "position" : 1 } ] } +``` +_Example Request using `_analyze` API :_ + +```sh curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=my_analyzer&pretty' -d 'サーバー' +``` + +_Response :_ + +```json { "tokens" : [ { "token" : "サーバ", From a5e57690855189aaea15f528122e88d8dbc20b8e Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 2 Jan 2015 20:58:21 +0100 Subject: [PATCH 087/115] Add sonatype snapshot repository --- pom.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pom.xml b/pom.xml index 98994e1f6c8..c197eb26a96 100644 --- a/pom.xml +++ b/pom.xml @@ -169,4 +169,12 @@ + + + + oss-snapshots + Sonatype OSS Snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + + From b0b7ffe12df288df8e24cb9f72c71e7ea0571c5e Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 2 Jan 2015 21:33:52 +0100 Subject: [PATCH 088/115] Remove old maven profiles (unused) --- pom.xml | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/pom.xml b/pom.xml index c197eb26a96..d820d7d3f26 100644 --- a/pom.xml +++ b/pom.xml @@ -138,38 +138,6 @@ - - - default - - true - - - - - plugin-test - - - tests.plugin - - - - - ${basedir}/elasticsearch/target - - - - - ElasticsearchRepo - ElasticsearchRepo - file://${elasticsearch.lib} - truealways - truealways - - - - - oss-snapshots From aa651a87472b62abef280655b8bbb022975d0dca Mon Sep 17 00:00:00 2001 From: Kenta Okamoto Date: Mon, 9 Feb 2015 16:13:21 +0900 Subject: [PATCH 089/115] Fix typo: MecCab -> MeCab --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9de9f7c76fb..e69e262dfe9 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ Input text is `関西国際空港` and `アブラカダブラ`. ### User Dictionary -Kuromoji tokenizer use MecCab-IPADIC dictionary by default. +Kuromoji tokenizer use MeCab-IPADIC dictionary by default. And Kuromoji is added an entry of dictionary to define by user; this is User Dictionary. User Dictionary entries are defined using the following CSV format: From 5d36b3fadb11e2ff09d61716fffc9d1ba9be0650 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 11 Feb 2015 21:49:29 +0100 Subject: [PATCH 090/115] update documentation with release 2.4.2 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e69e262dfe9..1e4c7ca2af7 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, run: ```sh -bin/plugin -install elasticsearch/elasticsearch-analysis-kuromoji/2.4.1 +bin/plugin install elasticsearch/elasticsearch-analysis-kuromoji/2.4.2 ``` You need to install a version matching your Elasticsearch version: @@ -15,7 +15,7 @@ You need to install a version matching your Elasticsearch version: |---------------|-----------------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | -| es-1.4 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.1/#version-241-for-elasticsearch-14) | +| es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.2/#version-242-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.2.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.1.0/#japanese-kuromoji-analysis-for-elasticsearch) | From d0f629b0f54545a9986c1fb7c1fbb8c7abcc793b Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 13 Feb 2015 16:44:31 +0100 Subject: [PATCH 091/115] Fix doc for es version < 1.4.3 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1e4c7ca2af7..d6963d14508 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ You need to install a version matching your Elasticsearch version: | master | Build from source | See below | | es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | | es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.2/#version-242-for-elasticsearch-14) | +| < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.2.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.1.0/#japanese-kuromoji-analysis-for-elasticsearch) | From 0a0d6fd644fca8b626d96d1e7e4516acd38f9f4f Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Tue, 21 Oct 2014 18:07:00 +0900 Subject: [PATCH 092/115] Add "ja_stop" filter * can use a predefined "_japanese_" stop words * can not use other predefined stop words * upgrade to lucene 5 * add ja_stop to README Closes #45 --- README.md | 45 +++++++++++ .../JapaneseStopTokenFilterFactory.java | 76 +++++++++++++++++++ .../kuromoji/AnalysisKuromojiPlugin.java | 1 + .../index/analysis/KuromojiAnalysisTests.java | 19 ++++- .../index/analysis/kuromoji_analysis.json | 5 +- 5 files changed, 143 insertions(+), 3 deletions(-) create mode 100644 src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java diff --git a/README.md b/README.md index d6963d14508..e728be4abe3 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ The plugin includes these analyzer and tokenizer, tokenfilter. | kuromoji_part_of_speech | tokenfilter | | kuromoji_readingform | tokenfilter | | kuromoji_stemmer | tokenfilter | +| ja_stop | tokenfilter | Usage @@ -475,6 +476,50 @@ _Response :_ ``` +## TokenFilter : kuromoji_part_of_speech + + +A token filter of type `ja_stop` that provide a predefined "_japanese_" stop words. +*Note: It is only provide "_japanese_". If you want to use other predefined stop words, you can use `stop` token filter.* + +### example + +```sh +curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' +{ + "settings": { + "index":{ + "analysis":{ + "analyzer" : { + "analyzer_with_ja_stop" : { + "tokenizer" : "kuromoji_tokenizer", + "filter" : ["ja_stop"] + } + }, + "filter" : { + "ja_stop" : { + "type" : "ja_stop", + "stopwords" : ["_japanese_", "ストップ"] + } + } + } + } + } +} +' + +curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=katakana_analyzer&pretty' -d 'ストップは消える' +{ + "tokens" : [ { + "token" : "消える", + "start_offset" : 5, + "end_offset" : 8, + "type" : "word", + "position" : 3 + } ] +} +``` + License ------- diff --git a/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java new file mode 100644 index 00000000000..e976ec5f821 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java @@ -0,0 +1,76 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.search.suggest.analyzing.SuggestStopFilter; +import org.elasticsearch.common.collect.ImmutableMap; +import org.elasticsearch.common.collect.MapBuilder; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +import java.util.Set; + +public class JapaneseStopTokenFilterFactory extends AbstractTokenFilterFactory{ + + + private final CharArraySet stopWords; + + private final boolean ignoreCase; + + private final boolean removeTrailing; + + @Inject + public JapaneseStopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + this.ignoreCase = settings.getAsBoolean("ignore_case", false); + this.removeTrailing = settings.getAsBoolean("remove_trailing", true); + ImmutableMap> namedStopWords = MapBuilder.>newMapBuilder() + .put("_japanese_", JapaneseAnalyzer.getDefaultStopSet()) + .immutableMap(); + this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), namedStopWords, ignoreCase); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + if (removeTrailing) { + return new StopFilter(tokenStream, stopWords); + } else { + return new SuggestStopFilter(tokenStream, stopWords); + } + } + + public Set stopWords() { + return stopWords; + } + + public boolean ignoreCase() { + return ignoreCase; + } + +} diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index 27d8a3e5b9e..ca2128d0e71 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -55,5 +55,6 @@ public class AnalysisKuromojiPlugin extends AbstractPlugin { module.addTokenFilter("kuromoji_part_of_speech", KuromojiPartOfSpeechFilterFactory.class); module.addTokenFilter("kuromoji_readingform", KuromojiReadingFormFilterFactory.class); module.addTokenFilter("kuromoji_stemmer", KuromojiKatakanaStemmerFactory.class); + module.addTokenFilter("ja_stop", JapaneseStopTokenFilterFactory.class); } } diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index fb9f1119fc7..e7be9276970 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -71,6 +71,9 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { filterFactory = analysisService.tokenFilter("kuromoji_stemmer"); assertThat(filterFactory, instanceOf(KuromojiKatakanaStemmerFactory.class)); + filterFactory = analysisService.tokenFilter("ja_stop"); + assertThat(filterFactory, instanceOf(JapaneseStopTokenFilterFactory.class)); + NamedAnalyzer analyzer = analysisService.analyzer("kuromoji"); assertThat(analyzer.analyzer(), instanceOf(JapaneseAnalyzer.class)); @@ -80,6 +83,7 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { CharFilterFactory charFilterFactory = analysisService.charFilter("kuromoji_iteration_mark"); assertThat(charFilterFactory, instanceOf(KuromojiIterationMarkCharFilterFactory.class)); + } @Test @@ -172,10 +176,21 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { expected = "ところどころ、ジジが、時時、馬鹿馬鹿しい"; assertCharFilterEquals(charFilterFactory.create(new StringReader(source)), expected); - - } + @Test + public void testJapaneseStopFilterFactory() throws IOException { + AnalysisService analysisService = createAnalysisService(); + TokenFilterFactory tokenFilter = analysisService.tokenFilter("ja_stop"); + assertThat(tokenFilter, instanceOf(JapaneseStopTokenFilterFactory.class)); + String source = "私は制限スピードを超える。"; + String[] expected = new String[]{"私", "制限", "超える"}; + Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); + } + + public AnalysisService createAnalysisService() { Settings settings = ImmutableSettings.settingsBuilder() .loadFromClasspath("org/elasticsearch/index/analysis/kuromoji_analysis.json") diff --git a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json index 478a6c1d293..7642e756ceb 100644 --- a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json +++ b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json @@ -13,9 +13,12 @@ "kuromoji_ks" : { "type": "kuromoji_stemmer", "minimum_length" : 6 + }, + "ja_stop" : { + "type": "ja_stop", + "stopwords": ["_japanese_", "スピード"] } - }, "char_filter":{ From 766402870e9f6b69717c3c0808edc5aaed41360c Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Tue, 6 Jan 2015 12:53:20 +0900 Subject: [PATCH 093/115] Docs: revise examples --- README.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e728be4abe3..f8c718d6f52 100644 --- a/README.md +++ b/README.md @@ -476,12 +476,14 @@ _Response :_ ``` -## TokenFilter : kuromoji_part_of_speech +## TokenFilter : ja_stop A token filter of type `ja_stop` that provide a predefined "_japanese_" stop words. *Note: It is only provide "_japanese_". If you want to use other predefined stop words, you can use `stop` token filter.* +_Example Settings:_ + ### example ```sh @@ -505,10 +507,18 @@ curl -XPUT 'http://localhost:9200/kuromoji_sample/' -d' } } } -} -' +}' +``` +_Example Request using `_analyze` API :_ + +```sh curl -XPOST 'http://localhost:9200/kuromoji_sample/_analyze?analyzer=katakana_analyzer&pretty' -d 'ストップは消える' +``` + +_Response :_ + +```json { "tokens" : [ { "token" : "消える", From 03f4b7c9654dbb35d1424c3ec0bb66d8094fb981 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 16 Mar 2015 16:25:04 -0700 Subject: [PATCH 094/115] create `es-1.5` branch --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f8c718d6f52..e9e6eef5e71 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,8 @@ You need to install a version matching your Elasticsearch version: | elasticsearch | Kuromoji Analysis Plugin | Docs | |---------------|-----------------------------|------------| | master | Build from source | See below | -| es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | +| es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | +| es-1.5 | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.5/#version-250-snapshot-for-elasticsearch-15) | | es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.2/#version-242-for-elasticsearch-14) | | < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | From a21134cd6496e3225b89942b5ec03215dc95ef19 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 24 Mar 2015 18:43:33 +0100 Subject: [PATCH 095/115] Move parent bloc after artifact coordinates --- pom.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pom.xml b/pom.xml index d820d7d3f26..f3c9582cfd9 100644 --- a/pom.xml +++ b/pom.xml @@ -4,12 +4,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - - org.elasticsearch - elasticsearch-parent - 2.0.0-SNAPSHOT - - org.elasticsearch elasticsearch-analysis-kuromoji 3.0.0-SNAPSHOT @@ -31,6 +25,12 @@ http://github.com/elasticsearch/elasticsearch-analysis-kuromoji + + org.elasticsearch + elasticsearch-parent + 2.0.0-SNAPSHOT + + From 414ad35802517e37255522df3dc7ff034b747b45 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 24 Mar 2015 18:44:49 +0100 Subject: [PATCH 096/115] Move to elastic owner (cherry picked from commit 01acf2f) (cherry picked from commit 5c93c2b) --- pom.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index f3c9582cfd9..1845da9de14 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ jar Elasticsearch Japanese (kuromoji) Analysis plugin The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. - https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/ + https://github.com/elastic/elasticsearch-analysis-kuromoji/ 2009 @@ -20,9 +20,9 @@ - scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git - scm:git:git@github.com:elasticsearch/elasticsearch-analysis-kuromoji.git - http://github.com/elasticsearch/elasticsearch-analysis-kuromoji + scm:git:git@github.com:elastic/elasticsearch-analysis-kuromoji.git + scm:git:git@github.com:elastic/elasticsearch-analysis-kuromoji.git + http://github.com/elastic/elasticsearch-analysis-kuromoji From 216828615954450e960e60293a67eaa2fdea3e59 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 24 Mar 2015 18:52:15 +0100 Subject: [PATCH 097/115] update documentation with release 2.5.0 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e9e6eef5e71..b1d40ccbbd2 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, run: ```sh -bin/plugin install elasticsearch/elasticsearch-analysis-kuromoji/2.4.2 +bin/plugin install elasticsearch/elasticsearch-analysis-kuromoji/2.5.0 ``` You need to install a version matching your Elasticsearch version: @@ -15,7 +15,7 @@ You need to install a version matching your Elasticsearch version: |---------------|-----------------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | -| es-1.5 | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.5/#version-250-snapshot-for-elasticsearch-15) | +| es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-kuromoji/tree/v2.5.0/#version-250-for-elasticsearch-15) | | es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.2/#version-242-for-elasticsearch-14) | | < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | From 2af932d569096e289edbd48cf5aa18d50a40a1ee Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 Apr 2015 15:39:48 +0200 Subject: [PATCH 098/115] [doc] update compatibility matrix for elasticsearch 1.4.5 Closes #58. --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b1d40ccbbd2..f84b5514ec1 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,9 @@ You need to install a version matching your Elasticsearch version: | master | Build from source | See below | | es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | | es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-kuromoji/tree/v2.5.0/#version-250-for-elasticsearch-15) | -| es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.2/#version-242-for-elasticsearch-14) | -| < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.1/#version-241-for-elasticsearch-14) | +| es-1.4 | 2.4.3 | [2.4.3-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-kuromoji/tree/es-1.4/#version-243-snapshot-for-elasticsearch-14) | +| < 1.4.5 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.2/#version-242-for-elasticsearch-14) | +| < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.2.0/#japanese-kuromoji-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.1.0/#japanese-kuromoji-analysis-for-elasticsearch) | From ac603eabd9d1fcb36150e4bbe96bf5d6d146d202 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 Apr 2015 15:47:15 +0200 Subject: [PATCH 099/115] update documentation with release 2.4.3 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f84b5514ec1..6926a16c15b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, run: ```sh -bin/plugin install elasticsearch/elasticsearch-analysis-kuromoji/2.5.0 +bin/plugin install elasticsearch/elasticsearch-analysis-kuromoji/2.4.3 ``` You need to install a version matching your Elasticsearch version: @@ -16,7 +16,7 @@ You need to install a version matching your Elasticsearch version: | master | Build from source | See below | | es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | | es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-kuromoji/tree/v2.5.0/#version-250-for-elasticsearch-15) | -| es-1.4 | 2.4.3 | [2.4.3-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-kuromoji/tree/es-1.4/#version-243-snapshot-for-elasticsearch-14) | +| es-1.4 | 2.4.3 | [2.4.3](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.3/#version-243-for-elasticsearch-14) | | < 1.4.5 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.2/#version-242-for-elasticsearch-14) | | < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji/tree/v2.3.0/#japanese-kuromoji-analysis-for-elasticsearch) | From 7d7b45beede22a50ca6db845a19f939558564549 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 Apr 2015 15:48:48 +0200 Subject: [PATCH 100/115] Latest version is 2.5.0 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6926a16c15b..65e37c88b52 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis modu In order to install the plugin, run: ```sh -bin/plugin install elasticsearch/elasticsearch-analysis-kuromoji/2.4.3 +bin/plugin install elasticsearch/elasticsearch-analysis-kuromoji/2.5.0 ``` You need to install a version matching your Elasticsearch version: From 7c05b8ebf522bb451437bc7054d400761551a974 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 5 May 2015 12:47:37 -0400 Subject: [PATCH 101/115] Tests: fix tests not to use CWD --- .../org/elasticsearch/index/analysis/KuromojiAnalysisTests.java | 1 + .../elasticsearch/index/analysis/KuromojiIntegrationTests.java | 1 + 2 files changed, 2 insertions(+) diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index e7be9276970..73fd78047bf 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -193,6 +193,7 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { public AnalysisService createAnalysisService() { Settings settings = ImmutableSettings.settingsBuilder() + .put("path.home", createTempDir()) .loadFromClasspath("org/elasticsearch/index/analysis/kuromoji_analysis.json") .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build(); diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java index a24acdf6b37..1ab0fe59d81 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java @@ -42,6 +42,7 @@ public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest { protected Settings nodeSettings(int nodeOrdinal) { return ImmutableSettings.builder() .put(super.nodeSettings(nodeOrdinal)) + .put("path.home", createTempDir()) .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true) .build(); } From e9b85b97ba990dbf8b54422b503fbc5b14b93035 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 11 May 2015 18:49:42 -0400 Subject: [PATCH 102/115] enable security manager in tests --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 1845da9de14..0e9f9569ddb 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,7 @@ + true From 3417ae8d7f1ab4bc992c9695b1d509f08473e579 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 11 May 2015 19:42:40 -0400 Subject: [PATCH 103/115] Remove outdated policy file --- dev-tools/tests.policy | 54 ------------------------------------------ 1 file changed, 54 deletions(-) delete mode 100644 dev-tools/tests.policy diff --git a/dev-tools/tests.policy b/dev-tools/tests.policy deleted file mode 100644 index 6afb5025840..00000000000 --- a/dev-tools/tests.policy +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -// Policy file to prevent tests from writing outside the test sandbox directory -// PLEASE NOTE: You may need to enable other permissions when new tests are added, -// everything not allowed here is forbidden! - -grant { - // permissions for file access, write access only to sandbox: - permission java.io.FilePermission "<>", "read,execute"; - permission java.io.FilePermission "${junit4.childvm.cwd}", "read,execute,write"; - permission java.io.FilePermission "${junit4.childvm.cwd}${/}-", "read,execute,write,delete"; - permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete"; - permission groovy.security.GroovyCodeSourcePermission "/groovy/script"; - - // Allow connecting to the internet anywhere - permission java.net.SocketPermission "*", "accept,listen,connect,resolve"; - - // Basic permissions needed for Lucene / Elasticsearch to work: - permission java.util.PropertyPermission "*", "read,write"; - permission java.lang.reflect.ReflectPermission "*"; - permission java.lang.RuntimePermission "*"; - - // These two *have* to be spelled out a separate - permission java.lang.management.ManagementPermission "control"; - permission java.lang.management.ManagementPermission "monitor"; - - permission java.net.NetPermission "*"; - permission java.util.logging.LoggingPermission "control"; - permission javax.management.MBeanPermission "*", "*"; - permission javax.management.MBeanServerPermission "*"; - permission javax.management.MBeanTrustPermission "*"; - - // Needed for some things in DNS caching in the JVM - permission java.security.SecurityPermission "getProperty.networkaddress.cache.ttl"; - permission java.security.SecurityPermission "getProperty.networkaddress.cache.negative.ttl"; - -}; From 22601a477fd2d7ebcbf2eee7b4ca26a6203e7909 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 11 May 2015 21:24:26 -0400 Subject: [PATCH 104/115] remove unnecessary prop --- pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0e9f9569ddb..1845da9de14 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,6 @@ - true From a3dce667b18c1587b6392f44faa957b863cd521c Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 13 May 2015 12:22:32 -0400 Subject: [PATCH 105/115] Fix compilation (use the new UserDictionary.open) Relates to #59 --- .../elasticsearch/index/analysis/KuromojiTokenizerFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java index 6d021c5e748..31b759c5e65 100644 --- a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -60,7 +60,7 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { return null; } else { try { - return new UserDictionary(reader); + return UserDictionary.open(reader); } finally { reader.close(); } From 330911389a77957a61cabb890140f86781d45c93 Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Thu, 14 May 2015 13:41:44 +0900 Subject: [PATCH 106/115] Add user dictionary test case Closes #59 --- README.md | 2 +- pom.xml | 1 + .../index/analysis/KuromojiAnalysisTests.java | 20 +++++++++++++++++++ .../index/analysis/empty_user_dict.txt | 0 .../index/analysis/kuromoji_analysis.json | 11 ++++++++-- .../index/analysis/user_dict.txt | 1 + 6 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 src/test/java/org/elasticsearch/index/analysis/empty_user_dict.txt create mode 100644 src/test/java/org/elasticsearch/index/analysis/user_dict.txt diff --git a/README.md b/README.md index 65e37c88b52..42346cb31b2 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ The following are settings that can be set for a `kuromoji_tokenizer` tokenizer |:--------------------|:--------------------------------------------------------------------------------------------------------------------------|:------------------| | mode | Tokenization mode: this determines how the tokenizer handles compound and unknown words. `normal` and `search`, `extended`| `search` | | discard_punctuation | `true` if punctuation tokens should be dropped from the output. | `true` | -| user_dict | set User Dictionary file | | +| user_dictionary | set User Dictionary file | | ### Tokenization mode diff --git a/pom.xml b/pom.xml index 1845da9de14..447e278d287 100644 --- a/pom.xml +++ b/pom.xml @@ -87,6 +87,7 @@ src/test/java **/*.json + **/*.txt
diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 73fd78047bf..02749c7c952 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -245,4 +245,24 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { return buffer.toString(); } + @Test + public void testKuromojiUserDict() throws IOException { + AnalysisService analysisService = createAnalysisService(); + TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_user_dict"); + String source = "私は制限スピードを超える。"; + String[] expected = new String[]{"私", "は", "制限スピード", "を", "超える"}; + + Tokenizer tokenizer = tokenizerFactory.create(); + tokenizer.setReader(new StringReader(source)); + assertSimpleTSOutput(tokenizer, expected); + } + + // fix #59 + @Test + public void testKuromojiEmptyUserDict() { + AnalysisService analysisService = createAnalysisService(); + TokenizerFactory tokenizerFactory = analysisService.tokenizer("kuromoji_empty_user_dict"); + assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); + } + } diff --git a/src/test/java/org/elasticsearch/index/analysis/empty_user_dict.txt b/src/test/java/org/elasticsearch/index/analysis/empty_user_dict.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json index 7642e756ceb..a36b4ae2197 100644 --- a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json +++ b/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json @@ -39,9 +39,16 @@ "tokenizer" : { "kuromoji" : { - "type":"kuromoji_tokenizer" + "type":"kuromoji_tokenizer" + }, + "kuromoji_empty_user_dict" : { + "type":"kuromoji_tokenizer", + "user_dictionary":"org/elasticsearch/index/analysis/empty_user_dict.txt" + }, + "kuromoji_user_dict" : { + "type":"kuromoji_tokenizer", + "user_dictionary":"org/elasticsearch/index/analysis/user_dict.txt" } - }, "analyzer" : { "my_analyzer" : { diff --git a/src/test/java/org/elasticsearch/index/analysis/user_dict.txt b/src/test/java/org/elasticsearch/index/analysis/user_dict.txt new file mode 100644 index 00000000000..54b59d66130 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/user_dict.txt @@ -0,0 +1 @@ +制限スピード,制限スピード,セイゲンスピード,テスト名詞 From 278fc4e30f0bcebc86fd226ee9ccccf28aecc575 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 21 May 2015 10:33:12 -0400 Subject: [PATCH 107/115] engage forbidden apis --- pom.xml | 66 ++------------------------------------------------------- 1 file changed, 2 insertions(+), 64 deletions(-) diff --git a/pom.xml b/pom.xml index 447e278d287..c8cc499935d 100644 --- a/pom.xml +++ b/pom.xml @@ -40,14 +40,12 @@ org.hamcrest hamcrest-all - - com.carrotsearch.randomizedtesting - randomizedtesting-runner - + org.apache.lucene lucene-test-framework + org.elasticsearch elasticsearch @@ -71,70 +69,10 @@ - - - - src/main/resources - true - - **/*.properties - - - - - - - src/test/java - - **/*.json - **/*.txt - - - - src/test/resources - - **/*.* - - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - com.carrotsearch.randomizedtesting - junit4-maven-plugin - - - - org.apache.maven.plugins - maven-surefire-plugin - - - org.apache.maven.plugins - maven-source-plugin - org.apache.maven.plugins maven-assembly-plugin - 2.3 - - false - ${project.build.directory}/releases/ - - ${basedir}/src/main/assemblies/plugin.xml - - - - - package - - single - - - From 99867c74b816c06294835179597dee9250755fe6 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 21 May 2015 16:47:42 -0400 Subject: [PATCH 108/115] remove duplicate test config --- pom.xml | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/pom.xml b/pom.xml index c8cc499935d..218cd0f3d9d 100644 --- a/pom.xml +++ b/pom.xml @@ -36,36 +36,10 @@ - - org.hamcrest - hamcrest-all - - - - org.apache.lucene - lucene-test-framework - - - - org.elasticsearch - elasticsearch - - org.apache.lucene lucene-analyzers-kuromoji - - - log4j - log4j - - - - org.elasticsearch - elasticsearch - test-jar - From 48d4fc716f9d06209b565ddc2fdf85dd46722f18 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 21 May 2015 22:51:02 -0400 Subject: [PATCH 109/115] switch to plugin pom --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 218cd0f3d9d..01d457c6ffe 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ org.elasticsearch - elasticsearch-parent + elasticsearch-plugin 2.0.0-SNAPSHOT From a7f7256f195ba88f3d9b37a16723c2c3f9dda365 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 22 May 2015 08:38:18 -0400 Subject: [PATCH 110/115] respect es.logger.level, TODO: fix this in es-parent for all plugins --- src/test/resources/log4j.properties | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties index 497c97f9959..526e22d4de0 100644 --- a/src/test/resources/log4j.properties +++ b/src/test/resources/log4j.properties @@ -1,4 +1,5 @@ -log4j.rootLogger=INFO, out +es.logger.level=INFO +log4j.rootLogger=${es.logger.level}, out log4j.appender.out=org.apache.log4j.ConsoleAppender log4j.appender.out.layout=org.apache.log4j.PatternLayout From 8182efb848ca21d49040a03ec854137c7011fa3e Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 22 May 2015 08:51:53 -0400 Subject: [PATCH 111/115] Don't set path.home for integration test --- .../elasticsearch/index/analysis/KuromojiIntegrationTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java index 1ab0fe59d81..a24acdf6b37 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java @@ -42,7 +42,6 @@ public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest { protected Settings nodeSettings(int nodeOrdinal) { return ImmutableSettings.builder() .put(super.nodeSettings(nodeOrdinal)) - .put("path.home", createTempDir()) .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true) .build(); } From 07940f1e600133c8af9f0e86f5892c4e488af0c9 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 22 May 2015 09:21:30 -0400 Subject: [PATCH 112/115] remove logging properties --- .gitignore | 1 + src/test/resources/log4j.properties | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) delete mode 100644 src/test/resources/log4j.properties diff --git a/.gitignore b/.gitignore index 9533848e238..6f32f60a650 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ /plugin_tools /.local-execution-hints.log /.local-*-execution-hints.log +/eclipse-build/ diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties deleted file mode 100644 index 526e22d4de0..00000000000 --- a/src/test/resources/log4j.properties +++ /dev/null @@ -1,6 +0,0 @@ -es.logger.level=INFO -log4j.rootLogger=${es.logger.level}, out - -log4j.appender.out=org.apache.log4j.ConsoleAppender -log4j.appender.out.layout=org.apache.log4j.PatternLayout -log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n From 8c526194364563f4e1e7d71dfec2f18c1ea327a8 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 26 May 2015 08:03:20 -0400 Subject: [PATCH 113/115] Absorb ImmutableSettings into Settings --- .../elasticsearch/index/analysis/KuromojiAnalysisTests.java | 3 +-- .../elasticsearch/index/analysis/KuromojiIntegrationTests.java | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 02749c7c952..0f6b5095e2d 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -28,7 +28,6 @@ import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; -import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.env.Environment; @@ -192,7 +191,7 @@ public class KuromojiAnalysisTests extends ElasticsearchTestCase { public AnalysisService createAnalysisService() { - Settings settings = ImmutableSettings.settingsBuilder() + Settings settings = Settings.settingsBuilder() .put("path.home", createTempDir()) .loadFromClasspath("org/elasticsearch/index/analysis/kuromoji_analysis.json") .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java index a24acdf6b37..a1c4e8c189e 100644 --- a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java @@ -20,7 +20,6 @@ package org.elasticsearch.index.analysis; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -40,7 +39,7 @@ public class KuromojiIntegrationTests extends ElasticsearchIntegrationTest { @Override protected Settings nodeSettings(int nodeOrdinal) { - return ImmutableSettings.builder() + return Settings.builder() .put(super.nodeSettings(nodeOrdinal)) .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true) .build(); From 325188281ff2a8dd49fca29a34f8aa3405e210e9 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 4 Jun 2015 22:21:15 +0200 Subject: [PATCH 114/115] Fix analysis-kuromoji to not use shaded APIs --- .../index/analysis/JapaneseStopTokenFilterFactory.java | 4 ++-- .../plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java index e976ec5f821..433d03d9836 100644 --- a/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java @@ -25,7 +25,6 @@ import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.ja.JapaneseAnalyzer; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.search.suggest.analyzing.SuggestStopFilter; -import org.elasticsearch.common.collect.ImmutableMap; import org.elasticsearch.common.collect.MapBuilder; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; @@ -34,6 +33,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; +import java.util.Map; import java.util.Set; public class JapaneseStopTokenFilterFactory extends AbstractTokenFilterFactory{ @@ -50,7 +50,7 @@ public class JapaneseStopTokenFilterFactory extends AbstractTokenFilterFactory{ super(index, indexSettings, name, settings); this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.removeTrailing = settings.getAsBoolean("remove_trailing", true); - ImmutableMap> namedStopWords = MapBuilder.>newMapBuilder() + Map> namedStopWords = MapBuilder.>newMapBuilder() .put("_japanese_", JapaneseAnalyzer.getDefaultStopSet()) .immutableMap(); this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), namedStopWords, ignoreCase); diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index ca2128d0e71..88d3cee3037 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -19,12 +19,12 @@ package org.elasticsearch.plugin.analysis.kuromoji; -import org.elasticsearch.common.collect.ImmutableList; import org.elasticsearch.common.inject.Module; import org.elasticsearch.index.analysis.*; import org.elasticsearch.indices.analysis.KuromojiIndicesAnalysisModule; import org.elasticsearch.plugins.AbstractPlugin; +import java.util.ArrayList; import java.util.Collection; /** @@ -44,7 +44,9 @@ public class AnalysisKuromojiPlugin extends AbstractPlugin { @Override public Collection> modules() { - return ImmutableList.>of(KuromojiIndicesAnalysisModule.class); + Collection> classes = new ArrayList<>(); + classes.add(KuromojiIndicesAnalysisModule.class); + return classes; } public void onModule(AnalysisModule module) { From 9b41b94459f8ad25aeca93012e68228bc1c3828c Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 5 Jun 2015 13:12:07 +0200 Subject: [PATCH 115/115] add analysis-kuromoji module --- .gitignore | 14 -- CONTRIBUTING.md | 98 --------- LICENSE.txt | 202 ------------------ dev-tools/release.py | 134 ------------ .../analysis-kuromoji/README.md | 0 pom.xml => plugins/analysis-kuromoji/pom.xml | 25 +-- .../src}/main/assemblies/plugin.xml | 0 .../JapaneseStopTokenFilterFactory.java | 0 .../analysis/KuromojiAnalyzerProvider.java | 0 .../KuromojiBaseFormFilterFactory.java | 0 ...uromojiIterationMarkCharFilterFactory.java | 0 .../KuromojiKatakanaStemmerFactory.java | 0 .../KuromojiPartOfSpeechFilterFactory.java | 0 .../KuromojiReadingFormFilterFactory.java | 0 .../analysis/KuromojiTokenizerFactory.java | 0 .../analysis/KuromojiIndicesAnalysis.java | 0 .../KuromojiIndicesAnalysisModule.java | 0 .../kuromoji/AnalysisKuromojiPlugin.java | 0 .../src}/main/resources/es-plugin.properties | 0 .../index/analysis/KuromojiAnalysisTests.java | 0 .../analysis/KuromojiIntegrationTests.java | 0 .../index/analysis/empty_user_dict.txt | 0 .../index/analysis/kuromoji_analysis.json | 0 .../index/analysis/user_dict.txt | 0 24 files changed, 2 insertions(+), 471 deletions(-) delete mode 100644 .gitignore delete mode 100644 CONTRIBUTING.md delete mode 100644 LICENSE.txt delete mode 100644 dev-tools/release.py rename README.md => plugins/analysis-kuromoji/README.md (100%) rename pom.xml => plugins/analysis-kuromoji/pom.xml (56%) rename {src => plugins/analysis-kuromoji/src}/main/assemblies/plugin.xml (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java (100%) rename {src => plugins/analysis-kuromoji/src}/main/resources/es-plugin.properties (100%) rename {src => plugins/analysis-kuromoji/src}/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java (100%) rename {src => plugins/analysis-kuromoji/src}/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java (100%) rename {src => plugins/analysis-kuromoji/src}/test/java/org/elasticsearch/index/analysis/empty_user_dict.txt (100%) rename {src => plugins/analysis-kuromoji/src}/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json (100%) rename {src => plugins/analysis-kuromoji/src}/test/java/org/elasticsearch/index/analysis/user_dict.txt (100%) diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 6f32f60a650..00000000000 --- a/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -/data -/work -/logs -/.idea -/target -.DS_Store -*.iml -/.project -/.settings -/.classpath -/plugin_tools -/.local-execution-hints.log -/.local-*-execution-hints.log -/eclipse-build/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 6afaf89b28f..00000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,98 +0,0 @@ -Contributing to elasticsearch -============================= - -Elasticsearch is an open source project and we love to receive contributions from our community — you! There are many ways to contribute, from writing tutorials or blog posts, improving the documentation, submitting bug reports and feature requests or writing code which can be incorporated into Elasticsearch itself. - -Bug reports ------------ - -If you think you have found a bug in Elasticsearch, first make sure that you are testing against the [latest version of Elasticsearch](http://www.elasticsearch.org/download/) - your issue may already have been fixed. If not, search our [issues list](https://github.com/elasticsearch/elasticsearch/issues) on GitHub in case a similar issue has already been opened. - -It is very helpful if you can prepare a reproduction of the bug. In other words, provide a small test case which we can run to confirm your bug. It makes it easier to find the problem and to fix it. Test cases should be provided as `curl` commands which we can copy and paste into a terminal to run it locally, for example: - -```sh -# delete the index -curl -XDELETE localhost:9200/test - -# insert a document -curl -XPUT localhost:9200/test/test/1 -d '{ - "title": "test document" -}' - -# this should return XXXX but instead returns YYY -curl .... -``` - -Provide as much information as you can. You may think that the problem lies with your query, when actually it depends on how your data is indexed. The easier it is for us to recreate your problem, the faster it is likely to be fixed. - -Feature requests ----------------- - -If you find yourself wishing for a feature that doesn't exist in Elasticsearch, you are probably not alone. There are bound to be others out there with similar needs. Many of the features that Elasticsearch has today have been added because our users saw the need. -Open an issue on our [issues list](https://github.com/elasticsearch/elasticsearch/issues) on GitHub which describes the feature you would like to see, why you need it, and how it should work. - -Contributing code and documentation changes -------------------------------------------- - -If you have a bugfix or new feature that you would like to contribute to Elasticsearch, please find or open an issue about it first. Talk about what you would like to do. It may be that somebody is already working on it, or that there are particular issues that you should know about before implementing the change. - -We enjoy working with contributors to get their code accepted. There are many approaches to fixing a problem and it is important to find the best approach before writing too much code. - -The process for contributing to any of the [Elasticsearch repositories](https://github.com/elasticsearch/) is similar. Details for individual projects can be found below. - -### Fork and clone the repository - -You will need to fork the main Elasticsearch code or documentation repository and clone it to your local machine. See -[github help page](https://help.github.com/articles/fork-a-repo) for help. - -Further instructions for specific projects are given below. - -### Submitting your changes - -Once your changes and tests are ready to submit for review: - -1. Test your changes -Run the test suite to make sure that nothing is broken. - -2. Sign the Contributor License Agreement -Please make sure you have signed our [Contributor License Agreement](http://www.elasticsearch.org/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once. - -3. Rebase your changes -Update your local repository with the most recent code from the main Elasticsearch repository, and rebase your branch on top of the latest master branch. We prefer your changes to be squashed into a single commit. - -4. Submit a pull request -Push your local changes to your forked copy of the repository and [submit a pull request](https://help.github.com/articles/using-pull-requests). In the pull request, describe what your changes do and mention the number of the issue where discussion has taken place, eg "Closes #123". - -Then sit back and wait. There will probably be discussion about the pull request and, if any changes are needed, we would love to work with you to get your pull request merged into Elasticsearch. - - -Contributing to the Elasticsearch plugin ----------------------------------------- - -**Repository:** [https://github.com/elasticsearch/elasticsearch-analysis-kuromoji](https://github.com/elasticsearch/elasticsearch-analysis-kuromoji) - -Make sure you have [Maven](http://maven.apache.org) installed, as Elasticsearch uses it as its build system. Integration with IntelliJ and Eclipse should work out of the box. Eclipse users can automatically configure their IDE by running `mvn eclipse:eclipse` and then importing the project into their workspace: `File > Import > Existing project into workspace`. - -Please follow these formatting guidelines: - -* Java indent is 4 spaces -* Line width is 140 characters -* The rest is left to Java coding standards -* Disable “auto-format on save” to prevent unnecessary format changes. This makes reviews much harder as it generates unnecessary formatting changes. If your IDE supports formatting only modified chunks that is fine to do. - -To create a distribution from the source, simply run: - -```sh -cd elasticsearch-analysis-kuromoji/ -mvn clean package -DskipTests -``` - -You will find the newly built packages under: `./target/releases/`. - -Before submitting your changes, run the test suite to make sure that nothing is broken, with: - -```sh -mvn clean test -``` - -Source: [Contributing to elasticsearch](http://www.elasticsearch.org/contributing-to-elasticsearch/) diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100644 index d6456956733..00000000000 --- a/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/dev-tools/release.py b/dev-tools/release.py deleted file mode 100644 index edcc637d068..00000000000 --- a/dev-tools/release.py +++ /dev/null @@ -1,134 +0,0 @@ -# Licensed to Elasticsearch under one or more contributor -# license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright -# ownership. Elasticsearch licenses this file to you under -# the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on -# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import datetime -import os -import shutil -import sys -import time -import urllib -import urllib.request -import zipfile - -from os.path import dirname, abspath - -""" - This tool builds a release from the a given elasticsearch plugin branch. - - It is basically a wrapper on top of launch_release.py which: - - - tries to get a more recent version of launch_release.py in ... - - download it if needed - - launch it passing all arguments to it, like: - - $ python3 dev_tools/release.py --branch master --publish --remote origin - - Important options: - - # Dry run - $ python3 dev_tools/release.py - - # Dry run without tests - python3 dev_tools/release.py --skiptests - - # Release, publish artifacts and announce - $ python3 dev_tools/release.py --publish - - See full documentation in launch_release.py -""" -env = os.environ - -# Change this if the source repository for your scripts is at a different location -SOURCE_REPO = 'elasticsearch/elasticsearch-plugins-script' -# We define that we should download again the script after 1 days -SCRIPT_OBSOLETE_DAYS = 1 -# We ignore in master.zip file the following files -IGNORED_FILES = ['.gitignore', 'README.md'] - - -ROOT_DIR = abspath(os.path.join(abspath(dirname(__file__)), '../')) -TARGET_TOOLS_DIR = ROOT_DIR + '/plugin_tools' -DEV_TOOLS_DIR = ROOT_DIR + '/dev-tools' -BUILD_RELEASE_FILENAME = 'release.zip' -BUILD_RELEASE_FILE = TARGET_TOOLS_DIR + '/' + BUILD_RELEASE_FILENAME -SOURCE_URL = 'https://github.com/%s/archive/master.zip' % SOURCE_REPO - -# Download a recent version of the release plugin tool -try: - os.mkdir(TARGET_TOOLS_DIR) - print('directory %s created' % TARGET_TOOLS_DIR) -except FileExistsError: - pass - - -try: - # we check latest update. If we ran an update recently, we - # are not going to check it again - download = True - - try: - last_download_time = datetime.datetime.fromtimestamp(os.path.getmtime(BUILD_RELEASE_FILE)) - if (datetime.datetime.now()-last_download_time).days < SCRIPT_OBSOLETE_DAYS: - download = False - except FileNotFoundError: - pass - - if download: - urllib.request.urlretrieve(SOURCE_URL, BUILD_RELEASE_FILE) - with zipfile.ZipFile(BUILD_RELEASE_FILE) as myzip: - for member in myzip.infolist(): - filename = os.path.basename(member.filename) - # skip directories - if not filename: - continue - if filename in IGNORED_FILES: - continue - - # copy file (taken from zipfile's extract) - source = myzip.open(member.filename) - target = open(os.path.join(TARGET_TOOLS_DIR, filename), "wb") - with source, target: - shutil.copyfileobj(source, target) - # We keep the original date - date_time = time.mktime(member.date_time + (0, 0, -1)) - os.utime(os.path.join(TARGET_TOOLS_DIR, filename), (date_time, date_time)) - print('plugin-tools updated from %s' % SOURCE_URL) -except urllib.error.HTTPError: - pass - - -# Let see if we need to update the release.py script itself -source_time = os.path.getmtime(TARGET_TOOLS_DIR + '/release.py') -repo_time = os.path.getmtime(DEV_TOOLS_DIR + '/release.py') -if source_time > repo_time: - input('release.py needs an update. Press a key to update it...') - shutil.copyfile(TARGET_TOOLS_DIR + '/release.py', DEV_TOOLS_DIR + '/release.py') - -# We can launch the build process -try: - PYTHON = 'python' - # make sure python3 is used if python3 is available - # some systems use python 2 as default - os.system('python3 --version > /dev/null 2>&1') - PYTHON = 'python3' -except RuntimeError: - pass - -release_args = '' -for x in range(1, len(sys.argv)): - release_args += ' ' + sys.argv[x] - -os.system('%s %s/build_release.py %s' % (PYTHON, TARGET_TOOLS_DIR, release_args)) diff --git a/README.md b/plugins/analysis-kuromoji/README.md similarity index 100% rename from README.md rename to plugins/analysis-kuromoji/README.md diff --git a/pom.xml b/plugins/analysis-kuromoji/pom.xml similarity index 56% rename from pom.xml rename to plugins/analysis-kuromoji/pom.xml index 01d457c6ffe..a4746fd12be 100644 --- a/pom.xml +++ b/plugins/analysis-kuromoji/pom.xml @@ -4,26 +4,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - org.elasticsearch + org.elasticsearch.plugin elasticsearch-analysis-kuromoji - 3.0.0-SNAPSHOT + jar Elasticsearch Japanese (kuromoji) Analysis plugin The Japanese (kuromoji) Analysis plugin integrates Lucene kuromoji analysis module into elasticsearch. - https://github.com/elastic/elasticsearch-analysis-kuromoji/ - 2009 - - - The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - - scm:git:git@github.com:elastic/elasticsearch-analysis-kuromoji.git - scm:git:git@github.com:elastic/elasticsearch-analysis-kuromoji.git - http://github.com/elastic/elasticsearch-analysis-kuromoji - org.elasticsearch @@ -51,11 +37,4 @@ - - - oss-snapshots - Sonatype OSS Snapshots - https://oss.sonatype.org/content/repositories/snapshots/ - - diff --git a/src/main/assemblies/plugin.xml b/plugins/analysis-kuromoji/src/main/assemblies/plugin.xml similarity index 100% rename from src/main/assemblies/plugin.xml rename to plugins/analysis-kuromoji/src/main/assemblies/plugin.xml diff --git a/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/JapaneseStopTokenFilterFactory.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiAnalyzerProvider.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiBaseFormFilterFactory.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiIterationMarkCharFilterFactory.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiKatakanaStemmerFactory.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiReadingFormFilterFactory.java diff --git a/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java similarity index 100% rename from src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysis.java diff --git a/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java similarity index 100% rename from src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/indices/analysis/KuromojiIndicesAnalysisModule.java diff --git a/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java similarity index 100% rename from src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java rename to plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java diff --git a/src/main/resources/es-plugin.properties b/plugins/analysis-kuromoji/src/main/resources/es-plugin.properties similarity index 100% rename from src/main/resources/es-plugin.properties rename to plugins/analysis-kuromoji/src/main/resources/es-plugin.properties diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java similarity index 100% rename from src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java rename to plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java diff --git a/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java similarity index 100% rename from src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java rename to plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiIntegrationTests.java diff --git a/src/test/java/org/elasticsearch/index/analysis/empty_user_dict.txt b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/empty_user_dict.txt similarity index 100% rename from src/test/java/org/elasticsearch/index/analysis/empty_user_dict.txt rename to plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/empty_user_dict.txt diff --git a/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json similarity index 100% rename from src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json rename to plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/kuromoji_analysis.json diff --git a/src/test/java/org/elasticsearch/index/analysis/user_dict.txt b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/user_dict.txt similarity index 100% rename from src/test/java/org/elasticsearch/index/analysis/user_dict.txt rename to plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/user_dict.txt