From b33896fe778c96081444d79010eebe9132707e43 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 7 Jan 2012 23:06:50 +0200 Subject: [PATCH 001/103] initial commit --- .gitignore | 7 + README.md | 12 ++ pom.xml | 137 ++++++++++++++++++ src/main/assemblies/plugin.xml | 26 ++++ .../index/analysis/DoubleMetaphoneFilter.java | 111 ++++++++++++++ .../PhoneticAnalysisBinderProcessor.java | 30 ++++ .../index/analysis/PhoneticFilter.java | 100 +++++++++++++ .../analysis/PhoneticTokenFilterFactory.java | 102 +++++++++++++ .../analysis/AnalysisPhoneticPlugin.java | 49 +++++++ src/main/resources/es-plugin.properties | 1 + .../analysis/SimplePhoneticAnalysisTests.java | 38 +++++ src/test/resources/log4j.properties | 5 + 12 files changed, 618 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 pom.xml create mode 100644 src/main/assemblies/plugin.xml create mode 100644 src/main/java/org/elasticsearch/index/analysis/DoubleMetaphoneFilter.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/PhoneticFilter.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java create mode 100644 src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java create mode 100644 src/main/resources/es-plugin.properties create mode 100644 src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java create mode 100644 src/test/resources/log4j.properties diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..06a1e6fedb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +/data +/work +/logs +/.idea +/target +.DS_Store +*.iml diff --git a/README.md b/README.md new file mode 100644 index 00000000000..cdac145a448 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +Phonetic Analysis for ElasticSearch +=================================== + +The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. + +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.1.0`. + + --------------------------------------------- + | Phonetic Analysis Plugin | ElasticSearch | + --------------------------------------------- + | master | 0.18 -> master | + --------------------------------------------- diff --git a/pom.xml b/pom.xml new file mode 100644 index 00000000000..2a5798cdf5e --- /dev/null +++ b/pom.xml @@ -0,0 +1,137 @@ + + + elasticsearch-analysis-phonetic + 4.0.0 + org.elasticsearch + elasticsearch-analysis-phonetic + 1.0.0-SNAPSHOT + jar + Phonetic Analysis for ElasticSearch + 2009 + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + scm:git:git@github.com:elasticsearch/elasticsearch-analysis-phonetic.git + scm:git:git@github.com:elasticsearch/elasticsearch-analysis-phonetic.git + + http://github.com/elasticsearch/elasticsearch-analysis-phonetic + + + + org.sonatype.oss + oss-parent + 7 + + + + 0.18.5 + + + + + + + + org.elasticsearch + elasticsearch + ${elasticsearch.version} + compile + + + + commons-codec + commons-codec + 1.6 + compile + + + + log4j + log4j + 1.2.16 + runtime + + + + org.testng + testng + 6.3.1 + test + + + + org.hamcrest + hamcrest-core + 1.3.RC2 + test + + + + org.hamcrest + hamcrest-library + 1.3.RC2 + test + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 1.6 + 1.6 + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.11 + + + **/*Tests.java + + + + + org.apache.maven.plugins + maven-source-plugin + 2.1.2 + + + attach-sources + + jar + + + + + + maven-assembly-plugin + + ${project.build.directory}/releases/ + + ${basedir}/src/main/assemblies/plugin.xml + + + + + package + + single + + + + + + + \ No newline at end of file diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml new file mode 100644 index 00000000000..8359311e108 --- /dev/null +++ b/src/main/assemblies/plugin.xml @@ -0,0 +1,26 @@ + + + + + zip + + false + + + / + true + true + + org.elasticsearch:elasticsearch + + + + / + true + true + + commons-codec:commons-codec + + + + \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/DoubleMetaphoneFilter.java b/src/main/java/org/elasticsearch/index/analysis/DoubleMetaphoneFilter.java new file mode 100644 index 00000000000..113e7eebf88 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/DoubleMetaphoneFilter.java @@ -0,0 +1,111 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.commons.codec.language.DoubleMetaphone; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +import java.io.IOException; +import java.util.LinkedList; + +public final class DoubleMetaphoneFilter extends TokenFilter { + + private static final String TOKEN_TYPE = "DoubleMetaphone"; + + private final LinkedList remainingTokens = new LinkedList(); + private final DoubleMetaphone encoder; + private final boolean inject; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); + + public DoubleMetaphoneFilter(TokenStream input, DoubleMetaphone encoder, boolean inject) { + super(input); + this.encoder = encoder; + this.inject = inject; + } + + @Override + public boolean incrementToken() throws IOException { + for (; ; ) { + + if (!remainingTokens.isEmpty()) { + // clearAttributes(); // not currently necessary + restoreState(remainingTokens.removeFirst()); + return true; + } + + if (!input.incrementToken()) return false; + + int len = termAtt.length(); + if (len == 0) return true; // pass through zero length terms + + int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement(); + + String v = termAtt.toString(); + String primaryPhoneticValue = encoder.doubleMetaphone(v); + String alternatePhoneticValue = encoder.doubleMetaphone(v, true); + + // a flag to lazily save state if needed... this avoids a save/restore when only + // one token will be generated. + boolean saveState = inject; + + if (primaryPhoneticValue != null && primaryPhoneticValue.length() > 0 && !primaryPhoneticValue.equals(v)) { + if (saveState) { + remainingTokens.addLast(captureState()); + } + posAtt.setPositionIncrement(firstAlternativeIncrement); + firstAlternativeIncrement = 0; + termAtt.setEmpty().append(primaryPhoneticValue); + saveState = true; + } + + if (alternatePhoneticValue != null && alternatePhoneticValue.length() > 0 + && !alternatePhoneticValue.equals(primaryPhoneticValue) + && !primaryPhoneticValue.equals(v)) { + if (saveState) { + remainingTokens.addLast(captureState()); + saveState = false; + } + posAtt.setPositionIncrement(firstAlternativeIncrement); + termAtt.setEmpty().append(alternatePhoneticValue); + saveState = true; + } + + // Just one token to return, so no need to capture/restore + // any state, simply return it. + if (remainingTokens.isEmpty()) { + return true; + } + + if (saveState) { + remainingTokens.addLast(captureState()); + } + } + } + + @Override + public void reset() throws IOException { + input.reset(); + remainingTokens.clear(); + } +} diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java new file mode 100644 index 00000000000..d0cfaee9990 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java @@ -0,0 +1,30 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +/** + */ +public class PhoneticAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { + + @Override + public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) { + tokenFiltersBindings.processTokenFilter("phonetic", PhoneticTokenFilterFactory.class); + } +} diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticFilter.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticFilter.java new file mode 100644 index 00000000000..1ba1fcdc87d --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/PhoneticFilter.java @@ -0,0 +1,100 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.commons.codec.Encoder; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; + +import java.io.IOException; + +/** + * Create tokens for phonetic matches. See: + * http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html + */ +// LUCENE MONITOR - No need for it in Lucene 3.6 +public class PhoneticFilter extends TokenFilter { + + protected boolean inject = true; + protected Encoder encoder = null; + protected String name = null; + + protected State save = null; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); + + public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) { + super(in); + this.encoder = encoder; + this.name = name; + this.inject = inject; + } + + @Override + public boolean incrementToken() throws IOException { + if (save != null) { + // clearAttributes(); // not currently necessary + restoreState(save); + save = null; + return true; + } + + if (!input.incrementToken()) return false; + + // pass through zero-length terms + if (termAtt.length() == 0) return true; + + String value = termAtt.toString(); + String phonetic = null; + try { + String v = encoder.encode(value).toString(); + if (v.length() > 0 && !value.equals(v)) phonetic = v; + } catch (Exception ignored) { + } // just use the direct text + + if (phonetic == null) return true; + + if (!inject) { + // just modify this token + termAtt.setEmpty().append(phonetic); + return true; + } + + // We need to return both the original and the phonetic tokens. + // to avoid a orig=captureState() change_to_phonetic() saved=captureState() restoreState(orig) + // we return the phonetic alternative first + + int origOffset = posAtt.getPositionIncrement(); + posAtt.setPositionIncrement(0); + save = captureState(); + + posAtt.setPositionIncrement(origOffset); + termAtt.setEmpty().append(phonetic); + return true; + } + + @Override + public void reset() throws IOException { + input.reset(); + save = null; + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java new file mode 100644 index 00000000000..b033776f65a --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java @@ -0,0 +1,102 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.commons.codec.Encoder; +import org.apache.commons.codec.language.*; +import org.apache.commons.codec.language.bm.BeiderMorseEncoder; +import org.apache.commons.codec.language.bm.NameType; +import org.apache.commons.codec.language.bm.RuleType; +import org.apache.lucene.analysis.TokenStream; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.inject.assistedinject.Assisted; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * + */ +@AnalysisSettingsRequired +public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { + + private final Encoder encoder; + + private final boolean replace; + + @Inject + public PhoneticTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { + super(index, indexSettings, name, settings); + this.replace = settings.getAsBoolean("replace", true); + String encoder = settings.get("encoder"); + if (encoder == null) { + throw new ElasticSearchIllegalArgumentException("encoder must be set on phonetic token filter"); + } + if ("metaphone".equalsIgnoreCase(encoder)) { + this.encoder = new Metaphone(); + } else if ("soundex".equalsIgnoreCase(encoder)) { + this.encoder = new Soundex(); + } else if ("caverphone1".equalsIgnoreCase(encoder)) { + this.encoder = new Caverphone1(); + } else if ("caverphone2".equalsIgnoreCase(encoder)) { + this.encoder = new Caverphone2(); + } else if ("caverphone".equalsIgnoreCase(encoder)) { + this.encoder = new Caverphone2(); + } else if ("refined_soundex".equalsIgnoreCase(encoder) || "refinedSoundex".equalsIgnoreCase(encoder)) { + this.encoder = new RefinedSoundex(); + } else if ("cologne".equalsIgnoreCase(encoder)) { + this.encoder = new ColognePhonetic(); + } else if ("double_metaphone".equalsIgnoreCase(encoder) || "doubleMetaphone".equalsIgnoreCase(encoder)) { + DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); + doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen())); + this.encoder = doubleMetaphone; + } else if ("bm".equalsIgnoreCase(encoder) || "beider_morse".equalsIgnoreCase(encoder)) { + BeiderMorseEncoder bm = new BeiderMorseEncoder(); + String ruleType = settings.get("rule_type", "approx"); + if ("approx".equalsIgnoreCase(ruleType)) { + bm.setRuleType(RuleType.APPROX); + } else if ("exact".equalsIgnoreCase(ruleType)) { + bm.setRuleType(RuleType.EXACT); + } else { + throw new ElasticSearchIllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder"); + } + String nameType = settings.get("name_type", "generic"); + if ("GENERIC".equalsIgnoreCase(nameType)) { + bm.setNameType(NameType.GENERIC); + } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) { + bm.setNameType(NameType.ASHKENAZI); + } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) { + bm.setNameType(NameType.SEPHARDIC); + } + this.encoder = bm; + } else { + throw new ElasticSearchIllegalArgumentException("unknown encoder [" + encoder + "] for phonetic token filter"); + } + } + + @Override + public TokenStream create(TokenStream tokenStream) { + if (encoder instanceof DoubleMetaphone) { + return new DoubleMetaphoneFilter(tokenStream, (DoubleMetaphone) encoder, !replace); + } + return new org.elasticsearch.index.analysis.PhoneticFilter(tokenStream, encoder, name(), !replace); + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java new file mode 100644 index 00000000000..f2e30d49fcf --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java @@ -0,0 +1,49 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.plugin.analysis; + +import org.elasticsearch.common.inject.Module; +import org.elasticsearch.index.analysis.AnalysisModule; +import org.elasticsearch.index.analysis.PhoneticAnalysisBinderProcessor; +import org.elasticsearch.plugins.AbstractPlugin; + +/** + */ +public class AnalysisPhoneticPlugin extends AbstractPlugin { + + @Override + public String name() { + return "analysis-phonetic"; + } + + @Override + public String description() { + return "Phonetic analysis support"; + } + + @Override + public void processModule(Module module) { + if (module instanceof AnalysisModule) { + AnalysisModule analysisModule = (AnalysisModule) module; + analysisModule.addProcessor(new PhoneticAnalysisBinderProcessor()); + } + } +} + diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties new file mode 100644 index 00000000000..01db8286e9b --- /dev/null +++ b/src/main/resources/es-plugin.properties @@ -0,0 +1 @@ +plugin=org.elasticsearch.plugin.analysis.AnalysisPhoneticPlugin diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java new file mode 100644 index 00000000000..11f0799781b --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -0,0 +1,38 @@ +package org.elasticsearch.index.analysis; + +import org.elasticsearch.common.inject.Injector; +import org.elasticsearch.common.inject.ModulesBuilder; +import org.elasticsearch.common.settings.SettingsModule; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.EnvironmentModule; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexNameModule; +import org.elasticsearch.index.settings.IndexSettingsModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisService; +import org.testng.annotations.Test; + +import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; + +/** + */ +public class SimplePhoneticAnalysisTests { + + @Test + public void testDefaultsIcuAnalysis() { + Index index = new Index("test"); + + Injector parentInjector = new ModulesBuilder().add(new SettingsModule(EMPTY_SETTINGS), new EnvironmentModule(new Environment(EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector(); + Injector injector = new ModulesBuilder().add( + new IndexSettingsModule(index, EMPTY_SETTINGS), + new IndexNameModule(index), + new AnalysisModule(EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new PhoneticAnalysisBinderProcessor())) + .createChildInjector(parentInjector); + + AnalysisService analysisService = injector.getInstance(AnalysisService.class); + + // need to create one with encoder... + //TokenFilterFactory tokenFilterFactory = analysisService.tokenFilter("phonetic"); + //MatcherAssert.assertThat(tokenFilterFactory, Matchers.instanceOf(PhoneticTokenFilterFactory.class)); + } +} diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties new file mode 100644 index 00000000000..497c97f9959 --- /dev/null +++ b/src/test/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, out + +log4j.appender.out=org.apache.log4j.ConsoleAppender +log4j.appender.out.layout=org.apache.log4j.PatternLayout +log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n From b28ddf0fa23394bf35111984b92fa999d4253ea1 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 7 Jan 2012 23:09:35 +0200 Subject: [PATCH 002/103] update readme --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index cdac145a448..186fea87242 100644 --- a/README.md +++ b/README.md @@ -10,3 +10,27 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e --------------------------------------------- | master | 0.18 -> master | --------------------------------------------- + +A `phonetic` token filter that can be configured with different `encoder` types: `metaphone`, `soundex`, `caverphone`, `refined_soundex`, `double_metaphone` (uses "commons codec":http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html). + +The `replace` parameter (defaults to `true`) controls if the token processed should be replaced with the encoded one (set it to `true`), or added (set it to `false`). + + { + "index" : { + "analysis" : { + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "standard", + "filter" : ["standard", "lowercase", "my_metaphone"] + } + }, + "filter" : { + "my_metaphone" : { + "type" : "phonetic", + "encoder" : "metaphone", + "replace" : false + } + } + } + } + } From 168de0f1c2828ac2c2a644ffc4f3274f63751c4f Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 7 Jan 2012 23:11:20 +0200 Subject: [PATCH 003/103] release 1.0.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2a5798cdf5e..ad42ae2b8e5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.0.0-SNAPSHOT + 1.0.0 jar Phonetic Analysis for ElasticSearch 2009 From 06971acde690de4c0ec19c89847a9aa1f050e61d Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 7 Jan 2012 23:11:59 +0200 Subject: [PATCH 004/103] move to 1.1.0 snap --- README.md | 2 ++ pom.xml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 186fea87242..55e01d7c012 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e --------------------------------------------- | master | 0.18 -> master | --------------------------------------------- + | 1.0.0 | 0.18 -> master | + --------------------------------------------- A `phonetic` token filter that can be configured with different `encoder` types: `metaphone`, `soundex`, `caverphone`, `refined_soundex`, `double_metaphone` (uses "commons codec":http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html). diff --git a/pom.xml b/pom.xml index ad42ae2b8e5..6ff299b1c12 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.0.0 + 1.1.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From d7b30cb487bc8a219f401b4a942f1486341cc0f5 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 31 Jan 2012 12:58:12 +0200 Subject: [PATCH 005/103] move to elasticsearch 0.19.0 snap and use some of its features --- README.md | 2 +- pom.xml | 2 +- .../plugin/analysis/AnalysisPhoneticPlugin.java | 9 ++------- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 55e01d7c012..693250bc2cb 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e --------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | --------------------------------------------- - | master | 0.18 -> master | + | master | master (0.19) | --------------------------------------------- | 1.0.0 | 0.18 -> master | --------------------------------------------- diff --git a/pom.xml b/pom.xml index 6ff299b1c12..c554834efe2 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ - 0.18.5 + 0.19.0-SNAPSHOT diff --git a/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java index f2e30d49fcf..a99238fc40f 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java @@ -19,7 +19,6 @@ package org.elasticsearch.plugin.analysis; -import org.elasticsearch.common.inject.Module; import org.elasticsearch.index.analysis.AnalysisModule; import org.elasticsearch.index.analysis.PhoneticAnalysisBinderProcessor; import org.elasticsearch.plugins.AbstractPlugin; @@ -38,12 +37,8 @@ public class AnalysisPhoneticPlugin extends AbstractPlugin { return "Phonetic analysis support"; } - @Override - public void processModule(Module module) { - if (module instanceof AnalysisModule) { - AnalysisModule analysisModule = (AnalysisModule) module; - analysisModule.addProcessor(new PhoneticAnalysisBinderProcessor()); - } + public void onModule(AnalysisModule module) { + module.addProcessor(new PhoneticAnalysisBinderProcessor()); } } From 0bfac8c37382651ecdbcee28db944790ddfe7086 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 7 Feb 2012 15:27:35 +0200 Subject: [PATCH 006/103] release 1.2.0 supporting 0.19 --- README.md | 4 +++- pom.xml | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 693250bc2cb..e086eb83776 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,9 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e --------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | --------------------------------------------- - | master | master (0.19) | + | master | 0.19 -> master | + --------------------------------------------- + | 1.1.0 | 0.19 -> master | --------------------------------------------- | 1.0.0 | 0.18 -> master | --------------------------------------------- diff --git a/pom.xml b/pom.xml index c554834efe2..6f5b0ac8170 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.1.0-SNAPSHOT + 1.1.0 jar Phonetic Analysis for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 0.19.0-SNAPSHOT + 0.19.0.RC1 From b987abac6e308a0cf2f13855fff1f8667856e800 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 7 Feb 2012 15:28:38 +0200 Subject: [PATCH 007/103] move to 1.2.0 snap --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6f5b0ac8170..2e48a950a98 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.1.0 + 1.2.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From 7b070c49d5a024ef74ac430eaa654f388a1cb81a Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 7 Feb 2012 15:47:29 +0200 Subject: [PATCH 008/103] fix readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e086eb83776..6f77f102bff 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e --------------------------------------------- | 1.1.0 | 0.19 -> master | --------------------------------------------- - | 1.0.0 | 0.18 -> master | + | 1.0.0 | 0.18 | --------------------------------------------- A `phonetic` token filter that can be configured with different `encoder` types: `metaphone`, `soundex`, `caverphone`, `refined_soundex`, `double_metaphone` (uses "commons codec":http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html). From 11b078ecfee1db9ded9cb34fba43dbd6825da592 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sun, 26 Feb 2012 10:09:23 +0200 Subject: [PATCH 009/103] upgrade to latest assembly --- pom.xml | 2 ++ src/main/assemblies/plugin.xml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2e48a950a98..19251e9ebc1 100644 --- a/pom.xml +++ b/pom.xml @@ -117,7 +117,9 @@ maven-assembly-plugin + 2.2.2 + false ${project.build.directory}/releases/ ${basedir}/src/main/assemblies/plugin.xml diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml index 8359311e108..e720308d685 100644 --- a/src/main/assemblies/plugin.xml +++ b/src/main/assemblies/plugin.xml @@ -1,6 +1,6 @@ - + plugin zip From 29a3300e969a934be960ed60665893f194e35120 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sun, 26 Feb 2012 10:09:58 +0200 Subject: [PATCH 010/103] upgrade to latest elasticsearch version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 19251e9ebc1..bacce367a8e 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ - 0.19.0.RC1 + 0.19.0.RC3 From cf31854e3ac090e0432599ea805187f7c32e88cf Mon Sep 17 00:00:00 2001 From: David Pilato Date: Sun, 26 Feb 2012 23:27:02 +0100 Subject: [PATCH 011/103] Update maven assembly plugin to latest version : 2.3 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index bacce367a8e..2562c5c71e5 100644 --- a/pom.xml +++ b/pom.xml @@ -117,7 +117,7 @@ maven-assembly-plugin - 2.2.2 + 2.3 false ${project.build.directory}/releases/ From 5717cb3905771a5dad31f3653cc03ccf03d8cec7 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Sun, 26 Feb 2012 23:28:17 +0100 Subject: [PATCH 012/103] Ignore eclipse files --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 06a1e6fedb6..81c7f9eeacf 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ /target .DS_Store *.iml +/.project +/.classpath +/.settings From 1338bcc93650642d53bc33c9ecec487a34ff366a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Prante?= Date: Sun, 22 Apr 2012 12:08:05 +0200 Subject: [PATCH 013/103] update to Lucene Phonetic 3.6, added Nysiis, KoelnerPhonetik, HaasePhonetik, added simple test --- README.md | 28 +- pom.xml | 12 +- .../index/analysis/DoubleMetaphoneFilter.java | 111 ------ .../index/analysis/PhoneticFilter.java | 100 ------ .../analysis/PhoneticTokenFilterFactory.java | 97 ++++-- .../analysis/phonetic/HaasePhonetik.java | 70 ++++ .../analysis/phonetic/KoelnerPhonetik.java | 327 ++++++++++++++++++ .../index/analysis/phonetic/Nysiis.java | 325 +++++++++++++++++ .../analysis/SimplePhoneticAnalysisTests.java | 33 +- .../index/analysis/phonetic-1.yml | 30 ++ 10 files changed, 862 insertions(+), 271 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/index/analysis/DoubleMetaphoneFilter.java delete mode 100644 src/main/java/org/elasticsearch/index/analysis/PhoneticFilter.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java create mode 100644 src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml diff --git a/README.md b/README.md index 6f77f102bff..75e410c3f23 100644 --- a/README.md +++ b/README.md @@ -5,19 +5,25 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.1.0`. - --------------------------------------------- - | Phonetic Analysis Plugin | ElasticSearch | - --------------------------------------------- - | master | 0.19 -> master | - --------------------------------------------- - | 1.1.0 | 0.19 -> master | - --------------------------------------------- - | 1.0.0 | 0.18 | - --------------------------------------------- + ----------------------------------------------- + | Phonetic Analysis Plugin | ElasticSearch | + ----------------------------------------------- + | master | 0.19.2 -> master | + ----------------------------------------------- + | 1.2.0 | 0.19.2 -> master | + ----------------------------------------------- + | 1.1.0 | 0.19 | + ----------------------------------------------- + | 1.0.0 | 0.18 | + ----------------------------------------------- -A `phonetic` token filter that can be configured with different `encoder` types: `metaphone`, `soundex`, `caverphone`, `refined_soundex`, `double_metaphone` (uses "commons codec":http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html). +A `phonetic` token filter that can be configured with different `encoder` types: +`metaphone`, `doublemetaphone`, `soundex`, `refinedsoundex`, +`caverphone1`, `caverphone2`, `cologne`, `nysiis`, +`koelnerphonetik`, `haasephonetik` -The `replace` parameter (defaults to `true`) controls if the token processed should be replaced with the encoded one (set it to `true`), or added (set it to `false`). +The `replace` parameter (defaults to `true`) controls if the token processed +should be replaced with the encoded one (set it to `true`), or added (set it to `false`). { "index" : { diff --git a/pom.xml b/pom.xml index 2562c5c71e5..6e10c83fcec 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.2.0-SNAPSHOT + 1.2.0 jar Phonetic Analysis for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 0.19.0.RC3 + 0.19.2 @@ -46,9 +46,9 @@ - commons-codec - commons-codec - 1.6 + org.apache.lucene + lucene-analyzers-phonetic + 3.6.0 compile @@ -95,7 +95,7 @@ org.apache.maven.plugins maven-surefire-plugin - 2.11 + 2.12 **/*Tests.java diff --git a/src/main/java/org/elasticsearch/index/analysis/DoubleMetaphoneFilter.java b/src/main/java/org/elasticsearch/index/analysis/DoubleMetaphoneFilter.java deleted file mode 100644 index 113e7eebf88..00000000000 --- a/src/main/java/org/elasticsearch/index/analysis/DoubleMetaphoneFilter.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.commons.codec.language.DoubleMetaphone; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; - -import java.io.IOException; -import java.util.LinkedList; - -public final class DoubleMetaphoneFilter extends TokenFilter { - - private static final String TOKEN_TYPE = "DoubleMetaphone"; - - private final LinkedList remainingTokens = new LinkedList(); - private final DoubleMetaphone encoder; - private final boolean inject; - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); - - public DoubleMetaphoneFilter(TokenStream input, DoubleMetaphone encoder, boolean inject) { - super(input); - this.encoder = encoder; - this.inject = inject; - } - - @Override - public boolean incrementToken() throws IOException { - for (; ; ) { - - if (!remainingTokens.isEmpty()) { - // clearAttributes(); // not currently necessary - restoreState(remainingTokens.removeFirst()); - return true; - } - - if (!input.incrementToken()) return false; - - int len = termAtt.length(); - if (len == 0) return true; // pass through zero length terms - - int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement(); - - String v = termAtt.toString(); - String primaryPhoneticValue = encoder.doubleMetaphone(v); - String alternatePhoneticValue = encoder.doubleMetaphone(v, true); - - // a flag to lazily save state if needed... this avoids a save/restore when only - // one token will be generated. - boolean saveState = inject; - - if (primaryPhoneticValue != null && primaryPhoneticValue.length() > 0 && !primaryPhoneticValue.equals(v)) { - if (saveState) { - remainingTokens.addLast(captureState()); - } - posAtt.setPositionIncrement(firstAlternativeIncrement); - firstAlternativeIncrement = 0; - termAtt.setEmpty().append(primaryPhoneticValue); - saveState = true; - } - - if (alternatePhoneticValue != null && alternatePhoneticValue.length() > 0 - && !alternatePhoneticValue.equals(primaryPhoneticValue) - && !primaryPhoneticValue.equals(v)) { - if (saveState) { - remainingTokens.addLast(captureState()); - saveState = false; - } - posAtt.setPositionIncrement(firstAlternativeIncrement); - termAtt.setEmpty().append(alternatePhoneticValue); - saveState = true; - } - - // Just one token to return, so no need to capture/restore - // any state, simply return it. - if (remainingTokens.isEmpty()) { - return true; - } - - if (saveState) { - remainingTokens.addLast(captureState()); - } - } - } - - @Override - public void reset() throws IOException { - input.reset(); - remainingTokens.clear(); - } -} diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticFilter.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticFilter.java deleted file mode 100644 index 1ba1fcdc87d..00000000000 --- a/src/main/java/org/elasticsearch/index/analysis/PhoneticFilter.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.commons.codec.Encoder; -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; - -import java.io.IOException; - -/** - * Create tokens for phonetic matches. See: - * http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html - */ -// LUCENE MONITOR - No need for it in Lucene 3.6 -public class PhoneticFilter extends TokenFilter { - - protected boolean inject = true; - protected Encoder encoder = null; - protected String name = null; - - protected State save = null; - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); - - public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) { - super(in); - this.encoder = encoder; - this.name = name; - this.inject = inject; - } - - @Override - public boolean incrementToken() throws IOException { - if (save != null) { - // clearAttributes(); // not currently necessary - restoreState(save); - save = null; - return true; - } - - if (!input.incrementToken()) return false; - - // pass through zero-length terms - if (termAtt.length() == 0) return true; - - String value = termAtt.toString(); - String phonetic = null; - try { - String v = encoder.encode(value).toString(); - if (v.length() > 0 && !value.equals(v)) phonetic = v; - } catch (Exception ignored) { - } // just use the direct text - - if (phonetic == null) return true; - - if (!inject) { - // just modify this token - termAtt.setEmpty().append(phonetic); - return true; - } - - // We need to return both the original and the phonetic tokens. - // to avoid a orig=captureState() change_to_phonetic() saved=captureState() restoreState(orig) - // we return the phonetic alternative first - - int origOffset = posAtt.getPositionIncrement(); - posAtt.setPositionIncrement(0); - save = captureState(); - - posAtt.setPositionIncrement(origOffset); - termAtt.setEmpty().append(phonetic); - return true; - } - - @Override - public void reset() throws IOException { - input.reset(); - save = null; - } -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java index b033776f65a..dc491e587ff 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java @@ -16,87 +16,120 @@ * specific language governing permissions and limitations * under the License. */ - package org.elasticsearch.index.analysis; +import java.util.Arrays; +import java.util.HashSet; import org.apache.commons.codec.Encoder; -import org.apache.commons.codec.language.*; -import org.apache.commons.codec.language.bm.BeiderMorseEncoder; +import org.apache.commons.codec.language.Caverphone1; +import org.apache.commons.codec.language.Caverphone2; +import org.apache.commons.codec.language.ColognePhonetic; +import org.apache.commons.codec.language.Metaphone; +import org.apache.commons.codec.language.RefinedSoundex; +import org.apache.commons.codec.language.Soundex; +import org.apache.commons.codec.language.bm.Languages.LanguageSet; import org.apache.commons.codec.language.bm.NameType; +import org.apache.commons.codec.language.bm.PhoneticEngine; import org.apache.commons.codec.language.bm.RuleType; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.phonetic.BeiderMorseFilter; +import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; +import org.apache.lucene.analysis.phonetic.PhoneticFilter; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; +import org.elasticsearch.index.analysis.phonetic.HaasePhonetik; +import org.elasticsearch.index.analysis.phonetic.KoelnerPhonetik; +import org.elasticsearch.index.analysis.phonetic.Nysiis; import org.elasticsearch.index.settings.IndexSettings; /** * */ -@AnalysisSettingsRequired public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { private final Encoder encoder; - private final boolean replace; + private int maxcodelength; + private String[] languageset; + private NameType nametype; + private RuleType ruletype; @Inject public PhoneticTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); + this.languageset = null; + this.nametype = null; + this.ruletype = null; + this.maxcodelength = 0; this.replace = settings.getAsBoolean("replace", true); - String encoder = settings.get("encoder"); - if (encoder == null) { - throw new ElasticSearchIllegalArgumentException("encoder must be set on phonetic token filter"); - } - if ("metaphone".equalsIgnoreCase(encoder)) { + // weird, encoder is null at last step in SimplePhoneticAnalysisTests, so we set it to metaphone as default + String encodername = settings.get("encoder", "metaphone"); + if ("metaphone".equalsIgnoreCase(encodername)) { this.encoder = new Metaphone(); - } else if ("soundex".equalsIgnoreCase(encoder)) { + } else if ("soundex".equalsIgnoreCase(encodername)) { this.encoder = new Soundex(); - } else if ("caverphone1".equalsIgnoreCase(encoder)) { + } else if ("caverphone1".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone1(); - } else if ("caverphone2".equalsIgnoreCase(encoder)) { + } else if ("caverphone2".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); - } else if ("caverphone".equalsIgnoreCase(encoder)) { + } else if ("caverphone".equalsIgnoreCase(encodername)) { this.encoder = new Caverphone2(); - } else if ("refined_soundex".equalsIgnoreCase(encoder) || "refinedSoundex".equalsIgnoreCase(encoder)) { + } else if ("refined_soundex".equalsIgnoreCase(encodername) || "refinedSoundex".equalsIgnoreCase(encodername)) { this.encoder = new RefinedSoundex(); - } else if ("cologne".equalsIgnoreCase(encoder)) { + } else if ("cologne".equalsIgnoreCase(encodername)) { this.encoder = new ColognePhonetic(); - } else if ("double_metaphone".equalsIgnoreCase(encoder) || "doubleMetaphone".equalsIgnoreCase(encoder)) { - DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); - doubleMetaphone.setMaxCodeLen(settings.getAsInt("max_code_len", doubleMetaphone.getMaxCodeLen())); - this.encoder = doubleMetaphone; - } else if ("bm".equalsIgnoreCase(encoder) || "beider_morse".equalsIgnoreCase(encoder)) { - BeiderMorseEncoder bm = new BeiderMorseEncoder(); + } else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) { + this.encoder = null; + this.maxcodelength = settings.getAsInt("max_code_len", 4); + } else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) { + this.encoder = null; + this.languageset = settings.getAsArray("languageset"); String ruleType = settings.get("rule_type", "approx"); if ("approx".equalsIgnoreCase(ruleType)) { - bm.setRuleType(RuleType.APPROX); + ruletype = RuleType.APPROX; } else if ("exact".equalsIgnoreCase(ruleType)) { - bm.setRuleType(RuleType.EXACT); + ruletype = RuleType.EXACT; } else { throw new ElasticSearchIllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { - bm.setNameType(NameType.GENERIC); + nametype = NameType.GENERIC; } else if ("ASHKENAZI".equalsIgnoreCase(nameType)) { - bm.setNameType(NameType.ASHKENAZI); + nametype = NameType.ASHKENAZI; } else if ("SEPHARDIC".equalsIgnoreCase(nameType)) { - bm.setNameType(NameType.SEPHARDIC); + nametype = NameType.SEPHARDIC; } - this.encoder = bm; + } else if ("koelnerphonetik".equalsIgnoreCase(encodername)) { + this.encoder = new KoelnerPhonetik(); + } else if ("haasephonetik".equalsIgnoreCase(encodername)) { + this.encoder = new HaasePhonetik(); + } else if ("nysiis".equalsIgnoreCase(encodername)) { + this.encoder = new Nysiis(); } else { - throw new ElasticSearchIllegalArgumentException("unknown encoder [" + encoder + "] for phonetic token filter"); + throw new ElasticSearchIllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } } @Override public TokenStream create(TokenStream tokenStream) { - if (encoder instanceof DoubleMetaphone) { - return new DoubleMetaphoneFilter(tokenStream, (DoubleMetaphone) encoder, !replace); + if (encoder == null) { + if (ruletype != null && nametype != null) { + if (languageset != null) { + final LanguageSet languages = LanguageSet.from(new HashSet(Arrays.asList(languageset))); + return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true), languages); + } + return new BeiderMorseFilter(tokenStream, new PhoneticEngine(nametype, ruletype, true)); + } + if (maxcodelength > 0) { + return new DoubleMetaphoneFilter(tokenStream, maxcodelength, !replace); + } + } else { + return new PhoneticFilter(tokenStream, encoder, !replace); } - return new org.elasticsearch.index.analysis.PhoneticFilter(tokenStream, encoder, name(), !replace); + throw new ElasticSearchIllegalArgumentException("encoder error"); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java b/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java new file mode 100644 index 00000000000..7526f205cda --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java @@ -0,0 +1,70 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis.phonetic; + +/** + * Geänderter Algorithmus aus der Matching Toolbox von Rainer Schnell + * Java-Programmierung von Jörg Reiher + * + * Die Kölner Phonetik wurde für den Einsatz in Namensdatenbanken wie + * der Verwaltung eines Krankenhauses durch Martin Haase (Institut für + * Sprachwissenschaft, Universität zu Köln) und Kai Heitmann (Insitut für + * medizinische Statistik, Informatik und Epidemiologie, Köln) überarbeitet. + * M. Haase und K. Heitmann. Die Erweiterte Kölner Phonetik. 526, 2000. + * + * nach: Martin Wilz, Aspekte der Kodierung phonetischer Ähnlichkeiten + * in deutschen Eigennamen, Magisterarbeit. + * http://www.uni-koeln.de/phil-fak/phonetik/Lehre/MA-Arbeiten/magister_wilz.pdf + * + * @author Jörg Prante + */ +public class HaasePhonetik extends KoelnerPhonetik { + + private final static String[] HAASE_VARIATIONS_PATTERNS = {"OWN", "RB", "WSK", "A$", "O$", "SCH", + "GLI", "EAU$", "^CH", "AUX", "EUX", "ILLE"}; + private final static String[] HAASE_VARIATIONS_REPLACEMENTS = {"AUN", "RW", "RSK", "AR", "OW", "CH", + "LI", "O", "SCH", "O", "O", "I"}; + + /** + * + * @return + */ + @Override + protected String[] getPatterns() { + return HAASE_VARIATIONS_PATTERNS; + } + + /** + * + * @return + */ + @Override + protected String[] getReplacements() { + return HAASE_VARIATIONS_REPLACEMENTS; + } + + /** + * + * @return + */ + @Override + protected char getCode() { + return '9'; + } +} diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java b/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java new file mode 100644 index 00000000000..3086a5aeda7 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java @@ -0,0 +1,327 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.analysis.phonetic; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.codec.EncoderException; +import org.apache.commons.codec.StringEncoder; + +/** + * Kölner Phonetik + * + * H.J. Postel, Die Kölner Phonetik. Ein Verfahren zu Identifizierung + * von Personennamen auf der Grundlage der Gestaltanalyse. IBM-Nachrichten 19 (1969), 925-931 + * + * Algorithmus aus der Matching Toolbox von Rainer Schnell + * Java-Programmierung von Jörg Reiher + * + * mit Änderungen von Jörg Prante + * + */ +public class KoelnerPhonetik implements StringEncoder { + + private static final String[] POSTEL_VARIATIONS_PATTERNS = {"AUN", "OWN", "RB", "RW", "WSK", "RSK"}; + private static final String[] POSTEL_VARIATIONS_REPLACEMENTS = {"OWN", "AUN", "RW", "RB", "RSK", "WSK"}; + private Pattern[] variationsPatterns; + private boolean primary = false; + private final Set csz = new HashSet(Arrays.asList( + 'C', 'S', 'Z')); + private final Set ckq = new HashSet(Arrays.asList( + 'C', 'K', 'Q')); + private final Set aouhkxq = new HashSet(Arrays.asList( + 'A', 'O', 'U', 'H', 'K', 'X', 'Q')); + private final Set ahkloqrux = new HashSet(Arrays.asList( + 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X')); + + /** + * Constructor for Kölner Phonetik + */ + public KoelnerPhonetik() { + init(); + } + + /** + * + * @param useOnlyPrimaryCode + */ + public KoelnerPhonetik(boolean useOnlyPrimaryCode) { + this(); + this.primary = useOnlyPrimaryCode; + } + + /** + * Get variation patterns + * + * @return string array of variations + */ + protected String[] getPatterns() { + return POSTEL_VARIATIONS_PATTERNS; + } + + /** + * + * @return + */ + protected String[] getReplacements() { + return POSTEL_VARIATIONS_REPLACEMENTS; + } + + /** + * + * @return + */ + protected char getCode() { + return '0'; + } + + /** + * + * @param o1 + * @param o2 + * @return + */ + public double getRelativeValue(Object o1, Object o2) { + String[] kopho1 = code(expandUmlauts(o1.toString().toUpperCase(Locale.GERMANY))); + String[] kopho2 = code(expandUmlauts(o2.toString().toUpperCase(Locale.GERMANY))); + for (int i = 0; i < kopho1.length; i++) { + for (int ii = 0; ii < kopho2.length; ii++) { + if (kopho1[i].equals(kopho2[ii])) { + return 1; + } + } + } + return 0; + } + + @Override + public Object encode(Object str) throws EncoderException { + return encode((String) str); + } + + @Override + public String encode(String str) throws EncoderException { + if (str == null) return null; + String[] s = code(str.toString()); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length; i++) { + sb.append(s[i]); + if (i < s.length - 1) { + sb.append('_'); + } + } + return sb.toString(); + } + + + private void init() { + this.variationsPatterns = new Pattern[getPatterns().length]; + for (int i = 0; i < getPatterns().length; i++) { + this.variationsPatterns[i] = Pattern.compile(getPatterns()[i]); + } + } + + private String[] code(String str) { + List parts = partition(str); + String[] codes = new String[parts.size()]; + int i = 0; + for (String s : parts) { + codes[i++] = substitute(s); + } + return codes; + } + + private List partition(String str) { + String primaryForm = str; + List parts = new ArrayList(); + parts.add(primaryForm.replaceAll("[^\\p{L}\\p{N}]", "")); + if (!primary) { + List tmpParts = new ArrayList(); + tmpParts.addAll((Arrays.asList(str.split("[\\p{Z}\\p{C}\\p{P}]")))); + int numberOfParts = tmpParts.size(); + while (tmpParts.size() > 0) { + StringBuilder part = new StringBuilder(); + for (int i = 0; i < tmpParts.size(); i++) { + part.append(tmpParts.get(i)); + if (!(i + 1 == numberOfParts)) { + parts.add(part.toString()); + } + } + tmpParts.remove(0); + } + } + List variations = new ArrayList(); + for (int i = 0; i < parts.size(); i++) { + List variation = getVariations(parts.get(i)); + if (variation != null) { + variations.addAll(variation); + } + } + return variations; + } + + private List getVariations(String str) { + int position = 0; + List variations = new ArrayList(); + variations.add(""); + while (position < str.length()) { + int i = 0; + int substPos = -1; + while (substPos < position && i < getPatterns().length) { + Matcher m = variationsPatterns[i].matcher(str); + while (substPos < position && m.find()) { + substPos = m.start(); + } + i++; + } + if (substPos >= position) { + i--; + List varNew = new ArrayList(); + String prevPart = str.substring(position, substPos); + for (int ii = 0; ii < variations.size(); ii++) { + String tmp = variations.get(ii); + varNew.add(tmp.concat(prevPart + getReplacements()[i])); + variations.set(ii, variations.get(ii) + prevPart + getPatterns()[i]); + } + variations.addAll(varNew); + position = substPos + getPatterns()[i].length(); + } else { + for (int ii = 0; ii < variations.size(); ii++) { + variations.set(ii, variations.get(ii) + str.substring(position, str.length())); + } + position = str.length(); + } + } + return variations; + } + + private String substitute(String str) { + String s = expandUmlauts(str.toUpperCase(Locale.GERMAN)); + s = removeSequences(s); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < s.length(); i++) { + char current = s.charAt(i); + char next = i + 1 < s.length() ? s.charAt(i + 1) : '_'; + char prev = i > 0 ? s.charAt(i - 1) : '_'; + switch (current) { + case 'A': + case 'E': + case 'I': + case 'J': + case 'Y': + case 'O': + case 'U': + if (i == 0 || ((i == 1) && prev == 'H')) { + sb.append(getCode()); + } + break; + case 'P': + sb.append(next == 'H' ? "33" : '1'); + break; + case 'B': + sb.append('1'); + break; + case 'D': + case 'T': + sb.append(csz.contains(next) ? '8' : '2'); + break; + case 'F': + case 'V': + case 'W': + sb.append('3'); + break; + case 'G': + case 'K': + case 'Q': + sb.append('4'); + break; + case 'C': + if (i == 0) { + sb.append(ahkloqrux.contains(next) ? '4' : '8'); + } else { + sb.append(aouhkxq.contains(next) ? '4' : '8'); + } + if (sb.length() >= 2 && sb.charAt(sb.length() - 2) == '8') { + sb.setCharAt(sb.length() - 1, '8'); + } + break; + case 'X': + sb.append(i < 1 || !ckq.contains(prev) ? "48" : '8'); + break; + case 'L': + sb.append('5'); + break; + case 'M': + case 'N': + sb.append('6'); + break; + case 'R': + sb.append('7'); + break; + case 'S': + case 'Z': + sb.append('8'); + break; + case 'H': + break; + } + } + s = sb.toString(); + s = removeSequences(s); + return s; + } + + /** + * + * @param str + * @return + */ + private String expandUmlauts(String str) { + return str.replaceAll("\u00C4", "AE").replaceAll("\u00D6", "OE").replaceAll("\u00DC", "UE"); + } + + /** + * + * @param str + * @return + */ + private String removeSequences(String str) { + if (str == null || str.length() == 0) { + return ""; + } + int i = 0, j = 0; + StringBuilder sb = new StringBuilder().append(str.charAt(i++)); + char c; + while (i < str.length()) { + c = str.charAt(i); + if (c != sb.charAt(j)) { + sb.append(c); + j++; + } + i++; + } + return sb.toString(); + } +} diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java b/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java new file mode 100644 index 00000000000..6275b84677a --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java @@ -0,0 +1,325 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.elasticsearch.index.analysis.phonetic; + +import java.util.regex.Pattern; +import org.apache.commons.codec.EncoderException; +import org.apache.commons.codec.StringEncoder; + +/** + * + * Taken from commons-codec trunk (unreleased yet) + * + * Encodes a string into a NYSIIS value. NYSIIS is an encoding used to relate + * similar names, but can also be used as a general purpose scheme to find word + * with similar phonemes. + * + *

NYSIIS features an accuracy increase of 2.7% over the traditional Soundex + * algorithm.

+ * + *

Algorithm description: + *

+ * 1. Transcode first characters of name
+ *   1a. MAC ->   MCC
+ *   1b. KN  ->   NN
+ *   1c. K   ->   C
+ *   1d. PH  ->   FF
+ *   1e. PF  ->   FF
+ *   1f. SCH ->   SSS
+ * 2. Transcode last characters of name
+ *   2a. EE, IE          ->   Y
+ *   2b. DT,RT,RD,NT,ND  ->   D
+ * 3. First character of key = first character of name
+ * 4. Transcode remaining characters by following these rules, incrementing by one character each time
+ *   4a. EV  ->   AF  else A,E,I,O,U -> A
+ *   4b. Q   ->   G
+ *   4c. Z   ->   S
+ *   4d. M   ->   N
+ *   4e. KN  ->   N   else K -> C
+ *   4f. SCH ->   SSS
+ *   4g. PH  ->   FF
+ *   4h. H   ->   If previous or next is nonvowel, previous
+ *   4i. W   ->   If previous is vowel, previous
+ *   4j. Add current to key if current != last key character
+ * 5. If last character is S, remove it
+ * 6. If last characters are AY, replace with Y
+ * 7. If last character is A, remove it
+ * 8. Collapse all strings of repeated characters
+ * 9. Add original first character of name as first character of key
+ * 

+ * + * @see NYSIIS on Wikipedia + * @see NYSIIS on dropby.com + * + */ +public class Nysiis implements StringEncoder { + + private static final char[] CHARS_A = new char[]{'A'}; + private static final char[] CHARS_AF = new char[]{'A', 'F'}; + private static final char[] CHARS_C = new char[]{'C'}; + private static final char[] CHARS_FF = new char[]{'F', 'F'}; + private static final char[] CHARS_G = new char[]{'G'}; + private static final char[] CHARS_N = new char[]{'N'}; + private static final char[] CHARS_NN = new char[]{'N', 'N'}; + private static final char[] CHARS_S = new char[]{'S'}; + private static final char[] CHARS_SSS = new char[]{'S', 'S', 'S'}; + private static final Pattern PAT_MAC = Pattern.compile("^MAC"); + private static final Pattern PAT_KN = Pattern.compile("^KN"); + private static final Pattern PAT_K = Pattern.compile("^K"); + private static final Pattern PAT_PH_PF = Pattern.compile("^(PH|PF)"); + private static final Pattern PAT_SCH = Pattern.compile("^SCH"); + private static final Pattern PAT_EE_IE = Pattern.compile("(EE|IE)$"); + private static final Pattern PAT_DT_ETC = Pattern.compile("(DT|RT|RD|NT|ND)$"); + private static final char SPACE = ' '; + private static final int TRUE_LENGTH = 6; + + /** + * Tests if the given character is a vowel. + * + * @param c the character to test + * @return {@code true} if the character is a vowel, {@code false} otherwise + */ + private static boolean isVowel(final char c) { + return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U'; + } + + /** + * Transcodes the remaining parts of the String. The method operates on a + * sliding window, looking at 4 characters at a time: [i-1, i, i+1, i+2]. + * + * @param prev the previous character + * @param curr the current character + * @param next the next character + * @param aNext the after next character + * @return a transcoded array of characters, starting from the current + * position + */ + private static char[] transcodeRemaining(final char prev, final char curr, final char next, final char aNext) { + // 1. EV -> AF + if (curr == 'E' && next == 'V') { + return CHARS_AF; + } + + // A, E, I, O, U -> A + if (isVowel(curr)) { + return CHARS_A; + } + + // 2. Q -> G, Z -> S, M -> N + if (curr == 'Q') { + return CHARS_G; + } else if (curr == 'Z') { + return CHARS_S; + } else if (curr == 'M') { + return CHARS_N; + } + + // 3. KN -> NN else K -> C + if (curr == 'K') { + if (next == 'N') { + return CHARS_NN; + } else { + return CHARS_C; + } + } + + // 4. SCH -> SSS + if (curr == 'S' && next == 'C' && aNext == 'H') { + return CHARS_SSS; + } + + // PH -> FF + if (curr == 'P' && next == 'H') { + return CHARS_FF; + } + + // 5. H -> If previous or next is a non vowel, previous. + if (curr == 'H' && (!isVowel(prev) || !isVowel(next))) { + return new char[]{prev}; + } + + // 6. W -> If previous is vowel, previous. + if (curr == 'W' && isVowel(prev)) { + return new char[]{prev}; + } + + return new char[]{curr}; + } + /** + * Indicates the strict mode. + */ + private final boolean strict; + + /** + * Creates an instance of the {@link Nysiis} encoder with strict mode + * (original form), i.e. encoded strings have a maximum length of 6. + */ + public Nysiis() { + this(true); + } + + /** + * Create an instance of the {@link Nysiis} encoder with the specified + * strict mode: + * + *
  • {@code true}: encoded strings have a maximum length of 6
  • {@code false}: + * encoded strings may have arbitrary length
+ * + * @param strict the strict mode + */ + public Nysiis(final boolean strict) { + this.strict = strict; + } + + /** + * Encodes an Object using the NYSIIS algorithm. This method is provided in + * order to satisfy the requirements of the Encoder interface, and will + * throw an {@link EncoderException} if the supplied object is not of type + * {@link String}. + * + * @param obj Object to encode + * @return An object (or a {@link String}) containing the NYSIIS code which + * corresponds to the given String. + * @throws EncoderException if the parameter supplied is not of a {@link String} + * @throws IllegalArgumentException if a character is not mapped + */ + @Override + public Object encode(Object obj) throws EncoderException { + if (!(obj instanceof String)) { + throw new EncoderException("Parameter supplied to Nysiis encode is not of type java.lang.String"); + } + return this.nysiis((String) obj); + } + + /** + * Encodes a String using the NYSIIS algorithm. + * + * @param str A String object to encode + * @return A Nysiis code corresponding to the String supplied + * @throws IllegalArgumentException if a character is not mapped + */ + @Override + public String encode(String str) { + return this.nysiis(str); + } + + /** + * Indicates the strict mode for this {@link Nysiis} encoder. + * + * @return {@code true} if the encoder is configured for strict mode, {@code false} + * otherwise + */ + public boolean isStrict() { + return this.strict; + } + + /** + * Retrieves the NYSIIS code for a given String object. + * + * @param str String to encode using the NYSIIS algorithm + * @return A NYSIIS code for the String supplied + */ + public String nysiis(String str) { + if (str == null) { + return null; + } + + // Use the same clean rules as Soundex + str = clean(str); + + if (str.length() == 0) { + return str; + } + + // Translate first characters of name: + // MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS + str = PAT_MAC.matcher(str).replaceFirst("MCC"); + str = PAT_KN.matcher(str).replaceFirst("NN"); + str = PAT_K.matcher(str).replaceFirst("C"); + str = PAT_PH_PF.matcher(str).replaceFirst("FF"); + str = PAT_SCH.matcher(str).replaceFirst("SSS"); + + // Translate last characters of name: + // EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D + str = PAT_EE_IE.matcher(str).replaceFirst("Y"); + str = PAT_DT_ETC.matcher(str).replaceFirst("D"); + + // First character of key = first character of name. + StringBuffer key = new StringBuffer(str.length()); + key.append(str.charAt(0)); + + // Transcode remaining characters, incrementing by one character each time + final char[] chars = str.toCharArray(); + final int len = chars.length; + + for (int i = 1; i < len; i++) { + final char next = i < len - 1 ? chars[i + 1] : SPACE; + final char aNext = i < len - 2 ? chars[i + 2] : SPACE; + final char[] transcoded = transcodeRemaining(chars[i - 1], chars[i], next, aNext); + System.arraycopy(transcoded, 0, chars, i, transcoded.length); + + // only append the current char to the key if it is different from the last one + if (chars[i] != chars[i - 1]) { + key.append(chars[i]); + } + } + + if (key.length() > 1) { + char lastChar = key.charAt(key.length() - 1); + + // If last character is S, remove it. + if (lastChar == 'S') { + key.deleteCharAt(key.length() - 1); + lastChar = key.charAt(key.length() - 1); + } + + if (key.length() > 2) { + final char last2Char = key.charAt(key.length() - 2); + // If last characters are AY, replace with Y. + if (last2Char == 'A' && lastChar == 'Y') { + key.deleteCharAt(key.length() - 2); + } + } + + // If last character is A, remove it. + if (lastChar == 'A') { + key.deleteCharAt(key.length() - 1); + } + } + + final String string = key.toString(); + return this.isStrict() ? string.substring(0, Math.min(TRUE_LENGTH, string.length())) : string; + } + + static String clean(String str) { + if (str == null || str.length() == 0) { + return str; + } + int len = str.length(); + char[] chars = new char[len]; + int count = 0; + for (int i = 0; i < len; i++) { + if (Character.isLetter(str.charAt(i))) { + chars[count++] = str.charAt(i); + } + } + if (count == len) { + return str.toUpperCase(java.util.Locale.ENGLISH); + } + return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH); + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java index 11f0799781b..a5aef2de90c 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -2,6 +2,8 @@ package org.elasticsearch.index.analysis; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; +import static org.elasticsearch.common.settings.ImmutableSettings.*; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.env.Environment; import org.elasticsearch.env.EnvironmentModule; @@ -10,29 +12,38 @@ import org.elasticsearch.index.IndexNameModule; import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.indices.analysis.IndicesAnalysisModule; import org.elasticsearch.indices.analysis.IndicesAnalysisService; +import org.hamcrest.MatcherAssert; +import static org.hamcrest.Matchers.*; import org.testng.annotations.Test; -import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; - /** */ public class SimplePhoneticAnalysisTests { @Test - public void testDefaultsIcuAnalysis() { + public void testPhoneticTokenFilterFactory() { + Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/phonetic-1.yml").build(); + AnalysisService analysisService = testSimpleConfiguration(settings); + TokenFilterFactory standardfilterFactory = analysisService.tokenFilter("standard"); + System.err.println("standard filterfactory = " + standardfilterFactory); + TokenFilterFactory filterFactory = analysisService.tokenFilter("phonetic"); + System.err.println("filterfactory = " + filterFactory); + MatcherAssert.assertThat(filterFactory, instanceOf(PhoneticTokenFilterFactory.class)); + } + + private AnalysisService testSimpleConfiguration(Settings settings) { Index index = new Index("test"); - Injector parentInjector = new ModulesBuilder().add(new SettingsModule(EMPTY_SETTINGS), new EnvironmentModule(new Environment(EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector(); + Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), + new EnvironmentModule(new Environment(settings)), + new IndicesAnalysisModule()).createInjector(); Injector injector = new ModulesBuilder().add( - new IndexSettingsModule(index, EMPTY_SETTINGS), + new IndexSettingsModule(index, settings), new IndexNameModule(index), - new AnalysisModule(EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new PhoneticAnalysisBinderProcessor())) - .createChildInjector(parentInjector); + new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)) + .addProcessor(new PhoneticAnalysisBinderProcessor())).createChildInjector(parentInjector); AnalysisService analysisService = injector.getInstance(AnalysisService.class); - - // need to create one with encoder... - //TokenFilterFactory tokenFilterFactory = analysisService.tokenFilter("phonetic"); - //MatcherAssert.assertThat(tokenFilterFactory, Matchers.instanceOf(PhoneticTokenFilterFactory.class)); + return analysisService; } } diff --git a/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml b/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml new file mode 100644 index 00000000000..41a4e3fc59f --- /dev/null +++ b/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml @@ -0,0 +1,30 @@ +index: + analysis: + filter: + doublemetaphonefilter: + type: phonetic + encoder: doublemetaphone + metaphonefilter: + type: phonetic + encoder: metaphone + soundexfilter: + type: phonetic + encoder: soundex + refinedsoundexfilter: + type: phonetic + encoder: refinedsoundex + caverphonefilter: + type: phonetic + encoder: caverphone + beidermorsefilter: + type: phonetic + encoder: beidermorse + koelnerphonetikfilter: + type: phonetic + encoder: koelnerphonetik + haasephonetikfilter: + type: phonetic + encoder: haasephonetik + nysiisfilter: + type: phonetic + encoder: nysiis From 4a7e8db78027c66a8c2df9521a0bf607393deaa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Prante?= Date: Sun, 22 Apr 2012 12:21:52 +0200 Subject: [PATCH 014/103] add Lucene Phonetic 3.6 to plugin zip --- src/main/assemblies/plugin.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml index e720308d685..b15bcd1a392 100644 --- a/src/main/assemblies/plugin.xml +++ b/src/main/assemblies/plugin.xml @@ -19,7 +19,8 @@ true true - commons-codec:commons-codec + org.apache.lucene:lucene-analyzers-phonetic + commons-codec:commons-codec From 37f97ca49d4ee53a889735ae6f75ff6ec2a3071e Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sun, 22 Apr 2012 13:37:18 +0300 Subject: [PATCH 015/103] fix pom and keep at snap for now --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 6e10c83fcec..9a53baf1ecf 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.2.0 + 1.2.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 @@ -47,7 +47,7 @@ org.apache.lucene - lucene-analyzers-phonetic + lucene-phonetic 3.6.0 compile From e02245ca729a4f065cc4e21c1162cb4dfcb23830 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Wed, 9 May 2012 22:49:35 +0300 Subject: [PATCH 016/103] update readme with latest install --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 75e410c3f23..8e854afe176 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Phonetic Analysis for ElasticSearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.1.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.2.0`. ----------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | From 80e93faadfa7e6e448557b17ec0fc22c3f877209 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Wed, 9 May 2012 22:54:26 +0300 Subject: [PATCH 017/103] release 1.2.0 --- pom.xml | 2 +- src/main/assemblies/plugin.xml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 9a53baf1ecf..ccb736152b4 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.2.0-SNAPSHOT + 1.2.0 jar Phonetic Analysis for ElasticSearch 2009 diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml index b15bcd1a392..31f3f5339b7 100644 --- a/src/main/assemblies/plugin.xml +++ b/src/main/assemblies/plugin.xml @@ -19,8 +19,8 @@ true true - org.apache.lucene:lucene-analyzers-phonetic - commons-codec:commons-codec + org.apache.lucene:lucene-phonetic + commons-codec:commons-codec From b3d1e58672dfd8a303ed3c8ccc6005dae65ea9af Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Wed, 9 May 2012 22:55:17 +0300 Subject: [PATCH 018/103] move to 1.3.0 snap --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index ccb736152b4..697c583dad5 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.2.0 + 1.3.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 0.19.2 + 0.19.3 From 861eae21a02e74876a17a21fa317a4c9cc024fad Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sun, 10 Jun 2012 21:57:26 +0200 Subject: [PATCH 019/103] add license and add repo --- LICENSE.txt | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 19 +++++ pom.xml | 4 ++ 3 files changed, 225 insertions(+) create mode 100644 LICENSE.txt diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 00000000000..d6456956733 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 8e854afe176..47fe94b993e 100644 --- a/README.md +++ b/README.md @@ -44,3 +44,22 @@ should be replaced with the encoded one (set it to `true`), or added (set it to } } } + +License +------- + + This software is licensed under the Apache 2 license, quoted below. + + Copyright 2009-2012 Shay Banon and ElasticSearch + + Licensed under the Apache License, Version 2.0 (the "License"); you may not + use this file except in compliance with the License. You may obtain a copy of + the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations under + the License. diff --git a/pom.xml b/pom.xml index 697c583dad5..ec94e1560a8 100644 --- a/pom.xml +++ b/pom.xml @@ -35,6 +35,10 @@ + + sonatype + http://oss.sonatype.org/content/repositories/releases/ + From 9902102997cec1d6bcf213e9e5a5e9a8ba79dddc Mon Sep 17 00:00:00 2001 From: David Pilato Date: Sun, 24 Feb 2013 23:22:37 +0100 Subject: [PATCH 020/103] Move to Elasticsearch 0.21.0.Beta1 Due to refactoring in 0.21.x we have to update this plugin Closes #5. --- README.md | 4 ++-- pom.xml | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 47fe94b993e..14d0e7b04fc 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e ----------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | ----------------------------------------------- - | master | 0.19.2 -> master | + | master | 0.21 -> master | ----------------------------------------------- - | 1.2.0 | 0.19.2 -> master | + | 1.2.0 | 0.19.2 -> 0.20 | ----------------------------------------------- | 1.1.0 | 0.19 | ----------------------------------------------- diff --git a/pom.xml b/pom.xml index ec94e1560a8..91ab1ed2234 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,8 @@ - 0.19.3 + 0.21.0.Beta1-SNAPSHOT + 4.1.0 @@ -51,8 +52,8 @@ org.apache.lucene - lucene-phonetic - 3.6.0 + lucene-analyzers-phonetic + ${lucene.version} compile @@ -140,4 +141,4 @@
- \ No newline at end of file + From 3fbebfdbd47a9ee672a1b1231471ca3c5582fc89 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 26 Feb 2013 15:42:55 +0100 Subject: [PATCH 021/103] release 1.3 --- README.md | 6 ++++-- pom.xml | 4 ++-- src/main/assemblies/plugin.xml | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 14d0e7b04fc..d3e965f427d 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,14 @@ Phonetic Analysis for ElasticSearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.2.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.3.0`. ----------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | ----------------------------------------------- - | master | 0.21 -> master | + | master | 0.90 -> master | + ----------------------------------------------- + | 1.3.0 | 0.90 -> master | ----------------------------------------------- | 1.2.0 | 0.19.2 -> 0.20 | ----------------------------------------------- diff --git a/pom.xml b/pom.xml index 91ab1ed2234..ed0cf18b96c 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.3.0-SNAPSHOT + 1.3.0 jar Phonetic Analysis for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 0.21.0.Beta1-SNAPSHOT + 0.90.0.Beta1 4.1.0 diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml index 31f3f5339b7..7a2d9322934 100644 --- a/src/main/assemblies/plugin.xml +++ b/src/main/assemblies/plugin.xml @@ -19,7 +19,7 @@ true true - org.apache.lucene:lucene-phonetic + org.apache.lucene:lucene-analyzers-phonetic commons-codec:commons-codec From 574905828f4cc0f017f13e9e8834a0be50a1b14c Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 26 Feb 2013 15:43:19 +0100 Subject: [PATCH 022/103] move to 1.4 snap --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ed0cf18b96c..b384e98ca1c 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.3.0 + 1.4.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From d104c23280eddcf93a6163aefcf2288a2d424db4 Mon Sep 17 00:00:00 2001 From: Dmitry Zhlobo Date: Thu, 21 Mar 2013 11:18:04 +0300 Subject: [PATCH 023/103] Add beidermorse encoder type to README Add information about the mailing list --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d3e965f427d..95b93429536 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e A `phonetic` token filter that can be configured with different `encoder` types: `metaphone`, `doublemetaphone`, `soundex`, `refinedsoundex`, `caverphone1`, `caverphone2`, `cologne`, `nysiis`, -`koelnerphonetik`, `haasephonetik` +`koelnerphonetik`, `haasephonetik`, `beidermorse` The `replace` parameter (defaults to `true`) controls if the token processed should be replaced with the encoded one (set it to `true`), or added (set it to `false`). @@ -47,6 +47,11 @@ should be replaced with the encoded one (set it to `true`), or added (set it to } } +Questions +--------- + +If you have questions or comments please use the mailing list instead of Github Issues tracker: https://groups.google.com/group/elasticsearch + License ------- From f98916652b6fd3fc719751ef0a3ccd3d55ec6a6e Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 29 Apr 2013 21:54:07 +0200 Subject: [PATCH 024/103] move to lucene 4.2.1 and ES 0.90 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index b384e98ca1c..be64366a0b6 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.0.Beta1 - 4.1.0 + 0.90.0 + 4.2.1 From 02d7f718fe05ea833d15047308eaf0f9434fce66 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 29 Apr 2013 21:55:15 +0200 Subject: [PATCH 025/103] release 1.4 --- README.md | 4 +++- pom.xml | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 95b93429536..e646d8fb258 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,15 @@ Phonetic Analysis for ElasticSearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.3.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.4.0`. ----------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | ----------------------------------------------- | master | 0.90 -> master | ----------------------------------------------- + | 1.4.0 | 0.90 -> master | + ----------------------------------------------- | 1.3.0 | 0.90 -> master | ----------------------------------------------- | 1.2.0 | 0.19.2 -> 0.20 | diff --git a/pom.xml b/pom.xml index be64366a0b6..4748501ec79 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.4.0-SNAPSHOT + 1.4.0 jar Phonetic Analysis for ElasticSearch 2009 From 92678174ff4d842d847d9b6194d3b6dd1b4dc69c Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 29 Apr 2013 21:55:28 +0200 Subject: [PATCH 026/103] move to 1.5 snap --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 4748501ec79..705a30ac974 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.4.0 + 1.5.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From 8f5ab9b4c3110163e4ddba653a022e5f8d111c35 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 May 2013 21:29:49 +0200 Subject: [PATCH 027/103] Update to Elasticsearch 0.90.1 / Lucene 4.3.0 Closes #11. --- README.md | 8 ++++---- pom.xml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index e646d8fb258..4f34e80af6a 100644 --- a/README.md +++ b/README.md @@ -8,11 +8,11 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e ----------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | ----------------------------------------------- - | master | 0.90 -> master | + | 1.5.0-SNAPSHOT (master) | 0.90.1 -> master | ----------------------------------------------- - | 1.4.0 | 0.90 -> master | + | 1.4.0 | 0.90.0 | ----------------------------------------------- - | 1.3.0 | 0.90 -> master | + | 1.3.0 | 0.90.0 | ----------------------------------------------- | 1.2.0 | 0.19.2 -> 0.20 | ----------------------------------------------- @@ -59,7 +59,7 @@ License This software is licensed under the Apache 2 license, quoted below. - Copyright 2009-2012 Shay Banon and ElasticSearch + Copyright 2009-2013 Shay Banon and ElasticSearch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of diff --git a/pom.xml b/pom.xml index 705a30ac974..c4abc335084 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.0 - 4.2.1 + 0.90.1 + 4.3.0 From ec9a3a3254f0a232f6623283c884bf3afd1d7309 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 May 2013 21:32:01 +0200 Subject: [PATCH 028/103] prepare release elasticsearch-analysis-phonetic-1.5.0 --- README.md | 6 ++++-- pom.xml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4f34e80af6a..f32b5acaacc 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,14 @@ Phonetic Analysis for ElasticSearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.4.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.5.0`. ----------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | ----------------------------------------------- - | 1.5.0-SNAPSHOT (master) | 0.90.1 -> master | + | 1.6.0-SNAPSHOT (master) | 0.90.1 -> master | + ----------------------------------------------- + | 1.5.0 | 0.90.1 -> master | ----------------------------------------------- | 1.4.0 | 0.90.0 | ----------------------------------------------- diff --git a/pom.xml b/pom.xml index c4abc335084..a2c3d558dd6 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.5.0-SNAPSHOT + 1.5.0 jar Phonetic Analysis for ElasticSearch 2009 From 0f61c4d57b9c263e7b409801bfe0e6fbbcb5f9bc Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 May 2013 21:34:08 +0200 Subject: [PATCH 029/103] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index a2c3d558dd6..b4b43c04389 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.5.0 + 1.6.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From c773919c629e601131417e72da72640756d5adcc Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 8 Aug 2013 15:37:29 +0200 Subject: [PATCH 030/103] Update to Elasticsearch 0.90.3 / Lucene 4.4.0 Closes #12. --- README.md | 4 ++-- pom.xml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f32b5acaacc..92ce70ed81e 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e ----------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | ----------------------------------------------- - | 1.6.0-SNAPSHOT (master) | 0.90.1 -> master | + | 1.6.0-SNAPSHOT (master) | 0.90.3 -> master | ----------------------------------------------- - | 1.5.0 | 0.90.1 -> master | + | 1.5.0 | 0.90.1 -> 0.90.2 | ----------------------------------------------- | 1.4.0 | 0.90.0 | ----------------------------------------------- diff --git a/pom.xml b/pom.xml index b4b43c04389..559b249cdff 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.1 - 4.3.0 + 0.90.3 + 4.4.0 From 431ac42c1b204a5731b525cf72cbeb4d4a9ed2b5 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 8 Aug 2013 15:38:33 +0200 Subject: [PATCH 031/103] prepare release elasticsearch-analysis-phonetic-1.6.0 --- README.md | 6 ++++-- pom.xml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 92ce70ed81e..244423ff35e 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,14 @@ Phonetic Analysis for ElasticSearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.5.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.6.0`. ----------------------------------------------- | Phonetic Analysis Plugin | ElasticSearch | ----------------------------------------------- - | 1.6.0-SNAPSHOT (master) | 0.90.3 -> master | + | 1.7.0-SNAPSHOT (master) | 0.90.3 -> master | + ----------------------------------------------- + | 1.6.0 | 0.90.3 -> master | ----------------------------------------------- | 1.5.0 | 0.90.1 -> 0.90.2 | ----------------------------------------------- diff --git a/pom.xml b/pom.xml index 559b249cdff..6ef9a4a574d 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.6.0-SNAPSHOT + 1.6.0 jar Phonetic Analysis for ElasticSearch 2009 From 9c3b97491462bca581c1fa5c3611dc4266d1d2e3 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 8 Aug 2013 15:40:29 +0200 Subject: [PATCH 032/103] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6ef9a4a574d..c4c4a7e29a9 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.6.0 + 1.7.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From 07a49499e9365c380eefd3b9bb2e99c588f988b2 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 21 Aug 2013 11:54:28 +0200 Subject: [PATCH 033/103] Create CONTRIBUTING.md --- CONTRIBUTING.md | 98 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000000..b88aaf63bd2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,98 @@ +Contributing to elasticsearch +============================= + +Elasticsearch is an open source project and we love to receive contributions from our community — you! There are many ways to contribute, from writing tutorials or blog posts, improving the documentation, submitting bug reports and feature requests or writing code which can be incorporated into Elasticsearch itself. + +Bug reports +----------- + +If you think you have found a bug in Elasticsearch, first make sure that you are testing against the [latest version of Elasticsearch](http://www.elasticsearch.org/download/) - your issue may already have been fixed. If not, search our [issues list](https://github.com/elasticsearch/elasticsearch/issues) on GitHub in case a similar issue has already been opened. + +It is very helpful if you can prepare a reproduction of the bug. In other words, provide a small test case which we can run to confirm your bug. It makes it easier to find the problem and to fix it. Test cases should be provided as `curl` commands which we can copy and paste into a terminal to run it locally, for example: + +```sh +# delete the index +curl -XDELETE localhost:9200/test + +# insert a document +curl -XPUT localhost:9200/test/test/1 -d '{ + "title": "test document" +}' + +# this should return XXXX but instead returns YYY +curl .... +``` + +Provide as much information as you can. You may think that the problem lies with your query, when actually it depends on how your data is indexed. The easier it is for us to recreate your problem, the faster it is likely to be fixed. + +Feature requests +---------------- + +If you find yourself wishing for a feature that doesn't exist in Elasticsearch, you are probably not alone. There are bound to be others out there with similar needs. Many of the features that Elasticsearch has today have been added because our users saw the need. +Open an issue on our [issues list](https://github.com/elasticsearch/elasticsearch/issues) on GitHub which describes the feature you would like to see, why you need it, and how it should work. + +Contributing code and documentation changes +------------------------------------------- + +If you have a bugfix or new feature that you would like to contribute to Elasticsearch, please find or open an issue about it first. Talk about what you would like to do. It may be that somebody is already working on it, or that there are particular issues that you should know about before implementing the change. + +We enjoy working with contributors to get their code accepted. There are many approaches to fixing a problem and it is important to find the best approach before writing too much code. + +The process for contributing to any of the [Elasticsearch repositories](https://github.com/elasticsearch/) is similar. Details for individual projects can be found below. + +### Fork and clone the repository + +You will need to fork the main Elasticsearch code or documentation repository and clone it to your local machine. See +[github help page](https://help.github.com/articles/fork-a-repo) for help. + +Further instructions for specific projects are given below. + +### Submitting your changes + +Once your changes and tests are ready to submit for review: + +1. Test your changes +Run the test suite to make sure that nothing is broken. + +2. Sign the Contributor License Agreement +Please make sure you have signed our [Contributor License Agreement](http://www.elasticsearch.org/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once. + +3. Rebase your changes +Update your local repository with the most recent code from the main Elasticsearch repository, and rebase your branch on top of the latest master branch. We prefer your changes to be squashed into a single commit. + +4. Submit a pull request +Push your local changes to your forked copy of the repository and [submit a pull request](https://help.github.com/articles/using-pull-requests). In the pull request, describe what your changes do and mention the number of the issue where discussion has taken place, eg "Closes #123". + +Then sit back and wait. There will probably be discussion about the pull request and, if any changes are needed, we would love to work with you to get your pull request merged into Elasticsearch. + + +Contributing to the Elasticsearch plugin +---------------------------------------- + +**Repository:** [https://github.com/elasticsearch/elasticsearch-analysis-phonetic](https://github.com/elasticsearch/elasticsearch-analysis-phonetic) + +Make sure you have [Maven](http://maven.apache.org) installed, as Elasticsearch uses it as its build system. Integration with IntelliJ and Eclipse should work out of the box. Eclipse users can automatically configure their IDE by running `mvn eclipse:eclipse` and then importing the project into their workspace: `File > Import > Existing project into workspace`. + +Please follow these formatting guidelines: + +* Java indent is 4 spaces +* Line width is 140 characters +* The rest is left to Java coding standards +* Disable “auto-format on save” to prevent unnecessary format changes. This makes reviews much harder as it generates unnecessary formatting changes. If your IDE supports formatting only modified chunks that is fine to do. + +To create a distribution from the source, simply run: + +```sh +cd elasticsearch-analysis-phonetic/ +mvn clean package -DskipTests +``` + +You will find the newly built packages under: `./target/releases/`. + +Before submitting your changes, run the test suite to make sure that nothing is broken, with: + +```sh +mvn clean test +``` + +Source: [Contributing to elasticsearch](http://www.elasticsearch.org/contributing-to-elasticsearch/) From 894233974e57688d53a1b282e0b53ba0b4b70d8e Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 6 Nov 2013 11:18:08 +0100 Subject: [PATCH 034/103] Update to Elasticsearch 0.90.6 / Lucene 4.5.1 Closes #13. --- README.md | 70 ++++++++++++++++++++++++++++++++++++++++--------------- pom.xml | 4 ++-- 2 files changed, 53 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 244423ff35e..496eda799b4 100644 --- a/README.md +++ b/README.md @@ -5,25 +5,57 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.6.0`. - ----------------------------------------------- - | Phonetic Analysis Plugin | ElasticSearch | - ----------------------------------------------- - | 1.7.0-SNAPSHOT (master) | 0.90.3 -> master | - ----------------------------------------------- - | 1.6.0 | 0.90.3 -> master | - ----------------------------------------------- - | 1.5.0 | 0.90.1 -> 0.90.2 | - ----------------------------------------------- - | 1.4.0 | 0.90.0 | - ----------------------------------------------- - | 1.3.0 | 0.90.0 | - ----------------------------------------------- - | 1.2.0 | 0.19.2 -> 0.20 | - ----------------------------------------------- - | 1.1.0 | 0.19 | - ----------------------------------------------- - | 1.0.0 | 0.18 | - ----------------------------------------------- + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Phonetic Analysis PluginElasticsearchRelease date
1.7.0-SNAPSHOT (master)0.90.6 -> master
1.6.00.90.3 -> 0.90.52013-08-08
1.5.00.90.1 -> 0.90.22013-05-30
1.4.00.90.02013-04-29
1.3.00.90.02013-02-26
1.2.00.19.2 -> 0.202012-05-09
1.1.00.19.0 -> 0.19.12012-02-07
1.0.00.182012-01-07
A `phonetic` token filter that can be configured with different `encoder` types: `metaphone`, `doublemetaphone`, `soundex`, `refinedsoundex`, diff --git a/pom.xml b/pom.xml index c4c4a7e29a9..bd023c16426 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.3 - 4.4.0 + 0.90.6 + 4.5.1 From b6ad03e2d9e730e9dbeb9f500df80c2abccdc744 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 6 Nov 2013 11:19:16 +0100 Subject: [PATCH 035/103] prepare release elasticsearch-analysis-phonetic-1.7.0 --- README.md | 9 +++++++-- pom.xml | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 496eda799b4..02e6b88f00a 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Phonetic Analysis for ElasticSearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.6.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.7.0`. @@ -15,10 +15,15 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e - + + + + + + diff --git a/pom.xml b/pom.xml index bd023c16426..d0fe49701af 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0org.elasticsearchelasticsearch-analysis-phonetic - 1.7.0-SNAPSHOT + 1.7.0jarPhonetic Analysis for ElasticSearch2009 From 03aafc2803c70ea111f82205dbff028746d65137 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 6 Nov 2013 11:50:34 +0100 Subject: [PATCH 036/103] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d0fe49701af..361e8490e60 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.7.0 + 1.8.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From 7f26d6d0a5042c2d7f6f765e51167dc472db1bab Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 19 Dec 2013 17:04:54 +0100 Subject: [PATCH 037/103] Update to Elasticsearch 0.90.8 / Lucene 4.6.0 Closes #15. --- README.md | 68 ++++++++++--------------------------------------------- pom.xml | 4 ++-- 2 files changed, 14 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 02e6b88f00a..f84b0d1a170 100644 --- a/README.md +++ b/README.md @@ -5,62 +5,18 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.7.0`. -
1.7.0-SNAPSHOT (master)1.8.0-SNAPSHOT (master) 0.90.6 -> master
1.7.00.90.6 -> master2013-11-06
1.6.0 0.90.3 -> 0.90.5
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Phonetic Analysis PluginElasticsearchRelease date
1.8.0-SNAPSHOT (master)0.90.6 -> master
1.7.00.90.6 -> master2013-11-06
1.6.00.90.3 -> 0.90.52013-08-08
1.5.00.90.1 -> 0.90.22013-05-30
1.4.00.90.02013-04-29
1.3.00.90.02013-02-26
1.2.00.19.2 -> 0.202012-05-09
1.1.00.19.0 -> 0.19.12012-02-07
1.0.00.182012-01-07
+| Phonetic Analysis Plugin | elasticsearch | Release date | +|-------------------------------|------------------|:------------:| +| 1.8.0-SNAPSHOT (master) | 0.90.8 -> master | 2013-12-19 | +| 1.7.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | +| 1.6.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | +| 1.5.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | +| 1.4.0 | 0.90.0 | 2013-04-29 | +| 1.3.0 | 0.90.0 | 2013-02-26 | +| 1.2.0 | 0.19.2 -> 0.20 | 2012-05-09 | +| 1.1.0 | 0.19.0 -> 0.19.1 | 2012-02-07 | +| 1.0.0 | 0.18 | 2012-01-07 | + A `phonetic` token filter that can be configured with different `encoder` types: `metaphone`, `doublemetaphone`, `soundex`, `refinedsoundex`, diff --git a/pom.xml b/pom.xml index 361e8490e60..c18eb5aa233 100644 --- a/pom.xml +++ b/pom.xml @@ -31,8 +31,8 @@ - 0.90.6 - 4.5.1 + 0.90.8 + 4.6.0 From 741baf651c0b3c5c21d86254c911df256a996cc8 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 19 Dec 2013 17:08:34 +0100 Subject: [PATCH 038/103] Move tests to JUnit Closes #14. --- .gitignore | 1 + pom.xml | 129 +++++++++++++++--- .../analysis/SimplePhoneticAnalysisTests.java | 10 +- 3 files changed, 115 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index 81c7f9eeacf..aa64bc86818 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ /.project /.classpath /.settings +/.local-execution-hints.log diff --git a/pom.xml b/pom.xml index c18eb5aa233..03866671230 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,11 @@ 0.90.8 4.6.0 + 1 + true + onerror + + INFO @@ -43,6 +48,19 @@ + + org.hamcrest + hamcrest-all + 1.3 + test + + + org.apache.lucene + lucene-test-framework + ${lucene.version} + test + + org.elasticsearch elasticsearch @@ -65,23 +83,10 @@ - org.testng - testng - 6.3.1 - test - - - - org.hamcrest - hamcrest-core - 1.3.RC2 - test - - - - org.hamcrest - hamcrest-library - 1.3.RC2 + org.elasticsearch + elasticsearch + ${elasticsearch.version} + test-jar test @@ -98,13 +103,95 @@
+ com.carrotsearch.randomizedtesting + junit4-maven-plugin + 2.0.12 + + + tests + test + + junit4 + + + 20 + pipe,warn + true + + + + + + + + + ${tests.jvms} + + + + + + + **/*Tests.class + **/*Test.class + + + **/Abstract*.class + **/*StressTest.class + + + -Xmx512m + -XX:MaxDirectMemorySize=512m + -Des.logger.prefix= + + ${tests.shuffle} + ${tests.verbose} + ${tests.seed} + ${tests.failfast} + + + ${tests.iters} + ${tests.maxfailures} + ${tests.failfast} + ${tests.class} + ${tests.method} + ${tests.nightly} + ${tests.badapples} + ${tests.weekly} + ${tests.slow} + ${tests.awaitsfix} + ${tests.slow} + ${tests.timeoutSuite} + ${tests.showSuccess} + ${tests.integration} + ${tests.cluster_seed} + ${tests.client.ratio} + ${env.ES_TEST_LOCAL} + ${es.node.mode} + ${es.logger.level} + true + + + + + + + org.apache.maven.plugins maven-surefire-plugin - 2.12 + 2.15 - - **/*Tests.java - + true diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java index a5aef2de90c..726e267db04 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -2,7 +2,6 @@ package org.elasticsearch.index.analysis; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; -import static org.elasticsearch.common.settings.ImmutableSettings.*; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.env.Environment; @@ -12,13 +11,16 @@ import org.elasticsearch.index.IndexNameModule; import org.elasticsearch.index.settings.IndexSettingsModule; import org.elasticsearch.indices.analysis.IndicesAnalysisModule; import org.elasticsearch.indices.analysis.IndicesAnalysisService; +import org.elasticsearch.test.ElasticsearchTestCase; import org.hamcrest.MatcherAssert; -import static org.hamcrest.Matchers.*; -import org.testng.annotations.Test; +import org.junit.Test; + +import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; +import static org.hamcrest.Matchers.instanceOf; /** */ -public class SimplePhoneticAnalysisTests { +public class SimplePhoneticAnalysisTests extends ElasticsearchTestCase { @Test public void testPhoneticTokenFilterFactory() { From a5afd86aa6d1a4b77b6656263e9024c5c1fc1f44 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 19 Dec 2013 17:09:55 +0100 Subject: [PATCH 039/103] prepare release elasticsearch-analysis-phonetic-1.8.0 --- README.md | 5 +++-- pom.xml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f84b0d1a170..34d514ee378 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,12 @@ Phonetic Analysis for ElasticSearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.7.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.8.0`. | Phonetic Analysis Plugin | elasticsearch | Release date | |-------------------------------|------------------|:------------:| -| 1.8.0-SNAPSHOT (master) | 0.90.8 -> master | 2013-12-19 | +| 1.9.0-SNAPSHOT (master) | 0.90.8 -> master | | +| 1.8.0 | 0.90.8 -> master | 2013-12-19 | | 1.7.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | | 1.6.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | | 1.5.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | diff --git a/pom.xml b/pom.xml index 03866671230..7000df36602 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.8.0-SNAPSHOT + 1.8.0 jar Phonetic Analysis for ElasticSearch 2009 From 810ce8c1b0896aee97c6004df849e9d03dbedf5c Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 19 Dec 2013 17:11:30 +0100 Subject: [PATCH 040/103] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7000df36602..7294c5d085e 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.8.0 + 1.9.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From 7545e78ce82dc8801b01b6a7d096d24f5763babf Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 10 Jan 2014 23:33:52 +0100 Subject: [PATCH 041/103] update headers --- README.md | 4 +-- .../PhoneticAnalysisBinderProcessor.java | 16 ++++----- .../analysis/PhoneticTokenFilterFactory.java | 31 ++++++++-------- .../analysis/phonetic/HaasePhonetik.java | 15 ++++---- .../analysis/phonetic/KoelnerPhonetik.java | 27 +++++++------- .../index/analysis/phonetic/Nysiis.java | 36 ++++++++++--------- .../analysis/AnalysisPhoneticPlugin.java | 14 ++++---- .../analysis/SimplePhoneticAnalysisTests.java | 19 ++++++++++ 8 files changed, 90 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index 34d514ee378..487b195a48d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Phonetic Analysis for ElasticSearch +Phonetic Analysis for Elasticsearch =================================== The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. @@ -57,7 +57,7 @@ License This software is licensed under the Apache 2 license, quoted below. - Copyright 2009-2013 Shay Banon and ElasticSearch + Copyright 2009-2014 Elasticsearch Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java index d0cfaee9990..45d7634081e 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java +++ b/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java @@ -1,13 +1,13 @@ /* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java index dc491e587ff..02c4803d02d 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java @@ -1,13 +1,13 @@ /* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -16,17 +16,11 @@ * specific language governing permissions and limitations * under the License. */ + package org.elasticsearch.index.analysis; -import java.util.Arrays; -import java.util.HashSet; import org.apache.commons.codec.Encoder; -import org.apache.commons.codec.language.Caverphone1; -import org.apache.commons.codec.language.Caverphone2; -import org.apache.commons.codec.language.ColognePhonetic; -import org.apache.commons.codec.language.Metaphone; -import org.apache.commons.codec.language.RefinedSoundex; -import org.apache.commons.codec.language.Soundex; +import org.apache.commons.codec.language.*; import org.apache.commons.codec.language.bm.Languages.LanguageSet; import org.apache.commons.codec.language.bm.NameType; import org.apache.commons.codec.language.bm.PhoneticEngine; @@ -45,6 +39,9 @@ import org.elasticsearch.index.analysis.phonetic.KoelnerPhonetik; import org.elasticsearch.index.analysis.phonetic.Nysiis; import org.elasticsearch.index.settings.IndexSettings; +import java.util.Arrays; +import java.util.HashSet; + /** * */ @@ -132,4 +129,4 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { } throw new ElasticSearchIllegalArgumentException("encoder error"); } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java b/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java index 7526f205cda..880bc00cace 100644 --- a/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java +++ b/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java @@ -1,11 +1,11 @@ /* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -16,6 +16,7 @@ * specific language governing permissions and limitations * under the License. */ + package org.elasticsearch.index.analysis.phonetic; /** diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java b/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java index 3086a5aeda7..a3190fa4686 100644 --- a/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java +++ b/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java @@ -1,11 +1,11 @@ /* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * @@ -16,19 +16,16 @@ * specific language governing permissions and limitations * under the License. */ + package org.elasticsearch.index.analysis.phonetic; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.StringEncoder; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * Kölner Phonetik * diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java b/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java index 6275b84677a..3b85ef43915 100644 --- a/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java +++ b/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java @@ -1,25 +1,29 @@ /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. */ + package org.elasticsearch.index.analysis.phonetic; -import java.util.regex.Pattern; import org.apache.commons.codec.EncoderException; import org.apache.commons.codec.StringEncoder; +import java.util.regex.Pattern; + /** * * Taken from commons-codec trunk (unreleased yet) @@ -322,4 +326,4 @@ public class Nysiis implements StringEncoder { } return new String(chars, 0, count).toUpperCase(java.util.Locale.ENGLISH); } -} \ No newline at end of file +} diff --git a/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java index a99238fc40f..dacea45e049 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java @@ -1,11 +1,11 @@ /* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java index 726e267db04..70f69fdbced 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -1,3 +1,22 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.elasticsearch.index.analysis; import org.elasticsearch.common.inject.Injector; From 901a6e943738ee187a1edc654f73215a99157159 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 10 Jan 2014 23:39:55 +0100 Subject: [PATCH 042/103] Prepare 1.x branch --- README.md | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 487b195a48d..7ddb8d75ef9 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,19 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.8.0`. -| Phonetic Analysis Plugin | elasticsearch | Release date | -|-------------------------------|------------------|:------------:| -| 1.9.0-SNAPSHOT (master) | 0.90.8 -> master | | -| 1.8.0 | 0.90.8 -> master | 2013-12-19 | -| 1.7.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | -| 1.6.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | -| 1.5.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | -| 1.4.0 | 0.90.0 | 2013-04-29 | -| 1.3.0 | 0.90.0 | 2013-02-26 | -| 1.2.0 | 0.19.2 -> 0.20 | 2012-05-09 | -| 1.1.0 | 0.19.0 -> 0.19.1 | 2012-02-07 | -| 1.0.0 | 0.18 | 2012-01-07 | +| Phonetic Analysis Plugin | elasticsearch | Release date | +|-------------------------------|---------------------|:------------:| +| 2.0.0.RC1-SNAPSHOT (master) | 1.0.0.RC1 -> master | | +| 1.9.0-SNAPSHOT (1.x) | 0.90.8 -> 0.90 | | +| 1.8.0 | 0.90.8 -> 0.90 | 2013-12-19 | +| 1.7.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | +| 1.6.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | +| 1.5.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | +| 1.4.0 | 0.90.0 | 2013-04-29 | +| 1.3.0 | 0.90.0 | 2013-02-26 | +| 1.2.0 | 0.19.2 -> 0.20 | 2012-05-09 | +| 1.1.0 | 0.19.0 -> 0.19.1 | 2012-02-07 | +| 1.0.0 | 0.18 | 2012-01-07 | A `phonetic` token filter that can be configured with different `encoder` types: @@ -50,7 +51,8 @@ should be replaced with the encoded one (set it to `true`), or added (set it to Questions --------- -If you have questions or comments please use the mailing list instead of Github Issues tracker: https://groups.google.com/group/elasticsearch +If you have questions or comments please use the [mailing list](https://groups.google.com/group/elasticsearch) instead +of Github Issues tracker. License ------- From f44ef93d38bb8d1588fe8990f9d0e4c89091d2b8 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 10 Jan 2014 23:42:48 +0100 Subject: [PATCH 043/103] Update to elasticsearch 1.0.0.RC1 Closes #17. --- pom.xml | 4 ++-- .../index/analysis/PhoneticTokenFilterFactory.java | 8 ++++---- .../index/analysis/SimplePhoneticAnalysisTests.java | 3 --- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pom.xml b/pom.xml index 7294c5d085e..a8d07c19157 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 1.9.0-SNAPSHOT + 2.0.0.RC1-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 0.90.8 + 1.0.0.RC1-SNAPSHOT 4.6.0 1 true diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java index 02c4803d02d..479ea1ae899 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.phonetic.BeiderMorseFilter; import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; import org.apache.lucene.analysis.phonetic.PhoneticFilter; -import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -90,7 +90,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { } else if ("exact".equalsIgnoreCase(ruleType)) { ruletype = RuleType.EXACT; } else { - throw new ElasticSearchIllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder"); + throw new ElasticsearchIllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { @@ -107,7 +107,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else { - throw new ElasticSearchIllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); + throw new ElasticsearchIllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } } @@ -127,6 +127,6 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { } else { return new PhoneticFilter(tokenStream, encoder, !replace); } - throw new ElasticSearchIllegalArgumentException("encoder error"); + throw new ElasticsearchIllegalArgumentException("encoder error"); } } diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java index 70f69fdbced..d3aa66b451f 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -45,10 +45,7 @@ public class SimplePhoneticAnalysisTests extends ElasticsearchTestCase { public void testPhoneticTokenFilterFactory() { Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/phonetic-1.yml").build(); AnalysisService analysisService = testSimpleConfiguration(settings); - TokenFilterFactory standardfilterFactory = analysisService.tokenFilter("standard"); - System.err.println("standard filterfactory = " + standardfilterFactory); TokenFilterFactory filterFactory = analysisService.tokenFilter("phonetic"); - System.err.println("filterfactory = " + filterFactory); MatcherAssert.assertThat(filterFactory, instanceOf(PhoneticTokenFilterFactory.class)); } From 0251e46d3fc59c6e86a48b2749e4b4c1e1d8fb05 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 15 Jan 2014 18:24:22 +0100 Subject: [PATCH 044/103] prepare release elasticsearch-analysis-phonetic-2.0.0.RC1 --- README.md | 5 +++-- pom.xml | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7ddb8d75ef9..9b583450cbc 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,12 @@ Phonetic Analysis for Elasticsearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/1.8.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.0.0.RC1`. | Phonetic Analysis Plugin | elasticsearch | Release date | |-------------------------------|---------------------|:------------:| -| 2.0.0.RC1-SNAPSHOT (master) | 1.0.0.RC1 -> master | | +| 2.0.0-SNAPSHOT (master) | 1.0.0.RC1 -> master | | +| 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | | 1.9.0-SNAPSHOT (1.x) | 0.90.8 -> 0.90 | | | 1.8.0 | 0.90.8 -> 0.90 | 2013-12-19 | | 1.7.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | diff --git a/pom.xml b/pom.xml index a8d07c19157..b98dc2c905b 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 2.0.0.RC1-SNAPSHOT + 2.0.0.RC1 jar Phonetic Analysis for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 1.0.0.RC1-SNAPSHOT + 1.0.0.RC1 4.6.0 1 true From 3c7fe4590f7a7783814f550c5c05d2f282b602cb Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 15 Jan 2014 19:52:35 +0100 Subject: [PATCH 045/103] prepare for next development iteration --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b98dc2c905b..a0878f6f2c4 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 2.0.0.RC1 + 2.0.0-SNAPSHOT jar Phonetic Analysis for ElasticSearch 2009 From c9895a2381e59541053390ba14ae7f298b2ac655 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 22:51:28 +0100 Subject: [PATCH 046/103] Add plugin release semi-automatic script Closes #20 --- README.md | 15 +- dev-tools/build_release.py | 708 +++++++++++++++++++++++++++++++++++++ dev-tools/upload-s3.py | 67 ++++ pom.xml | 5 +- 4 files changed, 782 insertions(+), 13 deletions(-) create mode 100755 dev-tools/build_release.py create mode 100644 dev-tools/upload-s3.py diff --git a/README.md b/README.md index 9b583450cbc..02d8121384b 100644 --- a/README.md +++ b/README.md @@ -5,20 +5,13 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.0.0.RC1`. +* For 1.0.x elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/master). +* For 0.90.x elasticsearch versions, look at [1.x branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/1.x). + | Phonetic Analysis Plugin | elasticsearch | Release date | |-------------------------------|---------------------|:------------:| -| 2.0.0-SNAPSHOT (master) | 1.0.0.RC1 -> master | | +| 2.0.0-SNAPSHOT | 1.0.0.RC1 -> master | XXXX-XX-XX | | 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | -| 1.9.0-SNAPSHOT (1.x) | 0.90.8 -> 0.90 | | -| 1.8.0 | 0.90.8 -> 0.90 | 2013-12-19 | -| 1.7.0 | 0.90.6 -> 0.90.7 | 2013-11-06 | -| 1.6.0 | 0.90.3 -> 0.90.5 | 2013-08-08 | -| 1.5.0 | 0.90.1 -> 0.90.2 | 2013-05-30 | -| 1.4.0 | 0.90.0 | 2013-04-29 | -| 1.3.0 | 0.90.0 | 2013-02-26 | -| 1.2.0 | 0.19.2 -> 0.20 | 2012-05-09 | -| 1.1.0 | 0.19.0 -> 0.19.1 | 2012-02-07 | -| 1.0.0 | 0.18 | 2012-01-07 | A `phonetic` token filter that can be configured with different `encoder` types: diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py new file mode 100755 index 00000000000..9166b09e7e3 --- /dev/null +++ b/dev-tools/build_release.py @@ -0,0 +1,708 @@ +# Licensed to Elasticsearch under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on +# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import re +import tempfile +import shutil +import os +import datetime +import argparse +import github3 +import smtplib + +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText + +from os.path import dirname, abspath + +""" + This tool builds a release from the a given elasticsearch plugin branch. + In order to execute it go in the top level directory and run: + $ python3 dev_tools/build_release.py --branch master --publish --remote origin + + By default this script runs in 'dry' mode which essentially simulates a release. If the + '--publish' option is set the actual release is done. + If not in 'dry' mode, a mail will be automatically sent to the mailing list. + You can disable it with the option '--disable_mail' + + $ python3 dev_tools/build_release.py --publish --remote origin --disable_mail + + The script takes over almost all + steps necessary for a release from a high level point of view it does the following things: + + - run prerequisite checks ie. check for Java 1.6 being present or S3 credentials available as env variables + - detect the version to release from the specified branch (--branch) or the current branch + - creates a release branch & updates pom.xml and README.md to point to a release version rather than a snapshot + - builds the artifacts + - commits the new version and merges the release branch into the source branch + - creates a tag and pushes the commit to the specified origin (--remote) + - publishes the releases to sonatype and S3 + - send a mail based on github issues fixed by this version + +Once it's done it will print all the remaining steps. + + Prerequisites: + - Python 3k for script execution + - Boto for S3 Upload ($ apt-get install python-boto or pip-3.3 install boto) + - github3 module (pip-3.3 install github3.py) + - S3 keys exported via ENV Variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) + - GITHUB (login/password) or key exported via ENV Variables (GITHUB_LOGIN, GITHUB_PASSWORD or GITHUB_KEY) + (see https://github.com/settings/applications#personal-access-tokens) - Optional: default to no authentication + - SMTP_HOST - Optional: default to localhost + - MAIL_SENDER - Optional: default to 'david@pilato.fr': must be authorized to send emails to elasticsearch mailing list + - MAIL_TO - Optional: default to 'elasticsearch@googlegroups.com' +""" +env = os.environ + +LOG = env.get('ES_RELEASE_LOG', '/tmp/elasticsearch_release.log') +ROOT_DIR = os.path.join(abspath(dirname(__file__)), '../') +README_FILE = ROOT_DIR + 'README.md' +POM_FILE = ROOT_DIR + 'pom.xml' + +def log(msg): + log_plain('\n%s' % msg) + +def log_plain(msg): + f = open(LOG, mode='ab') + f.write(msg.encode('utf-8')) + f.close() + +def run(command, quiet=False): + log('%s: RUN: %s\n' % (datetime.datetime.now(), command)) + if os.system('%s >> %s 2>&1' % (command, LOG)): + msg = ' FAILED: %s [see log %s]' % (command, LOG) + if not quiet: + print(msg) + raise RuntimeError(msg) + +try: + JAVA_HOME = env['JAVA_HOME'] +except KeyError: + raise RuntimeError(""" + Please set JAVA_HOME in the env before running release tool + On OSX use: export JAVA_HOME=`/usr/libexec/java_home -v '1.6*'`""") + +try: + MVN='mvn' + # make sure mvn3 is used if mvn3 is available + # some systems use maven 2 as default + run('mvn3 --version', quiet=True) + MVN='mvn3' +except RuntimeError: + pass + + +def java_exe(): + path = JAVA_HOME + return 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % (path, path, path) + +def verify_java_version(version): + s = os.popen('%s; java -version 2>&1' % java_exe()).read() + if s.find(' version "%s.' % version) == -1: + raise RuntimeError('got wrong version for java %s:\n%s' % (version, s)) + +# Verifies the java version. We guarantee that we run with Java 1.6 +# If 1.6 is not available fail the build! +def verify_mvn_java_version(version, mvn): + s = os.popen('%s; %s --version 2>&1' % (java_exe(), mvn)).read() + if s.find('Java version: %s' % version) == -1: + raise RuntimeError('got wrong java version for %s %s:\n%s' % (mvn, version, s)) + +# Returns the hash of the current git HEAD revision +def get_head_hash(): + return os.popen(' git rev-parse --verify HEAD 2>&1').read().strip() + +# Returns the hash of the given tag revision +def get_tag_hash(tag): + return os.popen('git show-ref --tags %s --hash 2>&1' % (tag)).read().strip() + +# Returns the name of the current branch +def get_current_branch(): + return os.popen('git rev-parse --abbrev-ref HEAD 2>&1').read().strip() + +verify_java_version('1.6') # we require to build with 1.6 +verify_mvn_java_version('1.6', MVN) + +# Utility that returns the name of the release branch for a given version +def release_branch(version): + return 'release_branch_%s' % version + +# runs get fetch on the given remote +def fetch(remote): + run('git fetch %s' % remote) + +# Creates a new release branch from the given source branch +# and rebases the source branch from the remote before creating +# the release branch. Note: This fails if the source branch +# doesn't exist on the provided remote. +def create_release_branch(remote, src_branch, release): + run('git checkout %s' % src_branch) + run('git pull --rebase %s %s' % (remote, src_branch)) + run('git checkout -b %s' % (release_branch(release))) + + +# Reads the given file and applies the +# callback to it. If the callback changed +# a line the given file is replaced with +# the modified input. +def process_file(file_path, line_callback): + fh, abs_path = tempfile.mkstemp() + modified = False + with open(abs_path,'w', encoding='utf-8') as new_file: + with open(file_path, encoding='utf-8') as old_file: + for line in old_file: + new_line = line_callback(line) + modified = modified or (new_line != line) + new_file.write(new_line) + os.close(fh) + if modified: + #Remove original file + os.remove(file_path) + #Move new file + shutil.move(abs_path, file_path) + return True + else: + # nothing to do - just remove the tmp file + os.remove(abs_path) + return False + +# Guess the next snapshot version number (increment second digit) +def guess_snapshot(version): + digits=list(map(int, re.findall(r'\d+', version))) + source='%s.%s' % (digits[0], digits[1]) + destination='%s.%s' % (digits[0], digits[1]+1) + return version.replace(source, destination) + +# Moves the pom.xml file from a snapshot to a release +def remove_maven_snapshot(pom, release): + pattern = '%s-SNAPSHOT' % release + replacement = '%s' % release + def callback(line): + return line.replace(pattern, replacement) + process_file(pom, callback) + +# Moves the README.md file from a snapshot to a release +def remove_version_snapshot(readme_file, release): + pattern = '%s-SNAPSHOT' % release + replacement = '%s ' % release + def callback(line): + return line.replace(pattern, replacement) + process_file(readme_file, callback) + +# Moves the pom.xml file to the next snapshot +def add_maven_snapshot(pom, release, snapshot): + pattern = '%s' % release + replacement = '%s-SNAPSHOT' % snapshot + def callback(line): + return line.replace(pattern, replacement) + process_file(pom, callback) + +# Add in README.md file the next snapshot +def add_version_snapshot(readme_file, release, snapshot): + pattern = '| %s ' % release + replacement = '| %s-SNAPSHOT' % snapshot + def callback(line): + # If we find pattern, we copy the line and replace its content + if line.find(pattern) >= 0: + return line.replace(pattern, replacement).replace('%s' % (datetime.datetime.now().strftime("%Y-%m-%d")), + 'XXXX-XX-XX')+line + else: + return line + process_file(readme_file, callback) + + +# Set release date in README.md file +def set_date(readme_file): + pattern = 'XXXX-XX-XX' + replacement = '%s' % (datetime.datetime.now().strftime("%Y-%m-%d")) + def callback(line): + return line.replace(pattern, replacement) + process_file(readme_file, callback) + +# Update installation instructions in README.md file +def set_install_instructions(readme_file, artifact_name, release): + pattern = '`bin/plugin -install elasticsearch/%s/.+`' % artifact_name + replacement = '`bin/plugin -install elasticsearch/%s/%s`' % (artifact_name, release) + def callback(line): + return re.sub(pattern, replacement, line) + process_file(readme_file, callback) + + +# Stages the given files for the next git commit +def add_pending_files(*files): + for file in files: + run('git add %s' % file) + +# Executes a git commit with 'release [version]' as the commit message +def commit_release(artifact_id, release): + run('git commit -m "prepare release %s-%s"' % (artifact_id, release)) + +def commit_snapshot(): + run('git commit -m "prepare for next development iteration"') + +def tag_release(release): + run('git tag -a v%s -m "Tag release version %s"' % (release, release)) + +def run_mvn(*cmd): + for c in cmd: + run('%s; %s -f %s %s' % (java_exe(), MVN, POM_FILE, c)) + +def build_release(run_tests=False, dry_run=True): + target = 'deploy' + if dry_run: + target = 'package' + if run_tests: + run_mvn('clean test') + run_mvn('clean %s -DskipTests' %(target)) + +# Checks the pom.xml for the release version. 2.0.0-SNAPSHOT +# This method fails if the pom file has no SNAPSHOT version set ie. +# if the version is already on a release version we fail. +# Returns the next version string ie. 0.90.7 +def find_release_version(src_branch): + run('git checkout %s' % src_branch) + with open(POM_FILE, encoding='utf-8') as file: + for line in file: + match = re.search(r'(.+)-SNAPSHOT', line) + if match: + return match.group(1) + raise RuntimeError('Could not find release version in branch %s' % src_branch) + +# extract a value from pom.xml +def find_from_pom(tag): + with open(POM_FILE, encoding='utf-8') as file: + for line in file: + match = re.search(r'<%s>(.+)' % (tag, tag), line) + if match: + return match.group(1) + raise RuntimeError('Could not find <%s> in pom.xml file' % (tag)) + +def get_artifacts(artifact_id, release): + artifact_path = ROOT_DIR + 'target/releases/%s-%s.zip' % (artifact_id, release) + print(' Path %s' % (artifact_path)) + if not os.path.isfile(artifact_path): + raise RuntimeError('Could not find required artifact at %s' % (artifact_path)) + return artifact_path + +# Generates sha1 for a file +# and returns the checksum files as well +# as the given files in a list +def generate_checksums(release_file): + res = [] + directory = os.path.dirname(release_file) + file = os.path.basename(release_file) + checksum_file = '%s.sha1.txt' % file + + if os.system('cd %s; shasum %s > %s' % (directory, file, checksum_file)): + raise RuntimeError('Failed to generate checksum for file %s' % release_file) + res = res + [os.path.join(directory, checksum_file), release_file] + return res + +def git_merge(src_branch, release_version): + run('git checkout %s' % src_branch) + run('git merge %s' % release_branch(release_version)) + +def git_push(remote, src_branch, release_version, dry_run): + if not dry_run: + run('git push %s %s' % (remote, src_branch)) # push the commit + run('git push %s v%s' % (remote, release_version)) # push the tag + else: + print(' dryrun [True] -- skipping push to remote %s' % remote) + +def publish_artifacts(artifacts, base='elasticsearch/elasticsearch', dry_run=True): + location = os.path.dirname(os.path.realpath(__file__)) + for artifact in artifacts: + if dry_run: + print('Skip Uploading %s to Amazon S3 in %s' % (artifact, base)) + else: + print('Uploading %s to Amazon S3' % artifact) + # requires boto to be installed but it is not available on python3k yet so we use a dedicated tool + run('python %s/upload-s3.py --file %s --path %s' % (location, os.path.abspath(artifact), base)) + + +################# +## +## +## Email and Github Management +## +## +################# +def format_issues_plain(issues, title='Fix'): + response = "" + + if len(issues) > 0: + response += '%s:\n' % title + for issue in issues: + response += ' * [%s] - %s (%s)\n' % (issue.number, issue.title, issue.html_url) + + return response + +def format_issues_html(issues, title='Fix'): + response = "" + + if len(issues) > 0: + response += '

%s

\n
    \n' % title + for issue in issues: + response += '[%s] - %s\n' % (issue.html_url, issue.number, issue.title) + response += '
\n' + + return response + +def get_github_repository(reponame, + login=env.get('GITHUB_LOGIN', None), + password=env.get('GITHUB_PASSWORD', None), + key=env.get('GITHUB_KEY', None)): + if login: + g = github3.login(login, password) + elif key: + g = github3.login(token=key) + else: + g = github3.GitHub() + + return g.repository("elasticsearch", reponame) + +# Check if there are some remaining open issues and fails +def check_opened_issues(version, repository, reponame): + opened_issues = [i for i in repository.iter_issues(state='open', labels='%s' % version)] + if len(opened_issues)>0: + raise NameError('Some issues [%s] are still opened. Check https://github.com/elasticsearch/%s/issues?labels=%s&state=open' + % (len(opened_issues), reponame, version)) + +# List issues from github: can be done anonymously if you don't +# exceed a given number of github API calls per day +# Check if there are some remaining open issues and fails +def list_issues(version, + repository, + severity='bug'): + issues = [i for i in repository.iter_issues(state='closed', labels='%s,%s' % (severity, version))] + return issues + +# Get issues from github and generates a Plain/HTML Multipart email +# And send it if dry_run=False +def prepare_email(artifact_id, release_version, repository, + artifact_name, artifact_description, project_url, + severity_labels_bug='bug', + severity_labels_update='update', + severity_labels_new='new', + severity_labels_doc='doc'): + + ## Get bugs from github + issues_bug = list_issues(release_version, repository, severity=severity_labels_bug) + issues_update = list_issues(release_version, repository, severity=severity_labels_update) + issues_new = list_issues(release_version, repository, severity=severity_labels_new) + issues_doc = list_issues(release_version, repository, severity=severity_labels_doc) + + ## Format content to plain text + plain_issues_bug = format_issues_plain(issues_bug, 'Fix') + plain_issues_update = format_issues_plain(issues_update, 'Update') + plain_issues_new = format_issues_plain(issues_new, 'New') + plain_issues_doc = format_issues_plain(issues_doc, 'Doc') + + ## Format content to html + html_issues_bug = format_issues_html(issues_bug, 'Fix') + html_issues_update = format_issues_html(issues_update, 'Update') + html_issues_new = format_issues_html(issues_new, 'New') + html_issues_doc = format_issues_html(issues_doc, 'Doc') + + if len(issues_bug)+len(issues_update)+len(issues_new)+len(issues_doc) > 0: + plain_empty_message = "" + html_empty_message = "" + + else: + plain_empty_message = "No issue listed for this release" + html_empty_message = "

No issue listed for this release

" + + msg = MIMEMultipart('alternative') + msg['Subject'] = '[ANN] %s %s released' % (artifact_name, release_version) + text = """ +Heya, + + +We are pleased to announce the release of the %(artifact_name)s, version %(release_version)s. + +%(artifact_description)s. + +%(project_url)s + +Release Notes - %(artifact_id)s - Version %(release_version)s + +%(empty_message)s +%(issues_bug)s +%(issues_update)s +%(issues_new)s +%(issues_doc)s + +Issues, Pull requests, Feature requests are warmly welcome on %(artifact_id)s project repository: %(project_url)s +For questions or comments around this plugin, feel free to use elasticsearch mailing list: https://groups.google.com/forum/#!forum/elasticsearch + +Enjoy, + +-The Elasticsearch team +""" % {'release_version': release_version, + 'artifact_id': artifact_id, + 'artifact_name': artifact_name, + 'artifact_description': artifact_description, + 'project_url': project_url, + 'empty_message': plain_empty_message, + 'issues_bug': plain_issues_bug, + 'issues_update': plain_issues_update, + 'issues_new': plain_issues_new, + 'issues_doc': plain_issues_doc} + + html = """ + + +

Heya,

+ +

We are pleased to announce the release of the %(artifact_name)s, version %(release_version)s

+ +
%(artifact_description)s.
+ +

Release Notes - Version %(release_version)s

+%(empty_message)s +%(issues_bug)s +%(issues_update)s +%(issues_new)s +%(issues_doc)s + +

Issues, Pull requests, Feature requests are warmly welcome on +%(artifact_id)s project repository!

+

For questions or comments around this plugin, feel free to use elasticsearch +mailing list!

+ +

Enjoy,

+ +

- The Elasticsearch team

+ +""" % {'release_version': release_version, + 'artifact_id': artifact_id, + 'artifact_name': artifact_name, + 'artifact_description': artifact_description, + 'project_url': project_url, + 'empty_message': html_empty_message, + 'issues_bug': html_issues_bug, + 'issues_update': html_issues_update, + 'issues_new': html_issues_new, + 'issues_doc': html_issues_doc} + + # Record the MIME types of both parts - text/plain and text/html. + part1 = MIMEText(text, 'plain') + part2 = MIMEText(html, 'html') + + # Attach parts into message container. + # According to RFC 2046, the last part of a multipart message, in this case + # the HTML message, is best and preferred. + msg.attach(part1) + msg.attach(part2) + + return msg + +def send_email(msg, + dry_run=True, + mail=True, + sender=env.get('MAIL_SENDER'), + to=env.get('MAIL_TO', 'elasticsearch@googlegroups.com'), + smtp_server=env.get('SMTP_SERVER', 'localhost')): + msg['From'] = 'Elasticsearch Team <%s>' % sender + msg['To'] = 'Elasticsearch Mailing List <%s>' % to + # save mail on disk + with open(ROOT_DIR+'target/email.txt', 'w') as email_file: + email_file.write(msg.as_string()) + if mail and not dry_run: + s = smtplib.SMTP(smtp_server, 25) + s.sendmail(sender, to, msg.as_string()) + s.quit() + else: + print('generated email: open %starget/email.txt' % ROOT_DIR) + +def print_sonatype_notice(): + settings = os.path.join(os.path.expanduser('~'), '.m2/settings.xml') + if os.path.isfile(settings): + with open(settings, encoding='utf-8') as settings_file: + for line in settings_file: + if line.strip() == 'sonatype-nexus-snapshots': + # moving out - we found the indicator no need to print the warning + return + print(""" + NOTE: No sonatype settings detected, make sure you have configured + your sonatype credentials in '~/.m2/settings.xml': + + + ... + + + sonatype-nexus-snapshots + your-jira-id + your-jira-pwd + + + sonatype-nexus-staging + your-jira-id + your-jira-pwd + + + ... + + """) + +def check_s3_credentials(): + if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None): + raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3') + +def check_github_credentials(): + if not env.get('GITHUB_KEY', None) and not env.get('GITHUB_LOGIN', None): + log('WARN: Could not find "GITHUB_LOGIN" / "GITHUB_PASSWORD" or "GITHUB_KEY" in the env variables. You could need it.') + +def check_email_settings(): + if not env.get('MAIL_SENDER', None): + raise RuntimeError('Could not find "MAIL_SENDER"') + +# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml +print_sonatype_notice() + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Builds and publishes a Elasticsearch Plugin Release') + parser.add_argument('--branch', '-b', metavar='master', default=get_current_branch(), + help='The branch to release from. Defaults to the current branch.') + parser.add_argument('--skiptests', '-t', dest='tests', action='store_false', + help='Skips tests before release. Tests are run by default.') + parser.set_defaults(tests=True) + parser.add_argument('--remote', '-r', metavar='origin', default='origin', + help='The remote to push the release commit and tag to. Default is [origin]') + parser.add_argument('--publish', '-p', dest='dryrun', action='store_false', + help='Publishes the release. Disable by default.') + parser.add_argument('--disable_mail', '-dm', dest='mail', action='store_false', + help='Do not send a release email. Email is sent by default.') + + parser.set_defaults(dryrun=True) + parser.set_defaults(mail=True) + args = parser.parse_args() + + src_branch = args.branch + remote = args.remote + run_tests = args.tests + dry_run = args.dryrun + mail = args.mail + + if not dry_run: + check_s3_credentials() + print('WARNING: dryrun is set to "false" - this will push and publish the release') + if mail: + check_email_settings() + print('An email to %s will be sent after the release' + % env.get('MAIL_TO', 'elasticsearch@googlegroups.com')) + input('Press Enter to continue...') + + check_github_credentials() + + print(''.join(['-' for _ in range(80)])) + print('Preparing Release from branch [%s] running tests: [%s] dryrun: [%s]' % (src_branch, run_tests, dry_run)) + print(' JAVA_HOME is [%s]' % JAVA_HOME) + print(' Running with maven command: [%s] ' % (MVN)) + + release_version = find_release_version(src_branch) + artifact_id = find_from_pom('artifactId') + artifact_name = find_from_pom('name') + artifact_description = find_from_pom('description') + project_url = find_from_pom('url') + print(' Artifact Id: [%s]' % artifact_id) + print(' Release version: [%s]' % release_version) + + # extract snapshot + default_snapshot_version = guess_snapshot(release_version) + snapshot_version = input('Enter next snapshot version [%s]:' % default_snapshot_version) + snapshot_version = snapshot_version or default_snapshot_version + + print(' Next version: [%s-SNAPSHOT]' % snapshot_version) + print(' Artifact Name: [%s]' % artifact_name) + print(' Artifact Description: [%s]' % artifact_description) + print(' Project URL: [%s]' % project_url) + + if not dry_run: + smoke_test_version = release_version + head_hash = get_head_hash() + run_mvn('clean') # clean the env! + create_release_branch(remote, src_branch, release_version) + print(' Created release branch [%s]' % (release_branch(release_version))) + success = False + try: + pending_files = [POM_FILE, README_FILE] + remove_maven_snapshot(POM_FILE, release_version) + remove_version_snapshot(README_FILE, release_version) + set_date(README_FILE) + set_install_instructions(README_FILE, artifact_id, release_version) + print(' Done removing snapshot version') + add_pending_files(*pending_files) # expects var args use * to expand + commit_release(artifact_id, release_version) + print(' Committed release version [%s]' % release_version) + print(''.join(['-' for _ in range(80)])) + print('Building Release candidate') + input('Press Enter to continue...') + print(' Checking github issues') + repository = get_github_repository(artifact_id) + check_opened_issues(release_version, repository, artifact_id) + if not dry_run: + print(' Running maven builds now and publish to sonatype - run-tests [%s]' % run_tests) + else: + print(' Running maven builds now run-tests [%s]' % run_tests) + build_release(run_tests=run_tests, dry_run=dry_run) + artifact = get_artifacts(artifact_id, release_version) + artifact_and_checksums = generate_checksums(artifact) + print(''.join(['-' for _ in range(80)])) + + print('Finish Release -- dry_run: %s' % dry_run) + input('Press Enter to continue...') + print(' merge release branch') + git_merge(src_branch, release_version) + print(' tag') + tag_release(release_version) + + add_maven_snapshot(POM_FILE, release_version, snapshot_version) + add_version_snapshot(README_FILE, release_version, snapshot_version) + add_pending_files(*pending_files) + commit_snapshot() + + print(' push to %s %s -- dry_run: %s' % (remote, src_branch, dry_run)) + git_push(remote, src_branch, release_version, dry_run) + print(' publish artifacts to S3 -- dry_run: %s' % dry_run) + publish_artifacts(artifact_and_checksums, base='elasticsearch/%s' % (artifact_id) , dry_run=dry_run) + print(' preparing email (from github issues)') + msg = prepare_email(artifact_id, release_version, repository, artifact_name, artifact_description, project_url) + print(' sending email -- dry_run: %s, mail: %s' % (dry_run, mail)) + send_email(msg, dry_run=dry_run, mail=mail) + + pending_msg = """ +Release successful pending steps: + * close and release sonatype repo: https://oss.sonatype.org/ + * check if the release is there https://oss.sonatype.org/content/repositories/releases/org/elasticsearch/%(artifact_id)s/%(version)s + * tweet about the release +""" + print(pending_msg % {'version': release_version, + 'artifact_id': artifact_id, + 'project_url': project_url}) + success = True + finally: + if not success: + run('git reset --hard HEAD') + run('git checkout %s' % src_branch) + elif dry_run: + print('End of dry_run') + input('Press Enter to reset changes...') + + run('git reset --hard %s' % head_hash) + run('git tag -d v%s' % release_version) + # we delete this one anyways + run('git branch -D %s' % (release_branch(release_version))) diff --git a/dev-tools/upload-s3.py b/dev-tools/upload-s3.py new file mode 100644 index 00000000000..95ea576e65c --- /dev/null +++ b/dev-tools/upload-s3.py @@ -0,0 +1,67 @@ +# Licensed to Elasticsearch under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on +# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import os +import sys +import argparse +try: + import boto.s3 +except: + raise RuntimeError(""" + S3 upload requires boto to be installed + Use one of: + 'pip install -U boto' + 'apt-get install python-boto' + 'easy_install boto' + """) + +import boto.s3 + + +def list_buckets(conn): + return conn.get_all_buckets() + + +def upload_s3(conn, path, key, file, bucket): + print 'Uploading %s to Amazon S3 bucket %s/%s' % \ + (file, bucket, os.path.join(path, key)) + def percent_cb(complete, total): + sys.stdout.write('.') + sys.stdout.flush() + bucket = conn.create_bucket(bucket) + k = bucket.new_key(os.path.join(path, key)) + k.set_contents_from_filename(file, cb=percent_cb, num_cb=100) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Uploads files to Amazon S3') + parser.add_argument('--file', '-f', metavar='path to file', + help='the branch to release from', required=True) + parser.add_argument('--bucket', '-b', metavar='B42', default='download.elasticsearch.org', + help='The S3 Bucket to upload to') + parser.add_argument('--path', '-p', metavar='elasticsearch/elasticsearch', default='elasticsearch/elasticsearch', + help='The key path to use') + parser.add_argument('--key', '-k', metavar='key', default=None, + help='The key - uses the file name as default key') + args = parser.parse_args() + if args.key: + key = args.key + else: + key = os.path.basename(args.file) + + connection = boto.connect_s3() + upload_s3(connection, args.path, key, args.file, args.bucket); + diff --git a/pom.xml b/pom.xml index a0878f6f2c4..4268a6a2212 100644 --- a/pom.xml +++ b/pom.xml @@ -2,13 +2,14 @@ - elasticsearch-analysis-phonetic 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic 2.0.0-SNAPSHOT jar - Phonetic Analysis for ElasticSearch + Phonetic Analysis for elasticsearch + The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. + https://github.com/elasticsearch/elasticsearch-analysis-phonetic/ 2009 From f985d3567f58169c82211f249e7852817fe96715 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 22:54:17 +0100 Subject: [PATCH 047/103] Update to Lucene 4.6.1 / Elasticsearch 1.0.0 Closes #19 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 4268a6a2212..d68035aafd3 100644 --- a/pom.xml +++ b/pom.xml @@ -32,8 +32,8 @@ - 1.0.0.RC1 - 4.6.0 + 1.0.0 + 4.6.1 1 true onerror From d8691d5cf19aebfd4c2c508def56ac836b86e873 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 23:00:47 +0100 Subject: [PATCH 048/103] prepare release elasticsearch-analysis-phonetic-2.0.0 --- README.md | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 02d8121384b..f1bbc23f447 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,14 @@ Phonetic Analysis for Elasticsearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.0.0.RC1`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.0.0`. * For 1.0.x elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/master). * For 0.90.x elasticsearch versions, look at [1.x branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/1.x). | Phonetic Analysis Plugin | elasticsearch | Release date | |-------------------------------|---------------------|:------------:| -| 2.0.0-SNAPSHOT | 1.0.0.RC1 -> master | XXXX-XX-XX | +| 2.0.0 | 1.0.0.RC1 -> master | 2014-02-28 | | 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | diff --git a/pom.xml b/pom.xml index d68035aafd3..04f04e4304c 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 2.0.0-SNAPSHOT + 2.0.0 jar Phonetic Analysis for elasticsearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. From b750e26cf8e2c3ea7566937f40c73b30cf6da4cb Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 28 Feb 2014 23:01:44 +0100 Subject: [PATCH 049/103] prepare for next development iteration --- README.md | 1 + pom.xml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f1bbc23f447..f37e4ce76e4 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ In order to install the plugin, simply run: `bin/plugin -install elasticsearch/e | Phonetic Analysis Plugin | elasticsearch | Release date | |-------------------------------|---------------------|:------------:| +| 2.1.0-SNAPSHOT | 1.0.0.RC1 -> master | XXXX-XX-XX | | 2.0.0 | 1.0.0.RC1 -> master | 2014-02-28 | | 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | diff --git a/pom.xml b/pom.xml index 04f04e4304c..dd95965142e 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 2.0.0 + 2.1.0-SNAPSHOT jar Phonetic Analysis for elasticsearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. From d4b6aa5783de9c753e2ab2375d6be6cf5d3b571c Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 3 Mar 2014 10:47:42 +0100 Subject: [PATCH 050/103] Update naming for better mailing list announcement By now, when we release the plugin, users can be confused about version naming. For example, email title could be: ``` [ANN] Phonetic Analysis for elasticsearch 1.8.0 released ``` We prefer to have that form: ``` [ANN] Elasticsearch Phonetic Analysis plugin 1.8.0 released ``` Thanks to @spinscale to reporting this. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index dd95965142e..917377d47b8 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ elasticsearch-analysis-phonetic 2.1.0-SNAPSHOT jar - Phonetic Analysis for elasticsearch + Elasticsearch Phonetic Analysis plugin The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. https://github.com/elasticsearch/elasticsearch-analysis-phonetic/ 2009 From d6efef4731574da46c3d398bbfd1c3d737a8dc7d Mon Sep 17 00:00:00 2001 From: David Pilato Date: Sun, 9 Mar 2014 10:43:11 +0100 Subject: [PATCH 051/103] Add plugin version in es-plugin.properties With https://github.com/elasticsearch/elasticsearch/issues/2784, we can now add plugin version in `es-plugin.properties` file. It will only be used with elasticsearch 1.0.0 and upper. No need to push it in 1.x branch. Closes #21. --- pom.xml | 9 +++++++++ src/main/resources/es-plugin.properties | 1 + 2 files changed, 10 insertions(+) diff --git a/pom.xml b/pom.xml index 917377d47b8..9e1e7bf47e4 100644 --- a/pom.xml +++ b/pom.xml @@ -93,6 +93,15 @@ + + + src/main/resources + true + + **/*.properties + + + org.apache.maven.plugins diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties index 01db8286e9b..c47e513e971 100644 --- a/src/main/resources/es-plugin.properties +++ b/src/main/resources/es-plugin.properties @@ -1 +1,2 @@ plugin=org.elasticsearch.plugin.analysis.AnalysisPhoneticPlugin +version=${project.version} From 99677faa43a7758ed6b3092a32f9d4e1d3f84bdb Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 19 Mar 2014 22:37:07 +0100 Subject: [PATCH 052/103] Disable java and maven version checking And fix typo in email html --- dev-tools/build_release.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py index 9166b09e7e3..74acd8c5f4e 100755 --- a/dev-tools/build_release.py +++ b/dev-tools/build_release.py @@ -43,7 +43,7 @@ from os.path import dirname, abspath The script takes over almost all steps necessary for a release from a high level point of view it does the following things: - - run prerequisite checks ie. check for Java 1.6 being present or S3 credentials available as env variables + - run prerequisite checks ie. check for S3 credentials available as env variables - detect the version to release from the specified branch (--branch) or the current branch - creates a release branch & updates pom.xml and README.md to point to a release version rather than a snapshot - builds the artifacts @@ -109,18 +109,6 @@ def java_exe(): path = JAVA_HOME return 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % (path, path, path) -def verify_java_version(version): - s = os.popen('%s; java -version 2>&1' % java_exe()).read() - if s.find(' version "%s.' % version) == -1: - raise RuntimeError('got wrong version for java %s:\n%s' % (version, s)) - -# Verifies the java version. We guarantee that we run with Java 1.6 -# If 1.6 is not available fail the build! -def verify_mvn_java_version(version, mvn): - s = os.popen('%s; %s --version 2>&1' % (java_exe(), mvn)).read() - if s.find('Java version: %s' % version) == -1: - raise RuntimeError('got wrong java version for %s %s:\n%s' % (mvn, version, s)) - # Returns the hash of the current git HEAD revision def get_head_hash(): return os.popen(' git rev-parse --verify HEAD 2>&1').read().strip() @@ -133,9 +121,6 @@ def get_tag_hash(tag): def get_current_branch(): return os.popen('git rev-parse --abbrev-ref HEAD 2>&1').read().strip() -verify_java_version('1.6') # we require to build with 1.6 -verify_mvn_java_version('1.6', MVN) - # Utility that returns the name of the release branch for a given version def release_branch(version): return 'release_branch_%s' % version @@ -218,7 +203,7 @@ def add_version_snapshot(readme_file, release, snapshot): # If we find pattern, we copy the line and replace its content if line.find(pattern) >= 0: return line.replace(pattern, replacement).replace('%s' % (datetime.datetime.now().strftime("%Y-%m-%d")), - 'XXXX-XX-XX')+line + 'XXXX-XX-XX')+line else: return line process_file(readme_file, callback) @@ -356,15 +341,15 @@ def format_issues_html(issues, title='Fix'): if len(issues) > 0: response += '

%s

\n
    \n' % title for issue in issues: - response += '[%s] - %s\n' % (issue.html_url, issue.number, issue.title) + response += '
  • [%s] - %s\n' % (issue.html_url, issue.number, issue.title) response += '
\n' return response def get_github_repository(reponame, - login=env.get('GITHUB_LOGIN', None), - password=env.get('GITHUB_PASSWORD', None), - key=env.get('GITHUB_KEY', None)): + login=env.get('GITHUB_LOGIN', None), + password=env.get('GITHUB_PASSWORD', None), + key=env.get('GITHUB_KEY', None)): if login: g = github3.login(login, password) elif key: From bd984f464b7dd2d761593aec82a47938b8588dc7 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 26 Mar 2014 12:48:28 +0100 Subject: [PATCH 053/103] Create branches according to elasticsearch versions We create branches: * es-0.90 for elasticsearch 0.90 * es-1.0 for elasticsearch 1.0 * es-1.1 for elasticsearch 1.1 * master for elasticsearch master We also check that before releasing we don't have a dependency to an elasticsearch SNAPSHOT version. Add links to each version in documentation (cherry picked from commit d5855a5) --- README.md | 14 +++++++++----- dev-tools/build_release.py | 29 +++++++++++++++++++++++++++++ pom.xml | 6 +++--- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index f37e4ce76e4..c3e58c6ebde 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,18 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.0.0`. -* For 1.0.x elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/master). -* For 0.90.x elasticsearch versions, look at [1.x branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/1.x). +* For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/master). +* For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.1). +* For 1.0.x elasticsearch versions, look at [es-1.0 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.0). +* For 0.90.x elasticsearch versions, look at [es-0.90 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-0.90). | Phonetic Analysis Plugin | elasticsearch | Release date | |-------------------------------|---------------------|:------------:| -| 2.1.0-SNAPSHOT | 1.0.0.RC1 -> master | XXXX-XX-XX | -| 2.0.0 | 1.0.0.RC1 -> master | 2014-02-28 | -| 2.0.0.RC1 | 1.0.0.RC1 -> master | 2014-01-15 | +| 3.0.0-SNAPSHOT | master | XXXX-XX-XX | + +Please read documentation relative to the version you are using: + +* [3.0.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/blob/master/README.md) A `phonetic` token filter that can be configured with different `encoder` types: diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py index 74acd8c5f4e..db8345440c7 100755 --- a/dev-tools/build_release.py +++ b/dev-tools/build_release.py @@ -208,6 +208,29 @@ def add_version_snapshot(readme_file, release, snapshot): return line process_file(readme_file, callback) +# Moves the README.md file from a snapshot to a release (documentation link) +def remove_documentation_snapshot(readme_file, repo_url, release, branch): + pattern = '* [%s-SNAPSHOT](%sblob/%s/README.md)' % (release, repo_url, branch) + replacement = '* [%s](%sblob/v%s/README.md)' % (release, repo_url, release) + def callback(line): + # If we find pattern, we replace its content + if line.find(pattern) >= 0: + return line.replace(pattern, replacement) + else: + return line + process_file(readme_file, callback) + +# Add in README.markdown file the documentation for the next version +def add_documentation_snapshot(readme_file, repo_url, release, snapshot, branch): + pattern = '* [%s](%sblob/v%s/README.md)' % (release, repo_url, release) + replacement = '* [%s-SNAPSHOT](%sblob/%s/README.md)' % (snapshot, repo_url, branch) + def callback(line): + # If we find pattern, we copy the line and replace its content + if line.find(pattern) >= 0: + return line.replace(pattern, replacement)+line + else: + return line + process_file(readme_file, callback) # Set release date in README.md file def set_date(readme_file): @@ -603,8 +626,12 @@ if __name__ == '__main__': artifact_name = find_from_pom('name') artifact_description = find_from_pom('description') project_url = find_from_pom('url') + elasticsearch_version = find_from_pom('elasticsearch.version') print(' Artifact Id: [%s]' % artifact_id) print(' Release version: [%s]' % release_version) + print(' Elasticsearch: [%s]' % elasticsearch_version) + if elasticsearch_version.find('-SNAPSHOT') != -1: + raise RuntimeError('Can not release with a SNAPSHOT elasticsearch dependency: %s' % elasticsearch_version) # extract snapshot default_snapshot_version = guess_snapshot(release_version) @@ -626,6 +653,7 @@ if __name__ == '__main__': try: pending_files = [POM_FILE, README_FILE] remove_maven_snapshot(POM_FILE, release_version) + remove_documentation_snapshot(README_FILE, project_url, release_version, src_branch) remove_version_snapshot(README_FILE, release_version) set_date(README_FILE) set_install_instructions(README_FILE, artifact_id, release_version) @@ -657,6 +685,7 @@ if __name__ == '__main__': add_maven_snapshot(POM_FILE, release_version, snapshot_version) add_version_snapshot(README_FILE, release_version, snapshot_version) + add_documentation_snapshot(README_FILE, project_url, release_version, snapshot_version, src_branch) add_pending_files(*pending_files) commit_snapshot() diff --git a/pom.xml b/pom.xml index 9e1e7bf47e4..99e10b90435 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-phonetic - 2.1.0-SNAPSHOT + 3.0.0-SNAPSHOT jar Elasticsearch Phonetic Analysis plugin The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. @@ -32,8 +32,8 @@ - 1.0.0 - 4.6.1 + 2.0.0-SNAPSHOT + 4.7.0 1 true onerror From ed17cdb96cac6dd06d46aa308bbb8c29bd964c78 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 28 Apr 2014 17:29:25 +0200 Subject: [PATCH 054/103] Added new branch for elasticsearch 1.2 (cherry picked from commit aac8f23) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c3e58c6ebde..6655e311e54 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.0.0`. * For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/master). +* For 1.2.x elasticsearch versions, look at [es-1.2 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.2). * For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.1). * For 1.0.x elasticsearch versions, look at [es-1.0 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.0). * For 0.90.x elasticsearch versions, look at [es-0.90 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-0.90). From 905c7093fc3e314179bfa4b8782db78f02f8b084 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 28 Apr 2014 18:08:39 +0200 Subject: [PATCH 055/103] Add Lucene version in plugin.properties file Closes #25. (cherry picked from commit 17d43f5) --- src/main/resources/es-plugin.properties | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties index c47e513e971..cc52b051102 100644 --- a/src/main/resources/es-plugin.properties +++ b/src/main/resources/es-plugin.properties @@ -1,2 +1,3 @@ plugin=org.elasticsearch.plugin.analysis.AnalysisPhoneticPlugin version=${project.version} +lucene=${lucene.version} From f1577d27cdf05efbc912897cd97dc73acf269eac Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 28 Apr 2014 18:20:17 +0200 Subject: [PATCH 056/103] Update to Lucene 4.8 Closes #24. (cherry picked from commit 1596bf6) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 99e10b90435..9f8b0c7aaf4 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.0.0-SNAPSHOT - 4.7.0 + 4.8.0 1 true onerror From 106698417d5292f1d52e6df5fa05ce75b2018694 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 28 May 2014 12:56:55 +0200 Subject: [PATCH 057/103] Add integration tests Closes #27. (cherry picked from commit d43d4df) --- .../SimplePhoneticIntegrationTests.java | 100 ++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java new file mode 100644 index 00000000000..6ae62dd7c31 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java @@ -0,0 +1,100 @@ +/* + * Licensed to Elasticsearch (the "Author") under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Author licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.test.ElasticsearchIntegrationTest; +import org.junit.Test; + +import java.io.IOException; +import java.util.concurrent.ExecutionException; + +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.notNullValue; + +@ElasticsearchIntegrationTest.ClusterScope(numDataNodes = 1, scope = ElasticsearchIntegrationTest.Scope.SUITE) +public class SimplePhoneticIntegrationTests extends ElasticsearchIntegrationTest { + + @Override + public Settings indexSettings() { + Settings settings = ImmutableSettings.builder() + .put(super.indexSettings()) + .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard") + .putArray("index.analysis.analyzer.my_analyzer.filter", "standard", "lowercase", "my_metaphone") + .put("index.analysis.filter.my_metaphone.type", "phonetic") + .put("index.analysis.filter.my_metaphone.encoder", "metaphone") + .put("index.analysis.filter.my_metaphone.replace", false) + .build(); + + return settings; + } + + @Test + public void testPhoneticAnalyzer() throws ExecutionException, InterruptedException { + createIndex("test"); + ensureGreen("test"); + AnalyzeResponse response = client().admin().indices() + .prepareAnalyze("hello world") + .setIndex("test") + .setAnalyzer("my_analyzer") + .execute().get(); + + assertThat(response, notNullValue()); + assertThat(response.getTokens().size(), is(4)); + assertThat(response.getTokens().get(0).getTerm(), is("HL")); + assertThat(response.getTokens().get(1).getTerm(), is("hello")); + assertThat(response.getTokens().get(2).getTerm(), is("WRLT")); + assertThat(response.getTokens().get(3).getTerm(), is("world")); + } + + @Test + public void testPhoneticAnalyzerInMapping() throws ExecutionException, InterruptedException, IOException { + createIndex("test"); + ensureGreen("test"); + final XContentBuilder mapping = jsonBuilder().startObject() + .startObject("type") + .startObject("properties") + .startObject("foo") + .field("type", "string") + .field("analyzer", "my_analyzer") + .endObject() + .endObject() + .endObject() + .endObject(); + + client().admin().indices().preparePutMapping("test").setType("type").setSource(mapping).get(); + + index("test", "type", "1", "foo", "hello world"); + refresh(); + + SearchResponse response = client().prepareSearch("test").setQuery( + QueryBuilders.matchQuery("foo", "helllo") + ).execute().actionGet(); + + assertThat(response.getHits().getTotalHits(), is(1L)); + } + +} From 868fd81d73757809915b2d704ff24753b718f15e Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 28 May 2014 12:58:29 +0200 Subject: [PATCH 058/103] Update to Lucene 4.8.1 / Elasticsearch 1.2.0 Related to #24. (cherry picked from commit ee289c9) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9f8b0c7aaf4..09af99e9c5a 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.0.0-SNAPSHOT - 4.8.0 + 4.8.1 1 true onerror From 39feb530d85c4569081cde835a5aa29452bf6179 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 28 May 2014 13:04:16 +0200 Subject: [PATCH 059/103] Beidermorse encoder does not support "replace" option Beidermorse encoder does not support "replace" option: only new tokens will be returned. One of the backfires is that highlighting will not work. This is actually because Lucene's beidermorse filter does not support this option. Please consider to update documentation by specifying which encore support `"replace : false"` option. Closes #22. (cherry picked from commit c307877) --- README.md | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 6655e311e54..2c4ddb918ad 100644 --- a/README.md +++ b/README.md @@ -28,25 +28,30 @@ A `phonetic` token filter that can be configured with different `encoder` types: The `replace` parameter (defaults to `true`) controls if the token processed should be replaced with the encoded one (set it to `true`), or added (set it to `false`). - { - "index" : { - "analysis" : { - "analyzer" : { - "my_analyzer" : { - "tokenizer" : "standard", - "filter" : ["standard", "lowercase", "my_metaphone"] - } - }, - "filter" : { - "my_metaphone" : { - "type" : "phonetic", - "encoder" : "metaphone", - "replace" : false - } +```js +{ + "index" : { + "analysis" : { + "analyzer" : { + "my_analyzer" : { + "tokenizer" : "standard", + "filter" : ["standard", "lowercase", "my_metaphone"] + } + }, + "filter" : { + "my_metaphone" : { + "type" : "phonetic", + "encoder" : "metaphone", + "replace" : false } } } } +} +``` + +Note that `beidermorse` does not support `replace` parameter. + Questions --------- From 50d3fff22d4bcd40265d8c544b4f97b954d214d7 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 28 May 2014 13:08:56 +0200 Subject: [PATCH 060/103] Update to elasticsearch 1.3.0 Closes #26. (cherry picked from commit 60b7c55) --- README.md | 3 ++- .../index/analysis/SimplePhoneticIntegrationTests.java | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2c4ddb918ad..7aeb6e29017 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,10 @@ Phonetic Analysis for Elasticsearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.0.0`. +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.1.0`. * For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/master). +* For 1.3.x elasticsearch versions, look at [es-1.3 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.3). * For 1.2.x elasticsearch versions, look at [es-1.2 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.2). * For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.1). * For 1.0.x elasticsearch versions, look at [es-1.0 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.0). diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java index 6ae62dd7c31..84da5eeb2e4 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.plugins.PluginsService; import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.junit.Test; @@ -38,6 +39,14 @@ import static org.hamcrest.CoreMatchers.notNullValue; @ElasticsearchIntegrationTest.ClusterScope(numDataNodes = 1, scope = ElasticsearchIntegrationTest.Scope.SUITE) public class SimplePhoneticIntegrationTests extends ElasticsearchIntegrationTest { + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return ImmutableSettings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true) + .build(); + } + @Override public Settings indexSettings() { Settings settings = ImmutableSettings.builder() From 8c28e4b1fcc1de0977e7ce0ef58e7129dd0f1eb1 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 27 Jun 2014 11:54:22 +0200 Subject: [PATCH 061/103] Update to Lucene 4.9.0 Closes #29. (cherry picked from commit 7dc2231) --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 09af99e9c5a..7dc31696088 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.0.0-SNAPSHOT - 4.8.1 + 4.9.0 1 true onerror From b1a055828b952b5a5b66b156ab2c5368f219e06c Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 16 Jul 2014 09:15:38 +0200 Subject: [PATCH 062/103] Update to elasticsearch 1.4.0 Closes #30. (cherry picked from commit fad6ebf) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 7aeb6e29017..a450a1fffdb 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.1.0`. * For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/master). +* For 1.4.x elasticsearch versions, look at [es-1.4 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.4). * For 1.3.x elasticsearch versions, look at [es-1.3 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.3). * For 1.2.x elasticsearch versions, look at [es-1.2 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.2). * For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.1). From 5473bf7b825e323fff5b142a3e2aabcd3e41a449 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 8 Sep 2014 18:07:32 +0200 Subject: [PATCH 063/103] Docs: make the welcome page more obvious Closes #31. --- .gitignore | 1 + README.md | 34 +- dev-tools/build_release.py | 722 ------------------------------------- dev-tools/release.py | 134 +++++++ dev-tools/upload-s3.py | 67 ---- 5 files changed, 156 insertions(+), 802 deletions(-) delete mode 100755 dev-tools/build_release.py create mode 100644 dev-tools/release.py delete mode 100644 dev-tools/upload-s3.py diff --git a/.gitignore b/.gitignore index aa64bc86818..cba388bfce9 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ /.classpath /.settings /.local-execution-hints.log +/plugin_tools diff --git a/README.md b/README.md index a450a1fffdb..eb406502854 100644 --- a/README.md +++ b/README.md @@ -3,24 +3,32 @@ Phonetic Analysis for Elasticsearch The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. -In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.1.0`. +In order to install the plugin, simply run: -* For master elasticsearch versions, look at [master branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/master). -* For 1.4.x elasticsearch versions, look at [es-1.4 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.4). -* For 1.3.x elasticsearch versions, look at [es-1.3 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.3). -* For 1.2.x elasticsearch versions, look at [es-1.2 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.2). -* For 1.1.x elasticsearch versions, look at [es-1.1 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.1). -* For 1.0.x elasticsearch versions, look at [es-1.0 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.0). -* For 0.90.x elasticsearch versions, look at [es-0.90 branch](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-0.90). +```sh +bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.3.0 +``` -| Phonetic Analysis Plugin | elasticsearch | Release date | -|-------------------------------|---------------------|:------------:| -| 3.0.0-SNAPSHOT | master | XXXX-XX-XX | -Please read documentation relative to the version you are using: +| elasticsearch | ICU Analysis Plugin | Docs | +|---------------|-----------------------|------------| +| master | Build from source | See below | +| es-1.x | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-240-snapshot-for-elasticsearch-1x) | +| es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | +| es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch) | +| es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch) | +| es-1.0 | 2.0.0 | [2.0.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.0.0/#phonetic-analysis-for-elasticsearch) | +| es-0.90 | 1.8.0 | [1.8.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v1.8.0/#phonetic-analysis-for-elasticsearch) | -* [3.0.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/blob/master/README.md) +To build a `SNAPSHOT` version, you need to build it with Maven: +```bash +mvn clean install +plugin --install analysis-phonetic \ + --url file:target/releases/elasticsearch-analysis-phonetic-X.X.X-SNAPSHOT.zip +``` + +## User guide A `phonetic` token filter that can be configured with different `encoder` types: `metaphone`, `doublemetaphone`, `soundex`, `refinedsoundex`, diff --git a/dev-tools/build_release.py b/dev-tools/build_release.py deleted file mode 100755 index db8345440c7..00000000000 --- a/dev-tools/build_release.py +++ /dev/null @@ -1,722 +0,0 @@ -# Licensed to Elasticsearch under one or more contributor -# license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright -# ownership. Elasticsearch licenses this file to you under -# the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on -# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import re -import tempfile -import shutil -import os -import datetime -import argparse -import github3 -import smtplib - -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText - -from os.path import dirname, abspath - -""" - This tool builds a release from the a given elasticsearch plugin branch. - In order to execute it go in the top level directory and run: - $ python3 dev_tools/build_release.py --branch master --publish --remote origin - - By default this script runs in 'dry' mode which essentially simulates a release. If the - '--publish' option is set the actual release is done. - If not in 'dry' mode, a mail will be automatically sent to the mailing list. - You can disable it with the option '--disable_mail' - - $ python3 dev_tools/build_release.py --publish --remote origin --disable_mail - - The script takes over almost all - steps necessary for a release from a high level point of view it does the following things: - - - run prerequisite checks ie. check for S3 credentials available as env variables - - detect the version to release from the specified branch (--branch) or the current branch - - creates a release branch & updates pom.xml and README.md to point to a release version rather than a snapshot - - builds the artifacts - - commits the new version and merges the release branch into the source branch - - creates a tag and pushes the commit to the specified origin (--remote) - - publishes the releases to sonatype and S3 - - send a mail based on github issues fixed by this version - -Once it's done it will print all the remaining steps. - - Prerequisites: - - Python 3k for script execution - - Boto for S3 Upload ($ apt-get install python-boto or pip-3.3 install boto) - - github3 module (pip-3.3 install github3.py) - - S3 keys exported via ENV Variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) - - GITHUB (login/password) or key exported via ENV Variables (GITHUB_LOGIN, GITHUB_PASSWORD or GITHUB_KEY) - (see https://github.com/settings/applications#personal-access-tokens) - Optional: default to no authentication - - SMTP_HOST - Optional: default to localhost - - MAIL_SENDER - Optional: default to 'david@pilato.fr': must be authorized to send emails to elasticsearch mailing list - - MAIL_TO - Optional: default to 'elasticsearch@googlegroups.com' -""" -env = os.environ - -LOG = env.get('ES_RELEASE_LOG', '/tmp/elasticsearch_release.log') -ROOT_DIR = os.path.join(abspath(dirname(__file__)), '../') -README_FILE = ROOT_DIR + 'README.md' -POM_FILE = ROOT_DIR + 'pom.xml' - -def log(msg): - log_plain('\n%s' % msg) - -def log_plain(msg): - f = open(LOG, mode='ab') - f.write(msg.encode('utf-8')) - f.close() - -def run(command, quiet=False): - log('%s: RUN: %s\n' % (datetime.datetime.now(), command)) - if os.system('%s >> %s 2>&1' % (command, LOG)): - msg = ' FAILED: %s [see log %s]' % (command, LOG) - if not quiet: - print(msg) - raise RuntimeError(msg) - -try: - JAVA_HOME = env['JAVA_HOME'] -except KeyError: - raise RuntimeError(""" - Please set JAVA_HOME in the env before running release tool - On OSX use: export JAVA_HOME=`/usr/libexec/java_home -v '1.6*'`""") - -try: - MVN='mvn' - # make sure mvn3 is used if mvn3 is available - # some systems use maven 2 as default - run('mvn3 --version', quiet=True) - MVN='mvn3' -except RuntimeError: - pass - - -def java_exe(): - path = JAVA_HOME - return 'export JAVA_HOME="%s" PATH="%s/bin:$PATH" JAVACMD="%s/bin/java"' % (path, path, path) - -# Returns the hash of the current git HEAD revision -def get_head_hash(): - return os.popen(' git rev-parse --verify HEAD 2>&1').read().strip() - -# Returns the hash of the given tag revision -def get_tag_hash(tag): - return os.popen('git show-ref --tags %s --hash 2>&1' % (tag)).read().strip() - -# Returns the name of the current branch -def get_current_branch(): - return os.popen('git rev-parse --abbrev-ref HEAD 2>&1').read().strip() - -# Utility that returns the name of the release branch for a given version -def release_branch(version): - return 'release_branch_%s' % version - -# runs get fetch on the given remote -def fetch(remote): - run('git fetch %s' % remote) - -# Creates a new release branch from the given source branch -# and rebases the source branch from the remote before creating -# the release branch. Note: This fails if the source branch -# doesn't exist on the provided remote. -def create_release_branch(remote, src_branch, release): - run('git checkout %s' % src_branch) - run('git pull --rebase %s %s' % (remote, src_branch)) - run('git checkout -b %s' % (release_branch(release))) - - -# Reads the given file and applies the -# callback to it. If the callback changed -# a line the given file is replaced with -# the modified input. -def process_file(file_path, line_callback): - fh, abs_path = tempfile.mkstemp() - modified = False - with open(abs_path,'w', encoding='utf-8') as new_file: - with open(file_path, encoding='utf-8') as old_file: - for line in old_file: - new_line = line_callback(line) - modified = modified or (new_line != line) - new_file.write(new_line) - os.close(fh) - if modified: - #Remove original file - os.remove(file_path) - #Move new file - shutil.move(abs_path, file_path) - return True - else: - # nothing to do - just remove the tmp file - os.remove(abs_path) - return False - -# Guess the next snapshot version number (increment second digit) -def guess_snapshot(version): - digits=list(map(int, re.findall(r'\d+', version))) - source='%s.%s' % (digits[0], digits[1]) - destination='%s.%s' % (digits[0], digits[1]+1) - return version.replace(source, destination) - -# Moves the pom.xml file from a snapshot to a release -def remove_maven_snapshot(pom, release): - pattern = '%s-SNAPSHOT' % release - replacement = '%s' % release - def callback(line): - return line.replace(pattern, replacement) - process_file(pom, callback) - -# Moves the README.md file from a snapshot to a release -def remove_version_snapshot(readme_file, release): - pattern = '%s-SNAPSHOT' % release - replacement = '%s ' % release - def callback(line): - return line.replace(pattern, replacement) - process_file(readme_file, callback) - -# Moves the pom.xml file to the next snapshot -def add_maven_snapshot(pom, release, snapshot): - pattern = '%s' % release - replacement = '%s-SNAPSHOT' % snapshot - def callback(line): - return line.replace(pattern, replacement) - process_file(pom, callback) - -# Add in README.md file the next snapshot -def add_version_snapshot(readme_file, release, snapshot): - pattern = '| %s ' % release - replacement = '| %s-SNAPSHOT' % snapshot - def callback(line): - # If we find pattern, we copy the line and replace its content - if line.find(pattern) >= 0: - return line.replace(pattern, replacement).replace('%s' % (datetime.datetime.now().strftime("%Y-%m-%d")), - 'XXXX-XX-XX')+line - else: - return line - process_file(readme_file, callback) - -# Moves the README.md file from a snapshot to a release (documentation link) -def remove_documentation_snapshot(readme_file, repo_url, release, branch): - pattern = '* [%s-SNAPSHOT](%sblob/%s/README.md)' % (release, repo_url, branch) - replacement = '* [%s](%sblob/v%s/README.md)' % (release, repo_url, release) - def callback(line): - # If we find pattern, we replace its content - if line.find(pattern) >= 0: - return line.replace(pattern, replacement) - else: - return line - process_file(readme_file, callback) - -# Add in README.markdown file the documentation for the next version -def add_documentation_snapshot(readme_file, repo_url, release, snapshot, branch): - pattern = '* [%s](%sblob/v%s/README.md)' % (release, repo_url, release) - replacement = '* [%s-SNAPSHOT](%sblob/%s/README.md)' % (snapshot, repo_url, branch) - def callback(line): - # If we find pattern, we copy the line and replace its content - if line.find(pattern) >= 0: - return line.replace(pattern, replacement)+line - else: - return line - process_file(readme_file, callback) - -# Set release date in README.md file -def set_date(readme_file): - pattern = 'XXXX-XX-XX' - replacement = '%s' % (datetime.datetime.now().strftime("%Y-%m-%d")) - def callback(line): - return line.replace(pattern, replacement) - process_file(readme_file, callback) - -# Update installation instructions in README.md file -def set_install_instructions(readme_file, artifact_name, release): - pattern = '`bin/plugin -install elasticsearch/%s/.+`' % artifact_name - replacement = '`bin/plugin -install elasticsearch/%s/%s`' % (artifact_name, release) - def callback(line): - return re.sub(pattern, replacement, line) - process_file(readme_file, callback) - - -# Stages the given files for the next git commit -def add_pending_files(*files): - for file in files: - run('git add %s' % file) - -# Executes a git commit with 'release [version]' as the commit message -def commit_release(artifact_id, release): - run('git commit -m "prepare release %s-%s"' % (artifact_id, release)) - -def commit_snapshot(): - run('git commit -m "prepare for next development iteration"') - -def tag_release(release): - run('git tag -a v%s -m "Tag release version %s"' % (release, release)) - -def run_mvn(*cmd): - for c in cmd: - run('%s; %s -f %s %s' % (java_exe(), MVN, POM_FILE, c)) - -def build_release(run_tests=False, dry_run=True): - target = 'deploy' - if dry_run: - target = 'package' - if run_tests: - run_mvn('clean test') - run_mvn('clean %s -DskipTests' %(target)) - -# Checks the pom.xml for the release version. 2.0.0-SNAPSHOT -# This method fails if the pom file has no SNAPSHOT version set ie. -# if the version is already on a release version we fail. -# Returns the next version string ie. 0.90.7 -def find_release_version(src_branch): - run('git checkout %s' % src_branch) - with open(POM_FILE, encoding='utf-8') as file: - for line in file: - match = re.search(r'(.+)-SNAPSHOT', line) - if match: - return match.group(1) - raise RuntimeError('Could not find release version in branch %s' % src_branch) - -# extract a value from pom.xml -def find_from_pom(tag): - with open(POM_FILE, encoding='utf-8') as file: - for line in file: - match = re.search(r'<%s>(.+)' % (tag, tag), line) - if match: - return match.group(1) - raise RuntimeError('Could not find <%s> in pom.xml file' % (tag)) - -def get_artifacts(artifact_id, release): - artifact_path = ROOT_DIR + 'target/releases/%s-%s.zip' % (artifact_id, release) - print(' Path %s' % (artifact_path)) - if not os.path.isfile(artifact_path): - raise RuntimeError('Could not find required artifact at %s' % (artifact_path)) - return artifact_path - -# Generates sha1 for a file -# and returns the checksum files as well -# as the given files in a list -def generate_checksums(release_file): - res = [] - directory = os.path.dirname(release_file) - file = os.path.basename(release_file) - checksum_file = '%s.sha1.txt' % file - - if os.system('cd %s; shasum %s > %s' % (directory, file, checksum_file)): - raise RuntimeError('Failed to generate checksum for file %s' % release_file) - res = res + [os.path.join(directory, checksum_file), release_file] - return res - -def git_merge(src_branch, release_version): - run('git checkout %s' % src_branch) - run('git merge %s' % release_branch(release_version)) - -def git_push(remote, src_branch, release_version, dry_run): - if not dry_run: - run('git push %s %s' % (remote, src_branch)) # push the commit - run('git push %s v%s' % (remote, release_version)) # push the tag - else: - print(' dryrun [True] -- skipping push to remote %s' % remote) - -def publish_artifacts(artifacts, base='elasticsearch/elasticsearch', dry_run=True): - location = os.path.dirname(os.path.realpath(__file__)) - for artifact in artifacts: - if dry_run: - print('Skip Uploading %s to Amazon S3 in %s' % (artifact, base)) - else: - print('Uploading %s to Amazon S3' % artifact) - # requires boto to be installed but it is not available on python3k yet so we use a dedicated tool - run('python %s/upload-s3.py --file %s --path %s' % (location, os.path.abspath(artifact), base)) - - -################# -## -## -## Email and Github Management -## -## -################# -def format_issues_plain(issues, title='Fix'): - response = "" - - if len(issues) > 0: - response += '%s:\n' % title - for issue in issues: - response += ' * [%s] - %s (%s)\n' % (issue.number, issue.title, issue.html_url) - - return response - -def format_issues_html(issues, title='Fix'): - response = "" - - if len(issues) > 0: - response += '

%s

\n
    \n' % title - for issue in issues: - response += '
  • [%s] - %s\n' % (issue.html_url, issue.number, issue.title) - response += '
\n' - - return response - -def get_github_repository(reponame, - login=env.get('GITHUB_LOGIN', None), - password=env.get('GITHUB_PASSWORD', None), - key=env.get('GITHUB_KEY', None)): - if login: - g = github3.login(login, password) - elif key: - g = github3.login(token=key) - else: - g = github3.GitHub() - - return g.repository("elasticsearch", reponame) - -# Check if there are some remaining open issues and fails -def check_opened_issues(version, repository, reponame): - opened_issues = [i for i in repository.iter_issues(state='open', labels='%s' % version)] - if len(opened_issues)>0: - raise NameError('Some issues [%s] are still opened. Check https://github.com/elasticsearch/%s/issues?labels=%s&state=open' - % (len(opened_issues), reponame, version)) - -# List issues from github: can be done anonymously if you don't -# exceed a given number of github API calls per day -# Check if there are some remaining open issues and fails -def list_issues(version, - repository, - severity='bug'): - issues = [i for i in repository.iter_issues(state='closed', labels='%s,%s' % (severity, version))] - return issues - -# Get issues from github and generates a Plain/HTML Multipart email -# And send it if dry_run=False -def prepare_email(artifact_id, release_version, repository, - artifact_name, artifact_description, project_url, - severity_labels_bug='bug', - severity_labels_update='update', - severity_labels_new='new', - severity_labels_doc='doc'): - - ## Get bugs from github - issues_bug = list_issues(release_version, repository, severity=severity_labels_bug) - issues_update = list_issues(release_version, repository, severity=severity_labels_update) - issues_new = list_issues(release_version, repository, severity=severity_labels_new) - issues_doc = list_issues(release_version, repository, severity=severity_labels_doc) - - ## Format content to plain text - plain_issues_bug = format_issues_plain(issues_bug, 'Fix') - plain_issues_update = format_issues_plain(issues_update, 'Update') - plain_issues_new = format_issues_plain(issues_new, 'New') - plain_issues_doc = format_issues_plain(issues_doc, 'Doc') - - ## Format content to html - html_issues_bug = format_issues_html(issues_bug, 'Fix') - html_issues_update = format_issues_html(issues_update, 'Update') - html_issues_new = format_issues_html(issues_new, 'New') - html_issues_doc = format_issues_html(issues_doc, 'Doc') - - if len(issues_bug)+len(issues_update)+len(issues_new)+len(issues_doc) > 0: - plain_empty_message = "" - html_empty_message = "" - - else: - plain_empty_message = "No issue listed for this release" - html_empty_message = "

No issue listed for this release

" - - msg = MIMEMultipart('alternative') - msg['Subject'] = '[ANN] %s %s released' % (artifact_name, release_version) - text = """ -Heya, - - -We are pleased to announce the release of the %(artifact_name)s, version %(release_version)s. - -%(artifact_description)s. - -%(project_url)s - -Release Notes - %(artifact_id)s - Version %(release_version)s - -%(empty_message)s -%(issues_bug)s -%(issues_update)s -%(issues_new)s -%(issues_doc)s - -Issues, Pull requests, Feature requests are warmly welcome on %(artifact_id)s project repository: %(project_url)s -For questions or comments around this plugin, feel free to use elasticsearch mailing list: https://groups.google.com/forum/#!forum/elasticsearch - -Enjoy, - --The Elasticsearch team -""" % {'release_version': release_version, - 'artifact_id': artifact_id, - 'artifact_name': artifact_name, - 'artifact_description': artifact_description, - 'project_url': project_url, - 'empty_message': plain_empty_message, - 'issues_bug': plain_issues_bug, - 'issues_update': plain_issues_update, - 'issues_new': plain_issues_new, - 'issues_doc': plain_issues_doc} - - html = """ - - -

Heya,

- -

We are pleased to announce the release of the %(artifact_name)s, version %(release_version)s

- -
%(artifact_description)s.
- -

Release Notes - Version %(release_version)s

-%(empty_message)s -%(issues_bug)s -%(issues_update)s -%(issues_new)s -%(issues_doc)s - -

Issues, Pull requests, Feature requests are warmly welcome on -%(artifact_id)s project repository!

-

For questions or comments around this plugin, feel free to use elasticsearch -mailing list!

- -

Enjoy,

- -

- The Elasticsearch team

- -""" % {'release_version': release_version, - 'artifact_id': artifact_id, - 'artifact_name': artifact_name, - 'artifact_description': artifact_description, - 'project_url': project_url, - 'empty_message': html_empty_message, - 'issues_bug': html_issues_bug, - 'issues_update': html_issues_update, - 'issues_new': html_issues_new, - 'issues_doc': html_issues_doc} - - # Record the MIME types of both parts - text/plain and text/html. - part1 = MIMEText(text, 'plain') - part2 = MIMEText(html, 'html') - - # Attach parts into message container. - # According to RFC 2046, the last part of a multipart message, in this case - # the HTML message, is best and preferred. - msg.attach(part1) - msg.attach(part2) - - return msg - -def send_email(msg, - dry_run=True, - mail=True, - sender=env.get('MAIL_SENDER'), - to=env.get('MAIL_TO', 'elasticsearch@googlegroups.com'), - smtp_server=env.get('SMTP_SERVER', 'localhost')): - msg['From'] = 'Elasticsearch Team <%s>' % sender - msg['To'] = 'Elasticsearch Mailing List <%s>' % to - # save mail on disk - with open(ROOT_DIR+'target/email.txt', 'w') as email_file: - email_file.write(msg.as_string()) - if mail and not dry_run: - s = smtplib.SMTP(smtp_server, 25) - s.sendmail(sender, to, msg.as_string()) - s.quit() - else: - print('generated email: open %starget/email.txt' % ROOT_DIR) - -def print_sonatype_notice(): - settings = os.path.join(os.path.expanduser('~'), '.m2/settings.xml') - if os.path.isfile(settings): - with open(settings, encoding='utf-8') as settings_file: - for line in settings_file: - if line.strip() == 'sonatype-nexus-snapshots': - # moving out - we found the indicator no need to print the warning - return - print(""" - NOTE: No sonatype settings detected, make sure you have configured - your sonatype credentials in '~/.m2/settings.xml': - - - ... - - - sonatype-nexus-snapshots - your-jira-id - your-jira-pwd - - - sonatype-nexus-staging - your-jira-id - your-jira-pwd - - - ... - - """) - -def check_s3_credentials(): - if not env.get('AWS_ACCESS_KEY_ID', None) or not env.get('AWS_SECRET_ACCESS_KEY', None): - raise RuntimeError('Could not find "AWS_ACCESS_KEY_ID" / "AWS_SECRET_ACCESS_KEY" in the env variables please export in order to upload to S3') - -def check_github_credentials(): - if not env.get('GITHUB_KEY', None) and not env.get('GITHUB_LOGIN', None): - log('WARN: Could not find "GITHUB_LOGIN" / "GITHUB_PASSWORD" or "GITHUB_KEY" in the env variables. You could need it.') - -def check_email_settings(): - if not env.get('MAIL_SENDER', None): - raise RuntimeError('Could not find "MAIL_SENDER"') - -# we print a notice if we can not find the relevant infos in the ~/.m2/settings.xml -print_sonatype_notice() - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Builds and publishes a Elasticsearch Plugin Release') - parser.add_argument('--branch', '-b', metavar='master', default=get_current_branch(), - help='The branch to release from. Defaults to the current branch.') - parser.add_argument('--skiptests', '-t', dest='tests', action='store_false', - help='Skips tests before release. Tests are run by default.') - parser.set_defaults(tests=True) - parser.add_argument('--remote', '-r', metavar='origin', default='origin', - help='The remote to push the release commit and tag to. Default is [origin]') - parser.add_argument('--publish', '-p', dest='dryrun', action='store_false', - help='Publishes the release. Disable by default.') - parser.add_argument('--disable_mail', '-dm', dest='mail', action='store_false', - help='Do not send a release email. Email is sent by default.') - - parser.set_defaults(dryrun=True) - parser.set_defaults(mail=True) - args = parser.parse_args() - - src_branch = args.branch - remote = args.remote - run_tests = args.tests - dry_run = args.dryrun - mail = args.mail - - if not dry_run: - check_s3_credentials() - print('WARNING: dryrun is set to "false" - this will push and publish the release') - if mail: - check_email_settings() - print('An email to %s will be sent after the release' - % env.get('MAIL_TO', 'elasticsearch@googlegroups.com')) - input('Press Enter to continue...') - - check_github_credentials() - - print(''.join(['-' for _ in range(80)])) - print('Preparing Release from branch [%s] running tests: [%s] dryrun: [%s]' % (src_branch, run_tests, dry_run)) - print(' JAVA_HOME is [%s]' % JAVA_HOME) - print(' Running with maven command: [%s] ' % (MVN)) - - release_version = find_release_version(src_branch) - artifact_id = find_from_pom('artifactId') - artifact_name = find_from_pom('name') - artifact_description = find_from_pom('description') - project_url = find_from_pom('url') - elasticsearch_version = find_from_pom('elasticsearch.version') - print(' Artifact Id: [%s]' % artifact_id) - print(' Release version: [%s]' % release_version) - print(' Elasticsearch: [%s]' % elasticsearch_version) - if elasticsearch_version.find('-SNAPSHOT') != -1: - raise RuntimeError('Can not release with a SNAPSHOT elasticsearch dependency: %s' % elasticsearch_version) - - # extract snapshot - default_snapshot_version = guess_snapshot(release_version) - snapshot_version = input('Enter next snapshot version [%s]:' % default_snapshot_version) - snapshot_version = snapshot_version or default_snapshot_version - - print(' Next version: [%s-SNAPSHOT]' % snapshot_version) - print(' Artifact Name: [%s]' % artifact_name) - print(' Artifact Description: [%s]' % artifact_description) - print(' Project URL: [%s]' % project_url) - - if not dry_run: - smoke_test_version = release_version - head_hash = get_head_hash() - run_mvn('clean') # clean the env! - create_release_branch(remote, src_branch, release_version) - print(' Created release branch [%s]' % (release_branch(release_version))) - success = False - try: - pending_files = [POM_FILE, README_FILE] - remove_maven_snapshot(POM_FILE, release_version) - remove_documentation_snapshot(README_FILE, project_url, release_version, src_branch) - remove_version_snapshot(README_FILE, release_version) - set_date(README_FILE) - set_install_instructions(README_FILE, artifact_id, release_version) - print(' Done removing snapshot version') - add_pending_files(*pending_files) # expects var args use * to expand - commit_release(artifact_id, release_version) - print(' Committed release version [%s]' % release_version) - print(''.join(['-' for _ in range(80)])) - print('Building Release candidate') - input('Press Enter to continue...') - print(' Checking github issues') - repository = get_github_repository(artifact_id) - check_opened_issues(release_version, repository, artifact_id) - if not dry_run: - print(' Running maven builds now and publish to sonatype - run-tests [%s]' % run_tests) - else: - print(' Running maven builds now run-tests [%s]' % run_tests) - build_release(run_tests=run_tests, dry_run=dry_run) - artifact = get_artifacts(artifact_id, release_version) - artifact_and_checksums = generate_checksums(artifact) - print(''.join(['-' for _ in range(80)])) - - print('Finish Release -- dry_run: %s' % dry_run) - input('Press Enter to continue...') - print(' merge release branch') - git_merge(src_branch, release_version) - print(' tag') - tag_release(release_version) - - add_maven_snapshot(POM_FILE, release_version, snapshot_version) - add_version_snapshot(README_FILE, release_version, snapshot_version) - add_documentation_snapshot(README_FILE, project_url, release_version, snapshot_version, src_branch) - add_pending_files(*pending_files) - commit_snapshot() - - print(' push to %s %s -- dry_run: %s' % (remote, src_branch, dry_run)) - git_push(remote, src_branch, release_version, dry_run) - print(' publish artifacts to S3 -- dry_run: %s' % dry_run) - publish_artifacts(artifact_and_checksums, base='elasticsearch/%s' % (artifact_id) , dry_run=dry_run) - print(' preparing email (from github issues)') - msg = prepare_email(artifact_id, release_version, repository, artifact_name, artifact_description, project_url) - print(' sending email -- dry_run: %s, mail: %s' % (dry_run, mail)) - send_email(msg, dry_run=dry_run, mail=mail) - - pending_msg = """ -Release successful pending steps: - * close and release sonatype repo: https://oss.sonatype.org/ - * check if the release is there https://oss.sonatype.org/content/repositories/releases/org/elasticsearch/%(artifact_id)s/%(version)s - * tweet about the release -""" - print(pending_msg % {'version': release_version, - 'artifact_id': artifact_id, - 'project_url': project_url}) - success = True - finally: - if not success: - run('git reset --hard HEAD') - run('git checkout %s' % src_branch) - elif dry_run: - print('End of dry_run') - input('Press Enter to reset changes...') - - run('git reset --hard %s' % head_hash) - run('git tag -d v%s' % release_version) - # we delete this one anyways - run('git branch -D %s' % (release_branch(release_version))) diff --git a/dev-tools/release.py b/dev-tools/release.py new file mode 100644 index 00000000000..edcc637d068 --- /dev/null +++ b/dev-tools/release.py @@ -0,0 +1,134 @@ +# Licensed to Elasticsearch under one or more contributor +# license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright +# ownership. Elasticsearch licenses this file to you under +# the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on +# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +# either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import datetime +import os +import shutil +import sys +import time +import urllib +import urllib.request +import zipfile + +from os.path import dirname, abspath + +""" + This tool builds a release from the a given elasticsearch plugin branch. + + It is basically a wrapper on top of launch_release.py which: + + - tries to get a more recent version of launch_release.py in ... + - download it if needed + - launch it passing all arguments to it, like: + + $ python3 dev_tools/release.py --branch master --publish --remote origin + + Important options: + + # Dry run + $ python3 dev_tools/release.py + + # Dry run without tests + python3 dev_tools/release.py --skiptests + + # Release, publish artifacts and announce + $ python3 dev_tools/release.py --publish + + See full documentation in launch_release.py +""" +env = os.environ + +# Change this if the source repository for your scripts is at a different location +SOURCE_REPO = 'elasticsearch/elasticsearch-plugins-script' +# We define that we should download again the script after 1 days +SCRIPT_OBSOLETE_DAYS = 1 +# We ignore in master.zip file the following files +IGNORED_FILES = ['.gitignore', 'README.md'] + + +ROOT_DIR = abspath(os.path.join(abspath(dirname(__file__)), '../')) +TARGET_TOOLS_DIR = ROOT_DIR + '/plugin_tools' +DEV_TOOLS_DIR = ROOT_DIR + '/dev-tools' +BUILD_RELEASE_FILENAME = 'release.zip' +BUILD_RELEASE_FILE = TARGET_TOOLS_DIR + '/' + BUILD_RELEASE_FILENAME +SOURCE_URL = 'https://github.com/%s/archive/master.zip' % SOURCE_REPO + +# Download a recent version of the release plugin tool +try: + os.mkdir(TARGET_TOOLS_DIR) + print('directory %s created' % TARGET_TOOLS_DIR) +except FileExistsError: + pass + + +try: + # we check latest update. If we ran an update recently, we + # are not going to check it again + download = True + + try: + last_download_time = datetime.datetime.fromtimestamp(os.path.getmtime(BUILD_RELEASE_FILE)) + if (datetime.datetime.now()-last_download_time).days < SCRIPT_OBSOLETE_DAYS: + download = False + except FileNotFoundError: + pass + + if download: + urllib.request.urlretrieve(SOURCE_URL, BUILD_RELEASE_FILE) + with zipfile.ZipFile(BUILD_RELEASE_FILE) as myzip: + for member in myzip.infolist(): + filename = os.path.basename(member.filename) + # skip directories + if not filename: + continue + if filename in IGNORED_FILES: + continue + + # copy file (taken from zipfile's extract) + source = myzip.open(member.filename) + target = open(os.path.join(TARGET_TOOLS_DIR, filename), "wb") + with source, target: + shutil.copyfileobj(source, target) + # We keep the original date + date_time = time.mktime(member.date_time + (0, 0, -1)) + os.utime(os.path.join(TARGET_TOOLS_DIR, filename), (date_time, date_time)) + print('plugin-tools updated from %s' % SOURCE_URL) +except urllib.error.HTTPError: + pass + + +# Let see if we need to update the release.py script itself +source_time = os.path.getmtime(TARGET_TOOLS_DIR + '/release.py') +repo_time = os.path.getmtime(DEV_TOOLS_DIR + '/release.py') +if source_time > repo_time: + input('release.py needs an update. Press a key to update it...') + shutil.copyfile(TARGET_TOOLS_DIR + '/release.py', DEV_TOOLS_DIR + '/release.py') + +# We can launch the build process +try: + PYTHON = 'python' + # make sure python3 is used if python3 is available + # some systems use python 2 as default + os.system('python3 --version > /dev/null 2>&1') + PYTHON = 'python3' +except RuntimeError: + pass + +release_args = '' +for x in range(1, len(sys.argv)): + release_args += ' ' + sys.argv[x] + +os.system('%s %s/build_release.py %s' % (PYTHON, TARGET_TOOLS_DIR, release_args)) diff --git a/dev-tools/upload-s3.py b/dev-tools/upload-s3.py deleted file mode 100644 index 95ea576e65c..00000000000 --- a/dev-tools/upload-s3.py +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to Elasticsearch under one or more contributor -# license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright -# ownership. Elasticsearch licenses this file to you under -# the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on -# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import os -import sys -import argparse -try: - import boto.s3 -except: - raise RuntimeError(""" - S3 upload requires boto to be installed - Use one of: - 'pip install -U boto' - 'apt-get install python-boto' - 'easy_install boto' - """) - -import boto.s3 - - -def list_buckets(conn): - return conn.get_all_buckets() - - -def upload_s3(conn, path, key, file, bucket): - print 'Uploading %s to Amazon S3 bucket %s/%s' % \ - (file, bucket, os.path.join(path, key)) - def percent_cb(complete, total): - sys.stdout.write('.') - sys.stdout.flush() - bucket = conn.create_bucket(bucket) - k = bucket.new_key(os.path.join(path, key)) - k.set_contents_from_filename(file, cb=percent_cb, num_cb=100) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Uploads files to Amazon S3') - parser.add_argument('--file', '-f', metavar='path to file', - help='the branch to release from', required=True) - parser.add_argument('--bucket', '-b', metavar='B42', default='download.elasticsearch.org', - help='The S3 Bucket to upload to') - parser.add_argument('--path', '-p', metavar='elasticsearch/elasticsearch', default='elasticsearch/elasticsearch', - help='The key path to use') - parser.add_argument('--key', '-k', metavar='key', default=None, - help='The key - uses the file name as default key') - args = parser.parse_args() - if args.key: - key = args.key - else: - key = os.path.basename(args.file) - - connection = boto.connect_s3() - upload_s3(connection, args.path, key, args.file, args.bucket); - From b7c56496e942ffeccd20e532c33d7003b769135c Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 8 Sep 2014 18:02:14 +0200 Subject: [PATCH 064/103] Update to Lucene 4.10.0 Closes #32. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7dc31696088..1c905ce7d48 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ 2.0.0-SNAPSHOT - 4.9.0 + 4.10.0 1 true onerror From 4c91b8832d1b1a1cf773cf285e0e41e4f650a897 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 8 Sep 2014 18:07:32 +0200 Subject: [PATCH 065/103] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eb406502854..ea5162906a1 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.3.0 ``` -| elasticsearch | ICU Analysis Plugin | Docs | +| elasticsearch |Phonetic Analysis Plugin| Docs | |---------------|-----------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-240-snapshot-for-elasticsearch-1x) | From 0199979360f344a4e49e9c9d156deee9ebab2ce9 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 15 Sep 2014 13:55:39 +0200 Subject: [PATCH 066/103] Create branch es-1.4 for elasticsearch 1.4.0 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index ea5162906a1..90802acd3bc 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.3.0 | elasticsearch |Phonetic Analysis Plugin| Docs | |---------------|-----------------------|------------| | master | Build from source | See below | -| es-1.x | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-240-snapshot-for-elasticsearch-1x) | +| es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | +| es-1.4 | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.4/#version-240-snapshot-for-elasticsearch-1x) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch) | From 812bef4c0e764fb18c13bee81aff64137e219a7a Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 24 Sep 2014 17:23:15 -0400 Subject: [PATCH 067/103] Upgrade to Lucene 4.10.1 snapshot --- pom.xml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 1c905ce7d48..14aee2bb158 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,8 @@ 2.0.0-SNAPSHOT - 4.10.0 + 4.10.1 + 4.10.1-snapshot-1627368 1 true onerror @@ -42,6 +43,10 @@ + + Lucene snapshots + https://download.elasticsearch.org/lucenesnapshots/ + sonatype http://oss.sonatype.org/content/repositories/releases/ @@ -58,7 +63,7 @@ org.apache.lucene lucene-test-framework - ${lucene.version} + ${lucene.maven.version} test @@ -72,7 +77,7 @@ org.apache.lucene lucene-analyzers-phonetic - ${lucene.version} + ${lucene.maven.version} compile From da368cfb3490dcda0b42895a12f773d76bc3dbc6 Mon Sep 17 00:00:00 2001 From: mikemccand Date: Sun, 28 Sep 2014 17:56:48 -0400 Subject: [PATCH 068/103] Upgrade to Lucene 4.10.1 --- pom.xml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index 14aee2bb158..1ea05fe634c 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 2.0.0-SNAPSHOT 4.10.1 - 4.10.1-snapshot-1627368 + 4.10.1 1 true onerror @@ -43,10 +43,6 @@ - - Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/ - sonatype http://oss.sonatype.org/content/repositories/releases/ From af36a3a727879a478850b25acd9e4249f8207b30 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 2 Oct 2014 09:18:43 +0200 Subject: [PATCH 069/103] Update to elasticsearch 1.4.0.Beta1 (cherry picked from commit c082405) --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 1ea05fe634c..59cf5264802 100644 --- a/pom.xml +++ b/pom.xml @@ -109,8 +109,8 @@ maven-compiler-plugin 2.3.2 - 1.6 - 1.6 + 1.7 + 1.7
From 6a5f662c4dd7a6503fbbe5c0f15f66ab13e8b0f6 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 2 Oct 2014 09:24:03 +0200 Subject: [PATCH 070/103] update documentation with release 2.4.0 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 90802acd3bc..ab5ba78481f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: ```sh -bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.3.0 +bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.4.0 ``` @@ -14,7 +14,7 @@ bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.3.0 |---------------|-----------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | -| es-1.4 | Build from source | [2.4.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.4/#version-240-snapshot-for-elasticsearch-1x) | +| es-1.4 | 2.4.0 | [2.4.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.0/#version-240-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch) | From bcdba5dbf7365520ba298a821bc5811304994ccc Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 15 Oct 2014 15:29:52 +0200 Subject: [PATCH 071/103] Tests: index.version.created must be set Due to this [change](https://github.com/elasticsearch/elasticsearch/pull/8018), we need to fix our tests for elasticsearch 1.4.0 and above. Closes #33. (cherry picked from commit cb869cd) --- .../index/analysis/SimplePhoneticAnalysisTests.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java index d3aa66b451f..e64b66dc82a 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.analysis; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; import org.elasticsearch.common.settings.Settings; @@ -43,7 +45,9 @@ public class SimplePhoneticAnalysisTests extends ElasticsearchTestCase { @Test public void testPhoneticTokenFilterFactory() { - Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/phonetic-1.yml").build(); + Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/phonetic-1.yml") + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .build(); AnalysisService analysisService = testSimpleConfiguration(settings); TokenFilterFactory filterFactory = analysisService.tokenFilter("phonetic"); MatcherAssert.assertThat(filterFactory, instanceOf(PhoneticTokenFilterFactory.class)); From 88d269081af733def021ea2a3e17e30e456eda5e Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Thu, 30 Oct 2014 14:51:10 +0900 Subject: [PATCH 072/103] Tests: Fix randomizedtest fail Closes #34 --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index 59cf5264802..2ab339339fb 100644 --- a/pom.xml +++ b/pom.xml @@ -56,6 +56,12 @@ 1.3 test + + com.carrotsearch.randomizedtesting + randomizedtesting-runner + 2.1.10 + test + org.apache.lucene lucene-test-framework From 0f236cfe1f52dfcff74bf182ba6baed97feffae7 Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Thu, 30 Oct 2014 14:52:22 +0900 Subject: [PATCH 073/103] Update to Lucene 4.10.2 Closes #35 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 2ab339339fb..2936bedb1ba 100644 --- a/pom.xml +++ b/pom.xml @@ -33,8 +33,8 @@ 2.0.0-SNAPSHOT - 4.10.1 - 4.10.1 + 4.10.2 + 4.10.2 1 true onerror From 27ed104ca1469d4b4a7a044668d3a890b2400cef Mon Sep 17 00:00:00 2001 From: tlrx Date: Wed, 5 Nov 2014 17:14:59 +0100 Subject: [PATCH 074/103] update documentation with release 2.4.1 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ab5ba78481f..f2ccd706f3f 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: ```sh -bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.4.0 +bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.4.1 ``` @@ -14,7 +14,7 @@ bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.4.0 |---------------|-----------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | -| es-1.4 | 2.4.0 | [2.4.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.0/#version-240-for-elasticsearch-14) | +| es-1.4 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch) | From 45944f225ebbc10294e2318f4227d6c5904ba81c Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 5 Nov 2014 16:42:46 -0500 Subject: [PATCH 075/103] upgrade to lucene 5 snapshot --- pom.xml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 2936bedb1ba..b558a78efad 100644 --- a/pom.xml +++ b/pom.xml @@ -33,8 +33,8 @@ 2.0.0-SNAPSHOT - 4.10.2 - 4.10.2 + 5.0.0 + 5.0.0-snapshot-1636426 1 true onerror @@ -47,6 +47,10 @@ sonatype http://oss.sonatype.org/content/repositories/releases/ + + Lucene snapshots + https://download.elasticsearch.org/lucenesnapshots/maven/ +
From 27df13704db3374583b5d671dbfa597b70f630cc Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 10 Nov 2014 16:45:40 -0500 Subject: [PATCH 076/103] Upgrade to Lucene 5.0.0-snapshot-1637347 --- pom.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pom.xml b/pom.xml index b558a78efad..1015b1289e6 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 2.0.0-SNAPSHOT 5.0.0 - 5.0.0-snapshot-1636426 + 5.0.0-snapshot-1637347 1 true onerror @@ -44,12 +44,12 @@ - sonatype - http://oss.sonatype.org/content/repositories/releases/ + Lucene snapshots + https://download.elasticsearch.org/lucenesnapshots/1637347/ - Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/maven/ + sonatype + http://oss.sonatype.org/content/repositories/releases/ From bf4bc0bfc7e4141d54c5eb9aca687035b5b9da2e Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 24 Nov 2014 05:51:36 -0500 Subject: [PATCH 077/103] Upgrade to Lucene 5.0.0-snapshot-1641343 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 1015b1289e6..c144d9d0089 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 2.0.0-SNAPSHOT 5.0.0 - 5.0.0-snapshot-1637347 + 5.0.0-snapshot-1641343 1 true onerror @@ -45,7 +45,7 @@ Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/1637347/ + https://download.elasticsearch.org/lucenesnapshots/1641343/ sonatype From 00c11b72a7fd577219a6c1281ccea7aa7a812f2b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 2 Dec 2014 18:15:43 +0100 Subject: [PATCH 078/103] Upgrade to Lucene 5.0.0-snapshot-1642891 --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index c144d9d0089..83541d32814 100644 --- a/pom.xml +++ b/pom.xml @@ -34,7 +34,7 @@ 2.0.0-SNAPSHOT 5.0.0 - 5.0.0-snapshot-1641343 + 5.0.0-snapshot-1642891 1 true onerror @@ -45,7 +45,7 @@ Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/1641343/ + https://download.elasticsearch.org/lucenesnapshots/1642891/ sonatype From d2c39ac8da41a0fdd0840d8285a2fffc0dde0146 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 25 Nov 2014 11:57:05 +0100 Subject: [PATCH 079/103] Depend on elasticsearch-parent To simplify plugins maintenance and provide more value in the future, we are starting to build an `elasticsearch-parent` project. This PR is the first step for this plugin to depend on this new `pom` maven project. --- dev-tools/tests.policy | 54 ++++++++++++++ pom.xml | 157 ++++------------------------------------- 2 files changed, 68 insertions(+), 143 deletions(-) create mode 100644 dev-tools/tests.policy diff --git a/dev-tools/tests.policy b/dev-tools/tests.policy new file mode 100644 index 00000000000..6afb5025840 --- /dev/null +++ b/dev-tools/tests.policy @@ -0,0 +1,54 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Policy file to prevent tests from writing outside the test sandbox directory +// PLEASE NOTE: You may need to enable other permissions when new tests are added, +// everything not allowed here is forbidden! + +grant { + // permissions for file access, write access only to sandbox: + permission java.io.FilePermission "<>", "read,execute"; + permission java.io.FilePermission "${junit4.childvm.cwd}", "read,execute,write"; + permission java.io.FilePermission "${junit4.childvm.cwd}${/}-", "read,execute,write,delete"; + permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete"; + permission groovy.security.GroovyCodeSourcePermission "/groovy/script"; + + // Allow connecting to the internet anywhere + permission java.net.SocketPermission "*", "accept,listen,connect,resolve"; + + // Basic permissions needed for Lucene / Elasticsearch to work: + permission java.util.PropertyPermission "*", "read,write"; + permission java.lang.reflect.ReflectPermission "*"; + permission java.lang.RuntimePermission "*"; + + // These two *have* to be spelled out a separate + permission java.lang.management.ManagementPermission "control"; + permission java.lang.management.ManagementPermission "monitor"; + + permission java.net.NetPermission "*"; + permission java.util.logging.LoggingPermission "control"; + permission javax.management.MBeanPermission "*", "*"; + permission javax.management.MBeanServerPermission "*"; + permission javax.management.MBeanTrustPermission "*"; + + // Needed for some things in DNS caching in the JVM + permission java.security.SecurityPermission "getProperty.networkaddress.cache.ttl"; + permission java.security.SecurityPermission "getProperty.networkaddress.cache.negative.ttl"; + +}; diff --git a/pom.xml b/pom.xml index 83541d32814..f0157aaa115 100644 --- a/pom.xml +++ b/pom.xml @@ -3,6 +3,13 @@ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 + + + org.elasticsearch + elasticsearch-parent + 2.0.0-SNAPSHOT + + org.elasticsearch elasticsearch-analysis-phonetic 3.0.0-SNAPSHOT @@ -20,86 +27,47 @@ scm:git:git@github.com:elasticsearch/elasticsearch-analysis-phonetic.git - scm:git:git@github.com:elasticsearch/elasticsearch-analysis-phonetic.git - + scm:git:git@github.com:elasticsearch/elasticsearch-analysis-phonetic.git http://github.com/elasticsearch/elasticsearch-analysis-phonetic - - org.sonatype.oss - oss-parent - 7 - - - 2.0.0-SNAPSHOT - 5.0.0 - 5.0.0-snapshot-1642891 - 1 - true - onerror - - INFO + - - - Lucene snapshots - https://download.elasticsearch.org/lucenesnapshots/1642891/ - - - sonatype - http://oss.sonatype.org/content/repositories/releases/ - - - org.hamcrest hamcrest-all - 1.3 - test - - - com.carrotsearch.randomizedtesting - randomizedtesting-runner - 2.1.10 - test org.apache.lucene lucene-test-framework - ${lucene.maven.version} - test + + + com.carrotsearch.randomizedtesting + randomizedtesting-runner org.elasticsearch elasticsearch - ${elasticsearch.version} - compile org.apache.lucene lucene-analyzers-phonetic - ${lucene.maven.version} - compile log4j log4j - 1.2.16 - runtime org.elasticsearch elasticsearch - ${elasticsearch.version} test-jar - test @@ -117,120 +85,23 @@ org.apache.maven.plugins maven-compiler-plugin - 2.3.2 - - 1.7 - 1.7 - com.carrotsearch.randomizedtesting junit4-maven-plugin - 2.0.12 - - - tests - test - - junit4 - - - 20 - pipe,warn - true - - - - - - - - - ${tests.jvms} - - - - - - - **/*Tests.class - **/*Test.class - - - **/Abstract*.class - **/*StressTest.class - - - -Xmx512m - -XX:MaxDirectMemorySize=512m - -Des.logger.prefix= - - ${tests.shuffle} - ${tests.verbose} - ${tests.seed} - ${tests.failfast} - - - ${tests.iters} - ${tests.maxfailures} - ${tests.failfast} - ${tests.class} - ${tests.method} - ${tests.nightly} - ${tests.badapples} - ${tests.weekly} - ${tests.slow} - ${tests.awaitsfix} - ${tests.slow} - ${tests.timeoutSuite} - ${tests.showSuccess} - ${tests.integration} - ${tests.cluster_seed} - ${tests.client.ratio} - ${env.ES_TEST_LOCAL} - ${es.node.mode} - ${es.logger.level} - true - - - - org.apache.maven.plugins maven-surefire-plugin - 2.15 - - true - org.apache.maven.plugins maven-source-plugin - 2.1.2 - - - attach-sources - - jar - - - + org.apache.maven.plugins maven-assembly-plugin - 2.3 false ${project.build.directory}/releases/ From d9b719558cc541126a12631e3068f87b58171325 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 12 Dec 2014 18:11:11 +0100 Subject: [PATCH 080/103] Add /.local-*-execution-hints.log --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index cba388bfce9..c006d1f3e88 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,6 @@ /.project /.classpath /.settings -/.local-execution-hints.log /plugin_tools +/.local-execution-hints.log +/.local-*-execution-hints.log From d9e44a50e001f2800d291b69717fadc19e326784 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 2 Jan 2015 20:40:18 +0100 Subject: [PATCH 081/103] Add sonatype snapshot repository --- pom.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pom.xml b/pom.xml index f0157aaa115..be9a1f3298f 100644 --- a/pom.xml +++ b/pom.xml @@ -120,4 +120,12 @@ + + + + oss-snapshots + Sonatype OSS Snapshots + https://oss.sonatype.org/content/repositories/snapshots/ + + From cba9f87ef3b03164f6d8a4f33ec7ad55605d6017 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 11 Feb 2015 21:51:37 +0100 Subject: [PATCH 082/103] update documentation with release 2.4.2 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f2ccd706f3f..d29733c5d4b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: ```sh -bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.4.1 +bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.4.2 ``` @@ -14,7 +14,7 @@ bin/plugin -install elasticsearch/elasticsearch-analysis-phonetic/2.4.1 |---------------|-----------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | -| es-1.4 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14) | +| es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.2/#version-242-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch) | From 260878b5e6c37e857a319a72253c055dbb6ef387 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 13 Feb 2015 16:43:33 +0100 Subject: [PATCH 083/103] Fix doc for es version < 1.4.3 --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index d29733c5d4b..a6341b32eb4 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.4.2 | master | Build from source | See below | | es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | | es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.2/#version-242-for-elasticsearch-14) | +| < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | | es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch) | | es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch) | From e1501ce1ed356c62b1b7084d5e5917cf2b8ac934 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 16 Mar 2015 16:27:55 -0700 Subject: [PATCH 084/103] create `es-1.5` branch --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a6341b32eb4..8ffc6cade57 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.4.2 | elasticsearch |Phonetic Analysis Plugin| Docs | |---------------|-----------------------|------------| | master | Build from source | See below | -| es-1.x | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-250-snapshot-for-elasticsearch-1x) | +| es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | +| es-1.5 | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.5/#version-250-snapshot-for-elasticsearch-15) | | es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.2/#version-242-for-elasticsearch-14) | | < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | From 275e5fb349ec07fc947b78181e74c0ed2097ffcd Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 24 Mar 2015 18:31:36 +0100 Subject: [PATCH 085/103] Move parent bloc after artifact coordinates --- pom.xml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pom.xml b/pom.xml index be9a1f3298f..10567e4ea42 100644 --- a/pom.xml +++ b/pom.xml @@ -4,12 +4,6 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - - org.elasticsearch - elasticsearch-parent - 2.0.0-SNAPSHOT - - org.elasticsearch elasticsearch-analysis-phonetic 3.0.0-SNAPSHOT @@ -31,6 +25,12 @@ http://github.com/elasticsearch/elasticsearch-analysis-phonetic + + org.elasticsearch + elasticsearch-parent + 2.0.0-SNAPSHOT + + From 01faea43da096809eda751788df1b94c57ededaa Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 24 Mar 2015 18:34:02 +0100 Subject: [PATCH 086/103] Move to elastic owner --- pom.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 10567e4ea42..2bd37665e4c 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ jar Elasticsearch Phonetic Analysis plugin The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. - https://github.com/elasticsearch/elasticsearch-analysis-phonetic/ + https://github.com/elastic/elasticsearch-analysis-phonetic/ 2009 @@ -20,9 +20,9 @@ - scm:git:git@github.com:elasticsearch/elasticsearch-analysis-phonetic.git - scm:git:git@github.com:elasticsearch/elasticsearch-analysis-phonetic.git - http://github.com/elasticsearch/elasticsearch-analysis-phonetic + scm:git:git@github.com:elastic/elasticsearch-analysis-phonetic.git + scm:git:git@github.com:elastic/elasticsearch-analysis-phonetic.git + http://github.com/elastic/elasticsearch-analysis-phonetic From 616c64d666d23cda76f94cc50c7bfd6de97d2460 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 24 Mar 2015 18:39:37 +0100 Subject: [PATCH 087/103] update documentation with release 2.5.0 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8ffc6cade57..7a0ba7cc92c 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: ```sh -bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.4.2 +bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.5.0 ``` @@ -14,7 +14,7 @@ bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.4.2 |---------------|-----------------------|------------| | master | Build from source | See below | | es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | -| es-1.5 | Build from source | [2.5.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.5/#version-250-snapshot-for-elasticsearch-15) | +| es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.5.0/#version-250-for-elasticsearch-15) | | es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.2/#version-242-for-elasticsearch-14) | | < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | From 2103c8f2d2a0710026032714e844a53d8de306ec Mon Sep 17 00:00:00 2001 From: David Pilato Date: Wed, 29 Apr 2015 18:52:41 +0200 Subject: [PATCH 088/103] Remove `ElasticsearchIllegalArgumentException` and `ElasticsearchIllegalStateException` in favor of the JDK one Related to https://github.com/elastic/elasticsearch/issues/10794 Closes #41. --- .../index/analysis/PhoneticTokenFilterFactory.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java index 479ea1ae899..b23f311268a 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java @@ -29,7 +29,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.phonetic.BeiderMorseFilter; import org.apache.lucene.analysis.phonetic.DoubleMetaphoneFilter; import org.apache.lucene.analysis.phonetic.PhoneticFilter; -import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -90,7 +89,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { } else if ("exact".equalsIgnoreCase(ruleType)) { ruletype = RuleType.EXACT; } else { - throw new ElasticsearchIllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder"); + throw new IllegalArgumentException("No matching rule type [" + ruleType + "] for beider morse encoder"); } String nameType = settings.get("name_type", "generic"); if ("GENERIC".equalsIgnoreCase(nameType)) { @@ -107,7 +106,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { } else if ("nysiis".equalsIgnoreCase(encodername)) { this.encoder = new Nysiis(); } else { - throw new ElasticsearchIllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); + throw new IllegalArgumentException("unknown encoder [" + encodername + "] for phonetic token filter"); } } @@ -127,6 +126,6 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory { } else { return new PhoneticFilter(tokenStream, encoder, !replace); } - throw new ElasticsearchIllegalArgumentException("encoder error"); + throw new IllegalArgumentException("encoder error"); } } From 4d26780ff996ba773ce016c60a01cc27ff9b7d14 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 Apr 2015 16:31:35 +0200 Subject: [PATCH 089/103] Update compatibility matrix for elasticsearch 1.4.5 Closes #43. --- README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 7a0ba7cc92c..9ee6e869794 100644 --- a/README.md +++ b/README.md @@ -13,15 +13,16 @@ bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.5.0 | elasticsearch |Phonetic Analysis Plugin| Docs | |---------------|-----------------------|------------| | master | Build from source | See below | -| es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | -| es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.5.0/#version-250-for-elasticsearch-15) | -| es-1.4 | 2.4.2 | [2.4.2](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.2/#version-242-for-elasticsearch-14) | -| < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14) | -| es-1.3 | 2.3.0 | [2.3.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | -| es-1.2 | 2.2.0 | [2.2.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch) | -| es-1.1 | 2.1.0 | [2.1.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch) | -| es-1.0 | 2.0.0 | [2.0.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.0.0/#phonetic-analysis-for-elasticsearch) | -| es-0.90 | 1.8.0 | [1.8.0](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v1.8.0/#phonetic-analysis-for-elasticsearch) | +| es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | +| es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.5.0/#version-250-for-elasticsearch-15) | +| es-1.4 | Build from source | [2.4.3-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/es-1.4/#version-243-snapshot-for-elasticsearch-14) | +| < 1.4.5 | 2.4.2 | [2.4.2](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.4.2/#version-242-for-elasticsearch-14) | +| < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14) | +| es-1.3 | 2.3.0 | [2.3.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | +| es-1.2 | 2.2.0 | [2.2.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.2.0/#phonetic-analysis-for-elasticsearch) | +| es-1.1 | 2.1.0 | [2.1.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.1.0/#phonetic-analysis-for-elasticsearch) | +| es-1.0 | 2.0.0 | [2.0.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.0.0/#phonetic-analysis-for-elasticsearch) | +| es-0.90 | 1.8.0 | [1.8.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v1.8.0/#phonetic-analysis-for-elasticsearch) | To build a `SNAPSHOT` version, you need to build it with Maven: From 7ef0a0b14cc5170b64f94049e32857aeacdcaa69 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 Apr 2015 16:36:35 +0200 Subject: [PATCH 090/103] update documentation with release 2.4.3 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9ee6e869794..4f1e998fa56 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: ```sh -bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.5.0 +bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.4.3 ``` @@ -15,7 +15,7 @@ bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.5.0 | master | Build from source | See below | | es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) | | es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.5.0/#version-250-for-elasticsearch-15) | -| es-1.4 | Build from source | [2.4.3-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/es-1.4/#version-243-snapshot-for-elasticsearch-14) | +| es-1.4 | 2.4.3 | [2.4.3](https://github.com/elasticsearch/elasticsearch-analysis-phonetic/tree/v2.4.3/#version-243-for-elasticsearch-14) | | < 1.4.5 | 2.4.2 | [2.4.2](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.4.2/#version-242-for-elasticsearch-14) | | < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.4.1/#version-241-for-elasticsearch-14) | | es-1.3 | 2.3.0 | [2.3.0](https://github.com/elastic/elasticsearch-analysis-phonetic/tree/v2.3.0/#phonetic-analysis-for-elasticsearch) | From aa40c649068736659591187592aa33da4e2df05b Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 30 Apr 2015 16:40:59 +0200 Subject: [PATCH 091/103] Latest version is 2.5.0 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4f1e998fa56..2c0d50a65f6 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ The Phonetic Analysis plugin integrates phonetic token filter analysis with elas In order to install the plugin, simply run: ```sh -bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.4.3 +bin/plugin install elasticsearch/elasticsearch-analysis-phonetic/2.5.0 ``` From 3378232854d35c3c57f87628e11d10dfed6e52a7 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 5 May 2015 12:53:30 -0400 Subject: [PATCH 092/103] Tests: fix tests not to use CWD --- .../index/analysis/SimplePhoneticAnalysisTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java index e64b66dc82a..0e041b71c6c 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -47,6 +47,7 @@ public class SimplePhoneticAnalysisTests extends ElasticsearchTestCase { public void testPhoneticTokenFilterFactory() { Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/index/analysis/phonetic-1.yml") .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put("path.home", createTempDir()) .build(); AnalysisService analysisService = testSimpleConfiguration(settings); TokenFilterFactory filterFactory = analysisService.tokenFilter("phonetic"); From 8a423e8f5a96c2b4570285f56dbdad62f8cd1c7a Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 7 May 2015 09:25:28 +0200 Subject: [PATCH 093/103] [build] remove explicit commons-codec in assembly file We don't have to explicitly define `commons-codec` in our assembly file. When removing it, the content is still exactly the same: ``` Archive: target/releases/elasticsearch-analysis-phonetic-2.4.4-SNAPSHOT.zip Length Date Time Name -------- ---- ---- ---- 17193 05-07-15 09:08 elasticsearch-analysis-phonetic-2.4.4-SNAPSHOT.jar 23098 03-17-15 00:28 lucene-analyzers-phonetic-4.10.4.jar 263965 11-09-14 14:02 commons-codec-1.9.jar -------- ------- 304256 3 files ``` See https://github.com/elastic/elasticsearch-analysis-phonetic/issues/40 Closes #44. (cherry picked from commit 6fa3fb8) --- src/main/assemblies/plugin.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml index 7a2d9322934..f5065e0a0d5 100644 --- a/src/main/assemblies/plugin.xml +++ b/src/main/assemblies/plugin.xml @@ -20,8 +20,7 @@ true org.apache.lucene:lucene-analyzers-phonetic - commons-codec:commons-codec - \ No newline at end of file + From e4f3dc13c6311cb7a414d8d30373cf24bba96e70 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 11 May 2015 19:33:58 -0400 Subject: [PATCH 094/103] enable security manager in tests --- .gitignore | 1 + pom.xml | 1 + 2 files changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index c006d1f3e88..fcd2677b19c 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ /plugin_tools /.local-execution-hints.log /.local-*-execution-hints.log +/eclipse-build/ diff --git a/pom.xml b/pom.xml index 2bd37665e4c..1f45863029c 100644 --- a/pom.xml +++ b/pom.xml @@ -33,6 +33,7 @@ + true From 48e45f5ba83832a9499bbf52b2e7cf10c9988a09 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 11 May 2015 19:43:13 -0400 Subject: [PATCH 095/103] Remove outdated policy file --- dev-tools/tests.policy | 54 ------------------------------------------ 1 file changed, 54 deletions(-) delete mode 100644 dev-tools/tests.policy diff --git a/dev-tools/tests.policy b/dev-tools/tests.policy deleted file mode 100644 index 6afb5025840..00000000000 --- a/dev-tools/tests.policy +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -// Policy file to prevent tests from writing outside the test sandbox directory -// PLEASE NOTE: You may need to enable other permissions when new tests are added, -// everything not allowed here is forbidden! - -grant { - // permissions for file access, write access only to sandbox: - permission java.io.FilePermission "<>", "read,execute"; - permission java.io.FilePermission "${junit4.childvm.cwd}", "read,execute,write"; - permission java.io.FilePermission "${junit4.childvm.cwd}${/}-", "read,execute,write,delete"; - permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete"; - permission groovy.security.GroovyCodeSourcePermission "/groovy/script"; - - // Allow connecting to the internet anywhere - permission java.net.SocketPermission "*", "accept,listen,connect,resolve"; - - // Basic permissions needed for Lucene / Elasticsearch to work: - permission java.util.PropertyPermission "*", "read,write"; - permission java.lang.reflect.ReflectPermission "*"; - permission java.lang.RuntimePermission "*"; - - // These two *have* to be spelled out a separate - permission java.lang.management.ManagementPermission "control"; - permission java.lang.management.ManagementPermission "monitor"; - - permission java.net.NetPermission "*"; - permission java.util.logging.LoggingPermission "control"; - permission javax.management.MBeanPermission "*", "*"; - permission javax.management.MBeanServerPermission "*"; - permission javax.management.MBeanTrustPermission "*"; - - // Needed for some things in DNS caching in the JVM - permission java.security.SecurityPermission "getProperty.networkaddress.cache.ttl"; - permission java.security.SecurityPermission "getProperty.networkaddress.cache.negative.ttl"; - -}; From b60d49c9140e74a3c27b5db4a565bf4607fc8ff9 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 11 May 2015 21:25:00 -0400 Subject: [PATCH 096/103] remove unnecessary prop --- pom.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/pom.xml b/pom.xml index 1f45863029c..2bd37665e4c 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,6 @@ - true From 7f397cfbc08959da52168675d5f7d50a015a0f19 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 21 May 2015 10:33:29 -0400 Subject: [PATCH 097/103] engage forbidden apis --- pom.xml | 46 +--------------------------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/pom.xml b/pom.xml index 2bd37665e4c..41c90b17aff 100644 --- a/pom.xml +++ b/pom.xml @@ -40,14 +40,11 @@ org.hamcrest hamcrest-all + org.apache.lucene lucene-test-framework - - com.carrotsearch.randomizedtesting - randomizedtesting-runner - org.elasticsearch @@ -72,51 +69,10 @@ - - - src/main/resources - true - - **/*.properties - - - - - org.apache.maven.plugins - maven-compiler-plugin - - - com.carrotsearch.randomizedtesting - junit4-maven-plugin - - - - org.apache.maven.plugins - maven-surefire-plugin - - - org.apache.maven.plugins - maven-source-plugin - org.apache.maven.plugins maven-assembly-plugin - - false - ${project.build.directory}/releases/ - - ${basedir}/src/main/assemblies/plugin.xml - - - - - package - - single - - - From 1573a07eaa0149b6794ffd712b0a0f5dd4dc679e Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 21 May 2015 16:48:02 -0400 Subject: [PATCH 098/103] remove duplicate test config --- pom.xml | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/pom.xml b/pom.xml index 41c90b17aff..e3c1e32de6b 100644 --- a/pom.xml +++ b/pom.xml @@ -36,36 +36,10 @@ - - org.hamcrest - hamcrest-all - - - - org.apache.lucene - lucene-test-framework - - - - org.elasticsearch - elasticsearch - - org.apache.lucene lucene-analyzers-phonetic - - - log4j - log4j - - - - org.elasticsearch - elasticsearch - test-jar - From d6aca867fb1bb4685b6ea3c331c739dcfcafcac6 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 21 May 2015 22:51:16 -0400 Subject: [PATCH 099/103] switch to plugin pom --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index e3c1e32de6b..e6977f12ff4 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ org.elasticsearch - elasticsearch-parent + elasticsearch-plugin 2.0.0-SNAPSHOT From b7a69065ee5a700d93b600316e9b1d5442de4700 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 22 May 2015 09:22:25 -0400 Subject: [PATCH 100/103] remove logging properties --- src/test/resources/log4j.properties | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 src/test/resources/log4j.properties diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties deleted file mode 100644 index 497c97f9959..00000000000 --- a/src/test/resources/log4j.properties +++ /dev/null @@ -1,5 +0,0 @@ -log4j.rootLogger=INFO, out - -log4j.appender.out=org.apache.log4j.ConsoleAppender -log4j.appender.out.layout=org.apache.log4j.PatternLayout -log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n From 1c15268657008b2fb846f81e2a850e7d38cb6572 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 22 May 2015 17:41:43 -0400 Subject: [PATCH 101/103] fix license header --- .../analysis/SimplePhoneticIntegrationTests.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java index 84da5eeb2e4..41af6558ddc 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java @@ -1,11 +1,11 @@ /* - * Licensed to Elasticsearch (the "Author") under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Author licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * From f54dbc3060286328f0763f9a21e7e9aa21f6ac05 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 26 May 2015 08:08:58 -0400 Subject: [PATCH 102/103] Absorb ImmutableSettings into Settings --- .../index/analysis/SimplePhoneticAnalysisTests.java | 2 +- .../index/analysis/SimplePhoneticIntegrationTests.java | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java index 0e041b71c6c..45c3d7cf0ec 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java @@ -36,7 +36,7 @@ import org.elasticsearch.test.ElasticsearchTestCase; import org.hamcrest.MatcherAssert; import org.junit.Test; -import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; +import static org.elasticsearch.common.settings.Settings.settingsBuilder; import static org.hamcrest.Matchers.instanceOf; /** diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java index 41af6558ddc..7f74879e3ce 100644 --- a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.analysis; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.query.QueryBuilders; @@ -41,7 +40,7 @@ public class SimplePhoneticIntegrationTests extends ElasticsearchIntegrationTest @Override protected Settings nodeSettings(int nodeOrdinal) { - return ImmutableSettings.builder() + return Settings.builder() .put(super.nodeSettings(nodeOrdinal)) .put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true) .build(); @@ -49,7 +48,7 @@ public class SimplePhoneticIntegrationTests extends ElasticsearchIntegrationTest @Override public Settings indexSettings() { - Settings settings = ImmutableSettings.builder() + Settings settings = Settings.builder() .put(super.indexSettings()) .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard") .putArray("index.analysis.analyzer.my_analyzer.filter", "standard", "lowercase", "my_metaphone") From 8d9ff57bc37942f11e1542deadf1b6dbf14e29ed Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Fri, 5 Jun 2015 13:12:20 +0200 Subject: [PATCH 103/103] add analysis-phonetic module --- .gitignore | 14 -- CONTRIBUTING.md | 98 --------- LICENSE.txt | 202 ------------------ dev-tools/release.py | 134 ------------ .../analysis-phonetic/README.md | 0 pom.xml => plugins/analysis-phonetic/pom.xml | 25 +-- .../src}/main/assemblies/plugin.xml | 0 .../PhoneticAnalysisBinderProcessor.java | 0 .../analysis/PhoneticTokenFilterFactory.java | 0 .../analysis/phonetic/HaasePhonetik.java | 0 .../analysis/phonetic/KoelnerPhonetik.java | 0 .../index/analysis/phonetic/Nysiis.java | 0 .../analysis/AnalysisPhoneticPlugin.java | 0 .../src}/main/resources/es-plugin.properties | 0 .../analysis/SimplePhoneticAnalysisTests.java | 0 .../SimplePhoneticIntegrationTests.java | 0 .../index/analysis/phonetic-1.yml | 0 17 files changed, 2 insertions(+), 471 deletions(-) delete mode 100644 .gitignore delete mode 100644 CONTRIBUTING.md delete mode 100644 LICENSE.txt delete mode 100644 dev-tools/release.py rename README.md => plugins/analysis-phonetic/README.md (100%) rename pom.xml => plugins/analysis-phonetic/pom.xml (55%) rename {src => plugins/analysis-phonetic/src}/main/assemblies/plugin.xml (100%) rename {src => plugins/analysis-phonetic/src}/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java (100%) rename {src => plugins/analysis-phonetic/src}/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java (100%) rename {src => plugins/analysis-phonetic/src}/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java (100%) rename {src => plugins/analysis-phonetic/src}/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java (100%) rename {src => plugins/analysis-phonetic/src}/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java (100%) rename {src => plugins/analysis-phonetic/src}/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java (100%) rename {src => plugins/analysis-phonetic/src}/main/resources/es-plugin.properties (100%) rename {src => plugins/analysis-phonetic/src}/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java (100%) rename {src => plugins/analysis-phonetic/src}/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java (100%) rename {src => plugins/analysis-phonetic/src}/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml (100%) diff --git a/.gitignore b/.gitignore deleted file mode 100644 index fcd2677b19c..00000000000 --- a/.gitignore +++ /dev/null @@ -1,14 +0,0 @@ -/data -/work -/logs -/.idea -/target -.DS_Store -*.iml -/.project -/.classpath -/.settings -/plugin_tools -/.local-execution-hints.log -/.local-*-execution-hints.log -/eclipse-build/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index b88aaf63bd2..00000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,98 +0,0 @@ -Contributing to elasticsearch -============================= - -Elasticsearch is an open source project and we love to receive contributions from our community — you! There are many ways to contribute, from writing tutorials or blog posts, improving the documentation, submitting bug reports and feature requests or writing code which can be incorporated into Elasticsearch itself. - -Bug reports ------------ - -If you think you have found a bug in Elasticsearch, first make sure that you are testing against the [latest version of Elasticsearch](http://www.elasticsearch.org/download/) - your issue may already have been fixed. If not, search our [issues list](https://github.com/elasticsearch/elasticsearch/issues) on GitHub in case a similar issue has already been opened. - -It is very helpful if you can prepare a reproduction of the bug. In other words, provide a small test case which we can run to confirm your bug. It makes it easier to find the problem and to fix it. Test cases should be provided as `curl` commands which we can copy and paste into a terminal to run it locally, for example: - -```sh -# delete the index -curl -XDELETE localhost:9200/test - -# insert a document -curl -XPUT localhost:9200/test/test/1 -d '{ - "title": "test document" -}' - -# this should return XXXX but instead returns YYY -curl .... -``` - -Provide as much information as you can. You may think that the problem lies with your query, when actually it depends on how your data is indexed. The easier it is for us to recreate your problem, the faster it is likely to be fixed. - -Feature requests ----------------- - -If you find yourself wishing for a feature that doesn't exist in Elasticsearch, you are probably not alone. There are bound to be others out there with similar needs. Many of the features that Elasticsearch has today have been added because our users saw the need. -Open an issue on our [issues list](https://github.com/elasticsearch/elasticsearch/issues) on GitHub which describes the feature you would like to see, why you need it, and how it should work. - -Contributing code and documentation changes -------------------------------------------- - -If you have a bugfix or new feature that you would like to contribute to Elasticsearch, please find or open an issue about it first. Talk about what you would like to do. It may be that somebody is already working on it, or that there are particular issues that you should know about before implementing the change. - -We enjoy working with contributors to get their code accepted. There are many approaches to fixing a problem and it is important to find the best approach before writing too much code. - -The process for contributing to any of the [Elasticsearch repositories](https://github.com/elasticsearch/) is similar. Details for individual projects can be found below. - -### Fork and clone the repository - -You will need to fork the main Elasticsearch code or documentation repository and clone it to your local machine. See -[github help page](https://help.github.com/articles/fork-a-repo) for help. - -Further instructions for specific projects are given below. - -### Submitting your changes - -Once your changes and tests are ready to submit for review: - -1. Test your changes -Run the test suite to make sure that nothing is broken. - -2. Sign the Contributor License Agreement -Please make sure you have signed our [Contributor License Agreement](http://www.elasticsearch.org/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once. - -3. Rebase your changes -Update your local repository with the most recent code from the main Elasticsearch repository, and rebase your branch on top of the latest master branch. We prefer your changes to be squashed into a single commit. - -4. Submit a pull request -Push your local changes to your forked copy of the repository and [submit a pull request](https://help.github.com/articles/using-pull-requests). In the pull request, describe what your changes do and mention the number of the issue where discussion has taken place, eg "Closes #123". - -Then sit back and wait. There will probably be discussion about the pull request and, if any changes are needed, we would love to work with you to get your pull request merged into Elasticsearch. - - -Contributing to the Elasticsearch plugin ----------------------------------------- - -**Repository:** [https://github.com/elasticsearch/elasticsearch-analysis-phonetic](https://github.com/elasticsearch/elasticsearch-analysis-phonetic) - -Make sure you have [Maven](http://maven.apache.org) installed, as Elasticsearch uses it as its build system. Integration with IntelliJ and Eclipse should work out of the box. Eclipse users can automatically configure their IDE by running `mvn eclipse:eclipse` and then importing the project into their workspace: `File > Import > Existing project into workspace`. - -Please follow these formatting guidelines: - -* Java indent is 4 spaces -* Line width is 140 characters -* The rest is left to Java coding standards -* Disable “auto-format on save” to prevent unnecessary format changes. This makes reviews much harder as it generates unnecessary formatting changes. If your IDE supports formatting only modified chunks that is fine to do. - -To create a distribution from the source, simply run: - -```sh -cd elasticsearch-analysis-phonetic/ -mvn clean package -DskipTests -``` - -You will find the newly built packages under: `./target/releases/`. - -Before submitting your changes, run the test suite to make sure that nothing is broken, with: - -```sh -mvn clean test -``` - -Source: [Contributing to elasticsearch](http://www.elasticsearch.org/contributing-to-elasticsearch/) diff --git a/LICENSE.txt b/LICENSE.txt deleted file mode 100644 index d6456956733..00000000000 --- a/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/dev-tools/release.py b/dev-tools/release.py deleted file mode 100644 index edcc637d068..00000000000 --- a/dev-tools/release.py +++ /dev/null @@ -1,134 +0,0 @@ -# Licensed to Elasticsearch under one or more contributor -# license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright -# ownership. Elasticsearch licenses this file to you under -# the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on -# an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -# either express or implied. See the License for the specific -# language governing permissions and limitations under the License. - -import datetime -import os -import shutil -import sys -import time -import urllib -import urllib.request -import zipfile - -from os.path import dirname, abspath - -""" - This tool builds a release from the a given elasticsearch plugin branch. - - It is basically a wrapper on top of launch_release.py which: - - - tries to get a more recent version of launch_release.py in ... - - download it if needed - - launch it passing all arguments to it, like: - - $ python3 dev_tools/release.py --branch master --publish --remote origin - - Important options: - - # Dry run - $ python3 dev_tools/release.py - - # Dry run without tests - python3 dev_tools/release.py --skiptests - - # Release, publish artifacts and announce - $ python3 dev_tools/release.py --publish - - See full documentation in launch_release.py -""" -env = os.environ - -# Change this if the source repository for your scripts is at a different location -SOURCE_REPO = 'elasticsearch/elasticsearch-plugins-script' -# We define that we should download again the script after 1 days -SCRIPT_OBSOLETE_DAYS = 1 -# We ignore in master.zip file the following files -IGNORED_FILES = ['.gitignore', 'README.md'] - - -ROOT_DIR = abspath(os.path.join(abspath(dirname(__file__)), '../')) -TARGET_TOOLS_DIR = ROOT_DIR + '/plugin_tools' -DEV_TOOLS_DIR = ROOT_DIR + '/dev-tools' -BUILD_RELEASE_FILENAME = 'release.zip' -BUILD_RELEASE_FILE = TARGET_TOOLS_DIR + '/' + BUILD_RELEASE_FILENAME -SOURCE_URL = 'https://github.com/%s/archive/master.zip' % SOURCE_REPO - -# Download a recent version of the release plugin tool -try: - os.mkdir(TARGET_TOOLS_DIR) - print('directory %s created' % TARGET_TOOLS_DIR) -except FileExistsError: - pass - - -try: - # we check latest update. If we ran an update recently, we - # are not going to check it again - download = True - - try: - last_download_time = datetime.datetime.fromtimestamp(os.path.getmtime(BUILD_RELEASE_FILE)) - if (datetime.datetime.now()-last_download_time).days < SCRIPT_OBSOLETE_DAYS: - download = False - except FileNotFoundError: - pass - - if download: - urllib.request.urlretrieve(SOURCE_URL, BUILD_RELEASE_FILE) - with zipfile.ZipFile(BUILD_RELEASE_FILE) as myzip: - for member in myzip.infolist(): - filename = os.path.basename(member.filename) - # skip directories - if not filename: - continue - if filename in IGNORED_FILES: - continue - - # copy file (taken from zipfile's extract) - source = myzip.open(member.filename) - target = open(os.path.join(TARGET_TOOLS_DIR, filename), "wb") - with source, target: - shutil.copyfileobj(source, target) - # We keep the original date - date_time = time.mktime(member.date_time + (0, 0, -1)) - os.utime(os.path.join(TARGET_TOOLS_DIR, filename), (date_time, date_time)) - print('plugin-tools updated from %s' % SOURCE_URL) -except urllib.error.HTTPError: - pass - - -# Let see if we need to update the release.py script itself -source_time = os.path.getmtime(TARGET_TOOLS_DIR + '/release.py') -repo_time = os.path.getmtime(DEV_TOOLS_DIR + '/release.py') -if source_time > repo_time: - input('release.py needs an update. Press a key to update it...') - shutil.copyfile(TARGET_TOOLS_DIR + '/release.py', DEV_TOOLS_DIR + '/release.py') - -# We can launch the build process -try: - PYTHON = 'python' - # make sure python3 is used if python3 is available - # some systems use python 2 as default - os.system('python3 --version > /dev/null 2>&1') - PYTHON = 'python3' -except RuntimeError: - pass - -release_args = '' -for x in range(1, len(sys.argv)): - release_args += ' ' + sys.argv[x] - -os.system('%s %s/build_release.py %s' % (PYTHON, TARGET_TOOLS_DIR, release_args)) diff --git a/README.md b/plugins/analysis-phonetic/README.md similarity index 100% rename from README.md rename to plugins/analysis-phonetic/README.md diff --git a/pom.xml b/plugins/analysis-phonetic/pom.xml similarity index 55% rename from pom.xml rename to plugins/analysis-phonetic/pom.xml index e6977f12ff4..d4cc3e7b871 100644 --- a/pom.xml +++ b/plugins/analysis-phonetic/pom.xml @@ -4,26 +4,12 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - org.elasticsearch + org.elasticsearch.plugin elasticsearch-analysis-phonetic - 3.0.0-SNAPSHOT + jar Elasticsearch Phonetic Analysis plugin The Phonetic Analysis plugin integrates phonetic token filter analysis with elasticsearch. - https://github.com/elastic/elasticsearch-analysis-phonetic/ - 2009 - - - The Apache Software License, Version 2.0 - http://www.apache.org/licenses/LICENSE-2.0.txt - repo - - - - scm:git:git@github.com:elastic/elasticsearch-analysis-phonetic.git - scm:git:git@github.com:elastic/elasticsearch-analysis-phonetic.git - http://github.com/elastic/elasticsearch-analysis-phonetic - org.elasticsearch @@ -51,11 +37,4 @@ - - - oss-snapshots - Sonatype OSS Snapshots - https://oss.sonatype.org/content/repositories/snapshots/ - - diff --git a/src/main/assemblies/plugin.xml b/plugins/analysis-phonetic/src/main/assemblies/plugin.xml similarity index 100% rename from src/main/assemblies/plugin.xml rename to plugins/analysis-phonetic/src/main/assemblies/plugin.xml diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java rename to plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticAnalysisBinderProcessor.java diff --git a/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java rename to plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/PhoneticTokenFilterFactory.java diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java rename to plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/HaasePhonetik.java diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java rename to plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/KoelnerPhonetik.java diff --git a/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java similarity index 100% rename from src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java rename to plugins/analysis-phonetic/src/main/java/org/elasticsearch/index/analysis/phonetic/Nysiis.java diff --git a/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java b/plugins/analysis-phonetic/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java similarity index 100% rename from src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java rename to plugins/analysis-phonetic/src/main/java/org/elasticsearch/plugin/analysis/AnalysisPhoneticPlugin.java diff --git a/src/main/resources/es-plugin.properties b/plugins/analysis-phonetic/src/main/resources/es-plugin.properties similarity index 100% rename from src/main/resources/es-plugin.properties rename to plugins/analysis-phonetic/src/main/resources/es-plugin.properties diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java b/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java similarity index 100% rename from src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java rename to plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticAnalysisTests.java diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java b/plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java similarity index 100% rename from src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java rename to plugins/analysis-phonetic/src/test/java/org/elasticsearch/index/analysis/SimplePhoneticIntegrationTests.java diff --git a/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml b/plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml similarity index 100% rename from src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml rename to plugins/analysis-phonetic/src/test/resources/org/elasticsearch/index/analysis/phonetic-1.yml