Index Analysis: Add language analyzers and stemmers, closes #72
This commit is contained in:
parent
45234f4d90
commit
a344ebb1b3
|
@ -81,7 +81,25 @@ public class AnalysisModule extends AbstractModule {
|
||||||
if (!tokenFiltersSettings.containsKey("shingle")) {
|
if (!tokenFiltersSettings.containsKey("shingle")) {
|
||||||
tokenFilterBinder.addBinding("shingle").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ShingleTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
tokenFilterBinder.addBinding("shingle").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ShingleTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||||
}
|
}
|
||||||
|
// extends defaults
|
||||||
|
if (!tokenFiltersSettings.containsKey("arabicStem")) {
|
||||||
|
tokenFilterBinder.addBinding("arabicStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||||
|
}
|
||||||
|
if (!tokenFiltersSettings.containsKey("brazilianStem")) {
|
||||||
|
tokenFilterBinder.addBinding("brazilianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||||
|
}
|
||||||
|
if (!tokenFiltersSettings.containsKey("dutchStem")) {
|
||||||
|
tokenFilterBinder.addBinding("dutchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||||
|
}
|
||||||
|
if (!tokenFiltersSettings.containsKey("frenchStem")) {
|
||||||
|
tokenFilterBinder.addBinding("frenchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||||
|
}
|
||||||
|
if (!tokenFiltersSettings.containsKey("germanStem")) {
|
||||||
|
tokenFilterBinder.addBinding("germanStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||||
|
}
|
||||||
|
if (!tokenFiltersSettings.containsKey("russianStem")) {
|
||||||
|
tokenFilterBinder.addBinding("russianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
|
||||||
|
}
|
||||||
|
|
||||||
MapBinder<String, TokenizerFactoryFactory> tokenizerBinder
|
MapBinder<String, TokenizerFactoryFactory> tokenizerBinder
|
||||||
= MapBinder.newMapBinder(binder(), String.class, TokenizerFactoryFactory.class);
|
= MapBinder.newMapBinder(binder(), String.class, TokenizerFactoryFactory.class);
|
||||||
|
|
|
@ -101,6 +101,45 @@ public class AnalysisService extends AbstractIndexComponent {
|
||||||
analyzerProviders.put("defaultSearch", analyzerProviders.get("default"));
|
analyzerProviders.put("defaultSearch", analyzerProviders.get("default"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// extended analyzers defaults
|
||||||
|
if (!analyzerProviders.containsKey("arabic")) {
|
||||||
|
analyzerProviders.put("arabic", new ArabicAnalyzerProvider(index, indexSettings, "arabic", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("brazilian")) {
|
||||||
|
analyzerProviders.put("brazilian", new BrazilianAnalyzerProvider(index, indexSettings, "brazilian", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("chinese")) {
|
||||||
|
analyzerProviders.put("chinese", new ChineseAnalyzerProvider(index, indexSettings, "chinese", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("cjk")) {
|
||||||
|
analyzerProviders.put("cjk", new ChineseAnalyzerProvider(index, indexSettings, "cjk", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("czech")) {
|
||||||
|
analyzerProviders.put("czech", new CzechAnalyzerProvider(index, indexSettings, "czech", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("dutch")) {
|
||||||
|
analyzerProviders.put("dutch", new DutchAnalyzerProvider(index, indexSettings, "dutch", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("french")) {
|
||||||
|
analyzerProviders.put("french", new FrenchAnalyzerProvider(index, indexSettings, "french", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("german")) {
|
||||||
|
analyzerProviders.put("german", new GermanAnalyzerProvider(index, indexSettings, "german", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("greek")) {
|
||||||
|
analyzerProviders.put("greek", new GreekAnalyzerProvider(index, indexSettings, "greek", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("persian")) {
|
||||||
|
analyzerProviders.put("persian", new PersianAnalyzerProvider(index, indexSettings, "persian", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("russian")) {
|
||||||
|
analyzerProviders.put("russian", new RussianAnalyzerProvider(index, indexSettings, "russian", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
if (!analyzerProviders.containsKey("thai")) {
|
||||||
|
analyzerProviders.put("thai", new ThaiAnalyzerProvider(index, indexSettings, "thai", ImmutableSettings.Builder.EMPTY_SETTINGS));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
this.analyzerProviders = ImmutableMap.copyOf(analyzerProviders);
|
this.analyzerProviders = ImmutableMap.copyOf(analyzerProviders);
|
||||||
|
|
||||||
Map<String, NamedAnalyzer> analyzers = newHashMap();
|
Map<String, NamedAnalyzer> analyzers = newHashMap();
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class ArabicAnalyzerProvider extends AbstractAnalyzerProvider<ArabicAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<String> stopWords;
|
||||||
|
|
||||||
|
private final ArabicAnalyzer arabicAnalyzer;
|
||||||
|
|
||||||
|
@Inject public ArabicAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = ArabicAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
arabicAnalyzer = new ArabicAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public ArabicAnalyzer get() {
|
||||||
|
return this.arabicAnalyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class ArabicStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
@Inject public ArabicStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public TokenStream create(TokenStream tokenStream) {
|
||||||
|
return new ArabicStemFilter(tokenStream);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class BrazilianAnalyzerProvider extends AbstractAnalyzerProvider<BrazilianAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
private final Set<?> stemExclusion;
|
||||||
|
|
||||||
|
private final BrazilianAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public BrazilianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = BrazilianAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] stemExclusion = settings.getAsArray("stemExclusion");
|
||||||
|
if (stemExclusion.length > 0) {
|
||||||
|
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||||
|
} else {
|
||||||
|
this.stemExclusion = ImmutableSet.of();
|
||||||
|
}
|
||||||
|
analyzer = new BrazilianAnalyzer(Version.LUCENE_CURRENT, this.stopWords, this.stemExclusion);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public BrazilianAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
private final Set<?> exclusions;
|
||||||
|
|
||||||
|
@Inject public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stemExclusion = settings.getAsArray("stemExclusion");
|
||||||
|
if (stemExclusion.length > 0) {
|
||||||
|
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||||
|
} else {
|
||||||
|
this.exclusions = ImmutableSet.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public TokenStream create(TokenStream tokenStream) {
|
||||||
|
return new BrazilianStemFilter(tokenStream, exclusions);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class ChineseAnalyzerProvider extends AbstractAnalyzerProvider<ChineseAnalyzer> {
|
||||||
|
|
||||||
|
private final ChineseAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public ChineseAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
analyzer = new ChineseAnalyzer();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public ChineseAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class CjkAnalyzerProvider extends AbstractAnalyzerProvider<CJKAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
private final CJKAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = CJKAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public CJKAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class CzechAnalyzerProvider extends AbstractAnalyzerProvider<CzechAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
private final CzechAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public CzechAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = CzechAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public CzechAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class DutchAnalyzerProvider extends AbstractAnalyzerProvider<DutchAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
private final Set<?> stemExclusion;
|
||||||
|
|
||||||
|
private final DutchAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public DutchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = DutchAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] stemExclusion = settings.getAsArray("stemExclusion");
|
||||||
|
if (stemExclusion.length > 0) {
|
||||||
|
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||||
|
} else {
|
||||||
|
this.stemExclusion = ImmutableSet.of();
|
||||||
|
}
|
||||||
|
analyzer = new DutchAnalyzer(Version.LUCENE_CURRENT, this.stopWords, this.stemExclusion);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public DutchAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.nl.DutchStemFilter;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
private final Set<?> exclusions;
|
||||||
|
|
||||||
|
@Inject public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stemExclusion = settings.getAsArray("stemExclusion");
|
||||||
|
if (stemExclusion.length > 0) {
|
||||||
|
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||||
|
} else {
|
||||||
|
this.exclusions = ImmutableSet.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public TokenStream create(TokenStream tokenStream) {
|
||||||
|
return new DutchStemFilter(tokenStream, exclusions);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class FrenchAnalyzerProvider extends AbstractAnalyzerProvider<FrenchAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
private final Set<?> stemExclusion;
|
||||||
|
|
||||||
|
private final FrenchAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public FrenchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = FrenchAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] stemExclusion = settings.getAsArray("stemExclusion");
|
||||||
|
if (stemExclusion.length > 0) {
|
||||||
|
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||||
|
} else {
|
||||||
|
this.stemExclusion = ImmutableSet.of();
|
||||||
|
}
|
||||||
|
analyzer = new FrenchAnalyzer(Version.LUCENE_CURRENT, this.stopWords, this.stemExclusion);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public FrenchAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.fr.FrenchStemFilter;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
private final Set<?> exclusions;
|
||||||
|
|
||||||
|
@Inject public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stemExclusion = settings.getAsArray("stemExclusion");
|
||||||
|
if (stemExclusion.length > 0) {
|
||||||
|
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||||
|
} else {
|
||||||
|
this.exclusions = ImmutableSet.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public TokenStream create(TokenStream tokenStream) {
|
||||||
|
return new FrenchStemFilter(tokenStream, exclusions);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,66 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class GermanAnalyzerProvider extends AbstractAnalyzerProvider<GermanAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
private final Set<?> stemExclusion;
|
||||||
|
|
||||||
|
private final GermanAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public GermanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = GermanAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] stemExclusion = settings.getAsArray("stemExclusion");
|
||||||
|
if (stemExclusion.length > 0) {
|
||||||
|
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||||
|
} else {
|
||||||
|
this.stemExclusion = ImmutableSet.of();
|
||||||
|
}
|
||||||
|
analyzer = new GermanAnalyzer(Version.LUCENE_CURRENT, this.stopWords, this.stemExclusion);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public GermanAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
private final Set<?> exclusions;
|
||||||
|
|
||||||
|
@Inject public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stemExclusion = settings.getAsArray("stemExclusion");
|
||||||
|
if (stemExclusion.length > 0) {
|
||||||
|
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||||
|
} else {
|
||||||
|
this.exclusions = ImmutableSet.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public TokenStream create(TokenStream tokenStream) {
|
||||||
|
return new GermanStemFilter(tokenStream, exclusions);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.el.GreekAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class GreekAnalyzerProvider extends AbstractAnalyzerProvider<GreekAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
private final GreekAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public GreekAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = GreekAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzer = new GreekAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public GreekAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,58 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.fa.PersianAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class PersianAnalyzerProvider extends AbstractAnalyzerProvider<PersianAnalyzer> {
|
||||||
|
|
||||||
|
private final Set<?> stopWords;
|
||||||
|
|
||||||
|
private final PersianAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public PersianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
|
||||||
|
} else {
|
||||||
|
this.stopWords = PersianAnalyzer.getDefaultStopSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzer = new PersianAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public PersianAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class RussianAnalyzerProvider extends AbstractAnalyzerProvider<RussianAnalyzer> {
|
||||||
|
|
||||||
|
private final RussianAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public RussianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
String[] stopWords = settings.getAsArray("stopwords");
|
||||||
|
if (stopWords.length > 0) {
|
||||||
|
analyzer = new RussianAnalyzer(Version.LUCENE_CURRENT, ImmutableSet.copyOf(Iterators.forArray(stopWords)));
|
||||||
|
} else {
|
||||||
|
analyzer = new RussianAnalyzer(Version.LUCENE_CURRENT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public RussianAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.ru.RussianStemFilter;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
@Inject public RussianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public TokenStream create(TokenStream tokenStream) {
|
||||||
|
return new RussianStemFilter(tokenStream);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.assistedinject.Assisted;
|
||||||
|
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.settings.IndexSettings;
|
||||||
|
import org.elasticsearch.util.settings.Settings;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class ThaiAnalyzerProvider extends AbstractAnalyzerProvider<ThaiAnalyzer> {
|
||||||
|
|
||||||
|
private final ThaiAnalyzer analyzer;
|
||||||
|
|
||||||
|
@Inject public ThaiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
|
super(index, indexSettings, name);
|
||||||
|
analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override public ThaiAnalyzer get() {
|
||||||
|
return this.analyzer;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue