Index Analysis: Add language analyzers and stemmers, closes #72

This commit is contained in:
kimchy 2010-03-19 17:07:43 +02:00
parent 45234f4d90
commit a344ebb1b3
20 changed files with 1052 additions and 1 deletions

View File

@ -81,7 +81,25 @@ public class AnalysisModule extends AbstractModule {
if (!tokenFiltersSettings.containsKey("shingle")) {
tokenFilterBinder.addBinding("shingle").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ShingleTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
// extends defaults
if (!tokenFiltersSettings.containsKey("arabicStem")) {
tokenFilterBinder.addBinding("arabicStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, ArabicStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!tokenFiltersSettings.containsKey("brazilianStem")) {
tokenFilterBinder.addBinding("brazilianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, BrazilianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!tokenFiltersSettings.containsKey("dutchStem")) {
tokenFilterBinder.addBinding("dutchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, DutchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!tokenFiltersSettings.containsKey("frenchStem")) {
tokenFilterBinder.addBinding("frenchStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, FrenchStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!tokenFiltersSettings.containsKey("germanStem")) {
tokenFilterBinder.addBinding("germanStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, GermanStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
if (!tokenFiltersSettings.containsKey("russianStem")) {
tokenFilterBinder.addBinding("russianStem").toProvider(FactoryProvider.newFactory(TokenFilterFactoryFactory.class, RussianStemTokenFilterFactory.class)).in(Scopes.SINGLETON);
}
MapBinder<String, TokenizerFactoryFactory> tokenizerBinder
= MapBinder.newMapBinder(binder(), String.class, TokenizerFactoryFactory.class);

View File

@ -101,6 +101,45 @@ public class AnalysisService extends AbstractIndexComponent {
analyzerProviders.put("defaultSearch", analyzerProviders.get("default"));
}
// extended analyzers defaults
if (!analyzerProviders.containsKey("arabic")) {
analyzerProviders.put("arabic", new ArabicAnalyzerProvider(index, indexSettings, "arabic", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("brazilian")) {
analyzerProviders.put("brazilian", new BrazilianAnalyzerProvider(index, indexSettings, "brazilian", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("chinese")) {
analyzerProviders.put("chinese", new ChineseAnalyzerProvider(index, indexSettings, "chinese", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("cjk")) {
analyzerProviders.put("cjk", new ChineseAnalyzerProvider(index, indexSettings, "cjk", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("czech")) {
analyzerProviders.put("czech", new CzechAnalyzerProvider(index, indexSettings, "czech", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("dutch")) {
analyzerProviders.put("dutch", new DutchAnalyzerProvider(index, indexSettings, "dutch", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("french")) {
analyzerProviders.put("french", new FrenchAnalyzerProvider(index, indexSettings, "french", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("german")) {
analyzerProviders.put("german", new GermanAnalyzerProvider(index, indexSettings, "german", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("greek")) {
analyzerProviders.put("greek", new GreekAnalyzerProvider(index, indexSettings, "greek", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("persian")) {
analyzerProviders.put("persian", new PersianAnalyzerProvider(index, indexSettings, "persian", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("russian")) {
analyzerProviders.put("russian", new RussianAnalyzerProvider(index, indexSettings, "russian", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
if (!analyzerProviders.containsKey("thai")) {
analyzerProviders.put("thai", new ThaiAnalyzerProvider(index, indexSettings, "thai", ImmutableSettings.Builder.EMPTY_SETTINGS));
}
this.analyzerProviders = ImmutableMap.copyOf(analyzerProviders);
Map<String, NamedAnalyzer> analyzers = newHashMap();

View File

@ -0,0 +1,57 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class ArabicAnalyzerProvider extends AbstractAnalyzerProvider<ArabicAnalyzer> {
private final Set<String> stopWords;
private final ArabicAnalyzer arabicAnalyzer;
@Inject public ArabicAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = ArabicAnalyzer.getDefaultStopSet();
}
arabicAnalyzer = new ArabicAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
}
@Override public ArabicAnalyzer get() {
return this.arabicAnalyzer;
}
}

View File

@ -0,0 +1,42 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
/**
* @author kimchy (shay.banon)
*/
public class ArabicStemTokenFilterFactory extends AbstractTokenFilterFactory {
@Inject public ArabicStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
}
@Override public TokenStream create(TokenStream tokenStream) {
return new ArabicStemFilter(tokenStream);
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class BrazilianAnalyzerProvider extends AbstractAnalyzerProvider<BrazilianAnalyzer> {
private final Set<?> stopWords;
private final Set<?> stemExclusion;
private final BrazilianAnalyzer analyzer;
@Inject public BrazilianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = BrazilianAnalyzer.getDefaultStopSet();
}
String[] stemExclusion = settings.getAsArray("stemExclusion");
if (stemExclusion.length > 0) {
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.stemExclusion = ImmutableSet.of();
}
analyzer = new BrazilianAnalyzer(Version.LUCENE_CURRENT, this.stopWords, this.stemExclusion);
}
@Override public BrazilianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions;
@Inject public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stemExclusion = settings.getAsArray("stemExclusion");
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
}
@Override public TokenStream create(TokenStream tokenStream) {
return new BrazilianStemFilter(tokenStream, exclusions);
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.cn.ChineseAnalyzer;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
/**
* @author kimchy (shay.banon)
*/
public class ChineseAnalyzerProvider extends AbstractAnalyzerProvider<ChineseAnalyzer> {
private final ChineseAnalyzer analyzer;
@Inject public ChineseAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
analyzer = new ChineseAnalyzer();
}
@Override public ChineseAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class CjkAnalyzerProvider extends AbstractAnalyzerProvider<CJKAnalyzer> {
private final Set<?> stopWords;
private final CJKAnalyzer analyzer;
@Inject public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = CJKAnalyzer.getDefaultStopSet();
}
analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
}
@Override public CJKAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class CzechAnalyzerProvider extends AbstractAnalyzerProvider<CzechAnalyzer> {
private final Set<?> stopWords;
private final CzechAnalyzer analyzer;
@Inject public CzechAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = CzechAnalyzer.getDefaultStopSet();
}
analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
}
@Override public CzechAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class DutchAnalyzerProvider extends AbstractAnalyzerProvider<DutchAnalyzer> {
private final Set<?> stopWords;
private final Set<?> stemExclusion;
private final DutchAnalyzer analyzer;
@Inject public DutchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = DutchAnalyzer.getDefaultStopSet();
}
String[] stemExclusion = settings.getAsArray("stemExclusion");
if (stemExclusion.length > 0) {
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.stemExclusion = ImmutableSet.of();
}
analyzer = new DutchAnalyzer(Version.LUCENE_CURRENT, this.stopWords, this.stemExclusion);
}
@Override public DutchAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.nl.DutchStemFilter;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions;
@Inject public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stemExclusion = settings.getAsArray("stemExclusion");
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
}
@Override public TokenStream create(TokenStream tokenStream) {
return new DutchStemFilter(tokenStream, exclusions);
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class FrenchAnalyzerProvider extends AbstractAnalyzerProvider<FrenchAnalyzer> {
private final Set<?> stopWords;
private final Set<?> stemExclusion;
private final FrenchAnalyzer analyzer;
@Inject public FrenchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = FrenchAnalyzer.getDefaultStopSet();
}
String[] stemExclusion = settings.getAsArray("stemExclusion");
if (stemExclusion.length > 0) {
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.stemExclusion = ImmutableSet.of();
}
analyzer = new FrenchAnalyzer(Version.LUCENE_CURRENT, this.stopWords, this.stemExclusion);
}
@Override public FrenchAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.fr.FrenchStemFilter;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions;
@Inject public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stemExclusion = settings.getAsArray("stemExclusion");
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
}
@Override public TokenStream create(TokenStream tokenStream) {
return new FrenchStemFilter(tokenStream, exclusions);
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class GermanAnalyzerProvider extends AbstractAnalyzerProvider<GermanAnalyzer> {
private final Set<?> stopWords;
private final Set<?> stemExclusion;
private final GermanAnalyzer analyzer;
@Inject public GermanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = GermanAnalyzer.getDefaultStopSet();
}
String[] stemExclusion = settings.getAsArray("stemExclusion");
if (stemExclusion.length > 0) {
this.stemExclusion = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.stemExclusion = ImmutableSet.of();
}
analyzer = new GermanAnalyzer(Version.LUCENE_CURRENT, this.stopWords, this.stemExclusion);
}
@Override public GermanAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final Set<?> exclusions;
@Inject public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stemExclusion = settings.getAsArray("stemExclusion");
if (stemExclusion.length > 0) {
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
} else {
this.exclusions = ImmutableSet.of();
}
}
@Override public TokenStream create(TokenStream tokenStream) {
return new GermanStemFilter(tokenStream, exclusions);
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class GreekAnalyzerProvider extends AbstractAnalyzerProvider<GreekAnalyzer> {
private final Set<?> stopWords;
private final GreekAnalyzer analyzer;
@Inject public GreekAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = GreekAnalyzer.getDefaultStopSet();
}
analyzer = new GreekAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
}
@Override public GreekAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
import java.util.Set;
/**
* @author kimchy (shay.banon)
*/
public class PersianAnalyzerProvider extends AbstractAnalyzerProvider<PersianAnalyzer> {
private final Set<?> stopWords;
private final PersianAnalyzer analyzer;
@Inject public PersianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
this.stopWords = ImmutableSet.copyOf(Iterators.forArray(stopWords));
} else {
this.stopWords = PersianAnalyzer.getDefaultStopSet();
}
analyzer = new PersianAnalyzer(Version.LUCENE_CURRENT, this.stopWords);
}
@Override public PersianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterators;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
/**
* @author kimchy (shay.banon)
*/
public class RussianAnalyzerProvider extends AbstractAnalyzerProvider<RussianAnalyzer> {
private final RussianAnalyzer analyzer;
@Inject public RussianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
String[] stopWords = settings.getAsArray("stopwords");
if (stopWords.length > 0) {
analyzer = new RussianAnalyzer(Version.LUCENE_CURRENT, ImmutableSet.copyOf(Iterators.forArray(stopWords)));
} else {
analyzer = new RussianAnalyzer(Version.LUCENE_CURRENT);
}
}
@Override public RussianAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,42 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ru.RussianStemFilter;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
/**
* @author kimchy (shay.banon)
*/
public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
@Inject public RussianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
}
@Override public TokenStream create(TokenStream tokenStream) {
return new RussianStemFilter(tokenStream);
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import com.google.inject.Inject;
import com.google.inject.assistedinject.Assisted;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.util.Version;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
/**
* @author kimchy (shay.banon)
*/
public class ThaiAnalyzerProvider extends AbstractAnalyzerProvider<ThaiAnalyzer> {
private final ThaiAnalyzer analyzer;
@Inject public ThaiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name);
analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
}
@Override public ThaiAnalyzer get() {
return this.analyzer;
}
}