migrate branch for analysis-stempel

This commit is contained in:
Simon Willnauer 2015-06-05 13:12:17 +02:00
commit 263bc6bff8
13 changed files with 693 additions and 0 deletions

View File

@ -0,0 +1,56 @@
Stempel (Polish) Analysis for Elasticsearch
==================================
The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.
In order to install the plugin, simply run:
```sh
bin/plugin install elasticsearch/elasticsearch-analysis-stempel/2.4.3
```
| elasticsearch | Stempel Analysis Plugin | Docs |
|---------------|-----------------------|------------|
| master | Build from source | See below |
| es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-stempel/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) |
| es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.5.0/#version-250-for-elasticsearch-15) |
| es-1.4 | 2.4.3 | [2.4.3](https://github.com/elasticsearch/elasticsearch-analysis-stempel/tree/v2.4.3/#version-243-for-elasticsearch-14) |
| < 1.4.5 | 2.4.2 | [2.4.2](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.4.2/#version-242-for-elasticsearch-14) |
| < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.4.1/#version-241-for-elasticsearch-14) |
| es-1.3 | 2.3.0 | [2.3.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.3.0/#stempel-polish-analysis-for-elasticsearch) |
| es-1.2 | 2.2.0 | [2.2.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.2.0/#stempel-polish-analysis-for-elasticsearch) |
| es-1.1 | 2.1.0 | [2.1.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.1.0/#stempel-polish-analysis-for-elasticsearch) |
| es-1.0 | 2.0.0 | [2.0.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.0.0/#stempel-polish-analysis-for-elasticsearch) |
| es-0.90 | 1.13.0 | [1.13.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v1.13.0/#stempel-polish-analysis-for-elasticsearch) |
To build a `SNAPSHOT` version, you need to build it with Maven:
```bash
mvn clean install
plugin --install analysis-stempel \
--url file:target/releases/elasticsearch-analysis-stempel-X.X.X-SNAPSHOT.zip
```
Stempel Plugin
-----------------
The plugin includes the `polish` analyzer and `polish_stem` token filter.
License
-------
This software is licensed under the Apache 2 license, quoted below.
Copyright 2009-2014 Elasticsearch <http://www.elasticsearch.org>
Licensed under the Apache License, Version 2.0 (the "License"); you may not
use this file except in compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations under
the License.

View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.elasticsearch.plugin</groupId>
<artifactId>elasticsearch-analysis-stempel</artifactId>
<packaging>jar</packaging>
<name>Elasticsearch Stempel (Polish) Analysis plugin</name>
<description>The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.</description>
<parent>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-plugin</artifactId>
<version>2.0.0-SNAPSHOT</version>
</parent>
<properties>
<!-- You can add any specific project property here -->
</properties>
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-stempel</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,26 @@
<?xml version="1.0"?>
<assembly>
<id>plugin</id>
<formats>
<format>zip</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<excludes>
<exclude>org.elasticsearch:elasticsearch</exclude>
</excludes>
</dependencySet>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<includes>
<include>org.apache.lucene:lucene-analyzers-stempel</include>
</includes>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,36 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis.pl;
import org.elasticsearch.index.analysis.AnalysisModule;
/**
*/
public class PolishAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {
@Override
public void processAnalyzers(AnalyzersBindings analyzersBindings) {
analyzersBindings.processAnalyzer("polish", PolishAnalyzerProvider.class);
}
@Override
public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
tokenFiltersBindings.processTokenFilter("polish_stem", PolishStemTokenFilterFactory.class);
}
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis.pl;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.settings.IndexSettings;
/**
*/
public class PolishAnalyzerProvider extends AbstractIndexAnalyzerProvider<PolishAnalyzer> {
private final PolishAnalyzer analyzer;
@Inject
public PolishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
analyzer = new PolishAnalyzer(PolishAnalyzer.getDefaultStopSet());
analyzer.setVersion(version);
}
@Override
public PolishAnalyzer get() {
return this.analyzer;
}
}

View File

@ -0,0 +1,56 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis.pl;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.egothor.stemmer.Trie;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import java.io.IOException;
public class PolishStemTokenFilterFactory extends AbstractTokenFilterFactory {
private final StempelStemmer stemmer;
@Inject public PolishStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettings, name, settings);
Trie tire;
try {
tire = StempelStemmer.load(PolishAnalyzer.class.getResourceAsStream(PolishAnalyzer.DEFAULT_STEMMER_FILE));
} catch (IOException ex) {
throw new RuntimeException("Unable to load default stemming tables", ex);
}
stemmer = new StempelStemmer(tire);
}
@Override public TokenStream create(TokenStream tokenStream) {
return new StempelFilter(tokenStream, stemmer);
}
}

View File

@ -0,0 +1,65 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis.pl;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.analysis.stempel.StempelStemmer;
import org.egothor.stemmer.Trie;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import java.io.IOException;
/**
* Registers indices level analysis components so, if not explicitly configured, will be shared
* among all indices.
*/
public class PolishIndicesAnalysis extends AbstractComponent {
@Inject
public PolishIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) {
super(settings);
indicesAnalysisService.analyzerProviderFactories().put("polish", new PreBuiltAnalyzerProviderFactory("polish", AnalyzerScope.INDICES, new PolishAnalyzer()));
indicesAnalysisService.tokenFilterFactories().put("polish_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
@Override public String name() {
return "polish_stem";
}
@Override public TokenStream create(TokenStream tokenStream) {
Trie tire;
try {
tire = StempelStemmer.load(PolishAnalyzer.class.getResourceAsStream(PolishAnalyzer.DEFAULT_STEMMER_FILE));
} catch (IOException ex) {
throw new RuntimeException("Unable to load default stemming tables", ex);
}
return new StempelFilter(tokenStream, new StempelStemmer(tire));
}
}));
}
}

View File

@ -0,0 +1,32 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis.pl;
import org.elasticsearch.common.inject.AbstractModule;
/**
*/
public class PolishIndicesAnalysisModule extends AbstractModule {
@Override
protected void configure() {
bind(PolishIndicesAnalysis.class).asEagerSingleton();
}
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.analysis.stempel;
import org.elasticsearch.common.inject.Module;
import org.elasticsearch.index.analysis.AnalysisModule;
import org.elasticsearch.index.analysis.pl.PolishAnalysisBinderProcessor;
import org.elasticsearch.indices.analysis.pl.PolishIndicesAnalysisModule;
import org.elasticsearch.plugins.AbstractPlugin;
import java.util.*;
/**
*
*/
public class AnalysisStempelPlugin extends AbstractPlugin {
@Override
public String name() {
return "analysis-stempel";
}
@Override
public String description() {
return "Stempel (Polish) analysis support";
}
@Override
public Collection<Class<? extends Module>> modules() {
Collection<Class<? extends Module>> classes = new ArrayList<>();
classes.add(PolishIndicesAnalysisModule.class);
return classes;
}
public void onModule(AnalysisModule module) {
module.addProcessor(new PolishAnalysisBinderProcessor());
}
}

View File

@ -0,0 +1,3 @@
plugin=org.elasticsearch.plugin.analysis.stempel.AnalysisStempelPlugin
version=${project.version}
lucene=${lucene.version}

View File

@ -0,0 +1,74 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.pl.PolishAnalyzer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.inject.ModulesBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.SettingsModule;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.EnvironmentModule;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNameModule;
import org.elasticsearch.index.analysis.pl.PolishAnalysisBinderProcessor;
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.hamcrest.MatcherAssert;
import org.junit.Test;
import static org.elasticsearch.common.settings.Settings.Builder.EMPTY_SETTINGS;
import static org.elasticsearch.common.settings.Settings.settingsBuilder;
import static org.hamcrest.Matchers.instanceOf;
/**
*/
public class PolishAnalysisTests extends ElasticsearchTestCase {
@Test
public void testDefaultsPolishAnalysis() {
Index index = new Index("test");
Settings settings = settingsBuilder()
.put("path.home", createTempDir())
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.build();
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(EMPTY_SETTINGS), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
Injector injector = new ModulesBuilder().add(
new IndexSettingsModule(index, settings),
new IndexNameModule(index),
new AnalysisModule(EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new PolishAnalysisBinderProcessor()))
.createChildInjector(parentInjector);
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
TokenFilterFactory tokenizerFactory = analysisService.tokenFilter("polish_stem");
MatcherAssert.assertThat(tokenizerFactory, instanceOf(PolishStemTokenFilterFactory.class));
Analyzer analyzer = analysisService.analyzer("polish").analyzer();
MatcherAssert.assertThat(analyzer, instanceOf(PolishAnalyzer.class));
}
}

View File

@ -0,0 +1,87 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.plugins.PluginsService;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.junit.Test;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.notNullValue;
@ElasticsearchIntegrationTest.ClusterScope(numDataNodes = 1, scope = ElasticsearchIntegrationTest.Scope.SUITE)
public class SimplePolishIntegrationTests extends ElasticsearchIntegrationTest {
@Override
protected Settings nodeSettings(int nodeOrdinal) {
return Settings.builder()
.put(super.nodeSettings(nodeOrdinal))
.put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true)
.build();
}
@Test
public void testPolishAnalyzer() throws ExecutionException, InterruptedException {
AnalyzeResponse response = client().admin().indices()
.prepareAnalyze("wirtualna polska").setAnalyzer("polish")
.execute().get();
assertThat(response, notNullValue());
assertThat(response.getTokens().size(), is(2));
}
@Test
public void testPolishStemmerTokenFilter() throws ExecutionException, InterruptedException {
AnalyzeResponse response = client().admin().indices()
.prepareAnalyze("canona").setTokenFilters("polish_stem")
.execute().get();
assertThat(response, notNullValue());
assertThat(response.getTokens().size(), is(1));
}
@Test
public void testPolishAnalyzerInMapping() throws ExecutionException, InterruptedException, IOException {
final XContentBuilder mapping = jsonBuilder().startObject()
.startObject("type")
.startObject("properties")
.startObject("foo")
.field("type", "string")
.field("analyzer", "polish")
.endObject()
.endObject()
.endObject()
.endObject();
client().admin().indices().prepareCreate("test").addMapping("type", mapping).get();
index("test", "type", "1", "foo", "wirtualna polska");
ensureYellow();
}
}

View File

@ -0,0 +1,114 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.inject.ModulesBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.SettingsModule;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.EnvironmentModule;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNameModule;
import org.elasticsearch.index.analysis.pl.PolishAnalysisBinderProcessor;
import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import java.io.IOException;
import java.io.StringReader;
import static org.hamcrest.Matchers.equalTo;
public class SimplePolishTokenFilterTests extends ElasticsearchTestCase {
@Test
public void testBasicUsage() throws Exception {
testToken("kwiaty", "kwć");
testToken("canona", "ć");
testToken("wirtualna", "wirtualny");
testToken("polska", "polski");
testAnalyzer("wirtualna polska", "wirtualny", "polski");
}
private void testToken(String source, String expected) throws IOException {
Index index = new Index("test");
Settings settings = Settings.settingsBuilder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put("path.home", createTempDir())
.put("index.analysis.filter.myStemmer.type", "polish_stem")
.build();
AnalysisService analysisService = createAnalysisService(index, settings);
TokenFilterFactory filterFactory = analysisService.tokenFilter("myStemmer");
Tokenizer tokenizer = new KeywordTokenizer();
tokenizer.setReader(new StringReader(source));
TokenStream ts = filterFactory.create(tokenizer);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(expected));
}
private void testAnalyzer(String source, String... expected_terms) throws IOException {
Index index = new Index("test");
Settings settings = Settings.settingsBuilder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put("path.home", createTempDir())
.build();
AnalysisService analysisService = createAnalysisService(index, settings);
Analyzer analyzer = analysisService.analyzer("polish").analyzer();
TokenStream ts = analyzer.tokenStream("test", source);
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
ts.reset();
for (String expected : expected_terms) {
assertThat(ts.incrementToken(), equalTo(true));
assertThat(term1.toString(), equalTo(expected));
}
}
private AnalysisService createAnalysisService(Index index, Settings settings) {
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
Injector injector = new ModulesBuilder().add(
new IndexSettingsModule(index, settings),
new IndexNameModule(index),
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new PolishAnalysisBinderProcessor()))
.createChildInjector(parentInjector);
return injector.getInstance(AnalysisService.class);
}
}