migrate branch for analysis-stempel
This commit is contained in:
commit
263bc6bff8
|
@ -0,0 +1,56 @@
|
|||
Stempel (Polish) Analysis for Elasticsearch
|
||||
==================================
|
||||
|
||||
The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.
|
||||
|
||||
In order to install the plugin, simply run:
|
||||
|
||||
```sh
|
||||
bin/plugin install elasticsearch/elasticsearch-analysis-stempel/2.4.3
|
||||
```
|
||||
|
||||
| elasticsearch | Stempel Analysis Plugin | Docs |
|
||||
|---------------|-----------------------|------------|
|
||||
| master | Build from source | See below |
|
||||
| es-1.x | Build from source | [2.6.0-SNAPSHOT](https://github.com/elastic/elasticsearch-analysis-stempel/tree/es-1.x/#version-260-snapshot-for-elasticsearch-1x) |
|
||||
| es-1.5 | 2.5.0 | [2.5.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.5.0/#version-250-for-elasticsearch-15) |
|
||||
| es-1.4 | 2.4.3 | [2.4.3](https://github.com/elasticsearch/elasticsearch-analysis-stempel/tree/v2.4.3/#version-243-for-elasticsearch-14) |
|
||||
| < 1.4.5 | 2.4.2 | [2.4.2](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.4.2/#version-242-for-elasticsearch-14) |
|
||||
| < 1.4.3 | 2.4.1 | [2.4.1](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.4.1/#version-241-for-elasticsearch-14) |
|
||||
| es-1.3 | 2.3.0 | [2.3.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.3.0/#stempel-polish-analysis-for-elasticsearch) |
|
||||
| es-1.2 | 2.2.0 | [2.2.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.2.0/#stempel-polish-analysis-for-elasticsearch) |
|
||||
| es-1.1 | 2.1.0 | [2.1.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.1.0/#stempel-polish-analysis-for-elasticsearch) |
|
||||
| es-1.0 | 2.0.0 | [2.0.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v2.0.0/#stempel-polish-analysis-for-elasticsearch) |
|
||||
| es-0.90 | 1.13.0 | [1.13.0](https://github.com/elastic/elasticsearch-analysis-stempel/tree/v1.13.0/#stempel-polish-analysis-for-elasticsearch) |
|
||||
|
||||
To build a `SNAPSHOT` version, you need to build it with Maven:
|
||||
|
||||
```bash
|
||||
mvn clean install
|
||||
plugin --install analysis-stempel \
|
||||
--url file:target/releases/elasticsearch-analysis-stempel-X.X.X-SNAPSHOT.zip
|
||||
```
|
||||
|
||||
Stempel Plugin
|
||||
-----------------
|
||||
|
||||
The plugin includes the `polish` analyzer and `polish_stem` token filter.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
This software is licensed under the Apache 2 license, quoted below.
|
||||
|
||||
Copyright 2009-2014 Elasticsearch <http://www.elasticsearch.org>
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||
use this file except in compliance with the License. You may obtain a copy of
|
||||
the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
License for the specific language governing permissions and limitations under
|
||||
the License.
|
|
@ -0,0 +1,40 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.elasticsearch.plugin</groupId>
|
||||
<artifactId>elasticsearch-analysis-stempel</artifactId>
|
||||
|
||||
<packaging>jar</packaging>
|
||||
<name>Elasticsearch Stempel (Polish) Analysis plugin</name>
|
||||
<description>The Stempel (Polish) Analysis plugin integrates Lucene stempel (polish) analysis module into elasticsearch.</description>
|
||||
|
||||
<parent>
|
||||
<groupId>org.elasticsearch</groupId>
|
||||
<artifactId>elasticsearch-plugin</artifactId>
|
||||
<version>2.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<!-- You can add any specific project property here -->
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analyzers-stempel</artifactId>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
|
@ -0,0 +1,26 @@
|
|||
<?xml version="1.0"?>
|
||||
<assembly>
|
||||
<id>plugin</id>
|
||||
<formats>
|
||||
<format>zip</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useProjectArtifact>true</useProjectArtifact>
|
||||
<useTransitiveFiltering>true</useTransitiveFiltering>
|
||||
<excludes>
|
||||
<exclude>org.elasticsearch:elasticsearch</exclude>
|
||||
</excludes>
|
||||
</dependencySet>
|
||||
<dependencySet>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useProjectArtifact>true</useProjectArtifact>
|
||||
<useTransitiveFiltering>true</useTransitiveFiltering>
|
||||
<includes>
|
||||
<include>org.apache.lucene:lucene-analyzers-stempel</include>
|
||||
</includes>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis.pl;
|
||||
|
||||
import org.elasticsearch.index.analysis.AnalysisModule;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class PolishAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {
|
||||
|
||||
@Override
|
||||
public void processAnalyzers(AnalyzersBindings analyzersBindings) {
|
||||
analyzersBindings.processAnalyzer("polish", PolishAnalyzerProvider.class);
|
||||
}
|
||||
@Override
|
||||
public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
|
||||
tokenFiltersBindings.processTokenFilter("polish_stem", PolishStemTokenFilterFactory.class);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis.pl;
|
||||
|
||||
import org.apache.lucene.analysis.pl.PolishAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class PolishAnalyzerProvider extends AbstractIndexAnalyzerProvider<PolishAnalyzer> {
|
||||
|
||||
private final PolishAnalyzer analyzer;
|
||||
|
||||
@Inject
|
||||
public PolishAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
|
||||
analyzer = new PolishAnalyzer(PolishAnalyzer.getDefaultStopSet());
|
||||
analyzer.setVersion(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public PolishAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis.pl;
|
||||
|
||||
import org.apache.lucene.analysis.pl.PolishAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.stempel.StempelFilter;
|
||||
import org.apache.lucene.analysis.stempel.StempelStemmer;
|
||||
import org.egothor.stemmer.Trie;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
|
||||
public class PolishStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final StempelStemmer stemmer;
|
||||
|
||||
@Inject public PolishStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Trie tire;
|
||||
try {
|
||||
tire = StempelStemmer.load(PolishAnalyzer.class.getResourceAsStream(PolishAnalyzer.DEFAULT_STEMMER_FILE));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException("Unable to load default stemming tables", ex);
|
||||
}
|
||||
stemmer = new StempelStemmer(tire);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new StempelFilter(tokenStream, stemmer);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.indices.analysis.pl;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pl.PolishAnalyzer;
|
||||
import org.apache.lucene.analysis.stempel.StempelFilter;
|
||||
import org.apache.lucene.analysis.stempel.StempelStemmer;
|
||||
import org.egothor.stemmer.Trie;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.analysis.AnalyzerScope;
|
||||
import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
|
||||
import org.elasticsearch.index.analysis.PreBuiltTokenFilterFactoryFactory;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Registers indices level analysis components so, if not explicitly configured, will be shared
|
||||
* among all indices.
|
||||
*/
|
||||
public class PolishIndicesAnalysis extends AbstractComponent {
|
||||
|
||||
@Inject
|
||||
public PolishIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) {
|
||||
super(settings);
|
||||
indicesAnalysisService.analyzerProviderFactories().put("polish", new PreBuiltAnalyzerProviderFactory("polish", AnalyzerScope.INDICES, new PolishAnalyzer()));
|
||||
|
||||
indicesAnalysisService.tokenFilterFactories().put("polish_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
|
||||
@Override public String name() {
|
||||
return "polish_stem";
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
Trie tire;
|
||||
try {
|
||||
tire = StempelStemmer.load(PolishAnalyzer.class.getResourceAsStream(PolishAnalyzer.DEFAULT_STEMMER_FILE));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException("Unable to load default stemming tables", ex);
|
||||
}
|
||||
return new StempelFilter(tokenStream, new StempelStemmer(tire));
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.indices.analysis.pl;
|
||||
|
||||
import org.elasticsearch.common.inject.AbstractModule;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class PolishIndicesAnalysisModule extends AbstractModule {
|
||||
|
||||
@Override
|
||||
protected void configure() {
|
||||
bind(PolishIndicesAnalysis.class).asEagerSingleton();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.analysis.stempel;
|
||||
|
||||
import org.elasticsearch.common.inject.Module;
|
||||
import org.elasticsearch.index.analysis.AnalysisModule;
|
||||
import org.elasticsearch.index.analysis.pl.PolishAnalysisBinderProcessor;
|
||||
import org.elasticsearch.indices.analysis.pl.PolishIndicesAnalysisModule;
|
||||
import org.elasticsearch.plugins.AbstractPlugin;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class AnalysisStempelPlugin extends AbstractPlugin {
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "analysis-stempel";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "Stempel (Polish) analysis support";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Class<? extends Module>> modules() {
|
||||
Collection<Class<? extends Module>> classes = new ArrayList<>();
|
||||
classes.add(PolishIndicesAnalysisModule.class);
|
||||
return classes;
|
||||
}
|
||||
|
||||
public void onModule(AnalysisModule module) {
|
||||
module.addProcessor(new PolishAnalysisBinderProcessor());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
plugin=org.elasticsearch.plugin.analysis.stempel.AnalysisStempelPlugin
|
||||
version=${project.version}
|
||||
lucene=${lucene.version}
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.pl.PolishAnalyzer;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.inject.Injector;
|
||||
import org.elasticsearch.common.inject.ModulesBuilder;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.settings.SettingsModule;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.env.EnvironmentModule;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexNameModule;
|
||||
import org.elasticsearch.index.analysis.pl.PolishAnalysisBinderProcessor;
|
||||
import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory;
|
||||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.elasticsearch.common.settings.Settings.Builder.EMPTY_SETTINGS;
|
||||
import static org.elasticsearch.common.settings.Settings.settingsBuilder;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class PolishAnalysisTests extends ElasticsearchTestCase {
|
||||
|
||||
@Test
|
||||
public void testDefaultsPolishAnalysis() {
|
||||
Index index = new Index("test");
|
||||
Settings settings = settingsBuilder()
|
||||
.put("path.home", createTempDir())
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.build();
|
||||
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(EMPTY_SETTINGS), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
|
||||
Injector injector = new ModulesBuilder().add(
|
||||
new IndexSettingsModule(index, settings),
|
||||
new IndexNameModule(index),
|
||||
new AnalysisModule(EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new PolishAnalysisBinderProcessor()))
|
||||
.createChildInjector(parentInjector);
|
||||
|
||||
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
|
||||
|
||||
TokenFilterFactory tokenizerFactory = analysisService.tokenFilter("polish_stem");
|
||||
MatcherAssert.assertThat(tokenizerFactory, instanceOf(PolishStemTokenFilterFactory.class));
|
||||
|
||||
Analyzer analyzer = analysisService.analyzer("polish").analyzer();
|
||||
MatcherAssert.assertThat(analyzer, instanceOf(PolishAnalyzer.class));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.plugins.PluginsService;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.CoreMatchers.notNullValue;
|
||||
|
||||
@ElasticsearchIntegrationTest.ClusterScope(numDataNodes = 1, scope = ElasticsearchIntegrationTest.Scope.SUITE)
|
||||
public class SimplePolishIntegrationTests extends ElasticsearchIntegrationTest {
|
||||
|
||||
@Override
|
||||
protected Settings nodeSettings(int nodeOrdinal) {
|
||||
return Settings.builder()
|
||||
.put(super.nodeSettings(nodeOrdinal))
|
||||
.put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPolishAnalyzer() throws ExecutionException, InterruptedException {
|
||||
AnalyzeResponse response = client().admin().indices()
|
||||
.prepareAnalyze("wirtualna polska").setAnalyzer("polish")
|
||||
.execute().get();
|
||||
|
||||
assertThat(response, notNullValue());
|
||||
assertThat(response.getTokens().size(), is(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPolishStemmerTokenFilter() throws ExecutionException, InterruptedException {
|
||||
AnalyzeResponse response = client().admin().indices()
|
||||
.prepareAnalyze("canona").setTokenFilters("polish_stem")
|
||||
.execute().get();
|
||||
|
||||
assertThat(response, notNullValue());
|
||||
assertThat(response.getTokens().size(), is(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPolishAnalyzerInMapping() throws ExecutionException, InterruptedException, IOException {
|
||||
final XContentBuilder mapping = jsonBuilder().startObject()
|
||||
.startObject("type")
|
||||
.startObject("properties")
|
||||
.startObject("foo")
|
||||
.field("type", "string")
|
||||
.field("analyzer", "polish")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject();
|
||||
|
||||
client().admin().indices().prepareCreate("test").addMapping("type", mapping).get();
|
||||
|
||||
index("test", "type", "1", "foo", "wirtualna polska");
|
||||
|
||||
ensureYellow();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,114 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.inject.Injector;
|
||||
import org.elasticsearch.common.inject.ModulesBuilder;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.settings.SettingsModule;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.env.EnvironmentModule;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexNameModule;
|
||||
import org.elasticsearch.index.analysis.pl.PolishAnalysisBinderProcessor;
|
||||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class SimplePolishTokenFilterTests extends ElasticsearchTestCase {
|
||||
|
||||
@Test
|
||||
public void testBasicUsage() throws Exception {
|
||||
testToken("kwiaty", "kwć");
|
||||
testToken("canona", "ć");
|
||||
testToken("wirtualna", "wirtualny");
|
||||
testToken("polska", "polski");
|
||||
|
||||
testAnalyzer("wirtualna polska", "wirtualny", "polski");
|
||||
}
|
||||
|
||||
private void testToken(String source, String expected) throws IOException {
|
||||
Index index = new Index("test");
|
||||
Settings settings = Settings.settingsBuilder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put("path.home", createTempDir())
|
||||
.put("index.analysis.filter.myStemmer.type", "polish_stem")
|
||||
.build();
|
||||
AnalysisService analysisService = createAnalysisService(index, settings);
|
||||
|
||||
TokenFilterFactory filterFactory = analysisService.tokenFilter("myStemmer");
|
||||
|
||||
Tokenizer tokenizer = new KeywordTokenizer();
|
||||
tokenizer.setReader(new StringReader(source));
|
||||
TokenStream ts = filterFactory.create(tokenizer);
|
||||
|
||||
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
assertThat(ts.incrementToken(), equalTo(true));
|
||||
|
||||
assertThat(term1.toString(), equalTo(expected));
|
||||
}
|
||||
|
||||
private void testAnalyzer(String source, String... expected_terms) throws IOException {
|
||||
Index index = new Index("test");
|
||||
Settings settings = Settings.settingsBuilder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put("path.home", createTempDir())
|
||||
.build();
|
||||
AnalysisService analysisService = createAnalysisService(index, settings);
|
||||
|
||||
Analyzer analyzer = analysisService.analyzer("polish").analyzer();
|
||||
|
||||
TokenStream ts = analyzer.tokenStream("test", source);
|
||||
|
||||
CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
|
||||
for (String expected : expected_terms) {
|
||||
assertThat(ts.incrementToken(), equalTo(true));
|
||||
assertThat(term1.toString(), equalTo(expected));
|
||||
}
|
||||
}
|
||||
|
||||
private AnalysisService createAnalysisService(Index index, Settings settings) {
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
|
||||
Injector injector = new ModulesBuilder().add(
|
||||
new IndexSettingsModule(index, settings),
|
||||
new IndexNameModule(index),
|
||||
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new PolishAnalysisBinderProcessor()))
|
||||
.createChildInjector(parentInjector);
|
||||
|
||||
return injector.getInstance(AnalysisService.class);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue