diff --git a/core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java new file mode 100644 index 00000000000..3af472f54bc --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactory.java @@ -0,0 +1,38 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.synonym.FlattenGraphFilter; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; + +public class FlattenGraphTokenFilterFactory extends AbstractTokenFilterFactory { + + public FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { + super(indexSettings, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new FlattenGraphFilter(tokenStream); + } +} diff --git a/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java new file mode 100644 index 00000000000..259da010daa --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/analysis/FlattenGraphTokenFilterFactoryTests.java @@ -0,0 +1,73 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import java.io.IOException; + +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.test.ESTokenStreamTestCase; +import org.elasticsearch.test.IndexSettingsModule; + +public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase { + + public void testBasic() throws IOException { + + Index index = new Index("test", "_na_"); + String name = "ngr"; + Settings indexSettings = newAnalysisSettingsBuilder().build(); + IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings); + Settings settings = newAnalysisSettingsBuilder().build(); + + // "wow that's funny" and "what the fudge" are separate side paths, in parallel with "wtf", on input: + TokenStream in = new CannedTokenStream(0, 12, new Token[] { + token("wtf", 1, 5, 0, 3), + token("what", 0, 1, 0, 3), + token("wow", 0, 3, 0, 3), + token("the", 1, 1, 0, 3), + token("fudge", 1, 3, 0, 3), + token("that's", 1, 1, 0, 3), + token("funny", 1, 1, 0, 3), + token("happened", 1, 1, 4, 12) + }); + + TokenStream tokens = new FlattenGraphTokenFilterFactory(indexProperties, null, name, settings).create(in); + + // ... but on output, it's flattened to wtf/what/wow that's/the fudge/funny happened: + assertTokenStreamContents(tokens, + new String[] {"wtf", "what", "wow", "the", "that's", "fudge", "funny", "happened"}, + new int[] {0, 0, 0, 0, 0, 0, 0, 4}, + new int[] {3, 3, 3, 3, 3, 3, 3, 12}, + new int[] {1, 0, 0, 1, 0, 1, 0, 1}, + new int[] {3, 1, 1, 1, 1, 1, 1, 1}, + 12); + } + + private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) { + final Token t = new Token(term, startOffset, endOffset); + t.setPositionIncrement(posInc); + t.setPositionLength(posLength); + return t; + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java index 1634f049392..bdf36bbb85a 100644 --- a/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/AnalysisFactoryTestCase.java @@ -42,6 +42,7 @@ import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory; import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory; import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory; import org.elasticsearch.index.analysis.ElisionTokenFilterFactory; +import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory; import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory; import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory; import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory; @@ -248,6 +249,7 @@ public class AnalysisFactoryTestCase extends ESTestCase { .put("type", KeepTypesFilterFactory.class) .put("uppercase", UpperCaseTokenFilterFactory.class) .put("worddelimiter", WordDelimiterTokenFilterFactory.class) + .put("flattengraph", FlattenGraphTokenFilterFactory.class) // TODO: these tokenfilters are not yet exposed: useful? @@ -277,8 +279,6 @@ public class AnalysisFactoryTestCase extends ESTestCase { .put("fingerprint", Void.class) // for tee-sinks .put("daterecognizer", Void.class) - // to flatten graphs created by the synonym graph filter - .put("flattengraph", Void.class) .immutableMap();