Add conditional token filter to elasticsearch (#31958)

This allows tokenfilters to be applied selectively, depending on the status of the current token in the tokenstream. The filter takes a scripted predicate, and only applies its subfilter when the predicate returns true.
2018-09-05 14:52:43 +01:00 · 2018-09-05 14:52:43 +01:00 · 636442700c
parent 74b87989d9
commit 636442700c
13 changed files with 578 additions and 1 deletions
--- a/docs/painless/painless-contexts/index.asciidoc
+++ b/docs/painless/painless-contexts/index.asciidoc
@ -30,6 +30,8 @@ include::painless-metric-agg-reduce-context.asciidoc[]
 include::painless-bucket-agg-context.asciidoc[]
 include::painless-analysis-predicate-context.asciidoc[]
 include::painless-watcher-condition-context.asciidoc[]
 include::painless-watcher-transform-context.asciidoc[]
--- a/docs/painless/painless-contexts/painless-analysis-predicate-context.asciidoc
+++ b/docs/painless/painless-contexts/painless-analysis-predicate-context.asciidoc
@ -0,0 +1,43 @@
 [[painless-analysis-predicate-context]]
 === Analysis Predicate Context
 Use a painless script to determine whether or not the current token in an
 analysis chain matches a predicate.
 *Variables*
 `params` (`Map`, read-only)::
        User-defined parameters passed in as part of the query.
 `token.term` (`CharSequence`, read-only)::
        The characters of the current token
 `token.position` (`int`, read-only)::
        The position of the current token
 `token.positionIncrement` (`int`, read-only)::
        The position increment of the current token
 `token.positionLength` (`int`, read-only)::
        The position length of the current token
 `token.startOffset` (`int`, read-only)::
        The start offset of the current token
 `token.endOffset` (`int`, read-only)::
        The end offset of the current token
 `token.type` (`String`, read-only)::
        The type of the current token
 `token.keyword` ('boolean`, read-only)::
        Whether or not the current token is marked as a keyword
 *Return*
 `boolean`::
        Whether or not the current token matches the predicate
 *API*
 The standard <<painless-api-reference, Painless API>> is available.
--- a/docs/reference/analysis/tokenfilters.asciidoc
+++ b/docs/reference/analysis/tokenfilters.asciidoc
@ -37,6 +37,8 @@ include::tokenfilters/word-delimiter-graph-tokenfilter.asciidoc[]
 include::tokenfilters/multiplexer-tokenfilter.asciidoc[]
 include::tokenfilters/condition-tokenfilter.asciidoc[]
 include::tokenfilters/stemmer-tokenfilter.asciidoc[]
 include::tokenfilters/stemmer-override-tokenfilter.asciidoc[]
--- a/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc
+++ b/docs/reference/analysis/tokenfilters/condition-tokenfilter.asciidoc
@ -0,0 +1,90 @@
 [[analysis-condition-tokenfilter]]
 === Conditional Token Filter
 The conditional token filter takes a predicate script and a list of subfilters, and
 only applies the subfilters to the current token if it matches the predicate.
 [float]
 === Options
 [horizontal]
 filter:: a chain of token filters to apply to the current token if the predicate
  matches. These can be any token filters defined elsewhere in the index mappings.
 script:: a predicate script that determines whether or not the filters will be applied
  to the current token.  Note that only inline scripts are supported
 [float]
 === Settings example
 You can set it up like:
 [source,js]
 --------------------------------------------------
 PUT /condition_example
 {
    "settings" : {
        "analysis" : {
            "analyzer" : {
                "my_analyzer" : {
                    "tokenizer" : "standard",
                    "filter" : [ "my_condition" ]
                }
            },
            "filter" : {
                "my_condition" : {
                    "type" : "condition",
                    "filter" : [ "lowercase" ],
                    "script" : {
                        "source" : "token.getTerm().length() < 5"  <1>
                    }
                }
            }
        }
    }
 }
 --------------------------------------------------
 // CONSOLE
 <1> This will only apply the lowercase filter to terms that are less than 5
 characters in length
 And test it like:
 [source,js]
 --------------------------------------------------
 POST /condition_example/_analyze
 {
  "analyzer" : "my_analyzer",
  "text" : "What Flapdoodle"
 }
 --------------------------------------------------
 // CONSOLE
 // TEST[continued]
 And it'd respond:
 [source,js]
 --------------------------------------------------
 {
  "tokens": [
    {
      "token": "what",              <1>
      "start_offset": 0,
      "end_offset": 4,
      "type": "<ALPHANUM>",
      "position": 0
    },
    {
      "token": "Flapdoodle",        <2>
      "start_offset": 5,
      "end_offset": 15,
      "type": "<ALPHANUM>",
      "position": 1
    }
  ]
 }
 --------------------------------------------------
 // TESTRESPONSE
 <1> The term `What` has been lowercased, because it is only 4 characters long
 <2> The term `Flapdoodle` has been left in its original case, because it doesn't pass
    the predicate
--- a/modules/analysis-common/build.gradle
+++ b/modules/analysis-common/build.gradle
@ -20,4 +20,13 @@
 esplugin {
    description 'Adds "built in" analyzers to Elasticsearch.'
    classname 'org.elasticsearch.analysis.common.CommonAnalysisPlugin'
    extendedPlugins = ['lang-painless']
 }
 dependencies {
    compileOnly project(':modules:lang-painless')
 }
 integTestCluster {
    module project(':modules:lang-painless')
 }
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AnalysisPainlessExtension.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AnalysisPainlessExtension.java
@ -0,0 +1,40 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.analysis.common;
 import org.elasticsearch.painless.spi.PainlessExtension;
 import org.elasticsearch.painless.spi.Whitelist;
 import org.elasticsearch.painless.spi.WhitelistLoader;
 import org.elasticsearch.script.ScriptContext;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 public class AnalysisPainlessExtension implements PainlessExtension {
    private static final Whitelist WHITELIST =
        WhitelistLoader.loadFromResourceFiles(AnalysisPainlessExtension.class, "painless_whitelist.txt");
    @Override
    public Map<ScriptContext<?>, List<Whitelist>> getContextWhitelists() {
        return Collections.singletonMap(AnalysisPredicateScript.CONTEXT, Collections.singletonList(WHITELIST));
    }
 }
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AnalysisPredicateScript.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/AnalysisPredicateScript.java
@ -0,0 +1,87 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.analysis.common;
 import org.elasticsearch.script.ScriptContext;
 /**
 * A predicate based on the current token in a TokenStream
 */
 public abstract class AnalysisPredicateScript {
    /**
     * Encapsulation of the state of the current token
     */
    public static class Token {
        public CharSequence term;
        public int pos;
        public int posInc;
        public int posLen;
        public int startOffset;
        public int endOffset;
        public String type;
        public boolean isKeyword;
        public CharSequence getTerm() {
            return term;
        }
        public int getPositionIncrement() {
            return posInc;
        }
        public int getPosition() {
            return pos;
        }
        public int getPositionLength() {
            return posLen;
        }
        public int getStartOffset() {
            return startOffset;
        }
        public int getEndOffset() {
            return endOffset;
        }
        public String getType() {
            return type;
        }
        public boolean isKeyword() {
            return isKeyword;
        }
    }
    /**
     * Returns {@code true} if the current term matches the predicate
     */
    public abstract boolean execute(Token token);
    public interface Factory {
        AnalysisPredicateScript newInstance();
    }
    public static final String[] PARAMETERS = new String[]{ "token" };
    public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("analysis", Factory.class);
 }
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java
@ -111,9 +111,16 @@ import org.apache.lucene.analysis.th.ThaiTokenizer;
 import org.apache.lucene.analysis.tr.ApostropheFilter;
 import org.apache.lucene.analysis.tr.TurkishAnalyzer;
 import org.apache.lucene.analysis.util.ElisionFilter;
 import org.apache.lucene.util.SetOnce;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
 import org.elasticsearch.common.logging.DeprecationLogger;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.xcontent.NamedXContentRegistry;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.env.NodeEnvironment;
 import org.elasticsearch.index.analysis.AnalyzerProvider;
 import org.elasticsearch.index.analysis.CharFilterFactory;
 import org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory;
@ -127,20 +134,44 @@ import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
 import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
 import org.elasticsearch.plugins.AnalysisPlugin;
 import org.elasticsearch.plugins.Plugin;
 import org.elasticsearch.plugins.ScriptPlugin;
 import org.elasticsearch.script.ScriptContext;
 import org.elasticsearch.script.ScriptService;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.watcher.ResourceWatcherService;
 import org.tartarus.snowball.ext.DutchStemmer;
 import org.tartarus.snowball.ext.FrenchStemmer;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 import static org.elasticsearch.plugins.AnalysisPlugin.requiresAnalysisSettings;
-public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
+public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, ScriptPlugin {
    private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(CommonAnalysisPlugin.class));
    private final SetOnce<ScriptService> scriptService = new SetOnce<>();
    @Override
    public Collection<Object> createComponents(Client client, ClusterService clusterService, ThreadPool threadPool,
                                               ResourceWatcherService resourceWatcherService, ScriptService scriptService,
                                               NamedXContentRegistry xContentRegistry, Environment environment,
                                               NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry) {
        this.scriptService.set(scriptService);
        return Collections.emptyList();
    }
    @Override
    @SuppressWarnings("rawtypes")  // TODO ScriptPlugin needs to change this to pass precommit?
    public List<ScriptContext> getContexts() {
        return Collections.singletonList(AnalysisPredicateScript.CONTEXT);
    }
    @Override
    public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
        Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
@ -202,6 +233,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
        filters.put("classic", ClassicFilterFactory::new);
        filters.put("czech_stem", CzechStemTokenFilterFactory::new);
        filters.put("common_grams", requiresAnalysisSettings(CommonGramsTokenFilterFactory::new));
        filters.put("condition",
            requiresAnalysisSettings((i, e, n, s) -> new ScriptedConditionTokenFilterFactory(i, n, s, scriptService.get())));
        filters.put("decimal_digit", DecimalDigitFilterFactory::new);
        filters.put("delimited_payload_filter", LegacyDelimitedPayloadTokenFilterFactory::new);
        filters.put("delimited_payload", DelimitedPayloadTokenFilterFactory::new);
--- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterFactory.java
+++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterFactory.java
@ -0,0 +1,117 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.analysis.common;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
 import org.elasticsearch.index.analysis.ReferringFilterFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptService;
 import org.elasticsearch.script.ScriptType;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.function.Function;
 /**
 * A factory for a conditional token filter that only applies child filters if the underlying token
 * matches an {@link AnalysisPredicateScript}
 */
 public class ScriptedConditionTokenFilterFactory extends AbstractTokenFilterFactory implements ReferringFilterFactory {
    private final AnalysisPredicateScript.Factory factory;
    private final List<TokenFilterFactory> filters = new ArrayList<>();
    private final List<String> filterNames;
    ScriptedConditionTokenFilterFactory(IndexSettings indexSettings, String name,
                                               Settings settings, ScriptService scriptService) {
        super(indexSettings, name, settings);
        Settings scriptSettings = settings.getAsSettings("script");
        Script script = Script.parse(scriptSettings);
        if (script.getType() != ScriptType.INLINE) {
            throw new IllegalArgumentException("Cannot use stored scripts in tokenfilter [" + name + "]");
        }
        this.factory = scriptService.compile(script, AnalysisPredicateScript.CONTEXT);
        this.filterNames = settings.getAsList("filter");
        if (this.filterNames.isEmpty()) {
            throw new IllegalArgumentException("Empty list of filters provided to tokenfilter [" + name + "]");
        }
    }
    @Override
    public TokenStream create(TokenStream tokenStream) {
        Function<TokenStream, TokenStream> filter = in -> {
            for (TokenFilterFactory tff : filters) {
                in = tff.create(in);
            }
            return in;
        };
        AnalysisPredicateScript script = factory.newInstance();
        final AnalysisPredicateScript.Token token = new AnalysisPredicateScript.Token();
        return new ConditionalTokenFilter(tokenStream, filter) {
            CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
            PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
            PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
            OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
            TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
            KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
            @Override
            protected boolean shouldFilter() {
                token.term = termAtt;
                token.posInc = posIncAtt.getPositionIncrement();
                token.pos += token.posInc;
                token.posLen = posLenAtt.getPositionLength();
                token.startOffset = offsetAtt.startOffset();
                token.endOffset = offsetAtt.endOffset();
                token.type = typeAtt.type();
                token.isKeyword = keywordAtt.isKeyword();
                return script.execute(token);
            }
        };
    }
    @Override
    public void setReferences(Map<String, TokenFilterFactory> factories) {
        for (String filter : filterNames) {
            TokenFilterFactory tff = factories.get(filter);
            if (tff == null) {
                throw new IllegalArgumentException("ScriptedConditionTokenFilter [" + name() +
                    "] refers to undefined token filter [" + filter + "]");
            }
            filters.add(tff);
        }
    }
 }
--- a/modules/analysis-common/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension
+++ b/modules/analysis-common/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension
@ -0,0 +1 @@
 org.elasticsearch.analysis.common.AnalysisPainlessExtension
--- a/modules/analysis-common/src/main/resources/org/elasticsearch/analysis/common/painless_whitelist.txt
+++ b/modules/analysis-common/src/main/resources/org/elasticsearch/analysis/common/painless_whitelist.txt
@ -0,0 +1,28 @@
 #
 # Licensed to Elasticsearch under one or more contributor
 # license agreements. See the NOTICE file distributed with
 # this work for additional information regarding copyright
 # ownership. Elasticsearch licenses this file to you under
 # the Apache License, Version 2.0 (the "License"); you may
 # not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 class org.elasticsearch.analysis.common.AnalysisPredicateScript$Token {
  CharSequence getTerm()
  int getPosition()
  int getPositionIncrement()
  int getPositionLength()
  int getStartOffset()
  int getEndOffset()
  String getType()
  boolean isKeyword()
 }
--- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java
+++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ScriptedConditionTokenFilterTests.java
@ -0,0 +1,89 @@
 /*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
 package org.elasticsearch.analysis.common;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.env.TestEnvironment;
 import org.elasticsearch.index.IndexSettings;
 import org.elasticsearch.index.analysis.IndexAnalyzers;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
 import org.elasticsearch.indices.analysis.AnalysisModule;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.script.ScriptContext;
 import org.elasticsearch.script.ScriptService;
 import org.elasticsearch.test.ESTokenStreamTestCase;
 import org.elasticsearch.test.IndexSettingsModule;
 import java.util.Collections;
 public class ScriptedConditionTokenFilterTests extends ESTokenStreamTestCase {
    public void testSimpleCondition() throws Exception {
        Settings settings = Settings.builder()
            .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
            .build();
        Settings indexSettings = Settings.builder()
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .put("index.analysis.filter.cond.type", "condition")
            .put("index.analysis.filter.cond.script.source", "token.getTerm().length() > 5")
            .putList("index.analysis.filter.cond.filter", "uppercase")
            .put("index.analysis.analyzer.myAnalyzer.type", "custom")
            .put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
            .putList("index.analysis.analyzer.myAnalyzer.filter", "cond")
            .build();
        IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
        AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
            @Override
            public boolean execute(Token token) {
                return token.getTerm().length() > 5;
            }
        };
        @SuppressWarnings("unchecked")
        ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()){
            @Override
            public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
                assertEquals(context, AnalysisPredicateScript.CONTEXT);
                assertEquals(new Script("token.getTerm().length() > 5"), script);
                return (FactoryType) factory;
            }
        };
        CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
        plugin.createComponents(null, null, null, null, scriptService, null, null, null, null);
        AnalysisModule module
            = new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
        IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
        try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
            assertNotNull(analyzer);
            assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[]{
                "VORSPRUNG", "Durch", "TECHNIK"
            });
        }
    }
 }
--- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/60_analysis_scripting.yml
+++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/60_analysis_scripting.yml
@ -0,0 +1,36 @@
 ## Test analysis scripts
 "condition":
    - do:
        indices.analyze:
          body:
            text: "Vorsprung Durch Technik"
            tokenizer: "whitespace"
            filter:
              - type: condition
                filter: [ "lowercase" ]
                script:
                  source: "token.term.length() > 5"
    - length: { tokens: 3 }
    - match: { tokens.0.token: "vorsprung" }
    - match: { tokens.1.token: "Durch" }
    - match: { tokens.2.token: "technik" }
 ---
 "condition-vars":
    - do:
        indices.analyze:
          body:
            text: "Vorsprung Durch Technik"
            tokenizer: "whitespace"
            filter:
              - type: condition
                filter: [ "lowercase" ]
                script:
                  source: "token.position > 1 && token.positionIncrement > 0 && token.startOffset > 0 && token.endOffset > 0 && (token.positionLength == 1 || token.type == \"a\" || token.keyword)"
    - length: { tokens: 3 }
    - match: { tokens.0.token: "Vorsprung" }
    - match: { tokens.1.token: "durch" }
    - match: { tokens.2.token: "technik" }
		`@ -0,0 +1 @@`
							`org.elasticsearch.analysis.common.AnalysisPainlessExtension`