Add predicate_token_filter (#33431)
This allows users to filter out tokens from a TokenStream using painless scripts, instead of having to write specialised Java code and packaging it up into a plugin. The commit also refactors the AnalysisPredicateScript.Token class so that it wraps and makes read-only an AttributeSource.
This commit is contained in:
parent
a55fa4fd6b
commit
f598297f55
|
@ -37,6 +37,8 @@ include::tokenfilters/multiplexer-tokenfilter.asciidoc[]
|
||||||
|
|
||||||
include::tokenfilters/condition-tokenfilter.asciidoc[]
|
include::tokenfilters/condition-tokenfilter.asciidoc[]
|
||||||
|
|
||||||
|
include::tokenfilters/predicate-tokenfilter.asciidoc[]
|
||||||
|
|
||||||
include::tokenfilters/stemmer-tokenfilter.asciidoc[]
|
include::tokenfilters/stemmer-tokenfilter.asciidoc[]
|
||||||
|
|
||||||
include::tokenfilters/stemmer-override-tokenfilter.asciidoc[]
|
include::tokenfilters/stemmer-override-tokenfilter.asciidoc[]
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
[[analysis-predicatefilter-tokenfilter]]
|
||||||
|
=== Predicate Token Filter Script
|
||||||
|
|
||||||
|
The predicate_token_filter token filter takes a predicate script, and removes tokens that do
|
||||||
|
not match the predicate.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
=== Options
|
||||||
|
[horizontal]
|
||||||
|
script:: a predicate script that determines whether or not the current token will
|
||||||
|
be emitted. Note that only inline scripts are supported.
|
||||||
|
|
||||||
|
[float]
|
||||||
|
=== Settings example
|
||||||
|
|
||||||
|
You can set it up like:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
PUT /condition_example
|
||||||
|
{
|
||||||
|
"settings" : {
|
||||||
|
"analysis" : {
|
||||||
|
"analyzer" : {
|
||||||
|
"my_analyzer" : {
|
||||||
|
"tokenizer" : "standard",
|
||||||
|
"filter" : [ "my_script_filter" ]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"filter" : {
|
||||||
|
"my_script_filter" : {
|
||||||
|
"type" : "predicate_token_filter",
|
||||||
|
"script" : {
|
||||||
|
"source" : "token.getTerm().length() > 5" <1>
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
|
||||||
|
<1> This will emit tokens that are more than 5 characters long
|
||||||
|
|
||||||
|
And test it like:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
POST /condition_example/_analyze
|
||||||
|
{
|
||||||
|
"analyzer" : "my_analyzer",
|
||||||
|
"text" : "What Flapdoodle"
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// CONSOLE
|
||||||
|
// TEST[continued]
|
||||||
|
|
||||||
|
And it'd respond:
|
||||||
|
|
||||||
|
[source,js]
|
||||||
|
--------------------------------------------------
|
||||||
|
{
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"token": "Flapdoodle", <1>
|
||||||
|
"start_offset": 5,
|
||||||
|
"end_offset": 15,
|
||||||
|
"type": "<ALPHANUM>",
|
||||||
|
"position": 1 <2>
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
--------------------------------------------------
|
||||||
|
// TESTRESPONSE
|
||||||
|
|
||||||
|
<1> The token 'What' has been removed from the tokenstream because it does not
|
||||||
|
match the predicate.
|
||||||
|
<2> The position and offset values are unaffected by the removal of earlier tokens
|
|
@ -19,6 +19,13 @@
|
||||||
|
|
||||||
package org.elasticsearch.analysis.common;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.elasticsearch.script.ScriptContext;
|
import org.elasticsearch.script.ScriptContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -30,21 +37,40 @@ public abstract class AnalysisPredicateScript {
|
||||||
* Encapsulation of the state of the current token
|
* Encapsulation of the state of the current token
|
||||||
*/
|
*/
|
||||||
public static class Token {
|
public static class Token {
|
||||||
public CharSequence term;
|
|
||||||
public int pos;
|
private final CharTermAttribute termAtt;
|
||||||
public int posInc;
|
private final PositionIncrementAttribute posIncAtt;
|
||||||
public int posLen;
|
private final PositionLengthAttribute posLenAtt;
|
||||||
public int startOffset;
|
private final OffsetAttribute offsetAtt;
|
||||||
public int endOffset;
|
private final TypeAttribute typeAtt;
|
||||||
public String type;
|
private final KeywordAttribute keywordAtt;
|
||||||
public boolean isKeyword;
|
|
||||||
|
// posInc is always 1 at the beginning of a tokenstream and the convention
|
||||||
|
// from the _analyze endpoint is that tokenstream positions are 0-based
|
||||||
|
private int pos = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a token exposing values from an AttributeSource
|
||||||
|
*/
|
||||||
|
public Token(AttributeSource source) {
|
||||||
|
this.termAtt = source.addAttribute(CharTermAttribute.class);
|
||||||
|
this.posIncAtt = source.addAttribute(PositionIncrementAttribute.class);
|
||||||
|
this.posLenAtt = source.addAttribute(PositionLengthAttribute.class);
|
||||||
|
this.offsetAtt = source.addAttribute(OffsetAttribute.class);
|
||||||
|
this.typeAtt = source.addAttribute(TypeAttribute.class);
|
||||||
|
this.keywordAtt = source.addAttribute(KeywordAttribute.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updatePosition() {
|
||||||
|
this.pos = this.pos + posIncAtt.getPositionIncrement();
|
||||||
|
}
|
||||||
|
|
||||||
public CharSequence getTerm() {
|
public CharSequence getTerm() {
|
||||||
return term;
|
return termAtt;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getPositionIncrement() {
|
public int getPositionIncrement() {
|
||||||
return posInc;
|
return posIncAtt.getPositionIncrement();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getPosition() {
|
public int getPosition() {
|
||||||
|
@ -52,23 +78,23 @@ public abstract class AnalysisPredicateScript {
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getPositionLength() {
|
public int getPositionLength() {
|
||||||
return posLen;
|
return posLenAtt.getPositionLength();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getStartOffset() {
|
public int getStartOffset() {
|
||||||
return startOffset;
|
return offsetAtt.startOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getEndOffset() {
|
public int getEndOffset() {
|
||||||
return endOffset;
|
return offsetAtt.endOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getType() {
|
public String getType() {
|
||||||
return type;
|
return typeAtt.type();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isKeyword() {
|
public boolean isKeyword() {
|
||||||
return isKeyword;
|
return keywordAtt.isKeyword();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -264,6 +264,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
||||||
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
|
filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new));
|
||||||
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
|
filters.put("persian_normalization", PersianNormalizationFilterFactory::new);
|
||||||
filters.put("porter_stem", PorterStemTokenFilterFactory::new);
|
filters.put("porter_stem", PorterStemTokenFilterFactory::new);
|
||||||
|
filters.put("predicate_token_filter",
|
||||||
|
requiresAnalysisSettings((i, e, n, s) -> new PredicateTokenFilterScriptFactory(i, n, s, scriptService.get())));
|
||||||
filters.put("remove_duplicates", RemoveDuplicatesTokenFilterFactory::new);
|
filters.put("remove_duplicates", RemoveDuplicatesTokenFilterFactory::new);
|
||||||
filters.put("reverse", ReverseTokenFilterFactory::new);
|
filters.put("reverse", ReverseTokenFilterFactory::new);
|
||||||
filters.put("russian_stem", RussianStemTokenFilterFactory::new);
|
filters.put("russian_stem", RussianStemTokenFilterFactory::new);
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
import org.elasticsearch.script.Script;
|
||||||
|
import org.elasticsearch.script.ScriptService;
|
||||||
|
import org.elasticsearch.script.ScriptType;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A factory for creating FilteringTokenFilters that determine whether or not to
|
||||||
|
* accept their underlying token by consulting a script
|
||||||
|
*/
|
||||||
|
public class PredicateTokenFilterScriptFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
private final AnalysisPredicateScript.Factory factory;
|
||||||
|
|
||||||
|
public PredicateTokenFilterScriptFactory(IndexSettings indexSettings, String name, Settings settings, ScriptService scriptService) {
|
||||||
|
super(indexSettings, name, settings);
|
||||||
|
Settings scriptSettings = settings.getAsSettings("script");
|
||||||
|
Script script = Script.parse(scriptSettings);
|
||||||
|
if (script.getType() != ScriptType.INLINE) {
|
||||||
|
throw new IllegalArgumentException("Cannot use stored scripts in tokenfilter [" + name + "]");
|
||||||
|
}
|
||||||
|
this.factory = scriptService.compile(script, AnalysisPredicateScript.CONTEXT);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
|
return new ScriptFilteringTokenFilter(tokenStream, factory.newInstance());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class ScriptFilteringTokenFilter extends FilteringTokenFilter {
|
||||||
|
|
||||||
|
final AnalysisPredicateScript script;
|
||||||
|
final AnalysisPredicateScript.Token token;
|
||||||
|
|
||||||
|
ScriptFilteringTokenFilter(TokenStream in, AnalysisPredicateScript script) {
|
||||||
|
super(in);
|
||||||
|
this.script = script;
|
||||||
|
this.token = new AnalysisPredicateScript.Token(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean accept() throws IOException {
|
||||||
|
token.updatePosition();
|
||||||
|
return script.execute(token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,12 +21,6 @@ package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
|
import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
@ -36,6 +30,7 @@ import org.elasticsearch.script.Script;
|
||||||
import org.elasticsearch.script.ScriptService;
|
import org.elasticsearch.script.ScriptService;
|
||||||
import org.elasticsearch.script.ScriptType;
|
import org.elasticsearch.script.ScriptType;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -76,30 +71,26 @@ public class ScriptedConditionTokenFilterFactory extends AbstractTokenFilterFact
|
||||||
}
|
}
|
||||||
return in;
|
return in;
|
||||||
};
|
};
|
||||||
AnalysisPredicateScript script = factory.newInstance();
|
return new ScriptedConditionTokenFilter(tokenStream, filter, factory.newInstance());
|
||||||
final AnalysisPredicateScript.Token token = new AnalysisPredicateScript.Token();
|
}
|
||||||
return new ConditionalTokenFilter(tokenStream, filter) {
|
|
||||||
|
|
||||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private static class ScriptedConditionTokenFilter extends ConditionalTokenFilter {
|
||||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
|
||||||
PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
|
|
||||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
|
||||||
TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
|
|
||||||
KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
|
|
||||||
|
|
||||||
@Override
|
private final AnalysisPredicateScript script;
|
||||||
protected boolean shouldFilter() {
|
private final AnalysisPredicateScript.Token token;
|
||||||
token.term = termAtt;
|
|
||||||
token.posInc = posIncAtt.getPositionIncrement();
|
ScriptedConditionTokenFilter(TokenStream input, Function<TokenStream, TokenStream> inputFactory,
|
||||||
token.pos += token.posInc;
|
AnalysisPredicateScript script) {
|
||||||
token.posLen = posLenAtt.getPositionLength();
|
super(input, inputFactory);
|
||||||
token.startOffset = offsetAtt.startOffset();
|
this.script = script;
|
||||||
token.endOffset = offsetAtt.endOffset();
|
this.token = new AnalysisPredicateScript.Token(this);
|
||||||
token.type = typeAtt.type();
|
}
|
||||||
token.isKeyword = keywordAtt.isKeyword();
|
|
||||||
return script.execute(token);
|
@Override
|
||||||
}
|
protected boolean shouldFilter() throws IOException {
|
||||||
};
|
token.updatePosition();
|
||||||
|
return script.execute(token);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
|
import org.elasticsearch.Version;
|
||||||
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.env.Environment;
|
||||||
|
import org.elasticsearch.env.TestEnvironment;
|
||||||
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||||
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
|
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||||
|
import org.elasticsearch.script.Script;
|
||||||
|
import org.elasticsearch.script.ScriptContext;
|
||||||
|
import org.elasticsearch.script.ScriptService;
|
||||||
|
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||||
|
import org.elasticsearch.test.IndexSettingsModule;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
public class PredicateTokenScriptFilterTests extends ESTokenStreamTestCase {
|
||||||
|
|
||||||
|
public void testSimpleFilter() throws IOException {
|
||||||
|
Settings settings = Settings.builder()
|
||||||
|
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||||
|
.build();
|
||||||
|
Settings indexSettings = Settings.builder()
|
||||||
|
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||||
|
.put("index.analysis.filter.f.type", "predicate_token_filter")
|
||||||
|
.put("index.analysis.filter.f.script.source", "token.getTerm().length() > 5")
|
||||||
|
.put("index.analysis.analyzer.myAnalyzer.type", "custom")
|
||||||
|
.put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
|
||||||
|
.putList("index.analysis.analyzer.myAnalyzer.filter", "f")
|
||||||
|
.build();
|
||||||
|
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
||||||
|
|
||||||
|
AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
|
||||||
|
@Override
|
||||||
|
public boolean execute(Token token) {
|
||||||
|
return token.getTerm().length() > 5;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
ScriptService scriptService = new ScriptService(indexSettings, Collections.emptyMap(), Collections.emptyMap()){
|
||||||
|
@Override
|
||||||
|
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
|
||||||
|
assertEquals(context, AnalysisPredicateScript.CONTEXT);
|
||||||
|
assertEquals(new Script("token.getTerm().length() > 5"), script);
|
||||||
|
return (FactoryType) factory;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
CommonAnalysisPlugin plugin = new CommonAnalysisPlugin();
|
||||||
|
plugin.createComponents(null, null, null, null, scriptService, null, null, null, null);
|
||||||
|
AnalysisModule module
|
||||||
|
= new AnalysisModule(TestEnvironment.newEnvironment(settings), Collections.singletonList(plugin));
|
||||||
|
|
||||||
|
IndexAnalyzers analyzers = module.getAnalysisRegistry().build(idxSettings);
|
||||||
|
|
||||||
|
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
|
||||||
|
assertNotNull(analyzer);
|
||||||
|
assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[]{
|
||||||
|
"Vorsprung", "Technik"
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -28,9 +28,44 @@
|
||||||
- type: condition
|
- type: condition
|
||||||
filter: [ "lowercase" ]
|
filter: [ "lowercase" ]
|
||||||
script:
|
script:
|
||||||
source: "token.position > 1 && token.positionIncrement > 0 && token.startOffset > 0 && token.endOffset > 0 && (token.positionLength == 1 || token.type == \"a\" || token.keyword)"
|
source: "token.position >= 1 && token.positionIncrement > 0 && token.startOffset > 0 && token.endOffset > 0 && (token.positionLength == 1 || token.type == \"a\" || token.keyword)"
|
||||||
|
|
||||||
- length: { tokens: 3 }
|
- length: { tokens: 3 }
|
||||||
- match: { tokens.0.token: "Vorsprung" }
|
- match: { tokens.0.token: "Vorsprung" }
|
||||||
- match: { tokens.1.token: "durch" }
|
- match: { tokens.1.token: "durch" }
|
||||||
- match: { tokens.2.token: "technik" }
|
- match: { tokens.2.token: "technik" }
|
||||||
|
|
||||||
|
---
|
||||||
|
"script_filter":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: "Vorsprung Durch Technik"
|
||||||
|
tokenizer: "whitespace"
|
||||||
|
filter:
|
||||||
|
- type: predicate_token_filter
|
||||||
|
script:
|
||||||
|
source: "token.term.length() > 5"
|
||||||
|
|
||||||
|
- length: { tokens: 2 }
|
||||||
|
- match: { tokens.0.token: "Vorsprung" }
|
||||||
|
- match: { tokens.1.token: "Technik" }
|
||||||
|
|
||||||
|
---
|
||||||
|
"script_filter_position":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: "a b c d e f g h"
|
||||||
|
tokenizer: "whitespace"
|
||||||
|
filter:
|
||||||
|
- type: predicate_token_filter
|
||||||
|
script:
|
||||||
|
source: "token.position >= 4"
|
||||||
|
|
||||||
|
- length: { tokens: 4 }
|
||||||
|
- match: { tokens.0.token: "e" }
|
||||||
|
- match: { tokens.1.token: "f" }
|
||||||
|
- match: { tokens.2.token: "g" }
|
||||||
|
- match: { tokens.3.token: "h" }
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue