mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-24 17:09:48 +00:00
Reset Token position on reuse in scripted analysis (#47424)
Most of the information in AnalysisPredicateScript.Token is pulled directly from its underlying AttributeSource, but we also keep track of the token position, and this state is held directly on the Token. This information needs to be reset when the containing ScriptFilteringTokenFilter or ScriptedConditionTokenFilter is re-used. Fixes #47197
This commit is contained in:
parent
4379a3c52b
commit
697c693ee7
@ -61,6 +61,10 @@ public abstract class AnalysisPredicateScript {
|
||||
this.keywordAtt = source.addAttribute(KeywordAttribute.class);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
this.pos = -1;
|
||||
}
|
||||
|
||||
public void updatePosition() {
|
||||
this.pos = this.pos + posIncAtt.getPositionIncrement();
|
||||
}
|
||||
|
@ -69,5 +69,11 @@ public class PredicateTokenFilterScriptFactory extends AbstractTokenFilterFactor
|
||||
token.updatePosition();
|
||||
return script.execute(token);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
this.token.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -31,6 +31,7 @@ import org.elasticsearch.script.Script;
|
||||
import org.elasticsearch.script.ScriptService;
|
||||
import org.elasticsearch.script.ScriptType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
@ -119,6 +120,12 @@ public class ScriptedConditionTokenFilterFactory extends AbstractTokenFilterFact
|
||||
token.updatePosition();
|
||||
return script.execute(token);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
token.reset();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -46,7 +46,7 @@ public class PredicateTokenScriptFilterTests extends ESTokenStreamTestCase {
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put("index.analysis.filter.f.type", "predicate_token_filter")
|
||||
.put("index.analysis.filter.f.script.source", "token.getTerm().length() > 5")
|
||||
.put("index.analysis.filter.f.script.source", "my_script")
|
||||
.put("index.analysis.analyzer.myAnalyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
|
||||
.putList("index.analysis.analyzer.myAnalyzer.filter", "f")
|
||||
@ -56,7 +56,7 @@ public class PredicateTokenScriptFilterTests extends ESTokenStreamTestCase {
|
||||
AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
|
||||
@Override
|
||||
public boolean execute(Token token) {
|
||||
return token.getTerm().length() > 5;
|
||||
return token.getPosition() < 2 || token.getPosition() > 4;
|
||||
}
|
||||
};
|
||||
|
||||
@ -65,7 +65,7 @@ public class PredicateTokenScriptFilterTests extends ESTokenStreamTestCase {
|
||||
@Override
|
||||
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
|
||||
assertEquals(context, AnalysisPredicateScript.CONTEXT);
|
||||
assertEquals(new Script("token.getTerm().length() > 5"), script);
|
||||
assertEquals(new Script("my_script"), script);
|
||||
return (FactoryType) factory;
|
||||
}
|
||||
};
|
||||
@ -79,8 +79,8 @@ public class PredicateTokenScriptFilterTests extends ESTokenStreamTestCase {
|
||||
|
||||
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
|
||||
assertNotNull(analyzer);
|
||||
assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[]{
|
||||
"Vorsprung", "Technik"
|
||||
assertAnalyzesTo(analyzer, "Oh what a wonderful thing to be", new String[]{
|
||||
"Oh", "what", "to", "be"
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -45,7 +45,7 @@ public class ScriptedConditionTokenFilterTests extends ESTokenStreamTestCase {
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put("index.analysis.filter.cond.type", "condition")
|
||||
.put("index.analysis.filter.cond.script.source", "token.getTerm().length() > 5")
|
||||
.put("index.analysis.filter.cond.script.source", "token.getPosition() > 1")
|
||||
.putList("index.analysis.filter.cond.filter", "uppercase")
|
||||
.put("index.analysis.analyzer.myAnalyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.myAnalyzer.tokenizer", "standard")
|
||||
@ -56,7 +56,7 @@ public class ScriptedConditionTokenFilterTests extends ESTokenStreamTestCase {
|
||||
AnalysisPredicateScript.Factory factory = () -> new AnalysisPredicateScript() {
|
||||
@Override
|
||||
public boolean execute(Token token) {
|
||||
return token.getTerm().length() > 5;
|
||||
return token.getPosition() > 1;
|
||||
}
|
||||
};
|
||||
|
||||
@ -65,7 +65,7 @@ public class ScriptedConditionTokenFilterTests extends ESTokenStreamTestCase {
|
||||
@Override
|
||||
public <FactoryType> FactoryType compile(Script script, ScriptContext<FactoryType> context) {
|
||||
assertEquals(context, AnalysisPredicateScript.CONTEXT);
|
||||
assertEquals(new Script("token.getTerm().length() > 5"), script);
|
||||
assertEquals(new Script("token.getPosition() > 1"), script);
|
||||
return (FactoryType) factory;
|
||||
}
|
||||
};
|
||||
@ -80,7 +80,7 @@ public class ScriptedConditionTokenFilterTests extends ESTokenStreamTestCase {
|
||||
try (NamedAnalyzer analyzer = analyzers.get("myAnalyzer")) {
|
||||
assertNotNull(analyzer);
|
||||
assertAnalyzesTo(analyzer, "Vorsprung Durch Technik", new String[]{
|
||||
"VORSPRUNG", "Durch", "TECHNIK"
|
||||
"Vorsprung", "Durch", "TECHNIK"
|
||||
});
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user