mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-25 14:26:27 +00:00
[ML] Fix exception categorising an empty string (elastic/x-pack-elasticsearch#3870)
Original commit: elastic/x-pack-elasticsearch@1840a74415
This commit is contained in:
parent
8e73085047
commit
d6ca53cd26
@ -105,7 +105,7 @@ public class MlClassicTokenizer extends Tokenizer {
|
|||||||
public final void end() throws IOException {
|
public final void end() throws IOException {
|
||||||
super.end();
|
super.end();
|
||||||
// Set final offset
|
// Set final offset
|
||||||
int finalOffset = nextOffset + (int) input.skip(Integer.MAX_VALUE) - 1;
|
int finalOffset = Math.max(0, nextOffset + (int) input.skip(Integer.MAX_VALUE) - 1);
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
// Adjust any skipped tokens
|
// Adjust any skipped tokens
|
||||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||||
|
@ -172,6 +172,13 @@ public class CategorizationAnalyzerTests extends ESTestCase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testEmptyString() throws IOException {
|
||||||
|
CategorizationAnalyzerConfig defaultConfig = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(null);
|
||||||
|
try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, defaultConfig)) {
|
||||||
|
assertEquals(Collections.emptyList(), categorizationAnalyzer.tokenizeField("foo", ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testThaiAnalyzer() throws IOException {
|
public void testThaiAnalyzer() throws IOException {
|
||||||
CategorizationAnalyzerConfig config = new CategorizationAnalyzerConfig.Builder().setAnalyzer("thai").build();
|
CategorizationAnalyzerConfig config = new CategorizationAnalyzerConfig.Builder().setAnalyzer("thai").build();
|
||||||
try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, config)) {
|
try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, config)) {
|
||||||
|
@ -45,4 +45,16 @@ public class MlClassicTokenizerTests extends ESTestCase {
|
|||||||
tokenizer.end();
|
tokenizer.end();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTokenize_emptyString() throws IOException {
|
||||||
|
String testData = "";
|
||||||
|
try (Tokenizer tokenizer = new MlClassicTokenizer()) {
|
||||||
|
tokenizer.setReader(new StringReader(testData));
|
||||||
|
tokenizer.reset();
|
||||||
|
CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
|
||||||
|
assertFalse(tokenizer.incrementToken());
|
||||||
|
assertEquals("", term.toString());
|
||||||
|
tokenizer.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user