[ML] Fix exception categorising an empty string (elastic/x-pack-elasticsearch#3870)

Original commit: elastic/x-pack-elasticsearch@1840a74415
This commit is contained in:
David Kyle 2018-02-08 16:18:48 +00:00 committed by GitHub
parent 8e73085047
commit d6ca53cd26
3 changed files with 20 additions and 1 deletions

View File

@ -105,7 +105,7 @@ public class MlClassicTokenizer extends Tokenizer {
public final void end() throws IOException {
super.end();
// Set final offset
int finalOffset = nextOffset + (int) input.skip(Integer.MAX_VALUE) - 1;
int finalOffset = Math.max(0, nextOffset + (int) input.skip(Integer.MAX_VALUE) - 1);
offsetAtt.setOffset(finalOffset, finalOffset);
// Adjust any skipped tokens
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);

View File

@ -172,6 +172,13 @@ public class CategorizationAnalyzerTests extends ESTestCase {
}
}
public void testEmptyString() throws IOException {
CategorizationAnalyzerConfig defaultConfig = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(null);
try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, defaultConfig)) {
assertEquals(Collections.emptyList(), categorizationAnalyzer.tokenizeField("foo", ""));
}
}
public void testThaiAnalyzer() throws IOException {
CategorizationAnalyzerConfig config = new CategorizationAnalyzerConfig.Builder().setAnalyzer("thai").build();
try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, config)) {

View File

@ -45,4 +45,16 @@ public class MlClassicTokenizerTests extends ESTestCase {
tokenizer.end();
}
}
public void testTokenize_emptyString() throws IOException {
String testData = "";
try (Tokenizer tokenizer = new MlClassicTokenizer()) {
tokenizer.setReader(new StringReader(testData));
tokenizer.reset();
CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
assertFalse(tokenizer.incrementToken());
assertEquals("", term.toString());
tokenizer.end();
}
}
}