[ML] Fix exception categorising an empty string (elastic/x-pack-elasticsearch#3870)
Original commit: elastic/x-pack-elasticsearch@1840a74415
This commit is contained in:
parent
8e73085047
commit
d6ca53cd26
|
@ -105,7 +105,7 @@ public class MlClassicTokenizer extends Tokenizer {
|
|||
public final void end() throws IOException {
|
||||
super.end();
|
||||
// Set final offset
|
||||
int finalOffset = nextOffset + (int) input.skip(Integer.MAX_VALUE) - 1;
|
||||
int finalOffset = Math.max(0, nextOffset + (int) input.skip(Integer.MAX_VALUE) - 1);
|
||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
// Adjust any skipped tokens
|
||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||
|
|
|
@ -172,6 +172,13 @@ public class CategorizationAnalyzerTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testEmptyString() throws IOException {
|
||||
CategorizationAnalyzerConfig defaultConfig = CategorizationAnalyzerConfig.buildDefaultCategorizationAnalyzer(null);
|
||||
try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, defaultConfig)) {
|
||||
assertEquals(Collections.emptyList(), categorizationAnalyzer.tokenizeField("foo", ""));
|
||||
}
|
||||
}
|
||||
|
||||
public void testThaiAnalyzer() throws IOException {
|
||||
CategorizationAnalyzerConfig config = new CategorizationAnalyzerConfig.Builder().setAnalyzer("thai").build();
|
||||
try (CategorizationAnalyzer categorizationAnalyzer = new CategorizationAnalyzer(analysisRegistry, environment, config)) {
|
||||
|
|
|
@ -45,4 +45,16 @@ public class MlClassicTokenizerTests extends ESTestCase {
|
|||
tokenizer.end();
|
||||
}
|
||||
}
|
||||
|
||||
public void testTokenize_emptyString() throws IOException {
|
||||
String testData = "";
|
||||
try (Tokenizer tokenizer = new MlClassicTokenizer()) {
|
||||
tokenizer.setReader(new StringReader(testData));
|
||||
tokenizer.reset();
|
||||
CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
|
||||
assertFalse(tokenizer.incrementToken());
|
||||
assertEquals("", term.toString());
|
||||
tokenizer.end();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue