From a28424ee5de17ff3f084234e9f09be38818f8058 Mon Sep 17 00:00:00 2001 From: aditya-agrawal Date: Tue, 8 May 2018 18:43:07 +0530 Subject: [PATCH] Avoid NPE in `more_like_this` when field has zero tokens (#30365) Fixes and edge case when using `more_like_this` where TermVectorsWriter could throw an NPE when a field produced zero tokens after analysis. This changes the implementation to use an empty list of tokens in this case. Closes #30148 --- docs/CHANGELOG.asciidoc | 4 +++ .../action/termvectors/TermVectorsWriter.java | 4 +++ .../search/morelikethis/MoreLikeThisIT.java | 30 +++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 621ca5a6414..346290c9f76 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -104,6 +104,8 @@ ones that the user is authorized to access in case field level security is enabl [float] === Bug Fixes +Fix NPE in 'more_like_this' when field has zero tokens ({pull}30365[#30365]) + Fixed prerelease version of elasticsearch in the `deb` package to sort before GA versions ({pull}29000[#29000]) @@ -169,6 +171,8 @@ Added put index template API to the high level rest client ({pull}30400[#30400]) [float] === Bug Fixes +Fix NPE in 'more_like_this' when field has zero tokens ({pull}30365[#30365]) + Do not ignore request analysis/similarity settings on index resize operations when the source index already contains such settings ({pull}30216[#30216]) Fix NPE when CumulativeSum agg encounters null value/empty bucket ({pull}29641[#29641]) diff --git a/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsWriter.java b/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsWriter.java index 06eea6367ed..8a54406c1f9 100644 --- a/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsWriter.java @@ -70,6 +70,10 @@ final class TermVectorsWriter { Terms topLevelTerms = topLevelFields.terms(field); // if no terms found, take the retrieved term vector fields for stats + if (fieldTermVector == null) { + fieldTermVector = EMPTY_TERMS; + } + if (topLevelTerms == null) { topLevelTerms = EMPTY_TERMS; } diff --git a/server/src/test/java/org/elasticsearch/search/morelikethis/MoreLikeThisIT.java b/server/src/test/java/org/elasticsearch/search/morelikethis/MoreLikeThisIT.java index 3487de255e1..21c043aa656 100644 --- a/server/src/test/java/org/elasticsearch/search/morelikethis/MoreLikeThisIT.java +++ b/server/src/test/java/org/elasticsearch/search/morelikethis/MoreLikeThisIT.java @@ -91,6 +91,36 @@ public class MoreLikeThisIT extends ESIntegTestCase { assertHitCount(response, 1L); } + //Issue #30148 + public void testMoreLikeThisForZeroTokensInOneOfTheAnalyzedFields() throws Exception { + CreateIndexRequestBuilder createIndexRequestBuilder = prepareCreate("test") + .addMapping("type", jsonBuilder() + .startObject().startObject("type") + .startObject("properties") + .startObject("myField").field("type", "text").endObject() + .startObject("empty").field("type", "text").endObject() + .endObject() + .endObject().endObject()); + + assertAcked(createIndexRequestBuilder); + + ensureGreen(); + + client().index(indexRequest("test").type("type").id("1").source(jsonBuilder().startObject() + .field("myField", "and_foo").field("empty", "").endObject())).actionGet(); + client().index(indexRequest("test").type("type").id("2").source(jsonBuilder().startObject() + .field("myField", "and_foo").field("empty", "").endObject())).actionGet(); + + client().admin().indices().refresh(refreshRequest()).actionGet(); + + SearchResponse searchResponse = client().prepareSearch().setQuery( + moreLikeThisQuery(new String[]{"myField", "empty"}, null, new Item[]{new Item("test", "type", "1")}) + .minTermFreq(1).minDocFreq(1) + ).get(); + + assertHitCount(searchResponse, 1L); + } + public void testSimpleMoreLikeOnLongField() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test")