diff --git a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java index e4e4a36ae18..ce7e4bf40a7 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/CompletionFieldMapper.java @@ -40,6 +40,7 @@ import org.elasticsearch.search.suggest.completion.CompletionTokenStream; import java.io.IOException; import java.io.Reader; import java.util.List; +import java.util.Locale; import java.util.Map; /** @@ -276,10 +277,18 @@ public class CompletionFieldMapper extends AbstractFieldMapper { } public Field getCompletionField(String input, BytesRef payload) { + final String originalInput = input; if (input.length() > maxInputLength) { final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length())); input = input.substring(0, len); } + for (int i = 0; i < input.length(); i++) { + if (isReservedChar(input.charAt(i))) { + throw new ElasticSearchIllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x" + + Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT) + + "] at position " + i + " is a reserved character"); + } + } return new SuggestField(names().fullName(), input, this.fieldType, payload, analyzingSuggestLookupProvider); } @@ -388,4 +397,14 @@ public class CompletionFieldMapper extends AbstractFieldMapper { this.maxInputLength = fieldMergeWith.maxInputLength; } } + + private static final char END_LABEL = 0x00; + + // this should be package private but our tests don't allow it. + public static boolean isReservedChar(char character) { + /* we also use 0xFF as a SEP_LABEL in the suggester but it's not valid UTF-8 so no need to check. + * we also don't need to convert to UTF-8 here to check for the 0x00 end label since all multi-byte + * UTF-8 chars start with 0x10 binary so if the UTF-16 CP is == 0x00 it's the single byte UTF-8 CP */ + return character == END_LABEL; + } } diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java index 4d21134f146..3a5d0e301bd 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/CompletionSuggestSearchTests.java @@ -32,6 +32,7 @@ import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.MapperException; +import org.elasticsearch.index.mapper.MapperParsingException; import org.elasticsearch.index.mapper.core.CompletionFieldMapper; import org.elasticsearch.search.suggest.Suggest; import org.elasticsearch.search.suggest.completion.CompletionStats; @@ -726,7 +727,7 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest { int iters = atLeast(10); for (int i = 0; i < iters; i++) { int len = between(3, 50); - String str = randomRealisticUnicodeOfCodepointLengthBetween(len+1, atLeast(len + 2)); + String str = replaceReservedChars(randomRealisticUnicodeOfCodepointLengthBetween(len+1, atLeast(len + 2)), (char)0x01); ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject() .startObject(TYPE).startObject("properties") .startObject(FIELD) @@ -766,7 +767,7 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest { .endObject())); ensureYellow(); // can cause stack overflow without the default max_input_len - String longString = randomRealisticUnicodeOfLength(atLeast(5000)); + String longString = replaceReservedChars(randomRealisticUnicodeOfLength(atLeast(5000)), (char)0x01); client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() .startObject().startObject(FIELD) .startArray("input").value(longString).endArray() @@ -775,4 +776,37 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest { ).setRefresh(true).get(); } + + // see #3648 + @Test(expected = MapperParsingException.class) + public void testReservedChars() throws IOException { + client().admin().indices().prepareCreate(INDEX).get(); + ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject() + .startObject(TYPE).startObject("properties") + .startObject(FIELD) + .field("type", "completion") + .endObject() + .endObject().endObject() + .endObject())); + ensureYellow(); + // can cause stack overflow without the default max_input_len + String string = "foo" + (char)0x00 + "bar"; + client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() + .startObject().startObject(FIELD) + .startArray("input").value(string).endArray() + .field("output", "foobar") + .endObject().endObject() + ).setRefresh(true).get(); + + } + + private static String replaceReservedChars(String input, char replacement) { + char[] charArray = input.toCharArray(); + for (int i = 0; i < charArray.length; i++) { + if (CompletionFieldMapper.isReservedChar(charArray[i])) { + charArray[i] = replacement; + } + } + return new String(charArray); + } }