Throw IAE if reserved completion suggester chars are used in input

The completion suggester reserves 0x00 and 0xFF as for internal use.
If those chars are used in the input string an IAE is thrown and the
input is rejected.

Closes #3648
This commit is contained in:
Simon Willnauer 2013-09-08 21:46:10 +02:00
parent d7b3ed7e8b
commit 732e38b8c7
2 changed files with 55 additions and 2 deletions

View File

@ -40,6 +40,7 @@ import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
import java.io.IOException;
import java.io.Reader;
import java.util.List;
import java.util.Locale;
import java.util.Map;
/**
@ -276,10 +277,18 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
}
public Field getCompletionField(String input, BytesRef payload) {
final String originalInput = input;
if (input.length() > maxInputLength) {
final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length()));
input = input.substring(0, len);
}
for (int i = 0; i < input.length(); i++) {
if (isReservedChar(input.charAt(i))) {
throw new ElasticSearchIllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x"
+ Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT)
+ "] at position " + i + " is a reserved character");
}
}
return new SuggestField(names().fullName(), input, this.fieldType, payload, analyzingSuggestLookupProvider);
}
@ -388,4 +397,14 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
this.maxInputLength = fieldMergeWith.maxInputLength;
}
}
private static final char END_LABEL = 0x00;
// this should be package private but our tests don't allow it.
public static boolean isReservedChar(char character) {
/* we also use 0xFF as a SEP_LABEL in the suggester but it's not valid UTF-8 so no need to check.
* we also don't need to convert to UTF-8 here to check for the 0x00 end label since all multi-byte
* UTF-8 chars start with 0x10 binary so if the UTF-16 CP is == 0x00 it's the single byte UTF-8 CP */
return character == END_LABEL;
}
}

View File

@ -32,6 +32,7 @@ import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperException;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
import org.elasticsearch.search.suggest.Suggest;
import org.elasticsearch.search.suggest.completion.CompletionStats;
@ -726,7 +727,7 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
int iters = atLeast(10);
for (int i = 0; i < iters; i++) {
int len = between(3, 50);
String str = randomRealisticUnicodeOfCodepointLengthBetween(len+1, atLeast(len + 2));
String str = replaceReservedChars(randomRealisticUnicodeOfCodepointLengthBetween(len+1, atLeast(len + 2)), (char)0x01);
ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
.startObject(TYPE).startObject("properties")
.startObject(FIELD)
@ -766,7 +767,7 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
.endObject()));
ensureYellow();
// can cause stack overflow without the default max_input_len
String longString = randomRealisticUnicodeOfLength(atLeast(5000));
String longString = replaceReservedChars(randomRealisticUnicodeOfLength(atLeast(5000)), (char)0x01);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value(longString).endArray()
@ -775,4 +776,37 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
).setRefresh(true).get();
}
// see #3648
@Test(expected = MapperParsingException.class)
public void testReservedChars() throws IOException {
client().admin().indices().prepareCreate(INDEX).get();
ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
.startObject(TYPE).startObject("properties")
.startObject(FIELD)
.field("type", "completion")
.endObject()
.endObject().endObject()
.endObject()));
ensureYellow();
// can cause stack overflow without the default max_input_len
String string = "foo" + (char)0x00 + "bar";
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value(string).endArray()
.field("output", "foobar")
.endObject().endObject()
).setRefresh(true).get();
}
private static String replaceReservedChars(String input, char replacement) {
char[] charArray = input.toCharArray();
for (int i = 0; i < charArray.length; i++) {
if (CompletionFieldMapper.isReservedChar(charArray[i])) {
charArray[i] = replacement;
}
}
return new String(charArray);
}
}