Throw IAE if reserved completion suggester chars are used in input
The completion suggester reserves 0x00 and 0xFF as for internal use. If those chars are used in the input string an IAE is thrown and the input is rejected. Closes #3648
This commit is contained in:
parent
d7b3ed7e8b
commit
732e38b8c7
|
@ -40,6 +40,7 @@ import org.elasticsearch.search.suggest.completion.CompletionTokenStream;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
|
@ -276,10 +277,18 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
|
|||
}
|
||||
|
||||
public Field getCompletionField(String input, BytesRef payload) {
|
||||
final String originalInput = input;
|
||||
if (input.length() > maxInputLength) {
|
||||
final int len = correctSubStringLen(input, Math.min(maxInputLength, input.length()));
|
||||
input = input.substring(0, len);
|
||||
}
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
if (isReservedChar(input.charAt(i))) {
|
||||
throw new ElasticSearchIllegalArgumentException("Illegal input [" + originalInput + "] UTF-16 codepoint [0x"
|
||||
+ Integer.toHexString((int) input.charAt(i)).toUpperCase(Locale.ROOT)
|
||||
+ "] at position " + i + " is a reserved character");
|
||||
}
|
||||
}
|
||||
return new SuggestField(names().fullName(), input, this.fieldType, payload, analyzingSuggestLookupProvider);
|
||||
}
|
||||
|
||||
|
@ -388,4 +397,14 @@ public class CompletionFieldMapper extends AbstractFieldMapper<String> {
|
|||
this.maxInputLength = fieldMergeWith.maxInputLength;
|
||||
}
|
||||
}
|
||||
|
||||
private static final char END_LABEL = 0x00;
|
||||
|
||||
// this should be package private but our tests don't allow it.
|
||||
public static boolean isReservedChar(char character) {
|
||||
/* we also use 0xFF as a SEP_LABEL in the suggester but it's not valid UTF-8 so no need to check.
|
||||
* we also don't need to convert to UTF-8 here to check for the 0x00 end label since all multi-byte
|
||||
* UTF-8 chars start with 0x10 binary so if the UTF-16 CP is == 0x00 it's the single byte UTF-8 CP */
|
||||
return character == END_LABEL;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.elasticsearch.common.settings.ImmutableSettings;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.mapper.MapperException;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
|
||||
import org.elasticsearch.search.suggest.Suggest;
|
||||
import org.elasticsearch.search.suggest.completion.CompletionStats;
|
||||
|
@ -726,7 +727,7 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
|
|||
int iters = atLeast(10);
|
||||
for (int i = 0; i < iters; i++) {
|
||||
int len = between(3, 50);
|
||||
String str = randomRealisticUnicodeOfCodepointLengthBetween(len+1, atLeast(len + 2));
|
||||
String str = replaceReservedChars(randomRealisticUnicodeOfCodepointLengthBetween(len+1, atLeast(len + 2)), (char)0x01);
|
||||
ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
|
||||
.startObject(TYPE).startObject("properties")
|
||||
.startObject(FIELD)
|
||||
|
@ -766,7 +767,7 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
|
|||
.endObject()));
|
||||
ensureYellow();
|
||||
// can cause stack overflow without the default max_input_len
|
||||
String longString = randomRealisticUnicodeOfLength(atLeast(5000));
|
||||
String longString = replaceReservedChars(randomRealisticUnicodeOfLength(atLeast(5000)), (char)0x01);
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value(longString).endArray()
|
||||
|
@ -775,4 +776,37 @@ public class CompletionSuggestSearchTests extends AbstractSharedClusterTest {
|
|||
).setRefresh(true).get();
|
||||
|
||||
}
|
||||
|
||||
// see #3648
|
||||
@Test(expected = MapperParsingException.class)
|
||||
public void testReservedChars() throws IOException {
|
||||
client().admin().indices().prepareCreate(INDEX).get();
|
||||
ElasticsearchAssertions.assertAcked(client().admin().indices().preparePutMapping(INDEX).setType(TYPE).setSource(jsonBuilder().startObject()
|
||||
.startObject(TYPE).startObject("properties")
|
||||
.startObject(FIELD)
|
||||
.field("type", "completion")
|
||||
.endObject()
|
||||
.endObject().endObject()
|
||||
.endObject()));
|
||||
ensureYellow();
|
||||
// can cause stack overflow without the default max_input_len
|
||||
String string = "foo" + (char)0x00 + "bar";
|
||||
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
|
||||
.startObject().startObject(FIELD)
|
||||
.startArray("input").value(string).endArray()
|
||||
.field("output", "foobar")
|
||||
.endObject().endObject()
|
||||
).setRefresh(true).get();
|
||||
|
||||
}
|
||||
|
||||
private static String replaceReservedChars(String input, char replacement) {
|
||||
char[] charArray = input.toCharArray();
|
||||
for (int i = 0; i < charArray.length; i++) {
|
||||
if (CompletionFieldMapper.isReservedChar(charArray[i])) {
|
||||
charArray[i] = replacement;
|
||||
}
|
||||
}
|
||||
return new String(charArray);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue