From 753fa6b3bd3512ab8e8112e55be2aff667a845c4 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 10 Dec 2020 10:49:40 -0800 Subject: [PATCH] IdUtils: Forbid characters that cannot be used in znodes. (#10659) * IdUtils: Forbid characters that cannot be used in znodes. * Fix whitespace. --- .../org/apache/druid/common/utils/IdUtils.java | 12 ++++++++++++ .../apache/druid/common/utils/IdUtilsTest.java | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/core/src/main/java/org/apache/druid/common/utils/IdUtils.java b/core/src/main/java/org/apache/druid/common/utils/IdUtils.java index cbafef00e67..549bf458743 100644 --- a/core/src/main/java/org/apache/druid/common/utils/IdUtils.java +++ b/core/src/main/java/org/apache/druid/common/utils/IdUtils.java @@ -24,6 +24,7 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import org.apache.druid.java.util.common.DateTimes; +import org.apache.druid.java.util.common.IAE; import org.apache.druid.java.util.common.StringUtils; import org.joda.time.DateTime; import org.joda.time.Interval; @@ -60,6 +61,17 @@ public class IdUtils !m.matches(), StringUtils.format("%s cannot contain whitespace character except space.", thingToValidate) ); + + for (int i = 0; i < stringToValidate.length(); i++) { + final char c = stringToValidate.charAt(i); + + // Curator doesn't permit any of the following ranges, so we can't either, because IDs are often used as + // znode paths. The first two ranges are control characters, the second two ranges correspond to surrogate + // pairs. This means that characters outside the basic multilingual plane, such as emojis, are not allowed. 😢 + if (c > 0 && c < 31 || c > 127 && c < 159 || c > '\ud800' && c < '\uf8ff' || c > '\ufff0' && c < '\uffff') { + throw new IAE("%s cannot contain character #%d (at position %d).", thingToValidate, (int) c, i); + } + } } public static String getRandomId() diff --git a/core/src/test/java/org/apache/druid/common/utils/IdUtilsTest.java b/core/src/test/java/org/apache/druid/common/utils/IdUtilsTest.java index bd34967d4e0..806c74e6161 100644 --- a/core/src/test/java/org/apache/druid/common/utils/IdUtilsTest.java +++ b/core/src/test/java/org/apache/druid/common/utils/IdUtilsTest.java @@ -112,6 +112,22 @@ public class IdUtilsTest IdUtils.validateId(THINGO, "form\u000cfeed?"); } + @Test + public void testInvalidUnprintableChars() + { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("thingToValidate cannot contain character #129 (at position 4)."); + IdUtils.validateId(THINGO, "form\u0081feed?"); + } + + @Test + public void testInvalidEmojis() + { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("thingToValidate cannot contain character #55357 (at position 4)."); + IdUtils.validateId(THINGO, "form💯feed?"); + } + @Test public void testNewTaskIdWithoutInterval() {