disallow whitespace characters except space in data source names (#8465)

* disallow whitespace characters in data source names

* wrapped preconditions in a function, and simplify unit tests code

* Fixed regex to allow space, simplified repeat logic

* Fixed import style against mvn checkstyle

* Add msg in case test fails, use emptyMap(), improved naming

* Changes on assertion functions

* change wording of "whitespace" to "whitespace except space" to avoid misleading
This commit is contained in:
Rye 2019-09-06 08:55:21 -07:00 committed by Himanshu
parent b8dc6a94b3
commit 645799f977
2 changed files with 59 additions and 4 deletions

View File

@ -41,13 +41,17 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*
*/
public class DataSchema
{
private static final Logger log = new Logger(DataSchema.class);
private static final Pattern INVALIDCHARS = Pattern.compile("(?s).*[^\\S ].*");
private final String dataSource;
private final Map<String, Object> parser;
private final AggregatorFactory[] aggregators;
@ -72,8 +76,7 @@ public class DataSchema
this.parser = parser;
this.transformSpec = transformSpec == null ? TransformSpec.NONE : transformSpec;
Preconditions.checkArgument(!Strings.isNullOrEmpty(dataSource), "dataSource cannot be null or empty. Please provide a dataSource.");
Preconditions.checkArgument(!dataSource.contains("/"), "dataSource cannot contain the '/' character.");
validateDatasourceName(dataSource);
this.dataSource = dataSource;
if (granularitySpec == null) {
@ -98,6 +101,20 @@ public class DataSchema
this.aggregators = aggregators == null ? new AggregatorFactory[]{} : aggregators;
}
static void validateDatasourceName(String dataSource)
{
Preconditions.checkArgument(
!Strings.isNullOrEmpty(dataSource),
"dataSource cannot be null or empty. Please provide a dataSource."
);
Matcher m = INVALIDCHARS.matcher(dataSource);
Preconditions.checkArgument(
!m.matches(),
"dataSource cannot contain whitespace character except space."
);
Preconditions.checkArgument(!dataSource.contains("/"), "dataSource cannot contain the '/' character.");
}
@JsonProperty
public String getDataSource()
{

View File

@ -22,6 +22,7 @@ package org.apache.druid.segment.indexing;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.impl.DimensionsSpec;
@ -52,6 +53,7 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
public class DataSchemaTest
@ -286,7 +288,7 @@ public class DataSchemaTest
// Jackson creates a default type parser (StringInputRowParser) for an invalid type.
schema.getParser();
}
@Test
public void testEmptyDatasource()
{
@ -324,6 +326,42 @@ public class DataSchemaTest
);
}
@Test
public void testInvalidWhitespaceDatasource()
{
Map<String, String> invalidCharToDataSourceName = ImmutableMap.of(
"\\t", "\tab\t",
"\\r", "\rcarriage\return\r",
"\\n", "\nnew\nline\n"
);
for (Map.Entry<String, String> entry : invalidCharToDataSourceName.entrySet()) {
testInvalidWhitespaceDatasourceHelper(entry.getValue(), entry.getKey());
}
}
private void testInvalidWhitespaceDatasourceHelper(String dataSource, String invalidChar)
{
String testFailMsg = "dataSource contain invalid whitespace character: " + invalidChar;
try {
DataSchema schema = new DataSchema(
dataSource,
Collections.emptyMap(),
null,
null,
null,
jsonMapper
);
Assert.fail(testFailMsg);
}
catch (IllegalArgumentException errorMsg) {
String expectedMsg = "dataSource cannot contain whitespace character except space.";
Assert.assertEquals(testFailMsg, expectedMsg, errorMsg.getMessage());
}
}
@Test
public void testSerde() throws Exception
{