mirror of https://github.com/apache/druid.git
Handle missing values for delimited text files when Nullhandling is enabled (#8779)
* Handle missing values * Fix multi value tests * Fix firehose tests * Fix conflicts
This commit is contained in:
parent
4ae6466ae2
commit
f5fbd0bea0
|
@ -21,6 +21,9 @@ package org.apache.druid.java.util.common.parsers;
|
|||
|
||||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.opencsv.RFC4180Parser;
|
||||
import com.opencsv.RFC4180ParserBuilder;
|
||||
import com.opencsv.enums.CSVReaderNullFieldIndicator;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
|
@ -29,7 +32,10 @@ import java.util.List;
|
|||
|
||||
public class CSVParser extends AbstractFlatTextFormatParser
|
||||
{
|
||||
private final RFC4180Parser parser = new RFC4180Parser();
|
||||
private final RFC4180Parser parser = NullHandling.replaceWithDefault()
|
||||
? new RFC4180Parser()
|
||||
: new RFC4180ParserBuilder().withFieldAsNull(
|
||||
CSVReaderNullFieldIndicator.EMPTY_SEPARATORS).build();
|
||||
|
||||
public CSVParser(
|
||||
@Nullable final String listDelimiter,
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.druid.java.util.common.parsers;
|
|||
import com.google.common.annotations.VisibleForTesting;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Splitter;
|
||||
import org.apache.druid.common.config.NullHandling;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.ArrayList;
|
||||
|
@ -94,7 +95,12 @@ public class DelimitedParser extends AbstractFlatTextFormatParser
|
|||
List<String> result = new ArrayList<String>();
|
||||
|
||||
while (iterator.hasNext()) {
|
||||
result.add(iterator.next());
|
||||
String splitValue = iterator.next();
|
||||
if (!NullHandling.replaceWithDefault() && splitValue.isEmpty()) {
|
||||
result.add(null);
|
||||
} else {
|
||||
result.add(splitValue);
|
||||
}
|
||||
}
|
||||
|
||||
return Collections.unmodifiableList(result);
|
||||
|
|
|
@ -215,6 +215,20 @@ public class FlatTextFormatParserTest
|
|||
parser.parseToMap(body[0]);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWithNullValues()
|
||||
{
|
||||
final Parser<String, Object> parser = PARSER_FACTORY.get(format, true, 0);
|
||||
parser.startFileFromBeginning();
|
||||
final String[] body = new String[]{
|
||||
concat(format, "time", "value1", "value2"),
|
||||
concat(format, "hello", "world", "")
|
||||
};
|
||||
Assert.assertNull(parser.parseToMap(body[0]));
|
||||
final Map<String, Object> jsonMap = parser.parseToMap(body[1]);
|
||||
Assert.assertNull(jsonMap.get("value2"));
|
||||
}
|
||||
|
||||
private static class FlatTextFormatParserFactory
|
||||
{
|
||||
public Parser<String, Object> get(FlatTextFormat format)
|
||||
|
|
|
@ -1190,9 +1190,7 @@ public class FirehoseSamplerTest
|
|||
private String getUnparseableTimestampString()
|
||||
{
|
||||
return ParserType.STR_CSV.equals(parserType)
|
||||
? (USE_DEFAULT_VALUE_FOR_NULL
|
||||
? "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, dim2=null, met1=6}"
|
||||
: "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, dim2=, met1=6}")
|
||||
: "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, met1=6}";
|
||||
}
|
||||
|
||||
|
|
|
@ -161,7 +161,7 @@ public class MultiValuedDimensionTest
|
|||
"2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3,u1\tu2",
|
||||
"2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5,u3\tu4",
|
||||
"2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7,u1\tu5",
|
||||
"2011-01-14T00:00:00.000Z,product_4,,u2"
|
||||
"2011-01-14T00:00:00.000Z,product_4,\"\",u2"
|
||||
};
|
||||
|
||||
for (String row : rows) {
|
||||
|
|
Loading…
Reference in New Issue