mirror of https://github.com/apache/druid.git
Handle missing values for delimited text files when Nullhandling is enabled (#8779)
* Handle missing values * Fix multi value tests * Fix firehose tests * Fix conflicts
This commit is contained in:
parent
4ae6466ae2
commit
f5fbd0bea0
|
@ -21,6 +21,9 @@ package org.apache.druid.java.util.common.parsers;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.opencsv.RFC4180Parser;
|
import com.opencsv.RFC4180Parser;
|
||||||
|
import com.opencsv.RFC4180ParserBuilder;
|
||||||
|
import com.opencsv.enums.CSVReaderNullFieldIndicator;
|
||||||
|
import org.apache.druid.common.config.NullHandling;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -29,7 +32,10 @@ import java.util.List;
|
||||||
|
|
||||||
public class CSVParser extends AbstractFlatTextFormatParser
|
public class CSVParser extends AbstractFlatTextFormatParser
|
||||||
{
|
{
|
||||||
private final RFC4180Parser parser = new RFC4180Parser();
|
private final RFC4180Parser parser = NullHandling.replaceWithDefault()
|
||||||
|
? new RFC4180Parser()
|
||||||
|
: new RFC4180ParserBuilder().withFieldAsNull(
|
||||||
|
CSVReaderNullFieldIndicator.EMPTY_SEPARATORS).build();
|
||||||
|
|
||||||
public CSVParser(
|
public CSVParser(
|
||||||
@Nullable final String listDelimiter,
|
@Nullable final String listDelimiter,
|
||||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.druid.java.util.common.parsers;
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.base.Splitter;
|
import com.google.common.base.Splitter;
|
||||||
|
import org.apache.druid.common.config.NullHandling;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -94,7 +95,12 @@ public class DelimitedParser extends AbstractFlatTextFormatParser
|
||||||
List<String> result = new ArrayList<String>();
|
List<String> result = new ArrayList<String>();
|
||||||
|
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
result.add(iterator.next());
|
String splitValue = iterator.next();
|
||||||
|
if (!NullHandling.replaceWithDefault() && splitValue.isEmpty()) {
|
||||||
|
result.add(null);
|
||||||
|
} else {
|
||||||
|
result.add(splitValue);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return Collections.unmodifiableList(result);
|
return Collections.unmodifiableList(result);
|
||||||
|
|
|
@ -215,6 +215,20 @@ public class FlatTextFormatParserTest
|
||||||
parser.parseToMap(body[0]);
|
parser.parseToMap(body[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithNullValues()
|
||||||
|
{
|
||||||
|
final Parser<String, Object> parser = PARSER_FACTORY.get(format, true, 0);
|
||||||
|
parser.startFileFromBeginning();
|
||||||
|
final String[] body = new String[]{
|
||||||
|
concat(format, "time", "value1", "value2"),
|
||||||
|
concat(format, "hello", "world", "")
|
||||||
|
};
|
||||||
|
Assert.assertNull(parser.parseToMap(body[0]));
|
||||||
|
final Map<String, Object> jsonMap = parser.parseToMap(body[1]);
|
||||||
|
Assert.assertNull(jsonMap.get("value2"));
|
||||||
|
}
|
||||||
|
|
||||||
private static class FlatTextFormatParserFactory
|
private static class FlatTextFormatParserFactory
|
||||||
{
|
{
|
||||||
public Parser<String, Object> get(FlatTextFormat format)
|
public Parser<String, Object> get(FlatTextFormat format)
|
||||||
|
|
|
@ -1190,9 +1190,7 @@ public class FirehoseSamplerTest
|
||||||
private String getUnparseableTimestampString()
|
private String getUnparseableTimestampString()
|
||||||
{
|
{
|
||||||
return ParserType.STR_CSV.equals(parserType)
|
return ParserType.STR_CSV.equals(parserType)
|
||||||
? (USE_DEFAULT_VALUE_FOR_NULL
|
? "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, dim2=null, met1=6}"
|
||||||
? "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, dim2=null, met1=6}"
|
|
||||||
: "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, dim2=, met1=6}")
|
|
||||||
: "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, met1=6}";
|
: "Unparseable timestamp found! Event: {t=bad_timestamp, dim1=foo, met1=6}";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -161,7 +161,7 @@ public class MultiValuedDimensionTest
|
||||||
"2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3,u1\tu2",
|
"2011-01-12T00:00:00.000Z,product_1,t1\tt2\tt3,u1\tu2",
|
||||||
"2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5,u3\tu4",
|
"2011-01-13T00:00:00.000Z,product_2,t3\tt4\tt5,u3\tu4",
|
||||||
"2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7,u1\tu5",
|
"2011-01-14T00:00:00.000Z,product_3,t5\tt6\tt7,u1\tu5",
|
||||||
"2011-01-14T00:00:00.000Z,product_4,,u2"
|
"2011-01-14T00:00:00.000Z,product_4,\"\",u2"
|
||||||
};
|
};
|
||||||
|
|
||||||
for (String row : rows) {
|
for (String row : rows) {
|
||||||
|
|
Loading…
Reference in New Issue