mirror of https://github.com/apache/druid.git
Issue fix for CSV loading with header and skip header not parsing well. (#10398)
This commit is contained in:
parent
6c5c86d800
commit
49a09302f3
|
@ -216,6 +216,10 @@ public class UriExtractionNamespace implements ExtractionNamespace
|
|||
public Map<String, String> parseToMap(String input)
|
||||
{
|
||||
final Map<String, Object> inner = delegate.parseToMap(input);
|
||||
if (null == inner) {
|
||||
// Skip null or missing values, treat them as if there were no row at all.
|
||||
return ImmutableMap.of();
|
||||
}
|
||||
final String k = Preconditions.checkNotNull(
|
||||
inner.get(key),
|
||||
"Key column [%s] missing data in line [%s]",
|
||||
|
@ -296,9 +300,10 @@ public class UriExtractionNamespace implements ExtractionNamespace
|
|||
this.valueColumn,
|
||||
Arrays.toString(columns.toArray())
|
||||
);
|
||||
|
||||
CSVParser csvParser = new CSVParser(null, columns, hasHeaderRow, skipHeaderRows);
|
||||
csvParser.startFileFromBeginning();
|
||||
this.parser = new DelegateParser(
|
||||
new CSVParser(null, columns, hasHeaderRow, skipHeaderRows),
|
||||
csvParser,
|
||||
this.keyColumn,
|
||||
this.valueColumn
|
||||
);
|
||||
|
@ -401,6 +406,7 @@ public class UriExtractionNamespace implements ExtractionNamespace
|
|||
hasHeaderRow,
|
||||
skipHeaderRows
|
||||
);
|
||||
delegate.startFileFromBeginning();
|
||||
Preconditions.checkArgument(
|
||||
!(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
|
||||
"Must specify both `keyColumn` and `valueColumn` or neither `keyColumn` nor `valueColumn`"
|
||||
|
|
|
@ -96,7 +96,25 @@ public class UriExtractionNamespaceTest
|
|||
);
|
||||
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A,B,C"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCSVWithHeader()
|
||||
{
|
||||
UriExtractionNamespace.CSVFlatDataParser parser = new UriExtractionNamespace.CSVFlatDataParser(
|
||||
ImmutableList.of("col1", "col2", "col3"),
|
||||
"col2",
|
||||
"col3",
|
||||
true,
|
||||
1
|
||||
);
|
||||
// parser return empyt list as the 1 row header need to be skipped.
|
||||
Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("row to skip "));
|
||||
//Header also need to be skipped.
|
||||
Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1,col2,col3"));
|
||||
// test the header is parsed
|
||||
Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames());
|
||||
// The third row will parse to data
|
||||
Assert.assertEquals(ImmutableMap.of("val2", "val3"), parser.getParser().parseToMap("val1,val2,val3"));
|
||||
}
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testBadCSV()
|
||||
{
|
||||
|
@ -146,6 +164,26 @@ public class UriExtractionNamespaceTest
|
|||
);
|
||||
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A\\u0001B\\u0001C"));
|
||||
}
|
||||
@Test
|
||||
public void testWithHeaderAndListDelimiterTSV()
|
||||
{
|
||||
UriExtractionNamespace.TSVFlatDataParser parser = new UriExtractionNamespace.TSVFlatDataParser(
|
||||
ImmutableList.of("col1", "col2", "col3"),
|
||||
"\\u0001",
|
||||
"\\u0002", "col2",
|
||||
"col3",
|
||||
true,
|
||||
1
|
||||
);
|
||||
// skipping one row
|
||||
Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("Skipping some rows"));
|
||||
// skip the header as well
|
||||
Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1\\u0001col2\\u0001col3"));
|
||||
// test if the headers are parsed well.
|
||||
Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames());
|
||||
// test if the data row is parsed correctly
|
||||
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A\\u0001B\\u0001C"));
|
||||
}
|
||||
|
||||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testBadTSV()
|
||||
|
|
Loading…
Reference in New Issue