Issue fix for CSV loading with header and skip header not parsing well. (#10398)

This commit is contained in:
Tarun 2020-09-22 03:44:22 +05:30 committed by GitHub
parent 6c5c86d800
commit 49a09302f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 3 deletions

View File

@ -216,6 +216,10 @@ public class UriExtractionNamespace implements ExtractionNamespace
public Map<String, String> parseToMap(String input)
{
final Map<String, Object> inner = delegate.parseToMap(input);
if (null == inner) {
// Skip null or missing values, treat them as if there were no row at all.
return ImmutableMap.of();
}
final String k = Preconditions.checkNotNull(
inner.get(key),
"Key column [%s] missing data in line [%s]",
@ -296,9 +300,10 @@ public class UriExtractionNamespace implements ExtractionNamespace
this.valueColumn,
Arrays.toString(columns.toArray())
);
CSVParser csvParser = new CSVParser(null, columns, hasHeaderRow, skipHeaderRows);
csvParser.startFileFromBeginning();
this.parser = new DelegateParser(
new CSVParser(null, columns, hasHeaderRow, skipHeaderRows),
csvParser,
this.keyColumn,
this.valueColumn
);
@ -401,6 +406,7 @@ public class UriExtractionNamespace implements ExtractionNamespace
hasHeaderRow,
skipHeaderRows
);
delegate.startFileFromBeginning();
Preconditions.checkArgument(
!(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
"Must specify both `keyColumn` and `valueColumn` or neither `keyColumn` nor `valueColumn`"

View File

@ -96,7 +96,25 @@ public class UriExtractionNamespaceTest
);
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A,B,C"));
}
@Test
public void testCSVWithHeader()
{
UriExtractionNamespace.CSVFlatDataParser parser = new UriExtractionNamespace.CSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3"),
"col2",
"col3",
true,
1
);
// parser return empyt list as the 1 row header need to be skipped.
Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("row to skip "));
//Header also need to be skipped.
Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1,col2,col3"));
// test the header is parsed
Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames());
// The third row will parse to data
Assert.assertEquals(ImmutableMap.of("val2", "val3"), parser.getParser().parseToMap("val1,val2,val3"));
}
@Test(expected = IllegalArgumentException.class)
public void testBadCSV()
{
@ -146,6 +164,26 @@ public class UriExtractionNamespaceTest
);
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A\\u0001B\\u0001C"));
}
@Test
public void testWithHeaderAndListDelimiterTSV()
{
UriExtractionNamespace.TSVFlatDataParser parser = new UriExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3"),
"\\u0001",
"\\u0002", "col2",
"col3",
true,
1
);
// skipping one row
Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("Skipping some rows"));
// skip the header as well
Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1\\u0001col2\\u0001col3"));
// test if the headers are parsed well.
Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames());
// test if the data row is parsed correctly
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A\\u0001B\\u0001C"));
}
@Test(expected = IllegalArgumentException.class)
public void testBadTSV()