diff --git a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java index 60b04d32780..f454d918271 100644 --- a/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java +++ b/extensions-core/lookups-cached-global/src/main/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespace.java @@ -216,6 +216,10 @@ public class UriExtractionNamespace implements ExtractionNamespace public Map parseToMap(String input) { final Map inner = delegate.parseToMap(input); + if (null == inner) { + // Skip null or missing values, treat them as if there were no row at all. + return ImmutableMap.of(); + } final String k = Preconditions.checkNotNull( inner.get(key), "Key column [%s] missing data in line [%s]", @@ -296,9 +300,10 @@ public class UriExtractionNamespace implements ExtractionNamespace this.valueColumn, Arrays.toString(columns.toArray()) ); - + CSVParser csvParser = new CSVParser(null, columns, hasHeaderRow, skipHeaderRows); + csvParser.startFileFromBeginning(); this.parser = new DelegateParser( - new CSVParser(null, columns, hasHeaderRow, skipHeaderRows), + csvParser, this.keyColumn, this.valueColumn ); @@ -401,6 +406,7 @@ public class UriExtractionNamespace implements ExtractionNamespace hasHeaderRow, skipHeaderRows ); + delegate.startFileFromBeginning(); Preconditions.checkArgument( !(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)), "Must specify both `keyColumn` and `valueColumn` or neither `keyColumn` nor `valueColumn`" diff --git a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java index d5ac42a068c..dc50126f066 100644 --- a/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java +++ b/extensions-core/lookups-cached-global/src/test/java/org/apache/druid/query/lookup/namespace/UriExtractionNamespaceTest.java @@ -96,7 +96,25 @@ public class UriExtractionNamespaceTest ); Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A,B,C")); } - + @Test + public void testCSVWithHeader() + { + UriExtractionNamespace.CSVFlatDataParser parser = new UriExtractionNamespace.CSVFlatDataParser( + ImmutableList.of("col1", "col2", "col3"), + "col2", + "col3", + true, + 1 + ); + // parser return empyt list as the 1 row header need to be skipped. + Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("row to skip ")); + //Header also need to be skipped. + Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1,col2,col3")); + // test the header is parsed + Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames()); + // The third row will parse to data + Assert.assertEquals(ImmutableMap.of("val2", "val3"), parser.getParser().parseToMap("val1,val2,val3")); + } @Test(expected = IllegalArgumentException.class) public void testBadCSV() { @@ -146,6 +164,26 @@ public class UriExtractionNamespaceTest ); Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A\\u0001B\\u0001C")); } + @Test + public void testWithHeaderAndListDelimiterTSV() + { + UriExtractionNamespace.TSVFlatDataParser parser = new UriExtractionNamespace.TSVFlatDataParser( + ImmutableList.of("col1", "col2", "col3"), + "\\u0001", + "\\u0002", "col2", + "col3", + true, + 1 + ); + // skipping one row + Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("Skipping some rows")); + // skip the header as well + Assert.assertEquals(ImmutableMap.of(), parser.getParser().parseToMap("col1\\u0001col2\\u0001col3")); + // test if the headers are parsed well. + Assert.assertEquals(ImmutableList.of("col1", "col2", "col3"), parser.getParser().getFieldNames()); + // test if the data row is parsed correctly + Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parseToMap("A\\u0001B\\u0001C")); + } @Test(expected = IllegalArgumentException.class) public void testBadTSV()