[QTL] adding listDelimiter to lookup parser spec (#2941)

* adding listDelimiter to lookup parser spec

* cleaning code
This commit is contained in:
Slim 2016-05-10 03:11:16 -07:00 committed by Nishant
parent 0c04650e69
commit 45b2e65d75
3 changed files with 31 additions and 8 deletions

View File

@ -151,6 +151,7 @@ truck,something3,buck
|`keyColumn`|The name of the column containing the key|no|The first column| |`keyColumn`|The name of the column containing the key|no|The first column|
|`valueColumn`|The name of the column containing the value|no|The second column| |`valueColumn`|The name of the column containing the value|no|The second column|
|`delimiter`|The delimiter in the file|no|tab (`\t`)| |`delimiter`|The delimiter in the file|no|tab (`\t`)|
|`listDelimiter`|The list delimiter in the file|no| (`\u0001`)|
*example input* *example input*

View File

@ -356,6 +356,7 @@ public class URIExtractionNamespace implements ExtractionNamespace
private final Parser<String, String> parser; private final Parser<String, String> parser;
private final List<String> columns; private final List<String> columns;
private final String delimiter; private final String delimiter;
private final String listDelimiter;
private final String keyColumn; private final String keyColumn;
private final String valueColumn; private final String valueColumn;
@ -363,6 +364,7 @@ public class URIExtractionNamespace implements ExtractionNamespace
public TSVFlatDataParser( public TSVFlatDataParser(
@JsonProperty("columns") List<String> columns, @JsonProperty("columns") List<String> columns,
@JsonProperty("delimiter") String delimiter, @JsonProperty("delimiter") String delimiter,
@JsonProperty("listDelimiter") String listDelimiter,
@JsonProperty("keyColumn") final String keyColumn, @JsonProperty("keyColumn") final String keyColumn,
@JsonProperty("valueColumn") final String valueColumn @JsonProperty("valueColumn") final String valueColumn
) )
@ -372,8 +374,8 @@ public class URIExtractionNamespace implements ExtractionNamespace
"Must specify more than one column to have a key value pair" "Must specify more than one column to have a key value pair"
); );
final DelimitedParser delegate = new DelimitedParser( final DelimitedParser delegate = new DelimitedParser(
Optional.fromNullable(Strings.isNullOrEmpty(delimiter) ? null : delimiter), Optional.fromNullable(Strings.emptyToNull(delimiter)),
Optional.<String>absent() Optional.fromNullable(Strings.emptyToNull(listDelimiter))
); );
Preconditions.checkArgument( Preconditions.checkArgument(
!(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)), !(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
@ -382,6 +384,7 @@ public class URIExtractionNamespace implements ExtractionNamespace
delegate.setFieldNames(columns); delegate.setFieldNames(columns);
this.columns = columns; this.columns = columns;
this.delimiter = delimiter; this.delimiter = delimiter;
this.listDelimiter = listDelimiter;
this.keyColumn = Strings.isNullOrEmpty(keyColumn) ? columns.get(0) : keyColumn; this.keyColumn = Strings.isNullOrEmpty(keyColumn) ? columns.get(0) : keyColumn;
this.valueColumn = Strings.isNullOrEmpty(valueColumn) ? columns.get(1) : valueColumn; this.valueColumn = Strings.isNullOrEmpty(valueColumn) ? columns.get(1) : valueColumn;
Preconditions.checkArgument( Preconditions.checkArgument(
@ -418,6 +421,12 @@ public class URIExtractionNamespace implements ExtractionNamespace
return this.valueColumn; return this.valueColumn;
} }
@JsonProperty
public String getListDelimiter()
{
return listDelimiter;
}
@JsonProperty @JsonProperty
public String getDelimiter() public String getDelimiter()
{ {
@ -434,9 +443,10 @@ public class URIExtractionNamespace implements ExtractionNamespace
public String toString() public String toString()
{ {
return String.format( return String.format(
"TSVFlatDataParser = { columns = %s, delimiter = '%s', keyColumn = %s, valueColumn = %s }", "TSVFlatDataParser = { columns = %s, delimiter = '%s', listDelimiter = '%s',keyColumn = %s, valueColumn = %s }",
Arrays.toString(columns.toArray()), Arrays.toString(columns.toArray()),
delimiter, delimiter,
listDelimiter,
keyColumn, keyColumn,
valueColumn valueColumn
); );

View File

@ -121,19 +121,31 @@ public class URIExtractionNamespaceTest
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser( URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3"), ImmutableList.of("col1", "col2", "col3"),
"|", "|",
"col2", null, "col2",
"col3" "col3"
); );
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parse("A|B|C")); Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parse("A|B|C"));
} }
@Test
public void testWithListDelimiterTSV()
{
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3"),
"\\u0001",
"\\u0002", "col2",
"col3"
);
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parse("A\\u0001B\\u0001C"));
}
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
public void testBadTSV() public void testBadTSV()
{ {
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser( URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3fdsfds"), ImmutableList.of("col1", "col2", "col3fdsfds"),
",", ",",
"col2", null, "col2",
"col3" "col3"
); );
Map<String, String> map = parser.getParser().parse("A,B,C"); Map<String, String> map = parser.getParser().parse("A,B,C");
@ -147,7 +159,7 @@ public class URIExtractionNamespaceTest
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser( URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
ImmutableList.of("col1", "col2", "col3"), ImmutableList.of("col1", "col2", "col3"),
",", ",",
"col2", null, "col2",
"col3" "col3"
); );
Map<String, String> map = parser.getParser().parse("A"); Map<String, String> map = parser.getParser().parse("A");
@ -293,7 +305,7 @@ public class URIExtractionNamespaceTest
), ),
new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper), new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper),
new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"), new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"),
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", "A", "B") new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", null, "A", "B")
)) { )) {
final String str = mapper.writeValueAsString(parser); final String str = mapper.writeValueAsString(parser);
final URIExtractionNamespace.FlatDataParser parser2 = mapper.readValue( final URIExtractionNamespace.FlatDataParser parser2 = mapper.readValue(
@ -318,7 +330,7 @@ public class URIExtractionNamespaceTest
), ),
new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper), new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper),
new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"), new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"),
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", "A", "B") new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", null, "A", "B")
)) { )) {
Assert.assertFalse(parser.toString().contains("@")); Assert.assertFalse(parser.toString().contains("@"));
} }