mirror of https://github.com/apache/druid.git
[QTL] adding listDelimiter to lookup parser spec (#2941)
* adding listDelimiter to lookup parser spec * cleaning code
This commit is contained in:
parent
0c04650e69
commit
45b2e65d75
|
@ -151,6 +151,7 @@ truck,something3,buck
|
||||||
|`keyColumn`|The name of the column containing the key|no|The first column|
|
|`keyColumn`|The name of the column containing the key|no|The first column|
|
||||||
|`valueColumn`|The name of the column containing the value|no|The second column|
|
|`valueColumn`|The name of the column containing the value|no|The second column|
|
||||||
|`delimiter`|The delimiter in the file|no|tab (`\t`)|
|
|`delimiter`|The delimiter in the file|no|tab (`\t`)|
|
||||||
|
|`listDelimiter`|The list delimiter in the file|no| (`\u0001`)|
|
||||||
|
|
||||||
|
|
||||||
*example input*
|
*example input*
|
||||||
|
|
|
@ -356,6 +356,7 @@ public class URIExtractionNamespace implements ExtractionNamespace
|
||||||
private final Parser<String, String> parser;
|
private final Parser<String, String> parser;
|
||||||
private final List<String> columns;
|
private final List<String> columns;
|
||||||
private final String delimiter;
|
private final String delimiter;
|
||||||
|
private final String listDelimiter;
|
||||||
private final String keyColumn;
|
private final String keyColumn;
|
||||||
private final String valueColumn;
|
private final String valueColumn;
|
||||||
|
|
||||||
|
@ -363,6 +364,7 @@ public class URIExtractionNamespace implements ExtractionNamespace
|
||||||
public TSVFlatDataParser(
|
public TSVFlatDataParser(
|
||||||
@JsonProperty("columns") List<String> columns,
|
@JsonProperty("columns") List<String> columns,
|
||||||
@JsonProperty("delimiter") String delimiter,
|
@JsonProperty("delimiter") String delimiter,
|
||||||
|
@JsonProperty("listDelimiter") String listDelimiter,
|
||||||
@JsonProperty("keyColumn") final String keyColumn,
|
@JsonProperty("keyColumn") final String keyColumn,
|
||||||
@JsonProperty("valueColumn") final String valueColumn
|
@JsonProperty("valueColumn") final String valueColumn
|
||||||
)
|
)
|
||||||
|
@ -372,8 +374,8 @@ public class URIExtractionNamespace implements ExtractionNamespace
|
||||||
"Must specify more than one column to have a key value pair"
|
"Must specify more than one column to have a key value pair"
|
||||||
);
|
);
|
||||||
final DelimitedParser delegate = new DelimitedParser(
|
final DelimitedParser delegate = new DelimitedParser(
|
||||||
Optional.fromNullable(Strings.isNullOrEmpty(delimiter) ? null : delimiter),
|
Optional.fromNullable(Strings.emptyToNull(delimiter)),
|
||||||
Optional.<String>absent()
|
Optional.fromNullable(Strings.emptyToNull(listDelimiter))
|
||||||
);
|
);
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
!(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
|
!(Strings.isNullOrEmpty(keyColumn) ^ Strings.isNullOrEmpty(valueColumn)),
|
||||||
|
@ -382,6 +384,7 @@ public class URIExtractionNamespace implements ExtractionNamespace
|
||||||
delegate.setFieldNames(columns);
|
delegate.setFieldNames(columns);
|
||||||
this.columns = columns;
|
this.columns = columns;
|
||||||
this.delimiter = delimiter;
|
this.delimiter = delimiter;
|
||||||
|
this.listDelimiter = listDelimiter;
|
||||||
this.keyColumn = Strings.isNullOrEmpty(keyColumn) ? columns.get(0) : keyColumn;
|
this.keyColumn = Strings.isNullOrEmpty(keyColumn) ? columns.get(0) : keyColumn;
|
||||||
this.valueColumn = Strings.isNullOrEmpty(valueColumn) ? columns.get(1) : valueColumn;
|
this.valueColumn = Strings.isNullOrEmpty(valueColumn) ? columns.get(1) : valueColumn;
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
|
@ -418,6 +421,12 @@ public class URIExtractionNamespace implements ExtractionNamespace
|
||||||
return this.valueColumn;
|
return this.valueColumn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public String getListDelimiter()
|
||||||
|
{
|
||||||
|
return listDelimiter;
|
||||||
|
}
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
public String getDelimiter()
|
public String getDelimiter()
|
||||||
{
|
{
|
||||||
|
@ -434,9 +443,10 @@ public class URIExtractionNamespace implements ExtractionNamespace
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
return String.format(
|
return String.format(
|
||||||
"TSVFlatDataParser = { columns = %s, delimiter = '%s', keyColumn = %s, valueColumn = %s }",
|
"TSVFlatDataParser = { columns = %s, delimiter = '%s', listDelimiter = '%s',keyColumn = %s, valueColumn = %s }",
|
||||||
Arrays.toString(columns.toArray()),
|
Arrays.toString(columns.toArray()),
|
||||||
delimiter,
|
delimiter,
|
||||||
|
listDelimiter,
|
||||||
keyColumn,
|
keyColumn,
|
||||||
valueColumn
|
valueColumn
|
||||||
);
|
);
|
||||||
|
|
|
@ -121,19 +121,31 @@ public class URIExtractionNamespaceTest
|
||||||
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
|
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
|
||||||
ImmutableList.of("col1", "col2", "col3"),
|
ImmutableList.of("col1", "col2", "col3"),
|
||||||
"|",
|
"|",
|
||||||
"col2",
|
null, "col2",
|
||||||
"col3"
|
"col3"
|
||||||
);
|
);
|
||||||
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parse("A|B|C"));
|
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parse("A|B|C"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithListDelimiterTSV()
|
||||||
|
{
|
||||||
|
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
|
||||||
|
ImmutableList.of("col1", "col2", "col3"),
|
||||||
|
"\\u0001",
|
||||||
|
"\\u0002", "col2",
|
||||||
|
"col3"
|
||||||
|
);
|
||||||
|
Assert.assertEquals(ImmutableMap.of("B", "C"), parser.getParser().parse("A\\u0001B\\u0001C"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test(expected = IllegalArgumentException.class)
|
@Test(expected = IllegalArgumentException.class)
|
||||||
public void testBadTSV()
|
public void testBadTSV()
|
||||||
{
|
{
|
||||||
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
|
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
|
||||||
ImmutableList.of("col1", "col2", "col3fdsfds"),
|
ImmutableList.of("col1", "col2", "col3fdsfds"),
|
||||||
",",
|
",",
|
||||||
"col2",
|
null, "col2",
|
||||||
"col3"
|
"col3"
|
||||||
);
|
);
|
||||||
Map<String, String> map = parser.getParser().parse("A,B,C");
|
Map<String, String> map = parser.getParser().parse("A,B,C");
|
||||||
|
@ -147,7 +159,7 @@ public class URIExtractionNamespaceTest
|
||||||
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
|
URIExtractionNamespace.TSVFlatDataParser parser = new URIExtractionNamespace.TSVFlatDataParser(
|
||||||
ImmutableList.of("col1", "col2", "col3"),
|
ImmutableList.of("col1", "col2", "col3"),
|
||||||
",",
|
",",
|
||||||
"col2",
|
null, "col2",
|
||||||
"col3"
|
"col3"
|
||||||
);
|
);
|
||||||
Map<String, String> map = parser.getParser().parse("A");
|
Map<String, String> map = parser.getParser().parse("A");
|
||||||
|
@ -293,7 +305,7 @@ public class URIExtractionNamespaceTest
|
||||||
),
|
),
|
||||||
new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper),
|
new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper),
|
||||||
new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"),
|
new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"),
|
||||||
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", "A", "B")
|
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", null, "A", "B")
|
||||||
)) {
|
)) {
|
||||||
final String str = mapper.writeValueAsString(parser);
|
final String str = mapper.writeValueAsString(parser);
|
||||||
final URIExtractionNamespace.FlatDataParser parser2 = mapper.readValue(
|
final URIExtractionNamespace.FlatDataParser parser2 = mapper.readValue(
|
||||||
|
@ -318,7 +330,7 @@ public class URIExtractionNamespaceTest
|
||||||
),
|
),
|
||||||
new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper),
|
new URIExtractionNamespace.ObjectMapperFlatDataParser(mapper),
|
||||||
new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"),
|
new URIExtractionNamespace.JSONFlatDataParser(mapper, "keyField", "valueField"),
|
||||||
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", "A", "B")
|
new URIExtractionNamespace.TSVFlatDataParser(ImmutableList.of("A", "B"), ",", null, "A", "B")
|
||||||
)) {
|
)) {
|
||||||
Assert.assertFalse(parser.toString().contains("@"));
|
Assert.assertFalse(parser.toString().contains("@"));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue