Fix a bug of CSV/TSV parsers when extracting columns from header (#4443)

* Reset fieldNames whenever a new file begins

* Fix test failure

* Fix test failure
This commit is contained in:
Jihoon Son 2017-06-24 06:29:26 +09:00 committed by Fangjin Yang
parent 3e7f7720a1
commit b37c9b5fe0
5 changed files with 148 additions and 8 deletions

View File

@ -482,7 +482,7 @@ public class IndexTaskTest
Assert.assertEquals(1, segments.size());
Assert.assertEquals(Arrays.asList("dim"), segments.get(0).getDimensions());
Assert.assertEquals(Arrays.asList("d"), segments.get(0).getDimensions());
Assert.assertEquals(Arrays.asList("val"), segments.get(0).getMetrics());
Assert.assertEquals(new Interval("2014/P1D"), segments.get(0).getInterval());
}

View File

@ -113,9 +113,12 @@ public class CSVParser implements Parser<String, Object>
@Override
public void startFileFromBeginning()
{
supportSkipHeaderRows = true;
if (hasHeaderRow) {
fieldNames = null;
}
hasParsedHeader = false;
skippedHeaderRows = 0;
supportSkipHeaderRows = true;
}
@Override

View File

@ -126,9 +126,12 @@ public class DelimitedParser implements Parser<String, Object>
@Override
public void startFileFromBeginning()
{
supportSkipHeaderRows = true;
if (hasHeaderRow) {
fieldNames = null;
}
hasParsedHeader = false;
skippedHeaderRows = 0;
supportSkipHeaderRows = true;
}
@Override

View File

@ -22,12 +22,16 @@ package io.druid.java.util.common.parsers;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.util.Map;
public class CSVParserTest
{
@Rule
public ExpectedException expectedException = ExpectedException.none();
@Test
public void testValidHeader()
@ -117,9 +121,71 @@ public class CSVParserTest
);
}
@Test(expected = UnsupportedOperationException.class)
@Test
public void testCSVParserWithHeaderRow()
{
final Parser<String, Object> csvParser = new CSVParser(
Optional.absent(),
true,
0
);
csvParser.startFileFromBeginning();
final String[] body = new String[] {
"time,value1,value2",
"hello,world,foo"
};
Assert.assertNull(csvParser.parse(body[0]));
final Map<String, Object> jsonMap = csvParser.parse(body[1]);
Assert.assertEquals(
"jsonMap",
ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo"),
jsonMap
);
}
@Test
public void testCSVParserWithDifferentHeaderRows()
{
final Parser<String, Object> csvParser = new CSVParser(
Optional.absent(),
true,
0
);
csvParser.startFileFromBeginning();
final String[] body = new String[] {
"time,value1,value2",
"hello,world,foo"
};
Assert.assertNull(csvParser.parse(body[0]));
Map<String, Object> jsonMap = csvParser.parse(body[1]);
Assert.assertEquals(
"jsonMap",
ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo"),
jsonMap
);
csvParser.startFileFromBeginning();
final String[] body2 = new String[] {
"time,value1,value2,value3",
"hello,world,foo,bar"
};
Assert.assertNull(csvParser.parse(body2[0]));
jsonMap = csvParser.parse(body2[1]);
Assert.assertEquals(
"jsonMap",
ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo", "value3", "bar"),
jsonMap
);
}
@Test
public void testCSVParserWithoutStartFileFromBeginning()
{
expectedException.expect(UnsupportedOperationException.class);
expectedException.expectMessage(
"hasHeaderRow or maxSkipHeaderRows is not supported. Please check the indexTask supports these options."
);
final int skipHeaderRows = 2;
final Parser<String, Object> csvParser = new CSVParser(
Optional.absent(),
@ -127,9 +193,9 @@ public class CSVParserTest
skipHeaderRows
);
final String[] body = new String[] {
"header\tline\t1",
"header\tline\t2",
"hello\tworld\tfoo"
"header,line,1",
"header,line,2",
"hello,world,foo"
};
csvParser.parse(body[0]);
}

View File

@ -22,12 +22,16 @@ package io.druid.java.util.common.parsers;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.util.Map;
public class DelimitedParserTest
{
@Rule
public ExpectedException expectedException = ExpectedException.none();
@Test
public void testValidHeader()
@ -127,9 +131,73 @@ public class DelimitedParserTest
);
}
@Test(expected = UnsupportedOperationException.class)
@Test
public void testTSVParserWithHeaderRow()
{
final Parser<String, Object> parser = new DelimitedParser(
Optional.of("\t"),
Optional.absent(),
true,
0
);
parser.startFileFromBeginning();
final String[] body = new String[] {
"time\tvalue1\tvalue2",
"hello\tworld\tfoo"
};
Assert.assertNull(parser.parse(body[0]));
final Map<String, Object> jsonMap = parser.parse(body[1]);
Assert.assertEquals(
"jsonMap",
ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo"),
jsonMap
);
}
@Test
public void testTSVParserWithDifferentHeaderRows()
{
final Parser<String, Object> csvParser = new DelimitedParser(
Optional.of("\t"),
Optional.absent(),
true,
0
);
csvParser.startFileFromBeginning();
final String[] body = new String[] {
"time\tvalue1\tvalue2",
"hello\tworld\tfoo"
};
Assert.assertNull(csvParser.parse(body[0]));
Map<String, Object> jsonMap = csvParser.parse(body[1]);
Assert.assertEquals(
"jsonMap",
ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo"),
jsonMap
);
csvParser.startFileFromBeginning();
final String[] body2 = new String[] {
"time\tvalue1\tvalue2\tvalue3",
"hello\tworld\tfoo\tbar"
};
Assert.assertNull(csvParser.parse(body2[0]));
jsonMap = csvParser.parse(body2[1]);
Assert.assertEquals(
"jsonMap",
ImmutableMap.of("time", "hello", "value1", "world", "value2", "foo", "value3", "bar"),
jsonMap
);
}
@Test
public void testTSVParserWithoutStartFileFromBeginning()
{
expectedException.expect(UnsupportedOperationException.class);
expectedException.expectMessage(
"hasHeaderRow or maxSkipHeaderRows is not supported. Please check the indexTask supports these options."
);
final int skipHeaderRows = 2;
final Parser<String, Object> delimitedParser = new DelimitedParser(
Optional.of("\t"),