/**
* Specifies a field to be added to the parsed object Map, using JsonPath notation.
- *
+ *
* See https://github.com/jayway/JsonPath for more information.
*/
public static class FieldSpec
@@ -281,5 +281,4 @@ public class JSONPathParser implements Parser
return expr;
}
}
-
}
diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/Parser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/Parser.java
index 78b29de7224..8cc6fd6a1d6 100644
--- a/java-util/src/main/java/io/druid/java/util/common/parsers/Parser.java
+++ b/java-util/src/main/java/io/druid/java/util/common/parsers/Parser.java
@@ -19,6 +19,7 @@
package io.druid.java.util.common.parsers;
+import javax.annotation.Nullable;
import java.util.List;
import java.util.Map;
@@ -28,11 +29,21 @@ import java.util.Map;
public interface Parser
{
/**
- * Parse a String into a Map.
+ * This method may or may not get called at the start of reading of every file depending on the type of IndexTasks.
+ * The parser state should be reset if exists.
+ */
+ default void startFileFromBeginning()
+ {
+
+ }
+
+ /**
+ * Parse a String into a Map. The result can be null which means the given input string will be ignored.
*
* @throws ParseException if the String cannot be parsed
*/
- public Map parse(String input);
+ @Nullable
+ Map parse(String input);
/**
* Set the fieldNames that you expect to see in parsed Maps. Deprecated; Parsers should not, in general, be
@@ -40,12 +51,12 @@ public interface Parser
* parser) and those parsers have their own way of setting field names.
*/
@Deprecated
- public void setFieldNames(Iterable fieldNames);
+ void setFieldNames(Iterable fieldNames);
/**
* Returns the fieldNames that we expect to see in parsed Maps, if known, or null otherwise. Deprecated; Parsers
* should not, in general, be expected to know what fields they will return.
*/
@Deprecated
- public List getFieldNames();
+ List getFieldNames();
}
diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/RegexParser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/RegexParser.java
index 87b6321aa20..329b02aa944 100644
--- a/java-util/src/main/java/io/druid/java/util/common/parsers/RegexParser.java
+++ b/java-util/src/main/java/io/druid/java/util/common/parsers/RegexParser.java
@@ -117,7 +117,6 @@ public class RegexParser implements Parser
}
@Override
-
public List getFieldNames()
{
return fieldNames;
diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/ToLowerCaseParser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/ToLowerCaseParser.java
index 7e45bf23d94..fede1aa1f98 100644
--- a/java-util/src/main/java/io/druid/java/util/common/parsers/ToLowerCaseParser.java
+++ b/java-util/src/main/java/io/druid/java/util/common/parsers/ToLowerCaseParser.java
@@ -53,6 +53,12 @@ public class ToLowerCaseParser implements Parser
return retVal;
}
+ @Override
+ public void startFileFromBeginning()
+ {
+ baseParser.startFileFromBeginning();
+ }
+
@Override
public void setFieldNames(Iterable fieldNames)
{
diff --git a/java-util/src/test/java/io/druid/java/util/common/parsers/CSVParserTest.java b/java-util/src/test/java/io/druid/java/util/common/parsers/CSVParserTest.java
index 8121fd9fd0c..37a589b276a 100644
--- a/java-util/src/test/java/io/druid/java/util/common/parsers/CSVParserTest.java
+++ b/java-util/src/test/java/io/druid/java/util/common/parsers/CSVParserTest.java
@@ -21,7 +21,7 @@ package io.druid.java.util.common.parsers;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
-import junit.framework.Assert;
+import org.junit.Assert;
import org.junit.Test;
import java.util.Map;
@@ -80,7 +80,7 @@ public class CSVParserTest
@Test
public void testCSVParserWithoutHeader()
{
- final Parser csvParser = new CSVParser(Optional.fromNullable(null));
+ final Parser csvParser = new CSVParser(Optional.fromNullable(null), false, 0);
String body = "hello,world,foo";
final Map jsonMap = csvParser.parse(body);
Assert.assertEquals(
@@ -89,4 +89,48 @@ public class CSVParserTest
jsonMap
);
}
+
+ @Test
+ public void testCSVParserWithSkipHeaderRows()
+ {
+ final int skipHeaderRows = 2;
+ final Parser csvParser = new CSVParser(
+ Optional.absent(),
+ false,
+ skipHeaderRows
+ );
+ csvParser.startFileFromBeginning();
+ final String[] body = new String[] {
+ "header,line,1",
+ "header,line,2",
+ "hello,world,foo"
+ };
+ int index;
+ for (index = 0; index < skipHeaderRows; index++) {
+ Assert.assertNull(csvParser.parse(body[index]));
+ }
+ final Map jsonMap = csvParser.parse(body[index]);
+ Assert.assertEquals(
+ "jsonMap",
+ ImmutableMap.of("column_1", "hello", "column_2", "world", "column_3", "foo"),
+ jsonMap
+ );
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testCSVParserWithoutStartFileFromBeginning()
+ {
+ final int skipHeaderRows = 2;
+ final Parser csvParser = new CSVParser(
+ Optional.absent(),
+ false,
+ skipHeaderRows
+ );
+ final String[] body = new String[] {
+ "header\tline\t1",
+ "header\tline\t2",
+ "hello\tworld\tfoo"
+ };
+ csvParser.parse(body[0]);
+ }
}
diff --git a/java-util/src/test/java/io/druid/java/util/common/parsers/DelimitedParserTest.java b/java-util/src/test/java/io/druid/java/util/common/parsers/DelimitedParserTest.java
index 3ca58c67f67..d91ed25cbbc 100644
--- a/java-util/src/test/java/io/druid/java/util/common/parsers/DelimitedParserTest.java
+++ b/java-util/src/test/java/io/druid/java/util/common/parsers/DelimitedParserTest.java
@@ -21,7 +21,7 @@ package io.druid.java.util.common.parsers;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableMap;
-import junit.framework.Assert;
+import org.junit.Assert;
import org.junit.Test;
import java.util.Map;
@@ -67,7 +67,11 @@ public class DelimitedParserTest
public void testTSVParserWithHeader()
{
String header = "time\tvalue1\tvalue2";
- final Parser delimitedParser = new DelimitedParser(Optional.of("\t"), Optional.absent(), header);
+ final Parser delimitedParser = new DelimitedParser(
+ Optional.of("\t"),
+ Optional.absent(),
+ header
+ );
String body = "hello\tworld\tfoo";
final Map jsonMap = delimitedParser.parse(body);
Assert.assertEquals(
@@ -80,7 +84,12 @@ public class DelimitedParserTest
@Test
public void testTSVParserWithoutHeader()
{
- final Parser delimitedParser = new DelimitedParser(Optional.of("\t"), Optional.absent());
+ final Parser delimitedParser = new DelimitedParser(
+ Optional.of("\t"),
+ Optional.absent(),
+ false,
+ 0
+ );
String body = "hello\tworld\tfoo";
final Map jsonMap = delimitedParser.parse(body);
Assert.assertEquals(
@@ -89,4 +98,50 @@ public class DelimitedParserTest
jsonMap
);
}
+
+ @Test
+ public void testTSVParserWithSkipHeaderRows()
+ {
+ final int skipHeaderRows = 2;
+ final Parser delimitedParser = new DelimitedParser(
+ Optional.of("\t"),
+ Optional.absent(),
+ false,
+ skipHeaderRows
+ );
+ delimitedParser.startFileFromBeginning();
+ final String[] body = new String[] {
+ "header\tline\t1",
+ "header\tline\t2",
+ "hello\tworld\tfoo"
+ };
+ int index;
+ for (index = 0; index < skipHeaderRows; index++) {
+ Assert.assertNull(delimitedParser.parse(body[index]));
+ }
+ final Map jsonMap = delimitedParser.parse(body[index]);
+ Assert.assertEquals(
+ "jsonMap",
+ ImmutableMap.of("column_1", "hello", "column_2", "world", "column_3", "foo"),
+ jsonMap
+ );
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testTSVParserWithoutStartFileFromBeginning()
+ {
+ final int skipHeaderRows = 2;
+ final Parser delimitedParser = new DelimitedParser(
+ Optional.of("\t"),
+ Optional.absent(),
+ false,
+ skipHeaderRows
+ );
+ final String[] body = new String[] {
+ "header\tline\t1",
+ "header\tline\t2",
+ "hello\tworld\tfoo"
+ };
+ delimitedParser.parse(body[0]);
+ }
}
diff --git a/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java b/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java
index d4fb1223ec8..614d45ff18b 100644
--- a/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java
+++ b/processing/src/test/java/io/druid/query/MultiValuedDimensionTest.java
@@ -130,7 +130,9 @@ public class MultiValuedDimensionTest
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags")), null, null),
"\t",
- ImmutableList.of("timestamp", "product", "tags")
+ ImmutableList.of("timestamp", "product", "tags"),
+ false,
+ 0
),
"UTF-8"
);
diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerFactoryTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerFactoryTest.java
index 26df8376601..edda6b9db3c 100644
--- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerFactoryTest.java
+++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerFactoryTest.java
@@ -146,7 +146,9 @@ public class GroupByQueryRunnerFactoryTest
new TimestampSpec("timestamp", "iso", null),
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("product", "tags")), null, null),
"\t",
- ImmutableList.of("timestamp", "product", "tags")
+ ImmutableList.of("timestamp", "product", "tags"),
+ false,
+ 0
),
"UTF-8"
);
diff --git a/processing/src/test/java/io/druid/segment/TestIndex.java b/processing/src/test/java/io/druid/segment/TestIndex.java
index 6f529768491..7a29b924860 100644
--- a/processing/src/test/java/io/druid/segment/TestIndex.java
+++ b/processing/src/test/java/io/druid/segment/TestIndex.java
@@ -291,7 +291,9 @@ public class TestIndex
new DimensionsSpec(DIMENSION_SCHEMAS, null, null),
"\t",
"\u0001",
- Arrays.asList(COLUMNS)
+ Arrays.asList(COLUMNS),
+ false,
+ 0
)
, "utf8"
);
diff --git a/server/src/main/java/io/druid/segment/realtime/firehose/ReplayableFirehoseFactory.java b/server/src/main/java/io/druid/segment/realtime/firehose/ReplayableFirehoseFactory.java
index 53a586ce4f2..08cd87ac089 100644
--- a/server/src/main/java/io/druid/segment/realtime/firehose/ReplayableFirehoseFactory.java
+++ b/server/src/main/java/io/druid/segment/realtime/firehose/ReplayableFirehoseFactory.java
@@ -162,6 +162,16 @@ public class ReplayableFirehoseFactory implements FirehoseFactory