diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java index 41c2d18226b..0a26433933c 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/CSVParser.java @@ -20,6 +20,7 @@ package org.apache.druid.java.util.common.parsers; import com.google.common.annotations.VisibleForTesting; +import com.opencsv.RFC4180Parser; import javax.annotation.Nullable; import java.io.IOException; @@ -28,7 +29,7 @@ import java.util.List; public class CSVParser extends AbstractFlatTextFormatParser { - private final com.opencsv.CSVParser parser = new com.opencsv.CSVParser(); + private final RFC4180Parser parser = new RFC4180Parser(); public CSVParser( @Nullable final String listDelimiter, diff --git a/core/src/test/java/org/apache/druid/java/util/common/parsers/CSVParserTest.java b/core/src/test/java/org/apache/druid/java/util/common/parsers/CSVParserTest.java new file mode 100644 index 00000000000..2f95ebbd057 --- /dev/null +++ b/core/src/test/java/org/apache/druid/java/util/common/parsers/CSVParserTest.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.java.util.common.parsers; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class CSVParserTest +{ + @Test + public void testBasic() throws IOException + { + CSVParser parser = new CSVParser(null, ImmutableList.of("Value", "Comment", "Timestamp"), false, 0); + + final List inputs = ImmutableList.of( + "3,\"Lets do some \"\"normal\"\" quotes\",2018-05-05T10:00:00Z", + "34,\"Lets do some \"\"normal\"\", quotes with comma\",2018-05-06T10:00:00Z", + "343,\"Lets try \\\"\"it\\\"\" with slash quotes\",2018-05-07T10:00:00Z", + "545,\"Lets try \\\"\"it\\\"\", with slash quotes and comma\",2018-05-08T10:00:00Z", + "65,Here I write \\n slash n,2018-05-09T10:00:00Z" + ); + final List> expectedResult = ImmutableList.of( + ImmutableMap.of("Value", "3", "Comment", "Lets do some \"normal\" quotes", "Timestamp", "2018-05-05T10:00:00Z"), + ImmutableMap.of( + "Value", + "34", + "Comment", + "Lets do some \"normal\", quotes with comma", + "Timestamp", + "2018-05-06T10:00:00Z" + ), + ImmutableMap.of( + "Value", + "343", + "Comment", + "Lets try \\\"it\\\" with slash quotes", + "Timestamp", + "2018-05-07T10:00:00Z" + ), + ImmutableMap.of( + "Value", + "545", + "Comment", + "Lets try \\\"it\\\", with slash quotes and comma", + "Timestamp", + "2018-05-08T10:00:00Z" + ), + ImmutableMap.of("Value", "65", "Comment", "Here I write \\n slash n", "Timestamp", "2018-05-09T10:00:00Z") + ); + final List> parsedResult = new ArrayList<>(); + + for (String input : inputs) { + Map parsedLineList = parser.parseToMap(input); + parsedResult.add(parsedLineList); + } + + Assert.assertEquals(expectedResult, parsedResult); + } + + @Test + public void testRussianTextMess() throws IOException + { + CSVParser parser = new CSVParser(null, ImmutableList.of("Comment"), false, 0); + final String input = "\"Как говорится: \\\"\"всё течет, всё изменяется\\\"\". Украина как всегда обвиняет Россию в собственных проблемах. #ПровокацияКиева\""; + final Map expect = ImmutableMap.of( + "Comment", + "Как говорится: \\\"всё течет, всё изменяется\\\". Украина как всегда обвиняет Россию в собственных проблемах. #ПровокацияКиева" + ); + final Map parsedInput = parser.parseToMap(input); + + Assert.assertEquals(String.class, parsedInput.get("Comment").getClass()); + Assert.assertEquals(expect, parsedInput); + } +}