From 607bd2215ade1bdcdf37df15443c375f35b04ada Mon Sep 17 00:00:00 2001 From: Willian Nalepa Oizumi Date: Mon, 15 Nov 2021 10:15:35 -0300 Subject: [PATCH] BAEL-5196 - Split a comma-separated string while ignoring commas in quotes (#11432) * Creating the module 'core-java-string-operations-4' for new string related code samples. Implemented code samples for the article BAEL-5196 * including new module 'core-java-string-operations-4 in the parent project * fixing spacing in the pom file * fixing the maven configuration for our new project core-java-string-operations-4 --- .../core-java-string-operations-4/README.md | 3 + .../core-java-string-operations-4/pom.xml | 58 ++++++++++++++++ .../SplitCommaSeparatedString.java | 66 +++++++++++++++++++ .../SplitCommaSeparatedStringUnitTest.java | 44 +++++++++++++ pom.xml | 1 + 5 files changed, 172 insertions(+) create mode 100644 core-java-modules/core-java-string-operations-4/README.md create mode 100644 core-java-modules/core-java-string-operations-4/pom.xml create mode 100644 core-java-modules/core-java-string-operations-4/src/main/java/com/baeldung/commaseparatedstring/SplitCommaSeparatedString.java create mode 100644 core-java-modules/core-java-string-operations-4/src/test/java/com/baeldung/commaseparatedstring/SplitCommaSeparatedStringUnitTest.java diff --git a/core-java-modules/core-java-string-operations-4/README.md b/core-java-modules/core-java-string-operations-4/README.md new file mode 100644 index 0000000000..88d562204b --- /dev/null +++ b/core-java-modules/core-java-string-operations-4/README.md @@ -0,0 +1,3 @@ +### Relevant Articles: + + diff --git a/core-java-modules/core-java-string-operations-4/pom.xml b/core-java-modules/core-java-string-operations-4/pom.xml new file mode 100644 index 0000000000..ea6bdcd849 --- /dev/null +++ b/core-java-modules/core-java-string-operations-4/pom.xml @@ -0,0 +1,58 @@ + + + 4.0.0 + core-java-string-operations-4 + 0.1.0-SNAPSHOT + core-java-string-operations-4 + jar + + + com.baeldung.core-java-modules + core-java-modules + 0.0.1-SNAPSHOT + ../ + + + + + org.assertj + assertj-core + ${assertj.version} + test + + + com.google.guava + guava + ${guava.version} + + + com.opencsv + opencsv + ${opencsv.version} + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + ${maven.compiler.source} + ${maven.compiler.target} + + + + + + + 11 + 11 + 3.6.1 + 31.0.1-jre + 4.1 + + + diff --git a/core-java-modules/core-java-string-operations-4/src/main/java/com/baeldung/commaseparatedstring/SplitCommaSeparatedString.java b/core-java-modules/core-java-string-operations-4/src/main/java/com/baeldung/commaseparatedstring/SplitCommaSeparatedString.java new file mode 100644 index 0000000000..c3bbdb4dfb --- /dev/null +++ b/core-java-modules/core-java-string-operations-4/src/main/java/com/baeldung/commaseparatedstring/SplitCommaSeparatedString.java @@ -0,0 +1,66 @@ +package com.baeldung.commaseparatedstring; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; + +import com.google.common.base.Splitter; +import com.opencsv.CSVParser; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReader; +import com.opencsv.CSVReaderBuilder; + +public class SplitCommaSeparatedString { + + public static List splitWithParser(String input) { + + List tokens = new ArrayList(); + int startPosition = 0; + boolean isInQuotes = false; + for (int currentPosition = 0; currentPosition < input.length(); currentPosition++) { + if (input.charAt(currentPosition) == '\"') { + isInQuotes = !isInQuotes; + } else if (input.charAt(currentPosition) == ',' && !isInQuotes) { + tokens.add(input.substring(startPosition, currentPosition)); + startPosition = currentPosition + 1; + } + } + + String lastToken = input.substring(startPosition); + if (lastToken.equals(",")) { + tokens.add(""); + } else { + tokens.add(lastToken); + } + + return tokens; + } + + public static List splitWithRegex(String input) { + String[] tokens = input.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1); + return Arrays.asList(tokens); + } + + public static List splitWithGuava(String input) { + Pattern pattern = Pattern.compile(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)"); + Splitter splitter = Splitter.on(pattern); + return splitter.splitToList(input); + } + + public static List splitMultiLineWithOpenCSV(String input) throws IOException { + CSVParser parser = new CSVParserBuilder().withSeparator(',') + .build(); + + CSVReader reader = new CSVReaderBuilder(new StringReader(input)).withCSVParser(parser) + .build(); + + List list = new ArrayList<>(); + list = reader.readAll(); + reader.close(); + + return list; + } +} \ No newline at end of file diff --git a/core-java-modules/core-java-string-operations-4/src/test/java/com/baeldung/commaseparatedstring/SplitCommaSeparatedStringUnitTest.java b/core-java-modules/core-java-string-operations-4/src/test/java/com/baeldung/commaseparatedstring/SplitCommaSeparatedStringUnitTest.java new file mode 100644 index 0000000000..ca34430099 --- /dev/null +++ b/core-java-modules/core-java-string-operations-4/src/test/java/com/baeldung/commaseparatedstring/SplitCommaSeparatedStringUnitTest.java @@ -0,0 +1,44 @@ +package com.baeldung.commaseparatedstring; + +import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitMultiLineWithOpenCSV; +import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithGuava; +import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithParser; +import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithRegex; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.hasSize; +import static org.junit.Assert.assertArrayEquals; + +import java.io.IOException; +import java.util.List; + +import org.junit.Test; + +public class SplitCommaSeparatedStringUnitTest { + + @Test + public void givenSingleLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() { + String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\""; + + var matcher = contains("baeldung", "tutorial", "splitting", "text", "\"ignoring this comma,\""); + assertThat(splitWithParser(input), matcher); + assertThat(splitWithRegex(input), matcher); + assertThat(splitWithGuava(input), matcher); + } + + @Test + public void givenMultiLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() throws IOException { + String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"" + System.lineSeparator() + + "splitting,a,regular,line,no double quotes"; + + String[] firstLine = new String[]{"baeldung", "tutorial", "splitting", "text", "ignoring this comma,"}; + String[] secondLine = new String[]{"splitting", "a", "regular", "line", "no double quotes"}; + + List result = splitMultiLineWithOpenCSV(input); + + assertThat(result, hasSize(2)); + assertArrayEquals(firstLine, result.get(0)); + assertArrayEquals(secondLine, result.get(1)); + } + +} diff --git a/pom.xml b/pom.xml index 372bc5a9f3..2672706f6b 100644 --- a/pom.xml +++ b/pom.xml @@ -1304,6 +1304,7 @@ core-java-modules/core-java-jpms core-java-modules/core-java-os core-java-modules/core-java-string-operations-3 + core-java-modules/core-java-string-operations-4 core-java-modules/core-java-time-measurements core-java-modules/multimodulemavenproject persistence-modules/sirix