BAEL-5196 - Split a comma-separated string while ignoring commas in quotes (#11432)

* Creating the module 'core-java-string-operations-4' for new string related code samples. Implemented code samples for the article BAEL-5196

* including new module 'core-java-string-operations-4 in the parent project

* fixing spacing in the pom file

* fixing the maven configuration for our new project core-java-string-operations-4
This commit is contained in:
Willian Nalepa Oizumi 2021-11-15 10:15:35 -03:00 committed by GitHub
parent c220564cf7
commit 607bd2215a
5 changed files with 172 additions and 0 deletions

View File

@ -0,0 +1,3 @@
### Relevant Articles:

View File

@ -0,0 +1,58 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>core-java-string-operations-4</artifactId>
<version>0.1.0-SNAPSHOT</version>
<name>core-java-string-operations-4</name>
<packaging>jar</packaging>
<parent>
<groupId>com.baeldung.core-java-modules</groupId>
<artifactId>core-java-modules</artifactId>
<version>0.0.1-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
<dependencies>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>${assertj.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>${opencsv.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<assertj.version>3.6.1</assertj.version>
<guava.version>31.0.1-jre</guava.version>
<opencsv.version>4.1</opencsv.version>
</properties>
</project>

View File

@ -0,0 +1,66 @@
package com.baeldung.commaseparatedstring;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import com.google.common.base.Splitter;
import com.opencsv.CSVParser;
import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
public class SplitCommaSeparatedString {
public static List<String> splitWithParser(String input) {
List<String> tokens = new ArrayList<String>();
int startPosition = 0;
boolean isInQuotes = false;
for (int currentPosition = 0; currentPosition < input.length(); currentPosition++) {
if (input.charAt(currentPosition) == '\"') {
isInQuotes = !isInQuotes;
} else if (input.charAt(currentPosition) == ',' && !isInQuotes) {
tokens.add(input.substring(startPosition, currentPosition));
startPosition = currentPosition + 1;
}
}
String lastToken = input.substring(startPosition);
if (lastToken.equals(",")) {
tokens.add("");
} else {
tokens.add(lastToken);
}
return tokens;
}
public static List<String> splitWithRegex(String input) {
String[] tokens = input.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1);
return Arrays.asList(tokens);
}
public static List<String> splitWithGuava(String input) {
Pattern pattern = Pattern.compile(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
Splitter splitter = Splitter.on(pattern);
return splitter.splitToList(input);
}
public static List<String[]> splitMultiLineWithOpenCSV(String input) throws IOException {
CSVParser parser = new CSVParserBuilder().withSeparator(',')
.build();
CSVReader reader = new CSVReaderBuilder(new StringReader(input)).withCSVParser(parser)
.build();
List<String[]> list = new ArrayList<>();
list = reader.readAll();
reader.close();
return list;
}
}

View File

@ -0,0 +1,44 @@
package com.baeldung.commaseparatedstring;
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitMultiLineWithOpenCSV;
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithGuava;
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithParser;
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithRegex;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.hasSize;
import static org.junit.Assert.assertArrayEquals;
import java.io.IOException;
import java.util.List;
import org.junit.Test;
public class SplitCommaSeparatedStringUnitTest {
@Test
public void givenSingleLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() {
String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"";
var matcher = contains("baeldung", "tutorial", "splitting", "text", "\"ignoring this comma,\"");
assertThat(splitWithParser(input), matcher);
assertThat(splitWithRegex(input), matcher);
assertThat(splitWithGuava(input), matcher);
}
@Test
public void givenMultiLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() throws IOException {
String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"" + System.lineSeparator()
+ "splitting,a,regular,line,no double quotes";
String[] firstLine = new String[]{"baeldung", "tutorial", "splitting", "text", "ignoring this comma,"};
String[] secondLine = new String[]{"splitting", "a", "regular", "line", "no double quotes"};
List<String[]> result = splitMultiLineWithOpenCSV(input);
assertThat(result, hasSize(2));
assertArrayEquals(firstLine, result.get(0));
assertArrayEquals(secondLine, result.get(1));
}
}

View File

@ -1304,6 +1304,7 @@
<module>core-java-modules/core-java-jpms</module>
<module>core-java-modules/core-java-os</module>
<module>core-java-modules/core-java-string-operations-3</module>
<module>core-java-modules/core-java-string-operations-4</module>
<module>core-java-modules/core-java-time-measurements</module>
<module>core-java-modules/multimodulemavenproject</module>
<module>persistence-modules/sirix</module>