BAEL-5196 - Split a comma-separated string while ignoring commas in quotes (#11432)
* Creating the module 'core-java-string-operations-4' for new string related code samples. Implemented code samples for the article BAEL-5196 * including new module 'core-java-string-operations-4 in the parent project * fixing spacing in the pom file * fixing the maven configuration for our new project core-java-string-operations-4
This commit is contained in:
parent
c220564cf7
commit
607bd2215a
|
@ -0,0 +1,3 @@
|
||||||
|
### Relevant Articles:
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<artifactId>core-java-string-operations-4</artifactId>
|
||||||
|
<version>0.1.0-SNAPSHOT</version>
|
||||||
|
<name>core-java-string-operations-4</name>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>com.baeldung.core-java-modules</groupId>
|
||||||
|
<artifactId>core-java-modules</artifactId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
<relativePath>../</relativePath>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.assertj</groupId>
|
||||||
|
<artifactId>assertj-core</artifactId>
|
||||||
|
<version>${assertj.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.google.guava</groupId>
|
||||||
|
<artifactId>guava</artifactId>
|
||||||
|
<version>${guava.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.opencsv</groupId>
|
||||||
|
<artifactId>opencsv</artifactId>
|
||||||
|
<version>${opencsv.version}</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<source>${maven.compiler.source}</source>
|
||||||
|
<target>${maven.compiler.target}</target>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<maven.compiler.source>11</maven.compiler.source>
|
||||||
|
<maven.compiler.target>11</maven.compiler.target>
|
||||||
|
<assertj.version>3.6.1</assertj.version>
|
||||||
|
<guava.version>31.0.1-jre</guava.version>
|
||||||
|
<opencsv.version>4.1</opencsv.version>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
</project>
|
|
@ -0,0 +1,66 @@
|
||||||
|
package com.baeldung.commaseparatedstring;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.google.common.base.Splitter;
|
||||||
|
import com.opencsv.CSVParser;
|
||||||
|
import com.opencsv.CSVParserBuilder;
|
||||||
|
import com.opencsv.CSVReader;
|
||||||
|
import com.opencsv.CSVReaderBuilder;
|
||||||
|
|
||||||
|
public class SplitCommaSeparatedString {
|
||||||
|
|
||||||
|
public static List<String> splitWithParser(String input) {
|
||||||
|
|
||||||
|
List<String> tokens = new ArrayList<String>();
|
||||||
|
int startPosition = 0;
|
||||||
|
boolean isInQuotes = false;
|
||||||
|
for (int currentPosition = 0; currentPosition < input.length(); currentPosition++) {
|
||||||
|
if (input.charAt(currentPosition) == '\"') {
|
||||||
|
isInQuotes = !isInQuotes;
|
||||||
|
} else if (input.charAt(currentPosition) == ',' && !isInQuotes) {
|
||||||
|
tokens.add(input.substring(startPosition, currentPosition));
|
||||||
|
startPosition = currentPosition + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String lastToken = input.substring(startPosition);
|
||||||
|
if (lastToken.equals(",")) {
|
||||||
|
tokens.add("");
|
||||||
|
} else {
|
||||||
|
tokens.add(lastToken);
|
||||||
|
}
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<String> splitWithRegex(String input) {
|
||||||
|
String[] tokens = input.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1);
|
||||||
|
return Arrays.asList(tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<String> splitWithGuava(String input) {
|
||||||
|
Pattern pattern = Pattern.compile(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)");
|
||||||
|
Splitter splitter = Splitter.on(pattern);
|
||||||
|
return splitter.splitToList(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<String[]> splitMultiLineWithOpenCSV(String input) throws IOException {
|
||||||
|
CSVParser parser = new CSVParserBuilder().withSeparator(',')
|
||||||
|
.build();
|
||||||
|
|
||||||
|
CSVReader reader = new CSVReaderBuilder(new StringReader(input)).withCSVParser(parser)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
List<String[]> list = new ArrayList<>();
|
||||||
|
list = reader.readAll();
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
package com.baeldung.commaseparatedstring;
|
||||||
|
|
||||||
|
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitMultiLineWithOpenCSV;
|
||||||
|
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithGuava;
|
||||||
|
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithParser;
|
||||||
|
import static com.baeldung.commaseparatedstring.SplitCommaSeparatedString.splitWithRegex;
|
||||||
|
import static org.hamcrest.MatcherAssert.assertThat;
|
||||||
|
import static org.hamcrest.Matchers.contains;
|
||||||
|
import static org.hamcrest.Matchers.hasSize;
|
||||||
|
import static org.junit.Assert.assertArrayEquals;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class SplitCommaSeparatedStringUnitTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenSingleLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() {
|
||||||
|
String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"";
|
||||||
|
|
||||||
|
var matcher = contains("baeldung", "tutorial", "splitting", "text", "\"ignoring this comma,\"");
|
||||||
|
assertThat(splitWithParser(input), matcher);
|
||||||
|
assertThat(splitWithRegex(input), matcher);
|
||||||
|
assertThat(splitWithGuava(input), matcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenMultiLineInput_whenParsing_shouldIgnoreCommasInsideDoubleQuotes() throws IOException {
|
||||||
|
String input = "baeldung,tutorial,splitting,text,\"ignoring this comma,\"" + System.lineSeparator()
|
||||||
|
+ "splitting,a,regular,line,no double quotes";
|
||||||
|
|
||||||
|
String[] firstLine = new String[]{"baeldung", "tutorial", "splitting", "text", "ignoring this comma,"};
|
||||||
|
String[] secondLine = new String[]{"splitting", "a", "regular", "line", "no double quotes"};
|
||||||
|
|
||||||
|
List<String[]> result = splitMultiLineWithOpenCSV(input);
|
||||||
|
|
||||||
|
assertThat(result, hasSize(2));
|
||||||
|
assertArrayEquals(firstLine, result.get(0));
|
||||||
|
assertArrayEquals(secondLine, result.get(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
1
pom.xml
1
pom.xml
|
@ -1304,6 +1304,7 @@
|
||||||
<module>core-java-modules/core-java-jpms</module>
|
<module>core-java-modules/core-java-jpms</module>
|
||||||
<module>core-java-modules/core-java-os</module>
|
<module>core-java-modules/core-java-os</module>
|
||||||
<module>core-java-modules/core-java-string-operations-3</module>
|
<module>core-java-modules/core-java-string-operations-3</module>
|
||||||
|
<module>core-java-modules/core-java-string-operations-4</module>
|
||||||
<module>core-java-modules/core-java-time-measurements</module>
|
<module>core-java-modules/core-java-time-measurements</module>
|
||||||
<module>core-java-modules/multimodulemavenproject</module>
|
<module>core-java-modules/multimodulemavenproject</module>
|
||||||
<module>persistence-modules/sirix</module>
|
<module>persistence-modules/sirix</module>
|
||||||
|
|
Loading…
Reference in New Issue