From 9bde99040e8be75d44541c3362a7e65c63814400 Mon Sep 17 00:00:00 2001
From: gbidsilva
Date: Wed, 30 Aug 2023 17:04:54 +0530
Subject: [PATCH 01/16] removing duplicated exception class name from error
message
---
.../org/apache/commons/csv/CSVParser.java | 2 +-
.../org/apache/commons/csv/CSVBenchmark.java | 454 +++++++++---------
.../java/org/apache/commons/csv/CSVTest.java | 79 +++
3 files changed, 307 insertions(+), 228 deletions(-)
create mode 100644 src/test/java/org/apache/commons/csv/CSVTest.java
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 96e77a77..7d292c6a 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -147,7 +147,7 @@ public final class CSVParser implements Iterable, Closeable {
try {
return CSVParser.this.nextRecord();
} catch (final IOException e) {
- throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
+ throw new UncheckedIOException("Error in reading next record: " + e.toString(), e);
}
}
diff --git a/src/test/java/org/apache/commons/csv/CSVBenchmark.java b/src/test/java/org/apache/commons/csv/CSVBenchmark.java
index 64d3f498..232142ef 100644
--- a/src/test/java/org/apache/commons/csv/CSVBenchmark.java
+++ b/src/test/java/org/apache/commons/csv/CSVBenchmark.java
@@ -1,227 +1,227 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.commons.csv;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.io.StringReader;
-import java.nio.charset.StandardCharsets;
-import java.util.Iterator;
-import java.util.Scanner;
-import java.util.concurrent.TimeUnit;
-import java.util.zip.GZIPInputStream;
-
-import com.generationjava.io.CsvReader;
-import com.opencsv.CSVParserBuilder;
-import com.opencsv.CSVReaderBuilder;
-
-import org.apache.commons.io.IOUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.openjdk.jmh.annotations.Benchmark;
-import org.openjdk.jmh.annotations.BenchmarkMode;
-import org.openjdk.jmh.annotations.Fork;
-import org.openjdk.jmh.annotations.Measurement;
-import org.openjdk.jmh.annotations.Mode;
-import org.openjdk.jmh.annotations.OutputTimeUnit;
-import org.openjdk.jmh.annotations.Scope;
-import org.openjdk.jmh.annotations.Setup;
-import org.openjdk.jmh.annotations.State;
-import org.openjdk.jmh.annotations.Threads;
-import org.openjdk.jmh.annotations.Warmup;
-import org.openjdk.jmh.infra.Blackhole;
-import org.supercsv.io.CsvListReader;
-import org.supercsv.prefs.CsvPreference;
-
-@BenchmarkMode(Mode.AverageTime)
-@Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"})
-@Threads(1)
-@Warmup(iterations = 5)
-@Measurement(iterations = 20)
-@OutputTimeUnit(TimeUnit.MILLISECONDS)
-@State(Scope.Benchmark)
-public class CSVBenchmark {
-
- private String data;
-
- /**
- * Load the data in memory before running the benchmarks, this takes out IO from the results.
- */
- @Setup
- public void init() throws IOException {
- InputStream in = this.getClass().getClassLoader().getResourceAsStream(
- "org/apache/commons/csv/perf/worldcitiespop.txt.gz");
- try (final InputStream gzin = new GZIPInputStream(in, 8192)) {
- this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1);
- }
- }
-
- private Reader getReader() {
- return new StringReader(data);
- }
-
- @Benchmark
- public int read(final Blackhole bh) throws Exception {
- int count = 0;
-
- try (BufferedReader reader = new BufferedReader(getReader())) {
- while (reader.readLine() != null) {
- count++;
- }
- }
-
- bh.consume(count);
- return count;
- }
-
- @Benchmark
- public int scan(final Blackhole bh) throws Exception {
- int count = 0;
-
- try (Scanner scanner = new Scanner(getReader())) {
- while (scanner.hasNextLine()) {
- scanner.nextLine();
- count++;
- }
- }
-
- bh.consume(count);
- return count;
- }
-
- @Benchmark
- public int split(final Blackhole bh) throws Exception {
- int count = 0;
-
- try (BufferedReader reader = new BufferedReader(getReader())) {
- String line;
- while ((line = reader.readLine()) != null) {
- final String[] values = StringUtils.split(line, ',');
- count += values.length;
- }
- }
-
- bh.consume(count);
- return count;
- }
-
- @Benchmark
- public int parseCommonsCSV(final Blackhole bh) throws Exception {
- int count = 0;
-
- try (final Reader in = getReader()) {
- final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build();
- Iterator iter = format.parse(in).iterator();
- while (iter.hasNext()) {
- count++;
- iter.next();
- }
- }
-
- bh.consume(count);
- return count;
- }
-
- @Benchmark
- public int parseGenJavaCSV(final Blackhole bh) throws Exception {
- int count = 0;
-
- try (final Reader in = getReader()) {
- final CsvReader reader = new CsvReader(in);
- reader.setFieldDelimiter(',');
- while (reader.readLine() != null) {
- count++;
- }
- }
-
- bh.consume(count);
- return count;
- }
-
- @Benchmark
- public int parseJavaCSV(final Blackhole bh) throws Exception {
- int count = 0;
-
- try (final Reader in = getReader()) {
- final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ',');
- reader.setRecordDelimiter('\n');
- while (reader.readRecord()) {
- count++;
- }
- }
-
- bh.consume(count);
- return count;
- }
-
- @Benchmark
- public int parseOpenCSV(final Blackhole bh) throws Exception {
- int count = 0;
-
- final com.opencsv.CSVParser parser = new CSVParserBuilder()
- .withSeparator(',').withIgnoreQuotations(true).build();
-
- try (final Reader in = getReader()) {
- final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build();
- while (reader.readNext() != null) {
- count++;
- }
- }
-
- bh.consume(count);
- return count;
- }
-
- @Benchmark
- public int parseSkifeCSV(final Blackhole bh) throws Exception {
- final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader();
- reader.setSeperator(',');
- final CountingReaderCallback callback = new CountingReaderCallback();
-
- try (final Reader in = getReader()) {
- reader.parse(in, callback);
- }
-
- bh.consume(callback);
- return callback.count;
- }
-
- private static class CountingReaderCallback implements org.skife.csv.ReaderCallback {
- public int count;
-
- @Override
- public void onRow(final String[] fields) {
- count++;
- }
- }
-
- @Benchmark
- public int parseSuperCSV(final Blackhole bh) throws Exception {
- int count = 0;
-
- try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) {
- while (reader.read() != null) {
- count++;
- }
- }
-
- bh.consume(count);
- return count;
- }
-}
+///*
+// * Licensed to the Apache Software Foundation (ASF) under one or more
+// * contributor license agreements. See the NOTICE file distributed with
+// * this work for additional information regarding copyright ownership.
+// * The ASF licenses this file to You under the Apache License, Version 2.0
+// * (the "License"); you may not use this file except in compliance with
+// * the License. You may obtain a copy of the License at
+// *
+// * http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing, software
+// * distributed under the License is distributed on an "AS IS" BASIS,
+// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// * See the License for the specific language governing permissions and
+// * limitations under the License.
+// */
+//
+//package org.apache.commons.csv;
+//
+//import java.io.BufferedReader;
+//import java.io.IOException;
+//import java.io.InputStream;
+//import java.io.Reader;
+//import java.io.StringReader;
+//import java.nio.charset.StandardCharsets;
+//import java.util.Iterator;
+//import java.util.Scanner;
+//import java.util.concurrent.TimeUnit;
+//import java.util.zip.GZIPInputStream;
+//
+//import com.generationjava.io.CsvReader;
+//import com.opencsv.CSVParserBuilder;
+//import com.opencsv.CSVReaderBuilder;
+//
+//import org.apache.commons.io.IOUtils;
+//import org.apache.commons.lang3.StringUtils;
+//import org.openjdk.jmh.annotations.Benchmark;
+//import org.openjdk.jmh.annotations.BenchmarkMode;
+//import org.openjdk.jmh.annotations.Fork;
+//import org.openjdk.jmh.annotations.Measurement;
+//import org.openjdk.jmh.annotations.Mode;
+//import org.openjdk.jmh.annotations.OutputTimeUnit;
+//import org.openjdk.jmh.annotations.Scope;
+//import org.openjdk.jmh.annotations.Setup;
+//import org.openjdk.jmh.annotations.State;
+//import org.openjdk.jmh.annotations.Threads;
+//import org.openjdk.jmh.annotations.Warmup;
+//import org.openjdk.jmh.infra.Blackhole;
+//import org.supercsv.io.CsvListReader;
+//import org.supercsv.prefs.CsvPreference;
+//
+//@BenchmarkMode(Mode.AverageTime)
+//@Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"})
+//@Threads(1)
+//@Warmup(iterations = 5)
+//@Measurement(iterations = 20)
+//@OutputTimeUnit(TimeUnit.MILLISECONDS)
+//@State(Scope.Benchmark)
+//public class CSVBenchmark {
+//
+// private String data;
+//
+// /**
+// * Load the data in memory before running the benchmarks, this takes out IO from the results.
+// */
+// @Setup
+// public void init() throws IOException {
+// InputStream in = this.getClass().getClassLoader().getResourceAsStream(
+// "org/apache/commons/csv/perf/worldcitiespop.txt.gz");
+// try (final InputStream gzin = new GZIPInputStream(in, 8192)) {
+// this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1);
+// }
+// }
+//
+// private Reader getReader() {
+// return new StringReader(data);
+// }
+//
+// @Benchmark
+// public int read(final Blackhole bh) throws Exception {
+// int count = 0;
+//
+// try (BufferedReader reader = new BufferedReader(getReader())) {
+// while (reader.readLine() != null) {
+// count++;
+// }
+// }
+//
+// bh.consume(count);
+// return count;
+// }
+//
+// @Benchmark
+// public int scan(final Blackhole bh) throws Exception {
+// int count = 0;
+//
+// try (Scanner scanner = new Scanner(getReader())) {
+// while (scanner.hasNextLine()) {
+// scanner.nextLine();
+// count++;
+// }
+// }
+//
+// bh.consume(count);
+// return count;
+// }
+//
+// @Benchmark
+// public int split(final Blackhole bh) throws Exception {
+// int count = 0;
+//
+// try (BufferedReader reader = new BufferedReader(getReader())) {
+// String line;
+// while ((line = reader.readLine()) != null) {
+// final String[] values = StringUtils.split(line, ',');
+// count += values.length;
+// }
+// }
+//
+// bh.consume(count);
+// return count;
+// }
+//
+// @Benchmark
+// public int parseCommonsCSV(final Blackhole bh) throws Exception {
+// int count = 0;
+//
+// try (final Reader in = getReader()) {
+// final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build();
+// Iterator iter = format.parse(in).iterator();
+// while (iter.hasNext()) {
+// count++;
+// iter.next();
+// }
+// }
+//
+// bh.consume(count);
+// return count;
+// }
+//
+// @Benchmark
+// public int parseGenJavaCSV(final Blackhole bh) throws Exception {
+// int count = 0;
+//
+// try (final Reader in = getReader()) {
+// final CsvReader reader = new CsvReader(in);
+// reader.setFieldDelimiter(',');
+// while (reader.readLine() != null) {
+// count++;
+// }
+// }
+//
+// bh.consume(count);
+// return count;
+// }
+//
+// @Benchmark
+// public int parseJavaCSV(final Blackhole bh) throws Exception {
+// int count = 0;
+//
+// try (final Reader in = getReader()) {
+// final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ',');
+// reader.setRecordDelimiter('\n');
+// while (reader.readRecord()) {
+// count++;
+// }
+// }
+//
+// bh.consume(count);
+// return count;
+// }
+//
+// @Benchmark
+// public int parseOpenCSV(final Blackhole bh) throws Exception {
+// int count = 0;
+//
+// final com.opencsv.CSVParser parser = new CSVParserBuilder()
+// .withSeparator(',').withIgnoreQuotations(true).build();
+//
+// try (final Reader in = getReader()) {
+// final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build();
+// while (reader.readNext() != null) {
+// count++;
+// }
+// }
+//
+// bh.consume(count);
+// return count;
+// }
+//
+// @Benchmark
+// public int parseSkifeCSV(final Blackhole bh) throws Exception {
+// final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader();
+// reader.setSeperator(',');
+// final CountingReaderCallback callback = new CountingReaderCallback();
+//
+// try (final Reader in = getReader()) {
+// reader.parse(in, callback);
+// }
+//
+// bh.consume(callback);
+// return callback.count;
+// }
+//
+// private static class CountingReaderCallback implements org.skife.csv.ReaderCallback {
+// public int count;
+//
+// @Override
+// public void onRow(final String[] fields) {
+// count++;
+// }
+// }
+//
+// @Benchmark
+// public int parseSuperCSV(final Blackhole bh) throws Exception {
+// int count = 0;
+//
+// try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) {
+// while (reader.read() != null) {
+// count++;
+// }
+// }
+//
+// bh.consume(count);
+// return count;
+// }
+//}
diff --git a/src/test/java/org/apache/commons/csv/CSVTest.java b/src/test/java/org/apache/commons/csv/CSVTest.java
new file mode 100644
index 00000000..5e01c3c1
--- /dev/null
+++ b/src/test/java/org/apache/commons/csv/CSVTest.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.commons.csv;
+
+import org.junit.jupiter.api.Test;
+
+import java.io.FileReader;
+import java.io.Reader;
+import java.io.StringReader;
+
+public class CSVTest {
+
+ // @Test
+ public void readCSVFileSimpleOne() {
+ // happy path one
+ try {
+ // change your csv file path properly
+ Reader in = new FileReader("D:\\code\\apache\\csv\\samples\\basicCsvSample-1\\src\\main\\resources\\longCsvFile.csv");
+
+ CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
+ .setHeader()
+ .setSkipHeaderRecord(true)
+ .build();
+
+ Iterable records = csvFormat.parse(in); // return a CSVParser - which is an Iterable
+
+ for (CSVRecord record : records) {
+ String firstName = record.get("firstname");
+ String lastName = record.get("lastname");
+ String age = record.get("age");
+ String email = record.get("email");
+ System.out.println("FirstName: " + firstName + ", LastName: "+ lastName +", Age: " + age + ", Email: " + email);
+ }
+ } catch (Exception e) {
+ System.out.println("An error occurred");
+ e.printStackTrace();
+ }
+ }
+
+ @Test
+ public void testFaultyCSVshouldThrowErrorWithDetailedMessage(){
+
+ String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\n" +
+ "rec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10";
+
+ try {
+ StringReader stringReader = new StringReader(csvContent);
+ CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
+ .setHeader()
+ .setSkipHeaderRecord(true)
+ .build();
+
+ Iterable records = csvFormat.parse(stringReader);
+
+ for (CSVRecord record : records) {
+ System.out.println(record.get(0) + " " + record.get(1) + " " + record.get(2) + " " + record.get(3) + " " + record.get(4) + " " + record.get(5) + " " + record.get(6) + " " + record.get(7) + " " + record.get(8) + " " + record.get(9));
+ }
+ } catch (Exception e) {
+ System.out.println("An error occurred");
+ e.printStackTrace();
+ }
+ }
+}
From eaba3415351e279611730e258e69011ad9c989fc Mon Sep 17 00:00:00 2001
From: gbidsilva
Date: Wed, 30 Aug 2023 17:08:12 +0530
Subject: [PATCH 02/16] removing unwanted files
---
.../org/apache/commons/csv/CSVBenchmark.java | 454 +++++++++---------
.../java/org/apache/commons/csv/CSVTest.java | 79 ---
2 files changed, 227 insertions(+), 306 deletions(-)
delete mode 100644 src/test/java/org/apache/commons/csv/CSVTest.java
diff --git a/src/test/java/org/apache/commons/csv/CSVBenchmark.java b/src/test/java/org/apache/commons/csv/CSVBenchmark.java
index 232142ef..64d3f498 100644
--- a/src/test/java/org/apache/commons/csv/CSVBenchmark.java
+++ b/src/test/java/org/apache/commons/csv/CSVBenchmark.java
@@ -1,227 +1,227 @@
-///*
-// * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements. See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
-// * the License. You may obtain a copy of the License at
-// *
-// * http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
-// * limitations under the License.
-// */
-//
-//package org.apache.commons.csv;
-//
-//import java.io.BufferedReader;
-//import java.io.IOException;
-//import java.io.InputStream;
-//import java.io.Reader;
-//import java.io.StringReader;
-//import java.nio.charset.StandardCharsets;
-//import java.util.Iterator;
-//import java.util.Scanner;
-//import java.util.concurrent.TimeUnit;
-//import java.util.zip.GZIPInputStream;
-//
-//import com.generationjava.io.CsvReader;
-//import com.opencsv.CSVParserBuilder;
-//import com.opencsv.CSVReaderBuilder;
-//
-//import org.apache.commons.io.IOUtils;
-//import org.apache.commons.lang3.StringUtils;
-//import org.openjdk.jmh.annotations.Benchmark;
-//import org.openjdk.jmh.annotations.BenchmarkMode;
-//import org.openjdk.jmh.annotations.Fork;
-//import org.openjdk.jmh.annotations.Measurement;
-//import org.openjdk.jmh.annotations.Mode;
-//import org.openjdk.jmh.annotations.OutputTimeUnit;
-//import org.openjdk.jmh.annotations.Scope;
-//import org.openjdk.jmh.annotations.Setup;
-//import org.openjdk.jmh.annotations.State;
-//import org.openjdk.jmh.annotations.Threads;
-//import org.openjdk.jmh.annotations.Warmup;
-//import org.openjdk.jmh.infra.Blackhole;
-//import org.supercsv.io.CsvListReader;
-//import org.supercsv.prefs.CsvPreference;
-//
-//@BenchmarkMode(Mode.AverageTime)
-//@Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"})
-//@Threads(1)
-//@Warmup(iterations = 5)
-//@Measurement(iterations = 20)
-//@OutputTimeUnit(TimeUnit.MILLISECONDS)
-//@State(Scope.Benchmark)
-//public class CSVBenchmark {
-//
-// private String data;
-//
-// /**
-// * Load the data in memory before running the benchmarks, this takes out IO from the results.
-// */
-// @Setup
-// public void init() throws IOException {
-// InputStream in = this.getClass().getClassLoader().getResourceAsStream(
-// "org/apache/commons/csv/perf/worldcitiespop.txt.gz");
-// try (final InputStream gzin = new GZIPInputStream(in, 8192)) {
-// this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1);
-// }
-// }
-//
-// private Reader getReader() {
-// return new StringReader(data);
-// }
-//
-// @Benchmark
-// public int read(final Blackhole bh) throws Exception {
-// int count = 0;
-//
-// try (BufferedReader reader = new BufferedReader(getReader())) {
-// while (reader.readLine() != null) {
-// count++;
-// }
-// }
-//
-// bh.consume(count);
-// return count;
-// }
-//
-// @Benchmark
-// public int scan(final Blackhole bh) throws Exception {
-// int count = 0;
-//
-// try (Scanner scanner = new Scanner(getReader())) {
-// while (scanner.hasNextLine()) {
-// scanner.nextLine();
-// count++;
-// }
-// }
-//
-// bh.consume(count);
-// return count;
-// }
-//
-// @Benchmark
-// public int split(final Blackhole bh) throws Exception {
-// int count = 0;
-//
-// try (BufferedReader reader = new BufferedReader(getReader())) {
-// String line;
-// while ((line = reader.readLine()) != null) {
-// final String[] values = StringUtils.split(line, ',');
-// count += values.length;
-// }
-// }
-//
-// bh.consume(count);
-// return count;
-// }
-//
-// @Benchmark
-// public int parseCommonsCSV(final Blackhole bh) throws Exception {
-// int count = 0;
-//
-// try (final Reader in = getReader()) {
-// final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build();
-// Iterator iter = format.parse(in).iterator();
-// while (iter.hasNext()) {
-// count++;
-// iter.next();
-// }
-// }
-//
-// bh.consume(count);
-// return count;
-// }
-//
-// @Benchmark
-// public int parseGenJavaCSV(final Blackhole bh) throws Exception {
-// int count = 0;
-//
-// try (final Reader in = getReader()) {
-// final CsvReader reader = new CsvReader(in);
-// reader.setFieldDelimiter(',');
-// while (reader.readLine() != null) {
-// count++;
-// }
-// }
-//
-// bh.consume(count);
-// return count;
-// }
-//
-// @Benchmark
-// public int parseJavaCSV(final Blackhole bh) throws Exception {
-// int count = 0;
-//
-// try (final Reader in = getReader()) {
-// final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ',');
-// reader.setRecordDelimiter('\n');
-// while (reader.readRecord()) {
-// count++;
-// }
-// }
-//
-// bh.consume(count);
-// return count;
-// }
-//
-// @Benchmark
-// public int parseOpenCSV(final Blackhole bh) throws Exception {
-// int count = 0;
-//
-// final com.opencsv.CSVParser parser = new CSVParserBuilder()
-// .withSeparator(',').withIgnoreQuotations(true).build();
-//
-// try (final Reader in = getReader()) {
-// final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build();
-// while (reader.readNext() != null) {
-// count++;
-// }
-// }
-//
-// bh.consume(count);
-// return count;
-// }
-//
-// @Benchmark
-// public int parseSkifeCSV(final Blackhole bh) throws Exception {
-// final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader();
-// reader.setSeperator(',');
-// final CountingReaderCallback callback = new CountingReaderCallback();
-//
-// try (final Reader in = getReader()) {
-// reader.parse(in, callback);
-// }
-//
-// bh.consume(callback);
-// return callback.count;
-// }
-//
-// private static class CountingReaderCallback implements org.skife.csv.ReaderCallback {
-// public int count;
-//
-// @Override
-// public void onRow(final String[] fields) {
-// count++;
-// }
-// }
-//
-// @Benchmark
-// public int parseSuperCSV(final Blackhole bh) throws Exception {
-// int count = 0;
-//
-// try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) {
-// while (reader.read() != null) {
-// count++;
-// }
-// }
-//
-// bh.consume(count);
-// return count;
-// }
-//}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
+import java.util.Iterator;
+import java.util.Scanner;
+import java.util.concurrent.TimeUnit;
+import java.util.zip.GZIPInputStream;
+
+import com.generationjava.io.CsvReader;
+import com.opencsv.CSVParserBuilder;
+import com.opencsv.CSVReaderBuilder;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.supercsv.io.CsvListReader;
+import org.supercsv.prefs.CsvPreference;
+
+@BenchmarkMode(Mode.AverageTime)
+@Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"})
+@Threads(1)
+@Warmup(iterations = 5)
+@Measurement(iterations = 20)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+public class CSVBenchmark {
+
+ private String data;
+
+ /**
+ * Load the data in memory before running the benchmarks, this takes out IO from the results.
+ */
+ @Setup
+ public void init() throws IOException {
+ InputStream in = this.getClass().getClassLoader().getResourceAsStream(
+ "org/apache/commons/csv/perf/worldcitiespop.txt.gz");
+ try (final InputStream gzin = new GZIPInputStream(in, 8192)) {
+ this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1);
+ }
+ }
+
+ private Reader getReader() {
+ return new StringReader(data);
+ }
+
+ @Benchmark
+ public int read(final Blackhole bh) throws Exception {
+ int count = 0;
+
+ try (BufferedReader reader = new BufferedReader(getReader())) {
+ while (reader.readLine() != null) {
+ count++;
+ }
+ }
+
+ bh.consume(count);
+ return count;
+ }
+
+ @Benchmark
+ public int scan(final Blackhole bh) throws Exception {
+ int count = 0;
+
+ try (Scanner scanner = new Scanner(getReader())) {
+ while (scanner.hasNextLine()) {
+ scanner.nextLine();
+ count++;
+ }
+ }
+
+ bh.consume(count);
+ return count;
+ }
+
+ @Benchmark
+ public int split(final Blackhole bh) throws Exception {
+ int count = 0;
+
+ try (BufferedReader reader = new BufferedReader(getReader())) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ final String[] values = StringUtils.split(line, ',');
+ count += values.length;
+ }
+ }
+
+ bh.consume(count);
+ return count;
+ }
+
+ @Benchmark
+ public int parseCommonsCSV(final Blackhole bh) throws Exception {
+ int count = 0;
+
+ try (final Reader in = getReader()) {
+ final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build();
+ Iterator iter = format.parse(in).iterator();
+ while (iter.hasNext()) {
+ count++;
+ iter.next();
+ }
+ }
+
+ bh.consume(count);
+ return count;
+ }
+
+ @Benchmark
+ public int parseGenJavaCSV(final Blackhole bh) throws Exception {
+ int count = 0;
+
+ try (final Reader in = getReader()) {
+ final CsvReader reader = new CsvReader(in);
+ reader.setFieldDelimiter(',');
+ while (reader.readLine() != null) {
+ count++;
+ }
+ }
+
+ bh.consume(count);
+ return count;
+ }
+
+ @Benchmark
+ public int parseJavaCSV(final Blackhole bh) throws Exception {
+ int count = 0;
+
+ try (final Reader in = getReader()) {
+ final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ',');
+ reader.setRecordDelimiter('\n');
+ while (reader.readRecord()) {
+ count++;
+ }
+ }
+
+ bh.consume(count);
+ return count;
+ }
+
+ @Benchmark
+ public int parseOpenCSV(final Blackhole bh) throws Exception {
+ int count = 0;
+
+ final com.opencsv.CSVParser parser = new CSVParserBuilder()
+ .withSeparator(',').withIgnoreQuotations(true).build();
+
+ try (final Reader in = getReader()) {
+ final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build();
+ while (reader.readNext() != null) {
+ count++;
+ }
+ }
+
+ bh.consume(count);
+ return count;
+ }
+
+ @Benchmark
+ public int parseSkifeCSV(final Blackhole bh) throws Exception {
+ final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader();
+ reader.setSeperator(',');
+ final CountingReaderCallback callback = new CountingReaderCallback();
+
+ try (final Reader in = getReader()) {
+ reader.parse(in, callback);
+ }
+
+ bh.consume(callback);
+ return callback.count;
+ }
+
+ private static class CountingReaderCallback implements org.skife.csv.ReaderCallback {
+ public int count;
+
+ @Override
+ public void onRow(final String[] fields) {
+ count++;
+ }
+ }
+
+ @Benchmark
+ public int parseSuperCSV(final Blackhole bh) throws Exception {
+ int count = 0;
+
+ try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) {
+ while (reader.read() != null) {
+ count++;
+ }
+ }
+
+ bh.consume(count);
+ return count;
+ }
+}
diff --git a/src/test/java/org/apache/commons/csv/CSVTest.java b/src/test/java/org/apache/commons/csv/CSVTest.java
deleted file mode 100644
index 5e01c3c1..00000000
--- a/src/test/java/org/apache/commons/csv/CSVTest.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.commons.csv;
-
-import org.junit.jupiter.api.Test;
-
-import java.io.FileReader;
-import java.io.Reader;
-import java.io.StringReader;
-
-public class CSVTest {
-
- // @Test
- public void readCSVFileSimpleOne() {
- // happy path one
- try {
- // change your csv file path properly
- Reader in = new FileReader("D:\\code\\apache\\csv\\samples\\basicCsvSample-1\\src\\main\\resources\\longCsvFile.csv");
-
- CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
- .setHeader()
- .setSkipHeaderRecord(true)
- .build();
-
- Iterable records = csvFormat.parse(in); // return a CSVParser - which is an Iterable
-
- for (CSVRecord record : records) {
- String firstName = record.get("firstname");
- String lastName = record.get("lastname");
- String age = record.get("age");
- String email = record.get("email");
- System.out.println("FirstName: " + firstName + ", LastName: "+ lastName +", Age: " + age + ", Email: " + email);
- }
- } catch (Exception e) {
- System.out.println("An error occurred");
- e.printStackTrace();
- }
- }
-
- @Test
- public void testFaultyCSVshouldThrowErrorWithDetailedMessage(){
-
- String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\n" +
- "rec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10";
-
- try {
- StringReader stringReader = new StringReader(csvContent);
- CSVFormat csvFormat = CSVFormat.DEFAULT.builder()
- .setHeader()
- .setSkipHeaderRecord(true)
- .build();
-
- Iterable records = csvFormat.parse(stringReader);
-
- for (CSVRecord record : records) {
- System.out.println(record.get(0) + " " + record.get(1) + " " + record.get(2) + " " + record.get(3) + " " + record.get(4) + " " + record.get(5) + " " + record.get(6) + " " + record.get(7) + " " + record.get(8) + " " + record.get(9));
- }
- } catch (Exception e) {
- System.out.println("An error occurred");
- e.printStackTrace();
- }
- }
-}
From dbc418b9e79369d4a4b440340322b45192d10aed Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 08:36:48 -0400
Subject: [PATCH 03/16] Update CSVParser.java
---
src/main/java/org/apache/commons/csv/CSVParser.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 7d292c6a..8679367c 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -147,7 +147,7 @@ public final class CSVParser implements Iterable, Closeable {
try {
return CSVParser.this.nextRecord();
} catch (final IOException e) {
- throw new UncheckedIOException("Error in reading next record: " + e.toString(), e);
+ throw new UncheckedIOException("Exception reading next record: " + e.toString(), e);
}
}
From 1492c7465fb911b11afbc4a0da9de2d27764e72b Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 08:42:23 -0400
Subject: [PATCH 04/16] Update exception message in CSVRecord#getNextRecord()
#348
---
src/changes/changes.xml | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 1a5552d4..b65bce7c 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -53,6 +53,7 @@
Bump commons-parent from 57 to 60.
Bump h2 from 2.1.214 to 2.2.220 #333.
Bump commons-lang3 from 3.12.0 to 3.13.0.
+ Update exception message in CSVRecord#getNextRecord() #348.
From 00738efb0df1b6ac3118e606a199bd0ec0d0089f Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 08:44:39 -0400
Subject: [PATCH 05/16] Javadoc
---
.../org/apache/commons/csv/CSVParser.java | 24 +++++++++----------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 8679367c..4ce3774b 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -53,7 +53,7 @@ import java.util.stream.StreamSupport;
* Because CSV appears in many different dialects, the parser supports many formats by allowing the
* specification of a {@link CSVFormat}.
*
- * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
+ * The parser works record-wise. It is not possible to go back, once a record has been parsed from the input stream.
*
* Creating instances
*
@@ -105,13 +105,13 @@ import java.util.stream.StreamSupport;
*
*
*
- * If the predefined formats don't match the format at hands, custom formats can be defined. More information about
- * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
+ * If the predefined formats don't match the format at hand, custom formats can be defined. More information about
+ * customizing CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
*
*
* Parsing into memory
*
- * If parsing record wise is not desired, the contents of the input can be read completely into memory.
+ * If parsing record-wise is not desired, the contents of the input can be read completely into memory.
*
*
*
@@ -126,14 +126,14 @@ import java.util.stream.StreamSupport;
*
*
* - Parsing into memory starts at the current position of the parser. If you have already parsed records from
- * the input, those records will not end up in the in memory representation of your CSV data.
- * - Parsing into memory may consume a lot of system resources depending on the input. For example if you're
+ * the input, those records will not end up in the in-memory representation of your CSV data.
+ * - Parsing into memory may consume a lot of system resources depending on the input. For example, if you're
* parsing a 150MB file of CSV data the contents will be read completely into memory.
*
*
* Notes
*
- * Internal parser state is completely covered by the format and the reader-state.
+ * The internal parser state is completely covered by the format and the reader state.
*
*
* @see package documentation for more details
@@ -304,8 +304,6 @@ public final class CSVParser implements Iterable, Closeable {
return new CSVParser(reader, format);
}
- // the following objects are shared to reduce garbage
-
/**
* Creates a parser for the given {@link String}.
*
@@ -421,7 +419,7 @@ public final class CSVParser implements Iterable, Closeable {
* @param recordNumber
* The next record number to assign
* @throws IllegalArgumentException
- * If the parameters of the format are inconsistent or if either reader or format are null.
+ * If the parameters of the format are inconsistent or if either the reader or format is null.
* @throws IOException
* If there is a problem reading the header or skipping the first record
* @since 1.1
@@ -662,11 +660,11 @@ public final class CSVParser implements Iterable, Closeable {
}
/**
- * Handle whether input is parsed as null
+ * Handles whether the input is parsed as null
*
* @param input
* the cell data to further processed
- * @return null if input is parsed as null, or input itself if input isn't parsed as null
+ * @return null if input is parsed as null, or input itself if the input isn't parsed as null
*/
private String handleNull(final String input) {
final boolean isQuoted = this.reusableToken.isQuoted;
@@ -733,7 +731,7 @@ public final class CSVParser implements Iterable, Closeable {
* Returns the record iterator.
*
*
- * An {@link IOException} caught during the iteration are re-thrown as an
+ * An {@link IOException} caught during the iteration is re-thrown as an
* {@link IllegalStateException}.
*
*
From 83760258af3279f7efd9deca788c9b3625cc5d1b Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 08:54:59 -0400
Subject: [PATCH 06/16] Javadoc
---
.../org/apache/commons/csv/CSVFormat.java | 60 +++++++++----------
1 file changed, 30 insertions(+), 30 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index b3ac7ace..77d557ba 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -477,7 +477,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
*
*
- * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
+ * @param headerEnum the enum defining the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
*/
public Builder setHeader(final Class extends Enum>> headerEnum) {
@@ -491,7 +491,7 @@ public final class CSVFormat implements Serializable {
}
/**
- * Sets the header from the result set metadata. The header can either be parsed automatically from the input file with:
+ * Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
*
*
* builder.setHeader();
@@ -506,7 +506,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
*
*
- * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
+ * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
*/
@@ -515,7 +515,7 @@ public final class CSVFormat implements Serializable {
}
/**
- * Sets the header from the result set metadata. The header can either be parsed automatically from the input file with:
+ * Sets the header from the result set metadata. The header can be parsed automatically from the input file with:
*
*
* builder.setHeader();
@@ -530,7 +530,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
*
*
- * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
+ * @param resultSetMetaData the metaData for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
*/
@@ -547,7 +547,7 @@ public final class CSVFormat implements Serializable {
}
/**
- * Sets the header to the given values. The header can either be parsed automatically from the input file with:
+ * Sets the header to the given values. The header can be parsed automatically from the input file with:
*
*
* builder.setHeader();
@@ -562,7 +562,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
*
*
- * @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
+ * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return This instance.
*/
public Builder setHeader(final String... header) {
@@ -909,8 +909,8 @@ public final class CSVFormat implements Serializable {
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
/**
- * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
- * to customize this format to accommodate to your regional settings.
+ * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale-dependent, it might be necessary
+ * to customize this format to accommodate your regional settings.
*
*
* For example for parsing or generating a CSV file on a French system the following format will be used:
@@ -949,7 +949,7 @@ public final class CSVFormat implements Serializable {
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation.
*
*
- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
+ * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
* The default NULL string is {@code "\\N"}.
*
*
@@ -981,7 +981,7 @@ public final class CSVFormat implements Serializable {
* Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
*
*
- * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
+ * This is a comma-delimited format with an LF character as the line separator. Values are not quoted and special characters are escaped with {@code '\'}.
* The default NULL string is {@code "\\N"}.
*
*
@@ -1084,7 +1084,7 @@ public final class CSVFormat implements Serializable {
* Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
*
*
- * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
+ * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
* characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
*
*
@@ -1121,7 +1121,7 @@ public final class CSVFormat implements Serializable {
* Default Oracle format used by the SQL*Loader utility.
*
*
- * This is a comma-delimited format with the system line separator character as the record separator.Values are
+ * This is a comma-delimited format with the system line separator character as the record separator. Values are
* double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is
* {@code ""}. Values are trimmed.
*
@@ -1161,7 +1161,7 @@ public final class CSVFormat implements Serializable {
* Default PostgreSQL CSV format used by the {@code COPY} operation.
*
*
- * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
+ * This is a comma-delimited format with an LF character as the line separator. Values are double quoted and special
* characters are not escaped. The default NULL string is {@code ""}.
*
*
@@ -1199,7 +1199,7 @@ public final class CSVFormat implements Serializable {
* Default PostgreSQL text format used by the {@code COPY} operation.
*
*
- * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
+ * This is a tab-delimited format with an LF character as the line separator. Values are not quoted and special
* characters are escaped with {@code '\\'}. The default NULL string is {@code "\\N"}.
*
*
@@ -1890,7 +1890,7 @@ public final class CSVFormat implements Serializable {
}
/**
- * Tests whether escape are being processed.
+ * Tests whether escapes are being processed.
*
* @return {@code true} if escapes are processed
*/
@@ -1899,7 +1899,7 @@ public final class CSVFormat implements Serializable {
}
/**
- * Tests whether a nullString has been defined.
+ * Tests whether a null string has been defined.
*
* @return {@code true} if a nullString is defined
*/
@@ -2009,7 +2009,7 @@ public final class CSVFormat implements Serializable {
if (object == null) {
out.append(value);
} else if (isQuoteCharacterSet()) {
- // the original object is needed so can check for Number
+ // The original object is needed so can check for Number
printWithQuotes(object, value, out, newRecord);
} else if (isEscapeCharacterSet()) {
printWithEscapes(value, out);
@@ -2086,7 +2086,7 @@ public final class CSVFormat implements Serializable {
}
/**
- * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the record separator.
+ * Prints the given {@code values} to {@code out} as a single record of delimiter-separated values followed by the record separator.
*
*
* The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record separator to the output after printing
@@ -2241,7 +2241,7 @@ public final class CSVFormat implements Serializable {
return;
case MINIMAL:
if (len <= 0) {
- // always quote an empty token that is the first
+ // Always quote an empty token that is the first
// on the line, as it may be the only thing on the
// line. If it were not quoted in that case,
// an empty line has no tokens.
@@ -2279,7 +2279,7 @@ public final class CSVFormat implements Serializable {
}
if (!quote) {
- // no encapsulation needed - write out the original value
+ // No encapsulation needed - write out the original value
out.append(charSeq, start, len);
return;
}
@@ -2289,12 +2289,12 @@ public final class CSVFormat implements Serializable {
}
if (!quote) {
- // no encapsulation needed - write out the original value
+ // No encapsulation needed - write out the original value
out.append(charSeq, start, len);
return;
}
- // we hit something that needed encapsulation
+ // We hit something that needed encapsulation
out.append(quoteChar);
// Pick up where we left off: pos should be positioned on the first character that caused
@@ -2310,13 +2310,13 @@ public final class CSVFormat implements Serializable {
pos++;
}
- // write the last segment
+ // Write the last segment
out.append(charSeq, start, pos);
out.append(quoteChar);
}
/**
- * Always use quotes unless QuoteMode is NONE, so we not have to look ahead.
+ * Always use quotes unless QuoteMode is NONE, so we do not have to look ahead.
*
* @param reader What to print
* @param appendable Where to print it
@@ -2417,7 +2417,7 @@ public final class CSVFormat implements Serializable {
/**
* Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
*
- * Because an instance can be used for both writing an parsing, not all conditions can be tested here. For example allowMissingColumnNames is only used for
+ * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used for
* parsing, so it cannot be used here.
*
*
@@ -2570,7 +2570,7 @@ public final class CSVFormat implements Serializable {
* Builds a new {@code CSVFormat} with the delimiter of the format set to the specified character.
*
* @param delimiter the delimiter character
- * @return A new CSVFormat that is equal to this with the specified character as delimiter
+ * @return A new CSVFormat that is equal to this with the specified character as a delimiter
* @throws IllegalArgumentException thrown if the specified character is a line break
* @deprecated Use {@link Builder#setDelimiter(char)}
*/
@@ -2679,7 +2679,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
*
*
- * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
+ * @param resultSet the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return A new CSVFormat that is equal to this but with the specified header
* @throws SQLException SQLException if a database access error occurs or this method is called on a closed result set.
* @since 1.1
@@ -2735,7 +2735,7 @@ public final class CSVFormat implements Serializable {
* The header is also used by the {@link CSVPrinter}.
*
*
- * @param header the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
+ * @param header the header, {@code null} if disabled, empty if parsed automatically, user-specified otherwise.
* @return A new CSVFormat that is equal to this but with the specified header
* @see Builder#setSkipHeaderRecord(boolean)
* @deprecated Use {@link Builder#setHeader(String...)}
@@ -2793,7 +2793,7 @@ public final class CSVFormat implements Serializable {
/**
* Builds a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
*
- * @return A new CSVFormat that will ignore case header name.
+ * @return A new CSVFormat that will ignore the new case header name behavior.
* @see Builder#setIgnoreHeaderCase(boolean)
* @since 1.3
* @deprecated Use {@link Builder#setIgnoreHeaderCase(boolean) Builder#setIgnoreHeaderCase(true)}
From 2ce1fbb0c82a4c4bd62e6078d7766b770ac8e286 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 08:57:11 -0400
Subject: [PATCH 07/16] Javadoc
---
.../org/apache/commons/csv/CSVPrinter.java | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVPrinter.java b/src/main/java/org/apache/commons/csv/CSVPrinter.java
index dba6de9e..421de13e 100644
--- a/src/main/java/org/apache/commons/csv/CSVPrinter.java
+++ b/src/main/java/org/apache/commons/csv/CSVPrinter.java
@@ -171,7 +171,7 @@ public final class CSVPrinter implements Flushable, Closeable {
}
/**
- * Prints a comment on a new line among the delimiter separated values.
+ * Prints a comment on a new line among the delimiter-separated values.
*
*
* Comments will always begin on a new line and occupy at least one full line. The character specified to start
@@ -184,7 +184,7 @@ public final class CSVPrinter implements Flushable, Closeable {
*
*
This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()}
* to start a new line of the comment. Note that this might produce unexpected results for formats that do not use
- * line breaks as record separator.
+ * line breaks as record separators.
*
* @param comment
* the comment to output
@@ -224,7 +224,7 @@ public final class CSVPrinter implements Flushable, Closeable {
/**
* Prints headers for a result set based on its metadata.
*
- * @param resultSet The result set to query for metadata.
+ * @param resultSet The ResultSet to query for metadata.
* @throws IOException If an I/O error occurs.
* @throws SQLException If a database access error occurs or this method is called on a closed result set.
* @since 1.9.0
@@ -245,7 +245,7 @@ public final class CSVPrinter implements Flushable, Closeable {
}
/**
- * Prints the given values as a single record of delimiter separated values followed by the record separator.
+ * Prints the given values as a single record of delimiter-separated values followed by the record separator.
*
*
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@@ -265,7 +265,7 @@ public final class CSVPrinter implements Flushable, Closeable {
}
/**
- * Prints the given values as a single record of delimiter separated values followed by the record separator.
+ * Prints the given values as a single record of delimiter-separated values followed by the record separator.
*
*
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@@ -282,7 +282,7 @@ public final class CSVPrinter implements Flushable, Closeable {
}
/**
- * Prints the given values as a single record of delimiter separated values followed by the record separator.
+ * Prints the given values as a single record of delimiter-separated values followed by the record separator.
*
*
* The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
@@ -366,7 +366,7 @@ public final class CSVPrinter implements Flushable, Closeable {
*
*
* If the given array only contains simple objects, this method will print a single record like
- * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
+ * {@link #printRecord(Object...)}. If the given collections contain nested collections or arrays, those nested
* elements will each be printed as records using {@link #printRecord(Object...)}.
*
*
@@ -408,11 +408,11 @@ public final class CSVPrinter implements Flushable, Closeable {
* Prints all the objects in the given JDBC result set.
*
* @param resultSet
- * result set the values to print.
+ * The values to print.
* @throws IOException
- * If an I/O error occurs
+ * If an I/O error occurs.
* @throws SQLException
- * if a database access error occurs
+ * Thrown when a database access error occurs.
*/
public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
final int columnCount = resultSet.getMetaData().getColumnCount();
From d103cba8b5d8e996698997c70072eaf65591e718 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 08:59:04 -0400
Subject: [PATCH 08/16] Javadoc
---
.../org/apache/commons/csv/CSVRecord.java | 26 +++++++++----------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 71d5f953..189cc07e 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -32,10 +32,10 @@ import java.util.stream.Stream;
*
* Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
* In version 1.8 the mapping between the column header and the column index was
- * removed from the serialised state. The class maintains serialization compatibility
+ * removed from the serialized state. The class maintains serialization compatibility
* with versions pre-1.8 for the record values; these must be accessed by index
- * following deserialization. There will be loss of any functionally linked to the header
- * mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa.
+ * following deserialization. There will be a loss of any functionally linked to the header
+ * mapping when transferring serialized forms pre-1.8 to 1.8 and vice versa.
*
*/
public final class CSVRecord implements Serializable, Iterable {
@@ -143,8 +143,8 @@ public final class CSVRecord implements Serializable, Iterable {
/**
* Returns the comment for this record, if any.
* Note that comments are attached to the following record.
- * If there is no following record (i.e. the comment is at EOF)
- * the comment will be ignored.
+ * If there is no following record (i.e. the comment is at EOF),
+ * then the comment will be ignored.
*
* @return the comment for this record, or null if no comment for this record is available.
*/
@@ -189,8 +189,8 @@ public final class CSVRecord implements Serializable, Iterable {
/**
* Checks whether this record has a comment, false otherwise.
* Note that comments are attached to the following record.
- * If there is no following record (i.e. the comment is at EOF)
- * the comment will be ignored.
+ * If there is no following record (i.e. the comment is at EOF),
+ * then the comment will be ignored.
*
* @return true if this record has a comment, false otherwise
* @since 1.3
@@ -227,22 +227,22 @@ public final class CSVRecord implements Serializable, Iterable {
}
/**
- * Checks whether a column with given index has a value.
+ * Checks whether a column with a given index has a value.
*
* @param index
* a column index (0-based)
- * @return whether a column with given index has a value
+ * @return whether a column with a given index has a value
*/
public boolean isSet(final int index) {
return 0 <= index && index < values.length;
}
/**
- * Checks whether a given columns is mapped and has a value.
+ * Checks whether a given column is mapped and has a value.
*
* @param name
* the name of the column to be retrieved.
- * @return whether a given columns is mapped and has a value
+ * @return whether a given column is mapped and has a value
*/
public boolean isSet(final String name) {
return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
@@ -311,8 +311,8 @@ public final class CSVRecord implements Serializable, Iterable {
}
/**
- * Copies this record into a new Map of header name to record value. If multiple instances of a header name exists,
- * only the last occurrence is mapped.
+ * Copies this record into a new Map of header name to record value. If multiple instances of a header name exist,
+ * then only the last occurrence is mapped.
*
*
* Editing the map does not update this instance.
From 346e217559379872d0b439c221f851e06d8d8ce7 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 09:00:53 -0400
Subject: [PATCH 09/16] Javadoc
---
.../org/apache/commons/csv/ExtendedBufferedReader.java | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
index 89e63e3a..429b07cb 100644
--- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
+++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -41,13 +41,13 @@ final class ExtendedBufferedReader extends BufferedReader {
/** The count of EOLs (CR/LF/CRLF) seen so far */
private long eolCounter;
- /** The position, which is number of characters read so far */
+ /** The position, which is the number of characters read so far */
private long position;
private boolean closed;
/**
- * Created extended buffered reader using default buffer-size
+ * Constructs a new instance using the default buffer size.
*/
ExtendedBufferedReader(final Reader reader) {
super(reader);
@@ -107,7 +107,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/**
* Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
- * still return this value. Does not affect line number or last character.
+ * still return this value. Does not affect the line number or the last character.
*
* @return the next character
*
@@ -125,7 +125,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/**
* Populates the buffer with the next {@code buf.length} characters in the
* current reader without consuming them. The next call to {@link #read()} will
- * still return the next value. This doesn't affect line number or last
+ * still return the next value. This doesn't affect the line number or the last
* character.
*
* @param buf the buffer to fill for the look ahead.
@@ -199,7 +199,7 @@ final class ExtendedBufferedReader extends BufferedReader {
/**
* Gets the next line, dropping the line terminator(s). This method should only be called when processing a
- * comment, otherwise information can be lost.
+ * comment, otherwise, information can be lost.
*
* Increments {@link #eolCounter} and updates {@link #position}.
*
From aa7fd80838fd43c0800b2d1d14b4a20fd5fa1f8f Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 09:02:22 -0400
Subject: [PATCH 10/16] Javadoc
---
src/main/java/org/apache/commons/csv/CSVFormat.java | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 77d557ba..2f7e6b9f 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -2417,8 +2417,8 @@ public final class CSVFormat implements Serializable {
/**
* Verifies the validity and consistency of the attributes, and throws an {@link IllegalArgumentException} if necessary.
*
- * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used for
- * parsing, so it cannot be used here.
+ * Because an instance can be used for both writing and parsing, not all conditions can be tested here. For example, allowMissingColumnNames is only used
+ * for parsing, so it cannot be used here.
*
*
* @throws IllegalArgumentException Throw when any attribute is invalid or inconsistent with other attributes.
From ecb056fd77ee7424bcc9dadc76070c1843d8be53 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 09:07:23 -0400
Subject: [PATCH 11/16] Javadoc
---
.../java/org/apache/commons/csv/Lexer.java | 43 ++++++++++---------
1 file changed, 22 insertions(+), 21 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 06b2c9c2..ea29ff68 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -42,7 +42,7 @@ final class Lexer implements Closeable {
private static final String LF_STRING = Character.toString(LF);
/**
- * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
+ * Constant char to use for disabling comments, escapes, and encapsulation. The value -2 is used because it
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
* chars (using surrogates) and thus there should never be a collision with a real text char.
*/
@@ -146,9 +146,9 @@ final class Lexer implements Closeable {
}
/**
- * Tests if the given character indicates end of file.
+ * Tests if the given character indicates the end of the file.
*
- * @return true if the given character indicates end of file.
+ * @return true if the given character indicates the end of the file.
*/
boolean isEndOfFile(final int ch) {
return ch == END_OF_STREAM;
@@ -168,7 +168,7 @@ final class Lexer implements Closeable {
*
* For example, for delimiter "[|]" and escape '!', return true if the next characters constitute "![!|!]".
*
- * @return true if the next characters constitute a escape delimiter.
+ * @return true if the next characters constitute an escape delimiter.
* @throws IOException If an I/O error occurs.
*/
boolean isEscapeDelimiter() throws IOException {
@@ -194,7 +194,7 @@ final class Lexer implements Closeable {
}
/**
- * Tests if the current character represents the start of a line: a CR, LF or is at the start of the file.
+ * Tests if the current character represents the start of a line: a CR, LF, or is at the start of the file.
*
* @param ch the character to check
* @return true if the character is at the start of a line.
@@ -214,13 +214,13 @@ final class Lexer implements Closeable {
*
*
* @param token
- * an existing Token object to reuse. The caller is responsible to initialize the Token.
+ * an existing Token object to reuse. The caller is responsible for initializing the Token.
* @return the next token found.
* @throws IOException on stream access error.
*/
Token nextToken(final Token token) throws IOException {
- // get the last read char (required for empty line detection)
+ // Get the last read char (required for empty line detection)
int lastChar = reader.getLastChar();
// read the next char and set eol
@@ -234,11 +234,11 @@ final class Lexer implements Closeable {
// empty line detection: eol AND (last char was EOL or beginning)
if (ignoreEmptyLines) {
while (eol && isStartOfLine(lastChar)) {
- // go on char ahead ...
+ // Go on char ahead ...
lastChar = c;
c = reader.read();
eol = readEndOfLine(c);
- // reached end of file without any content (empty line at the end)
+ // reached the end of the file without any content (empty line at the end)
if (isEndOfFile(c)) {
token.type = EOF;
// don't set token.isReady here because no content
@@ -247,7 +247,7 @@ final class Lexer implements Closeable {
}
}
- // did we reach eof during the last iteration already ? EOF
+ // Did we reach EOF during the last iteration already? EOF
if (isEndOfFile(lastChar) || !isLastTokenDelimiter && isEndOfFile(c)) {
token.type = EOF;
// don't set token.isReady here because no content
@@ -267,7 +267,7 @@ final class Lexer implements Closeable {
return token;
}
- // important: make sure a new char gets consumed in each iteration
+ // Important: make sure a new char gets consumed in each iteration
while (token.type == INVALID) {
// ignore whitespaces at beginning of a token
if (ignoreSurroundingSpaces) {
@@ -305,12 +305,12 @@ final class Lexer implements Closeable {
/**
* Parses an encapsulated token.
*
- * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
+ * Encapsulated tokens are surrounded by the given encapsulating string. The encapsulator itself might be included
* in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
- * an encapsulated token are ignored. The token is finished when one of the following conditions become true:
+ * an encapsulated token is ignored. The token is finished when one of the following conditions becomes true:
*
*
- * - an unescaped encapsulator has been reached, and is followed by optional whitespace then:
+ * - An unescaped encapsulator has been reached and is followed by optional whitespace then:
*
* - delimiter (TOKEN)
* - end of line (EORECORD)
@@ -321,11 +321,12 @@ final class Lexer implements Closeable {
* the current token
* @return a valid token object
* @throws IOException
- * on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
+ * Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
+ * delimiter or EOL.
*/
private Token parseEncapsulatedToken(final Token token) throws IOException {
token.isQuoted = true;
- // save current line number in case needed for IOE
+ // Save current line number in case needed for IOE
final long startLineNumber = getCurrentLineNumber();
int c;
while (true) {
@@ -385,13 +386,13 @@ final class Lexer implements Closeable {
/**
* Parses a simple token.
*
- * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
- * delimiters (as \, or \;). The token is finished when one of the following conditions become true:
+ * Simple tokens are tokens that are not surrounded by encapsulators. A simple token might contain escaped
+ * delimiters (as \, or \;). The token is finished when one of the following conditions becomes true:
*
*
- * - end of line has been reached (EORECORD)
- * - end of stream has been reached (EOF)
- * - an unescaped delimiter has been reached (TOKEN)
+ * - The end of line has been reached (EORECORD)
+ * - The end of stream has been reached (EOF)
+ * - An unescaped delimiter has been reached (TOKEN)
*
*
* @param token
From b55e022d0ed39d9d55912281febca50e285bda64 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 09:07:57 -0400
Subject: [PATCH 12/16] Javadoc
---
src/main/java/org/apache/commons/csv/QuoteMode.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/csv/QuoteMode.java b/src/main/java/org/apache/commons/csv/QuoteMode.java
index a9b33a10..f2fb1f94 100644
--- a/src/main/java/org/apache/commons/csv/QuoteMode.java
+++ b/src/main/java/org/apache/commons/csv/QuoteMode.java
@@ -32,7 +32,7 @@ public enum QuoteMode {
ALL_NON_NULL,
/**
- * Quotes fields which contain special characters such as a the field delimiter, quote character or any of the
+ * Quotes fields that contain special characters such as a field delimiter, quote character, or any of the
* characters in the line separator string.
*/
MINIMAL,
From 4e0b0fb8a501cccad9531249bcc8b0aafce7bb3a Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 09:08:38 -0400
Subject: [PATCH 13/16] Javadoc
---
src/main/java/org/apache/commons/csv/Token.java | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/Token.java b/src/main/java/org/apache/commons/csv/Token.java
index 2dedc58a..a33b220e 100644
--- a/src/main/java/org/apache/commons/csv/Token.java
+++ b/src/main/java/org/apache/commons/csv/Token.java
@@ -21,8 +21,9 @@ import static org.apache.commons.csv.Token.Type.INVALID;
/**
* Internal token representation.
- *
- * It is used as contract between the lexer and the parser.
+ *
+ * It is used as a contract between the lexer and the parser.
+ *
*/
final class Token {
@@ -30,7 +31,7 @@ final class Token {
/** Token has no valid content, i.e. is in its initialized state. */
INVALID,
- /** Token with content, at beginning or in the middle of a line. */
+ /** Token with content, at the beginning or in the middle of a line. */
TOKEN,
/** Token (which can have content) when the end of file is reached. */
From 386e6e7cc1e5ff2a05a4d46b763cdae22460c39c Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 09:09:27 -0400
Subject: [PATCH 14/16] Javadoc
---
.../org/apache/commons/csv/package-info.java | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/main/java/org/apache/commons/csv/package-info.java b/src/main/java/org/apache/commons/csv/package-info.java
index 29e7fef6..023096f6 100644
--- a/src/main/java/org/apache/commons/csv/package-info.java
+++ b/src/main/java/org/apache/commons/csv/package-info.java
@@ -18,14 +18,14 @@
/**
* Apache Commons CSV Format Support.
*
- * CSV are widely used as interfaces to legacy systems or manual data-imports.
+ *
CSV are widely used as interfaces to legacy systems or manual data imports.
* CSV stands for "Comma Separated Values" (or sometimes "Character Separated
* Values"). The CSV data format is defined in
* RFC 4180
* but many dialects exist.
*
* Common to all file dialects is its basic structure: The CSV data-format
- * is record oriented, whereas each record starts on a new textual line. A
+ * is record-oriented, whereas each record starts on a new textual line. A
* record is build of a list of values. Keep in mind that not all records
* must have an equal number of values:
*
@@ -36,28 +36,28 @@
* The following list contains the CSV aspects the Commons CSV parser supports:
*
* - Separators (for lines)
- * - The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.
+ * - The record separators are hardcoded and cannot be changed. The must be '\r', '\n', or '\r\n'.
*
* - Delimiter (for values)
* - The delimiter for values is freely configurable (default ',').
*
* - Comments
- * - Some CSV-dialects support a simple comment syntax. A comment is a record
+ *
- Some CSV dialects support a simple comment syntax. A comment is a record
* which must start with a designated character (the commentStarter). A record
- * of this kind is treated as comment and gets removed from the input (default none)
+ * of this kind is treated as a comment and gets removed from the input (default none)
*
* - Encapsulator
* - Two encapsulator characters (default '"') are used to enclose -> complex values.
*
* - Simple values
- * - A simple value consist of all characters (except the delimiter) until
- * (but not including) the next delimiter or a record-terminator. Optionally
+ *
- A simple value consists of all characters (except the delimiter) until
+ * (but not including) the next delimiter or a record terminator. Optionally
* all surrounding whitespaces of a simple value can be ignored (default: true).
*
* - Complex values
* - Complex values are encapsulated within a pair of the defined encapsulator characters.
* The encapsulator itself must be escaped or doubled when used inside complex values.
- * Complex values preserve all kind of formatting (including newlines -> multiline-values)
+ * Complex values preserve all kinds of formatting (including newlines -> multiline-values)
*
* - Empty line skipping
* - Optionally empty lines in CSV files can be skipped.
From e613a3cc1b4029d253d34220eec40d73908e6075 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 09:12:10 -0400
Subject: [PATCH 15/16] Javadoc
---
src/main/java/org/apache/commons/csv/Lexer.java | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index ea29ff68..01716720 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -321,7 +321,7 @@ final class Lexer implements Closeable {
* the current token
* @return a valid token object
* @throws IOException
- * Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
+ * Thrown when in an invalid state: EOF before closing encapsulator or invalid character before
* delimiter or EOL.
*/
private Token parseEncapsulatedToken(final Token token) throws IOException {
From 710459843cbe3cf4f1ca3aaac5a6eb22d049e643 Mon Sep 17 00:00:00 2001
From: Gary Gregory
Date: Wed, 30 Aug 2023 09:16:49 -0400
Subject: [PATCH 16/16] Bump commons-parent from 60 to 61
---
pom.xml | 2 +-
src/changes/changes.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pom.xml b/pom.xml
index f57853c3..b59fd6bf 100644
--- a/pom.xml
+++ b/pom.xml
@@ -20,7 +20,7 @@
org.apache.commons
commons-parent
- 60
+ 61
commons-csv
1.10.1-SNAPSHOT
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index b65bce7c..de874fed 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -50,7 +50,7 @@
[StepSecurity] CI: Harden GitHub Actions #329, #330.
Bump commons-io:commons-io: from 2.11.0 to 2.13.0.
- Bump commons-parent from 57 to 60.
+ Bump commons-parent from 57 to 61.
Bump h2 from 2.1.214 to 2.2.220 #333.
Bump commons-lang3 from 3.12.0 to 3.13.0.
Update exception message in CSVRecord#getNextRecord() #348.