From f368f64fa7f9acdcc01084f676e8b9c2b86f946e Mon Sep 17 00:00:00 2001 From: David Warshaw Date: Fri, 18 May 2018 14:24:50 -0600 Subject: [PATCH] [CSV-224] Some Multi Iterator Parsing Peek Sequences Incorrectly Consume Elements. --- src/changes/changes.xml | 1 + .../org/apache/commons/csv/CSVParser.java | 101 +++++++++--------- .../org/apache/commons/csv/CSVParserTest.java | 56 ++++++++++ 3 files changed, 110 insertions(+), 48 deletions(-) diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 8142c21a..d94ccdde 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,6 +45,7 @@ Add API org.apache.commons.csv.CSVFormat.withSystemRecordSeparator(). Inconsistency between Javadoc of CSVFormat DEFAULT EXCEL. Create CSVFormat.ORACLE preset. + Some Multi Iterator Parsing Peek Sequences Incorrectly Consume Elements. withNullString value is printed without quotes when QuoteMode.ALL is specified; add QuoteMode.ALL_NON_NULL. PR #17. diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 2a902cd3..7e9d7d41 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -286,6 +286,8 @@ public final class CSVParser implements Iterable, Closeable { private final Lexer lexer; + private final CSVRecordIterator csvRecordIterator; + /** A record buffer for getRecord(). Grows as necessary and is reused. */ private final List recordList = new ArrayList<>(); @@ -353,6 +355,7 @@ public final class CSVParser implements Iterable, Closeable { this.format = format; this.lexer = new Lexer(format, new ExtendedBufferedReader(reader)); + this.csvRecordIterator = new CSVRecordIterator(); this.headerMap = this.initializeHeader(); this.characterOffset = characterOffset; this.recordNumber = recordNumber - 1; @@ -519,55 +522,57 @@ public final class CSVParser implements Iterable, Closeable { */ @Override public Iterator iterator() { - return new Iterator() { - private CSVRecord current; - - private CSVRecord getNextRecord() { - try { - return CSVParser.this.nextRecord(); - } catch (final IOException e) { - throw new IllegalStateException( - e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); - } - } - - @Override - public boolean hasNext() { - if (CSVParser.this.isClosed()) { - return false; - } - if (this.current == null) { - this.current = this.getNextRecord(); - } - - return this.current != null; - } - - @Override - public CSVRecord next() { - if (CSVParser.this.isClosed()) { - throw new NoSuchElementException("CSVParser has been closed"); - } - CSVRecord next = this.current; - this.current = null; - - if (next == null) { - // hasNext() wasn't called before - next = this.getNextRecord(); - if (next == null) { - throw new NoSuchElementException("No more CSV records available"); - } - } - - return next; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; + return csvRecordIterator; } + + class CSVRecordIterator implements Iterator { + private CSVRecord current; + + private CSVRecord getNextRecord() { + try { + return CSVParser.this.nextRecord(); + } catch (final IOException e) { + throw new IllegalStateException( + e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); + } + } + + @Override + public boolean hasNext() { + if (CSVParser.this.isClosed()) { + return false; + } + if (this.current == null) { + this.current = this.getNextRecord(); + } + + return this.current != null; + } + + @Override + public CSVRecord next() { + if (CSVParser.this.isClosed()) { + throw new NoSuchElementException("CSVParser has been closed"); + } + CSVRecord next = this.current; + this.current = null; + + if (next == null) { + // hasNext() wasn't called before + next = this.getNextRecord(); + if (next == null) { + throw new NoSuchElementException("No more CSV records available"); + } + } + + return next; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; /** * Parses the next record from the current point in the stream. diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 07e06bca..1e1d7a6c 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -989,6 +989,62 @@ public class CSVParserTest { Assert.assertEquals(3, record.size()); } + @Test + public void testIteratorSequenceBreaking() throws IOException { + final String fiveRows = "1\n2\n3\n4\n5\n"; + + // Iterator hasNext() shouldn't break sequence + CSVParser parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows)); + int recordNumber = 0; + Iterator iter = parser.iterator(); + recordNumber = 0; + while (iter.hasNext()) { + CSVRecord record = iter.next(); + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + iter.hasNext(); + while (iter.hasNext()) { + CSVRecord record = iter.next(); + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + + // Consecutive enhanced for loops shouldn't break sequence + parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows)); + recordNumber = 0; + for (CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + for (CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + + // Consecutive enhanced for loops with hasNext() peeking shouldn't break sequence + parser = CSVFormat.DEFAULT.parse(new StringReader(fiveRows)); + recordNumber = 0; + for (CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + if (recordNumber >= 2) { + break; + } + } + parser.iterator().hasNext(); + for (CSVRecord record : parser) { + recordNumber++; + assertEquals(String.valueOf(recordNumber), record.get(0)); + } + } + private void validateLineNumbers(final String lineSeparator) throws IOException { try (final CSVParser parser = CSVParser.parse("a" + lineSeparator + "b" + lineSeparator + "c", CSVFormat.DEFAULT.withRecordSeparator(lineSeparator))) {