diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 58cdb146..d92600c1 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -353,6 +353,10 @@ public final class CSVParser implements Iterable, Closeable { return new CSVParser(new InputStreamReader(url.openStream(), charset), format); } + private String headerComment; + + private String trailerComment; + private final CSVFormat format; private final Headers headers; @@ -480,10 +484,14 @@ public final class CSVParser implements Iterable, Closeable { final CSVRecord nextRecord = this.nextRecord(); if (nextRecord != null) { headerRecord = nextRecord.values(); + headerComment = nextRecord.getComment(); } } else { if (this.format.getSkipHeaderRecord()) { - this.nextRecord(); + final CSVRecord nextRecord = this.nextRecord(); + if (nextRecord != null) { + headerComment = nextRecord.getComment(); + } } headerRecord = formatHeader; } @@ -597,6 +605,57 @@ public final class CSVParser implements Iterable, Closeable { return Collections.unmodifiableList(headers.headerNames); } + /** + * Checks whether there is a header comment. + * The header comment appears before the header record. + * Note that if the parser's format has been given an explicit header + * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload) + * and the header record is not being skipped + * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments + * will be associated with the first record, not the header. + * + * @return true if this parser has seen a header comment, false otherwise + * @since 1.10.0 + */ + public boolean hasHeaderComment() { + return headerComment != null; + } + + /** + * Returns the header comment, if any. + * The header comment appears before the header record. + * + * @return the header comment for this stream, or null if no comment is available. + * @since 1.10.0 + */ + public String getHeaderComment() { + return headerComment; + } + + /** + * Checks whether there is a trailer comment. + * Trailer comments are located between the last record and EOF. + * The trailer comments will only be available after the parser has + * finished processing this stream. + * + * @return true if this parser has seen a trailer comment, false otherwise + * @since 1.10.0 + */ + public boolean hasTrailerComment() { + return trailerComment != null; + } + + /** + * Returns the trailer comment, if any. + * Trailer comments are located between the last record and EOF + * + * @return the trailer comment for this stream, or null if no comment is available. + * @since 1.10.0 + */ + public String getTrailerComment() { + return trailerComment; + } + /** * Returns the current record number in the input stream. * @@ -713,6 +772,8 @@ public final class CSVParser implements Iterable, Closeable { case EOF: if (this.reusableToken.isReady) { this.addRecordValue(true); + } else if (sb != null) { + trailerComment = sb.toString(); } break; case INVALID: diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 0cfec4cb..a1f55a98 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -1349,4 +1349,160 @@ public class CSVParserTest { parser.close(); } + + // CSV with no header comments + static private final String CSV_INPUT_NO_COMMENT = "A,B"+CRLF+"1,2"+CRLF; + // CSV with a header comment + static private final String CSV_INPUT_HEADER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF; + // CSV with a single line header and trailer comment + static private final String CSV_INPUT_HEADER_TRAILER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# comment"; + // CSV with a multi-line header and trailer comment + static private final String CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT = "# multi-line" + CRLF + "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# multi-line" + CRLF + "# comment"; + // Format with auto-detected header + static private final CSVFormat FORMAT_AUTO_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().build(); + // Format with explicit header + static private final CSVFormat FORMAT_EXPLICIT_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setSkipHeaderRecord(true) + .setCommentMarker('#') + .setHeader("A", "B") + .build(); + // Format with explicit header that does not skip the header line + CSVFormat FORMAT_EXPLICIT_HEADER_NOSKIP = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setCommentMarker('#') + .setHeader("A", "B") + .build(); + + @Test + public void testGetHeaderComment_NoComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_HeaderComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("header comment", parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_HeaderTrailerComment() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("multi-line"+LF+"header comment", parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_NoComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_HeaderComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("header comment", parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_NoComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + public void testGetHeaderComment_HeaderComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + // Expect no header comment - the text "comment" is attached to the first record + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderTrailerComment1() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_MultilineComment() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("multi-line"+LF+"comment", parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderTrailerComment2() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + } + + @Test + public void testGetTrailerComment_HeaderTrailerComment3() throws IOException { + try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + }