diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 58cdb146..3385f418 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -352,6 +352,9 @@ public final class CSVParser implements Iterable, Closeable { return new CSVParser(new InputStreamReader(url.openStream(), charset), format); } + private String headerComment; + + private String trailerComment; private final CSVFormat format; @@ -480,10 +483,12 @@ public final class CSVParser implements Iterable, Closeable { final CSVRecord nextRecord = this.nextRecord(); if (nextRecord != null) { headerRecord = nextRecord.values(); + headerComment = nextRecord.getComment(); } } else { if (this.format.getSkipHeaderRecord()) { - this.nextRecord(); + final CSVRecord csvRecord = this.nextRecord(); + headerComment = csvRecord.getComment(); } headerRecord = formatHeader; } @@ -596,7 +601,49 @@ public final class CSVParser implements Iterable, Closeable { public List getHeaderNames() { return Collections.unmodifiableList(headers.headerNames); } - + /** + * Checks whether this parser has a header comment, false otherwise. + * The header comment appears before the header record. + * Note that if the parser's format has been given an explicit header + * (with {@link CSVFormat.Builder#setHeader(String... )} or another overload) + * and the header record is not being skipped + * ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments + * will be associated with the first record, not the header. + * + * @return true if this parser has seen a header comment, false otherwise + */ + public boolean hasHeaderComment() { + return headerComment != null; + } + /** + * Returns the header comment for this parser, if any. + * The header comment appears before the header record. + * + * @return the header comment for this stream, or null if no comment is available. + */ + public String getHeaderComment() { + return headerComment; + } + /** + * Checks whether this parser has seen a trailer comment, false otherwise. + * Trailer comments are located between the last record and EOF. + * The trailer comments will only be available after the parser has + * finished processing this stream. + * + * @return true if this parser has seen a trailer comment, false otherwise + */ + public boolean hasTrailerComment() { + return trailerComment != null; + } + /** + * Returns the trailer comment for this record, if any. + * Trailer comments are located between the last record and EOF + * + * @return the trailer comment for this stream, or null if no comment is available. + */ + public String getTrailerComment() { + return trailerComment; + } /** * Returns the current record number in the input stream. * @@ -713,6 +760,10 @@ public final class CSVParser implements Iterable, Closeable { case EOF: if (this.reusableToken.isReady) { this.addRecordValue(true); + } else { + if (sb != null) { + trailerComment = sb.toString(); + } } break; case INVALID: diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 38663a16..aa6c9777 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -1375,4 +1375,128 @@ public class CSVParserTest { parser.close(); } + @Test + public void getHeaderComment() throws IOException { + // File with no header comments + String text_1 = "A,B"+CRLF+"1,2"+CRLF; + // File with a single line header comment + String text_2 = "# comment"+CRLF+"A,B"+CRLF+"1,2"+CRLF; + // File with a multi-line header comment + String text_3 = "# multi-line" + CRLF + "# comment"+CRLF+"A,B"+CRLF+"1,2"+CRLF; + // Format with auto-detected header + CSVFormat format_a = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().build(); + // Format with explicit header + CSVFormat format_b = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setSkipHeaderRecord(true) + .setCommentMarker('#') + .setHeader("A","B") + .build(); + // Format with explicit header that does not skip the header line + CSVFormat format_c = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setCommentMarker('#') + .setHeader("A","B") + .build(); + + try (CSVParser parser = CSVParser.parse(text_1, format_a)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + try (CSVParser parser = CSVParser.parse(text_2, format_a)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("comment", parser.getHeaderComment()); + } + try (CSVParser parser = CSVParser.parse(text_3, format_a)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("multi-line"+LF+"comment", parser.getHeaderComment()); + } + try (CSVParser parser = CSVParser.parse(text_1, format_b)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + try (CSVParser parser = CSVParser.parse(text_2, format_b)) { + parser.getRecords(); + // Expect a header comment + assertTrue(parser.hasHeaderComment()); + assertEquals("comment", parser.getHeaderComment()); + } + try (CSVParser parser = CSVParser.parse(text_1, format_c)) { + parser.getRecords(); + // Expect no header comment + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + try (CSVParser parser = CSVParser.parse(text_2, format_c)) { + parser.getRecords(); + // Expect no header comment - the text "comment" is attached to the first record + assertFalse(parser.hasHeaderComment()); + assertNull(parser.getHeaderComment()); + } + } + @Test + public void getTrailerComment() throws IOException { + // File with a header comment + String text_1 = "# header comment"+CRLF+"A,B"+CRLF+"1,2"+CRLF; + // File with a single line header and trailer comment + String text_2 = "# header comment"+CRLF+"A,B"+CRLF+"1,2"+CRLF + "# comment"; + // File with a multi-line header and trailer comment + String text_3 = "# multi-line" + CRLF + "# header comment"+CRLF+"A,B"+CRLF+"1,2"+CRLF+"# multi-line"+CRLF+"# comment"; + // Format with auto-detected header + CSVFormat format_a = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().build(); + // Format with explicit header + CSVFormat format_b = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setSkipHeaderRecord(true) + .setCommentMarker('#') + .setHeader("A","B") + .build(); + // Format with explicit header that does not skip the header line + CSVFormat format_c = CSVFormat.Builder.create(CSVFormat.DEFAULT) + .setCommentMarker('#') + .setHeader("A","B") + .build(); + + try (CSVParser parser = CSVParser.parse(text_1, format_a)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + try (CSVParser parser = CSVParser.parse(text_2, format_a)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + try (CSVParser parser = CSVParser.parse(text_3, format_a)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("multi-line"+LF+"comment", parser.getTrailerComment()); + } + try (CSVParser parser = CSVParser.parse(text_1, format_b)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + try (CSVParser parser = CSVParser.parse(text_2, format_b)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + try (CSVParser parser = CSVParser.parse(text_1, format_c)) { + parser.getRecords(); + assertFalse(parser.hasTrailerComment()); + assertNull(parser.getTrailerComment()); + } + try (CSVParser parser = CSVParser.parse(text_2, format_c)) { + parser.getRecords(); + assertTrue(parser.hasTrailerComment()); + assertEquals("comment", parser.getTrailerComment()); + } + } + }