Merge pull request #257 from pedro-w/csv-304-header-comments

[CSV-304] Accessors for header/trailer comments
This commit is contained in:
Gary Gregory 2022-09-12 09:43:21 -07:00 committed by GitHub
commit 2e851bc960
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 218 additions and 1 deletions

View File

@ -353,6 +353,10 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return new CSVParser(new InputStreamReader(url.openStream(), charset), format); return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
} }
private String headerComment;
private String trailerComment;
private final CSVFormat format; private final CSVFormat format;
private final Headers headers; private final Headers headers;
@ -480,10 +484,14 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
final CSVRecord nextRecord = this.nextRecord(); final CSVRecord nextRecord = this.nextRecord();
if (nextRecord != null) { if (nextRecord != null) {
headerRecord = nextRecord.values(); headerRecord = nextRecord.values();
headerComment = nextRecord.getComment();
} }
} else { } else {
if (this.format.getSkipHeaderRecord()) { if (this.format.getSkipHeaderRecord()) {
this.nextRecord(); final CSVRecord nextRecord = this.nextRecord();
if (nextRecord != null) {
headerComment = nextRecord.getComment();
}
} }
headerRecord = formatHeader; headerRecord = formatHeader;
} }
@ -597,6 +605,57 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
return Collections.unmodifiableList(headers.headerNames); return Collections.unmodifiableList(headers.headerNames);
} }
/**
* Checks whether there is a header comment.
* The header comment appears before the header record.
* Note that if the parser's format has been given an explicit header
* (with {@link CSVFormat.Builder#setHeader(String... )} or another overload)
* and the header record is not being skipped
* ({@link CSVFormat.Builder#setSkipHeaderRecord} is false) then any initial comments
* will be associated with the first record, not the header.
*
* @return true if this parser has seen a header comment, false otherwise
* @since 1.10.0
*/
public boolean hasHeaderComment() {
return headerComment != null;
}
/**
* Returns the header comment, if any.
* The header comment appears before the header record.
*
* @return the header comment for this stream, or null if no comment is available.
* @since 1.10.0
*/
public String getHeaderComment() {
return headerComment;
}
/**
* Checks whether there is a trailer comment.
* Trailer comments are located between the last record and EOF.
* The trailer comments will only be available after the parser has
* finished processing this stream.
*
* @return true if this parser has seen a trailer comment, false otherwise
* @since 1.10.0
*/
public boolean hasTrailerComment() {
return trailerComment != null;
}
/**
* Returns the trailer comment, if any.
* Trailer comments are located between the last record and EOF
*
* @return the trailer comment for this stream, or null if no comment is available.
* @since 1.10.0
*/
public String getTrailerComment() {
return trailerComment;
}
/** /**
* Returns the current record number in the input stream. * Returns the current record number in the input stream.
* *
@ -713,6 +772,8 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
case EOF: case EOF:
if (this.reusableToken.isReady) { if (this.reusableToken.isReady) {
this.addRecordValue(true); this.addRecordValue(true);
} else if (sb != null) {
trailerComment = sb.toString();
} }
break; break;
case INVALID: case INVALID:

View File

@ -1349,4 +1349,160 @@ public class CSVParserTest {
parser.close(); parser.close();
} }
// CSV with no header comments
static private final String CSV_INPUT_NO_COMMENT = "A,B"+CRLF+"1,2"+CRLF;
// CSV with a header comment
static private final String CSV_INPUT_HEADER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF;
// CSV with a single line header and trailer comment
static private final String CSV_INPUT_HEADER_TRAILER_COMMENT = "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# comment";
// CSV with a multi-line header and trailer comment
static private final String CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT = "# multi-line" + CRLF + "# header comment" + CRLF + "A,B" + CRLF + "1,2" + CRLF + "# multi-line" + CRLF + "# comment";
// Format with auto-detected header
static private final CSVFormat FORMAT_AUTO_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT).setCommentMarker('#').setHeader().build();
// Format with explicit header
static private final CSVFormat FORMAT_EXPLICIT_HEADER = CSVFormat.Builder.create(CSVFormat.DEFAULT)
.setSkipHeaderRecord(true)
.setCommentMarker('#')
.setHeader("A", "B")
.build();
// Format with explicit header that does not skip the header line
CSVFormat FORMAT_EXPLICIT_HEADER_NOSKIP = CSVFormat.Builder.create(CSVFormat.DEFAULT)
.setCommentMarker('#')
.setHeader("A", "B")
.build();
@Test
public void testGetHeaderComment_NoComment1() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_AUTO_HEADER)) {
parser.getRecords();
// Expect no header comment
assertFalse(parser.hasHeaderComment());
assertNull(parser.getHeaderComment());
}
}
@Test
public void testGetHeaderComment_HeaderComment1() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
parser.getRecords();
// Expect a header comment
assertTrue(parser.hasHeaderComment());
assertEquals("header comment", parser.getHeaderComment());
}
}
@Test
public void testGetHeaderComment_HeaderTrailerComment() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
parser.getRecords();
// Expect a header comment
assertTrue(parser.hasHeaderComment());
assertEquals("multi-line"+LF+"header comment", parser.getHeaderComment());
}
}
@Test
public void testGetHeaderComment_NoComment2() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER)) {
parser.getRecords();
// Expect no header comment
assertFalse(parser.hasHeaderComment());
assertNull(parser.getHeaderComment());
}
}
@Test
public void testGetHeaderComment_HeaderComment2() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
parser.getRecords();
// Expect a header comment
assertTrue(parser.hasHeaderComment());
assertEquals("header comment", parser.getHeaderComment());
}
}
@Test
public void testGetHeaderComment_NoComment3() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_NO_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
parser.getRecords();
// Expect no header comment
assertFalse(parser.hasHeaderComment());
assertNull(parser.getHeaderComment());
}
}
@Test
public void testGetHeaderComment_HeaderComment3() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
parser.getRecords();
// Expect no header comment - the text "comment" is attached to the first record
assertFalse(parser.hasHeaderComment());
assertNull(parser.getHeaderComment());
}
}
@Test
public void testGetTrailerComment_HeaderComment1() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_AUTO_HEADER)) {
parser.getRecords();
assertFalse(parser.hasTrailerComment());
assertNull(parser.getTrailerComment());
}
}
@Test
public void testGetTrailerComment_HeaderTrailerComment1() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
parser.getRecords();
assertTrue(parser.hasTrailerComment());
assertEquals("comment", parser.getTrailerComment());
}
}
@Test
public void testGetTrailerComment_MultilineComment() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_MULTILINE_HEADER_TRAILER_COMMENT, FORMAT_AUTO_HEADER)) {
parser.getRecords();
assertTrue(parser.hasTrailerComment());
assertEquals("multi-line"+LF+"comment", parser.getTrailerComment());
}
}
@Test
public void testGetTrailerComment_HeaderComment2() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
parser.getRecords();
assertFalse(parser.hasTrailerComment());
assertNull(parser.getTrailerComment());
}
}
@Test
public void testGetTrailerComment_HeaderTrailerComment2() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER)) {
parser.getRecords();
assertTrue(parser.hasTrailerComment());
assertEquals("comment", parser.getTrailerComment());
}
}
@Test
public void testGetTrailerComment_HeaderComment3() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
parser.getRecords();
assertFalse(parser.hasTrailerComment());
assertNull(parser.getTrailerComment());
}
}
@Test
public void testGetTrailerComment_HeaderTrailerComment3() throws IOException {
try (CSVParser parser = CSVParser.parse(CSV_INPUT_HEADER_TRAILER_COMMENT, FORMAT_EXPLICIT_HEADER_NOSKIP)) {
parser.getRecords();
assertTrue(parser.hasTrailerComment());
assertEquals("comment", parser.getTrailerComment());
}
}
} }