[CSV-214] Adding a placeholder in the Lexer and CSV parser to store the
end-of-line string. I applied the patch in spirit and made changes: there is no need to use a boolean to track the state of the EOL String (set vs. not set). I also allowed for CR to be saved as an EOL string since we allow that already.
This commit is contained in:
parent
4d0f226006
commit
aae6f90442
|
@ -51,6 +51,7 @@
|
|||
<action issue="CSV-192" type="add" dev="ggregory" due-to="Gary Gregory">Add convenience API CSVParser.parse(Path, Charset, CSVFormat)</action>
|
||||
<action issue="CSV-205" type="add" dev="ggregory" due-to="Gary Gregory">Add convenience API CSVFormat#printer() to print to System.out</action>
|
||||
<action issue="CSV-207" type="add" dev="ggregory" due-to="Gary Gregory">Provide a CSV Format for printing PostgreSQL CSV and Text formats.</action>
|
||||
<action issue="CSV-214" type="add" dev="ggregory" due-to="Nitin Mahendru, Gary Gregory">Adding a placeholder in the Lexer and CSV parser to store the end-of-line string.</action>
|
||||
</release>
|
||||
<release version="1.4" date="2016-05-28" description="Feature and bug fix release">
|
||||
<action issue="CSV-181" type="update" dev="ggregory" due-to="Gary Gregory">Make CSVPrinter.print(Object) GC-free.</action>
|
||||
|
|
|
@ -396,6 +396,16 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
|
|||
return this.lexer.getCurrentLineNumber();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the first end-of-line string encountered.
|
||||
*
|
||||
* @return the first end-of-line string
|
||||
* @since 1.5
|
||||
*/
|
||||
public String getFirstEndOfLine() {
|
||||
return lexer.getFirstEol();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the header map that iterates in column order.
|
||||
* <p>
|
||||
|
|
|
@ -40,6 +40,9 @@ import java.io.IOException;
|
|||
*/
|
||||
final class Lexer implements Closeable {
|
||||
|
||||
private static final String CR_STRING = Character.toString(Constants.CR);
|
||||
private static final String LF_STRING = Character.toString(Constants.LF);
|
||||
|
||||
/**
|
||||
* Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
|
||||
* won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
|
||||
|
@ -57,6 +60,11 @@ final class Lexer implements Closeable {
|
|||
|
||||
/** The input stream */
|
||||
private final ExtendedBufferedReader reader;
|
||||
private String firstEol;
|
||||
|
||||
String getFirstEol(){
|
||||
return firstEol;
|
||||
}
|
||||
|
||||
Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
|
||||
this.reader = reader;
|
||||
|
@ -374,7 +382,20 @@ final class Lexer implements Closeable {
|
|||
if (ch == CR && reader.lookAhead() == LF) {
|
||||
// note: does not change ch outside of this method!
|
||||
ch = reader.read();
|
||||
// Save the EOL state
|
||||
if (firstEol == null) {
|
||||
this.firstEol = Constants.CRLF;
|
||||
}
|
||||
}
|
||||
// save EOL state here.
|
||||
if (firstEol == null) {
|
||||
if (ch == LF) {
|
||||
this.firstEol = LF_STRING;
|
||||
} else if (ch == CR) {
|
||||
this.firstEol = CR_STRING;
|
||||
}
|
||||
}
|
||||
|
||||
return ch == LF || ch == CR;
|
||||
}
|
||||
|
||||
|
|
|
@ -235,6 +235,36 @@ public class CSVParserTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFirstEndOfLineCrLf() throws IOException {
|
||||
final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
|
||||
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
|
||||
final List<CSVRecord> records = parser.getRecords();
|
||||
assertEquals(4, records.size());
|
||||
assertEquals("\r\n", parser.getFirstEndOfLine());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFirstEndOfLineLf() throws IOException {
|
||||
final String data = "foo\nbaar,\nhello,world\n,kanu";
|
||||
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
|
||||
final List<CSVRecord> records = parser.getRecords();
|
||||
assertEquals(4, records.size());
|
||||
assertEquals("\n", parser.getFirstEndOfLine());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFirstEndOfLineCr() throws IOException {
|
||||
final String data = "foo\rbaar,\rhello,world\r,kanu";
|
||||
try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
|
||||
final List<CSVRecord> records = parser.getRecords();
|
||||
assertEquals(4, records.size());
|
||||
assertEquals("\r", parser.getFirstEndOfLine());
|
||||
}
|
||||
}
|
||||
|
||||
@Test(expected = NoSuchElementException.class)
|
||||
public void testClose() throws Exception {
|
||||
final Reader in = new StringReader("# comment\na,b,c\n1,2,3\nx,y,z");
|
||||
|
|
Loading…
Reference in New Issue