Sort members.
This commit is contained in:
parent
d7b1574483
commit
46f9211d60
|
@ -122,19 +122,6 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This
|
|
||||||
* doesn't affect line number or last character.
|
|
||||||
*
|
|
||||||
* @param n the number characters look ahead.
|
|
||||||
* @return the next n characters.
|
|
||||||
* @throws IOException If an I/O error occurs
|
|
||||||
*/
|
|
||||||
char[] lookAhead(final int n) throws IOException {
|
|
||||||
final char[] buf = new char[n];
|
|
||||||
return lookAhead(buf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Populates the buffer with the next {@code buf.length} characters in the
|
* Populates the buffer with the next {@code buf.length} characters in the
|
||||||
* current reader without consuming them. The next call to {@link #read()} will
|
* current reader without consuming them. The next call to {@link #read()} will
|
||||||
|
@ -154,6 +141,19 @@ final class ExtendedBufferedReader extends BufferedReader {
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This
|
||||||
|
* doesn't affect line number or last character.
|
||||||
|
*
|
||||||
|
* @param n the number characters look ahead.
|
||||||
|
* @return the next n characters.
|
||||||
|
* @throws IOException If an I/O error occurs
|
||||||
|
*/
|
||||||
|
char[] lookAhead(final int n) throws IOException {
|
||||||
|
final char[] buf = new char[n];
|
||||||
|
return lookAhead(buf);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int read() throws IOException {
|
public int read() throws IOException {
|
||||||
final int current = super.read();
|
final int current = super.read();
|
||||||
|
|
|
@ -42,6 +42,11 @@ public class CSVFileParserTest {
|
||||||
|
|
||||||
private static final File BASE_DIR = new File("src/test/resources/org/apache/commons/csv/CSVFileParser");
|
private static final File BASE_DIR = new File("src/test/resources/org/apache/commons/csv/CSVFileParser");
|
||||||
|
|
||||||
|
public static Stream<File> generateData() {
|
||||||
|
final File[] files = BASE_DIR.listFiles((dir, name) -> name.startsWith("test") && name.endsWith(".txt"));
|
||||||
|
return files != null ? Stream.of(files) : Stream.empty();
|
||||||
|
}
|
||||||
|
|
||||||
private String readTestData(final BufferedReader reader) throws IOException {
|
private String readTestData(final BufferedReader reader) throws IOException {
|
||||||
String line;
|
String line;
|
||||||
do {
|
do {
|
||||||
|
@ -50,11 +55,6 @@ public class CSVFileParserTest {
|
||||||
return line;
|
return line;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Stream<File> generateData() {
|
|
||||||
final File[] files = BASE_DIR.listFiles((dir, name) -> name.startsWith("test") && name.endsWith(".txt"));
|
|
||||||
return files != null ? Stream.of(files) : Stream.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@ParameterizedTest
|
@ParameterizedTest
|
||||||
@MethodSource("generateData")
|
@MethodSource("generateData")
|
||||||
public void testCSVFile(final File testFile) throws Exception {
|
public void testCSVFile(final File testFile) throws Exception {
|
||||||
|
|
|
@ -41,6 +41,16 @@ public class CSVFormatPredefinedTest {
|
||||||
test(CSVFormat.EXCEL, "Excel");
|
test(CSVFormat.EXCEL, "Excel");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMongoDbCsv() {
|
||||||
|
test(CSVFormat.MONGODB_CSV, "MongoDBCsv");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMongoDbTsv() {
|
||||||
|
test(CSVFormat.MONGODB_TSV, "MongoDBTsv");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMySQL() {
|
public void testMySQL() {
|
||||||
test(CSVFormat.MYSQL, "MySQL");
|
test(CSVFormat.MYSQL, "MySQL");
|
||||||
|
@ -56,16 +66,6 @@ public class CSVFormatPredefinedTest {
|
||||||
test(CSVFormat.POSTGRESQL_CSV, "PostgreSQLCsv");
|
test(CSVFormat.POSTGRESQL_CSV, "PostgreSQLCsv");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMongoDbCsv() {
|
|
||||||
test(CSVFormat.MONGODB_CSV, "MongoDBCsv");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testMongoDbTsv() {
|
|
||||||
test(CSVFormat.MONGODB_TSV, "MongoDBTsv");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPostgreSqlText() {
|
public void testPostgreSqlText() {
|
||||||
test(CSVFormat.POSTGRESQL_TEXT, "PostgreSQLText");
|
test(CSVFormat.POSTGRESQL_TEXT, "PostgreSQLText");
|
||||||
|
|
|
@ -264,6 +264,15 @@ public class CSVRecordTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStream() {
|
||||||
|
final AtomicInteger i = new AtomicInteger();
|
||||||
|
record.stream().forEach(value -> {
|
||||||
|
assertEquals(values[i.get()], value);
|
||||||
|
i.incrementAndGet();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testToList() {
|
public void testToList() {
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
@ -297,15 +306,6 @@ public class CSVRecordTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testStream() {
|
|
||||||
final AtomicInteger i = new AtomicInteger();
|
|
||||||
record.stream().forEach(value -> {
|
|
||||||
assertEquals(values[i.get()], value);
|
|
||||||
i.incrementAndGet();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testToString() {
|
public void testToString() {
|
||||||
assertNotNull(recordWithHeader.toString());
|
assertNotNull(recordWithHeader.toString());
|
||||||
|
|
|
@ -47,63 +47,65 @@ public class LexerTest {
|
||||||
|
|
||||||
private CSVFormat formatWithEscaping;
|
private CSVFormat formatWithEscaping;
|
||||||
|
|
||||||
@BeforeEach
|
|
||||||
public void setUp() {
|
|
||||||
formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\');
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("resource")
|
@SuppressWarnings("resource")
|
||||||
private Lexer createLexer(final String input, final CSVFormat format) {
|
private Lexer createLexer(final String input, final CSVFormat format) {
|
||||||
return new Lexer(format, new ExtendedBufferedReader(new StringReader(input)));
|
return new Lexer(format, new ExtendedBufferedReader(new StringReader(input)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@BeforeEach
|
||||||
public void testSurroundingSpacesAreDeleted() throws IOException {
|
public void setUp() {
|
||||||
final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,";
|
formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\');
|
||||||
try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// simple token with escaping enabled
|
||||||
@Test
|
@Test
|
||||||
public void testSurroundingTabsAreDeleted() throws IOException {
|
public void testBackslashWithEscaping() throws IOException {
|
||||||
final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,";
|
/*
|
||||||
try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
|
* file: a,\,,b \,,
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs"));
|
*/
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab"));
|
final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne";
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab"));
|
final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false);
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs"));
|
assertTrue(format.isEscapeCharacterSet());
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testIgnoreEmptyLines() throws IOException {
|
|
||||||
final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" +
|
|
||||||
"\n" + "last, line \n" + "\n" + "\n" + "\n";
|
|
||||||
final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines();
|
|
||||||
try (final Lexer parser = createLexer(code, format)) {
|
try (final Lexer parser = createLexer(code, format)) {
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
|
||||||
assertThat(parser.nextToken(new Token()), matches(EORECORD, ""));
|
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "second"));
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
|
||||||
assertThat(parser.nextToken(new Token()), matches(EORECORD, "line"));
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line "));
|
assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "last"));
|
assertThat(parser.nextToken(new Token()), matches(EOF, "e"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(EORECORD, " line "));
|
}
|
||||||
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
}
|
||||||
|
|
||||||
|
// simple token with escaping not enabled
|
||||||
|
@Test
|
||||||
|
public void testBackslashWithoutEscaping() throws IOException {
|
||||||
|
/*
|
||||||
|
* file: a,\,,b \,,
|
||||||
|
*/
|
||||||
|
final String code = "a,\\,,b\\\n\\,,";
|
||||||
|
final CSVFormat format = CSVFormat.DEFAULT;
|
||||||
|
assertFalse(format.isEscapeCharacterSet());
|
||||||
|
try (final Lexer parser = createLexer(code, format)) {
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
|
||||||
|
// an unquoted single backslash is not an escape char
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
|
||||||
|
// an unquoted single backslash is not an escape char
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
||||||
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBackspace() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testComments() throws IOException {
|
public void testComments() throws IOException {
|
||||||
final String code = "first,line,\n" + "second,line,tokenWith#no-comment\n" + "# comment line \n" +
|
final String code = "first,line,\n" + "second,line,tokenWith#no-comment\n" + "# comment line \n" +
|
||||||
|
@ -173,45 +175,141 @@ public class LexerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// simple token with escaping not enabled
|
|
||||||
@Test
|
@Test
|
||||||
public void testBackslashWithoutEscaping() throws IOException {
|
public void testCR() throws Exception {
|
||||||
/*
|
try (final Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) {
|
||||||
* file: a,\,,b \,,
|
assertThat(lexer.nextToken(new Token()), hasContent("character"));
|
||||||
*/
|
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
|
||||||
final String code = "a,\\,,b\\\n\\,,";
|
}
|
||||||
final CSVFormat format = CSVFormat.DEFAULT;
|
}
|
||||||
assertFalse(format.isEscapeCharacterSet());
|
|
||||||
|
// From CSV-1
|
||||||
|
@Test
|
||||||
|
public void testDelimiterIsWhitespace() throws IOException {
|
||||||
|
final String code = "one\ttwo\t\tfour \t five\t six";
|
||||||
|
try (final Lexer parser = createLexer(code, CSVFormat.TDF)) {
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "one"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "two"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "four"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "five"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(EOF, "six"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
|
||||||
|
public void testEscapedBackspace() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEscapedCharacter() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEscapedControlCharacter() throws Exception {
|
||||||
|
// we are explicitly using an escape different from \ here
|
||||||
|
try (final Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEscapedControlCharacter2() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEscapedCR() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test // TODO is this correct? Do we expect <esc>FF to be unescaped?
|
||||||
|
public void testEscapedFF() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEscapedLF() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEscapedMySqlNullValue() throws Exception {
|
||||||
|
// MySQL uses \N to symbolize null values. We have to restore this
|
||||||
|
try (final Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test // TODO is this correct? Do we expect <esc>TAB to be unescaped?
|
||||||
|
public void testEscapedTab() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEscapingAtEOF() throws Exception {
|
||||||
|
final String code = "escaping at EOF is evil\\";
|
||||||
|
try (final Lexer lexer = createLexer(code, formatWithEscaping)) {
|
||||||
|
assertThrows(IOException.class, () -> lexer.nextToken(new Token()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFF() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIgnoreEmptyLines() throws IOException {
|
||||||
|
final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" +
|
||||||
|
"\n" + "last, line \n" + "\n" + "\n" + "\n";
|
||||||
|
final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines();
|
||||||
try (final Lexer parser = createLexer(code, format)) {
|
try (final Lexer parser = createLexer(code, format)) {
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
|
||||||
// an unquoted single backslash is not an escape char
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
|
assertThat(parser.nextToken(new Token()), matches(EORECORD, ""));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "second"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
|
assertThat(parser.nextToken(new Token()), matches(EORECORD, "line"));
|
||||||
// an unquoted single backslash is not an escape char
|
assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line "));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "last"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
assertThat(parser.nextToken(new Token()), matches(EORECORD, " line "));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
||||||
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// simple token with escaping enabled
|
|
||||||
@Test
|
@Test
|
||||||
public void testBackslashWithEscaping() throws IOException {
|
public void testIsMetaCharCommentStart() throws IOException {
|
||||||
/*
|
try (final Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) {
|
||||||
* file: a,\,,b \,,
|
final int ch = lexer.readEscape();
|
||||||
*/
|
assertEquals('#', ch);
|
||||||
final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne";
|
}
|
||||||
final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false);
|
}
|
||||||
assertTrue(format.isEscapeCharacterSet());
|
|
||||||
try (final Lexer parser = createLexer(code, format)) {
|
@Test
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
|
public void testLF() throws Exception {
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
|
try (final Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) {
|
||||||
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
|
assertThat(lexer.nextToken(new Token()), hasContent("character"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
|
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(EOF, "e"));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -266,148 +364,6 @@ public class LexerTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// From CSV-1
|
|
||||||
@Test
|
|
||||||
public void testDelimiterIsWhitespace() throws IOException {
|
|
||||||
final String code = "one\ttwo\t\tfour \t five\t six";
|
|
||||||
try (final Lexer parser = createLexer(code, CSVFormat.TDF)) {
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "one"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "two"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "four"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(TOKEN, "five"));
|
|
||||||
assertThat(parser.nextToken(new Token()), matches(EOF, "six"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEscapedCR() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testCR() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character"));
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEscapedLF() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testLF() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character"));
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test // TODO is this correct? Do we expect <esc>TAB to be unescaped?
|
|
||||||
public void testEscapedTab() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped"));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testTab() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
|
|
||||||
public void testEscapedBackspace() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testBackspace() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test // TODO is this correct? Do we expect <esc>FF to be unescaped?
|
|
||||||
public void testEscapedFF() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFF() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEscapedMySqlNullValue() throws Exception {
|
|
||||||
// MySQL uses \N to symbolize null values. We have to restore this
|
|
||||||
try (final Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEscapedCharacter() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEscapedControlCharacter() throws Exception {
|
|
||||||
// we are explicitly using an escape different from \ here
|
|
||||||
try (final Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEscapedControlCharacter2() throws Exception {
|
|
||||||
try (final Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) {
|
|
||||||
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testEscapingAtEOF() throws Exception {
|
|
||||||
final String code = "escaping at EOF is evil\\";
|
|
||||||
try (final Lexer lexer = createLexer(code, formatWithEscaping)) {
|
|
||||||
assertThrows(IOException.class, () -> lexer.nextToken(new Token()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testTrimTrailingSpacesZeroLength() throws Exception {
|
|
||||||
final StringBuilder buffer = new StringBuilder("");
|
|
||||||
final Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT);
|
|
||||||
lexer.trimTrailingSpaces(buffer);
|
|
||||||
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testReadEscapeTab() throws IOException {
|
|
||||||
try (final Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) {
|
|
||||||
final int ch = lexer.readEscape();
|
|
||||||
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
|
|
||||||
assertEquals(TAB, ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testReadEscapeBackspace() throws IOException {
|
public void testReadEscapeBackspace() throws IOException {
|
||||||
try (final Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
|
try (final Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
|
||||||
|
@ -425,10 +381,54 @@ public class LexerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIsMetaCharCommentStart() throws IOException {
|
public void testReadEscapeTab() throws IOException {
|
||||||
try (final Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) {
|
try (final Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) {
|
||||||
final int ch = lexer.readEscape();
|
final int ch = lexer.readEscape();
|
||||||
assertEquals('#', ch);
|
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
|
||||||
|
assertEquals(TAB, ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSurroundingSpacesAreDeleted() throws IOException {
|
||||||
|
final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,";
|
||||||
|
try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSurroundingTabsAreDeleted() throws IOException {
|
||||||
|
final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,";
|
||||||
|
try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs"));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
|
||||||
|
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTab() throws Exception {
|
||||||
|
try (final Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) {
|
||||||
|
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTrimTrailingSpacesZeroLength() throws Exception {
|
||||||
|
final StringBuilder buffer = new StringBuilder("");
|
||||||
|
final Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT);
|
||||||
|
lexer.trimTrailingSpaces(buffer);
|
||||||
|
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,6 +41,21 @@ import org.apache.commons.io.IOUtils;
|
||||||
@SuppressWarnings("boxing")
|
@SuppressWarnings("boxing")
|
||||||
public class PerformanceTest {
|
public class PerformanceTest {
|
||||||
|
|
||||||
|
@FunctionalInterface
|
||||||
|
private interface CSVParserFactory {
|
||||||
|
CSVParser createParser() throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Container for basic statistics
|
||||||
|
private static class Stats {
|
||||||
|
final int count;
|
||||||
|
final int fields;
|
||||||
|
Stats(final int c, final int f) {
|
||||||
|
count = c;
|
||||||
|
fields = f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static final String[] PROPS = {
|
private static final String[] PROPS = {
|
||||||
"java.version", // Java Runtime Environment version
|
"java.version", // Java Runtime Environment version
|
||||||
"java.vendor", // Java Runtime Environment vendor
|
"java.vendor", // Java Runtime Environment vendor
|
||||||
|
@ -58,17 +73,42 @@ public class PerformanceTest {
|
||||||
"os.arch", // Operating system architecture
|
"os.arch", // Operating system architecture
|
||||||
"os.version", // Operating system version
|
"os.version", // Operating system version
|
||||||
};
|
};
|
||||||
|
|
||||||
private static int max = 11; // skip first test
|
private static int max = 11; // skip first test
|
||||||
|
|
||||||
private static int num; // number of elapsed times recorded
|
private static int num; // number of elapsed times recorded
|
||||||
private static final long[] ELAPSED_TIMES = new long[max];
|
|
||||||
|
|
||||||
|
private static final long[] ELAPSED_TIMES = new long[max];
|
||||||
private static final CSVFormat format = CSVFormat.EXCEL;
|
private static final CSVFormat format = CSVFormat.EXCEL;
|
||||||
|
|
||||||
private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
|
private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
|
||||||
|
|
||||||
private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
|
private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
|
||||||
|
|
||||||
|
private static Reader createReader() throws IOException {
|
||||||
|
return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
|
||||||
|
throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
|
||||||
|
return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
|
||||||
|
return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Stats iterate(final Iterable<CSVRecord> it) {
|
||||||
|
int count = 0;
|
||||||
|
int fields = 0;
|
||||||
|
for (final CSVRecord record : it) {
|
||||||
|
count++;
|
||||||
|
fields += record.size();
|
||||||
|
}
|
||||||
|
return new Stats(count, fields);
|
||||||
|
}
|
||||||
|
|
||||||
public static void main(final String [] args) throws Exception {
|
public static void main(final String [] args) throws Exception {
|
||||||
if (BIG_FILE.exists()) {
|
if (BIG_FILE.exists()) {
|
||||||
System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length());
|
System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length());
|
||||||
|
@ -128,26 +168,15 @@ public class PerformanceTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Reader createReader() throws IOException {
|
private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
|
||||||
return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1);
|
int count = 0;
|
||||||
}
|
int fields = 0;
|
||||||
|
String record;
|
||||||
// Container for basic statistics
|
while ((record = in.readLine()) != null) {
|
||||||
private static class Stats {
|
count++;
|
||||||
final int count;
|
fields += split ? record.split(",").length : 1;
|
||||||
final int fields;
|
|
||||||
Stats(final int c, final int f) {
|
|
||||||
count = c;
|
|
||||||
fields = f;
|
|
||||||
}
|
}
|
||||||
}
|
return new Stats(count, fields);
|
||||||
|
|
||||||
// Display end stats; store elapsed for average
|
|
||||||
private static void show(final String msg, final Stats s, final long start) {
|
|
||||||
final long elapsed = System.currentTimeMillis() - start;
|
|
||||||
System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields);
|
|
||||||
ELAPSED_TIMES[num] = elapsed;
|
|
||||||
num++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculate and show average
|
// calculate and show average
|
||||||
|
@ -162,106 +191,12 @@ public class PerformanceTest {
|
||||||
num = 0; // ready for next set
|
num = 0; // ready for next set
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void testReadBigFile(final boolean split) throws Exception {
|
// Display end stats; store elapsed for average
|
||||||
for (int i = 0; i < max; i++) {
|
private static void show(final String msg, final Stats s, final long start) {
|
||||||
final long startMillis;
|
final long elapsed = System.currentTimeMillis() - start;
|
||||||
final Stats stats;
|
System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields);
|
||||||
try (final BufferedReader in = new BufferedReader(createReader())) {
|
ELAPSED_TIMES[num] = elapsed;
|
||||||
startMillis = System.currentTimeMillis();
|
num++;
|
||||||
stats = readAll(in, split);
|
|
||||||
}
|
|
||||||
show(split ? "file+split" : "file", stats, startMillis);
|
|
||||||
}
|
|
||||||
show();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
|
|
||||||
int count = 0;
|
|
||||||
int fields = 0;
|
|
||||||
String record;
|
|
||||||
while ((record = in.readLine()) != null) {
|
|
||||||
count++;
|
|
||||||
fields += split ? record.split(",").length : 1;
|
|
||||||
}
|
|
||||||
return new Stats(count, fields);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void testExtendedBuffer(final boolean makeString) throws Exception {
|
|
||||||
for (int i = 0; i < max; i++) {
|
|
||||||
int fields = 0;
|
|
||||||
int lines = 0;
|
|
||||||
final long startMillis;
|
|
||||||
try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
|
|
||||||
startMillis = System.currentTimeMillis();
|
|
||||||
int read;
|
|
||||||
if (makeString) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
while ((read = in.read()) != -1) {
|
|
||||||
sb.append((char) read);
|
|
||||||
if (read == ',') { // count delimiters
|
|
||||||
sb.toString();
|
|
||||||
sb = new StringBuilder();
|
|
||||||
fields++;
|
|
||||||
} else if (read == '\n') {
|
|
||||||
sb.toString();
|
|
||||||
sb = new StringBuilder();
|
|
||||||
lines++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
while ((read = in.read()) != -1) {
|
|
||||||
if (read == ',') { // count delimiters
|
|
||||||
fields++;
|
|
||||||
} else if (read == '\n') {
|
|
||||||
lines++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fields += lines; // EOL is a delimiter too
|
|
||||||
}
|
|
||||||
show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
|
|
||||||
}
|
|
||||||
show();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void testParser(final String msg, final CSVParserFactory fac) throws Exception {
|
|
||||||
for (int i = 0; i < max; i++) {
|
|
||||||
final long startMillis;
|
|
||||||
final Stats stats;
|
|
||||||
try (final CSVParser parser = fac.createParser()) {
|
|
||||||
startMillis = System.currentTimeMillis();
|
|
||||||
stats = iterate(parser);
|
|
||||||
}
|
|
||||||
show(msg, stats, startMillis);
|
|
||||||
}
|
|
||||||
show();
|
|
||||||
}
|
|
||||||
|
|
||||||
@FunctionalInterface
|
|
||||||
private interface CSVParserFactory {
|
|
||||||
CSVParser createParser() throws IOException;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void testParseCommonsCSV() throws Exception {
|
|
||||||
testParser("CSV", () -> new CSVParser(createReader(), format));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void testParsePath() throws Exception {
|
|
||||||
testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void testParsePathDoubleBuffering() throws Exception {
|
|
||||||
testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void testParseURL() throws Exception {
|
|
||||||
testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
|
|
||||||
return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void testCSVLexer(final boolean newToken, final String test) throws Exception {
|
private static void testCSVLexer(final boolean newToken, final String test) throws Exception {
|
||||||
|
@ -312,19 +247,84 @@ public class PerformanceTest {
|
||||||
show();
|
show();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
|
private static void testExtendedBuffer(final boolean makeString) throws Exception {
|
||||||
throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
|
for (int i = 0; i < max; i++) {
|
||||||
return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input);
|
int fields = 0;
|
||||||
|
int lines = 0;
|
||||||
|
final long startMillis;
|
||||||
|
try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
|
||||||
|
startMillis = System.currentTimeMillis();
|
||||||
|
int read;
|
||||||
|
if (makeString) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
while ((read = in.read()) != -1) {
|
||||||
|
sb.append((char) read);
|
||||||
|
if (read == ',') { // count delimiters
|
||||||
|
sb.toString();
|
||||||
|
sb = new StringBuilder();
|
||||||
|
fields++;
|
||||||
|
} else if (read == '\n') {
|
||||||
|
sb.toString();
|
||||||
|
sb = new StringBuilder();
|
||||||
|
lines++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while ((read = in.read()) != -1) {
|
||||||
|
if (read == ',') { // count delimiters
|
||||||
|
fields++;
|
||||||
|
} else if (read == '\n') {
|
||||||
|
lines++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fields += lines; // EOL is a delimiter too
|
||||||
|
}
|
||||||
|
show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
|
||||||
|
}
|
||||||
|
show();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Stats iterate(final Iterable<CSVRecord> it) {
|
private static void testParseCommonsCSV() throws Exception {
|
||||||
int count = 0;
|
testParser("CSV", () -> new CSVParser(createReader(), format));
|
||||||
int fields = 0;
|
}
|
||||||
for (final CSVRecord record : it) {
|
|
||||||
count++;
|
private static void testParsePath() throws Exception {
|
||||||
fields += record.size();
|
testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testParsePathDoubleBuffering() throws Exception {
|
||||||
|
testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testParser(final String msg, final CSVParserFactory fac) throws Exception {
|
||||||
|
for (int i = 0; i < max; i++) {
|
||||||
|
final long startMillis;
|
||||||
|
final Stats stats;
|
||||||
|
try (final CSVParser parser = fac.createParser()) {
|
||||||
|
startMillis = System.currentTimeMillis();
|
||||||
|
stats = iterate(parser);
|
||||||
|
}
|
||||||
|
show(msg, stats, startMillis);
|
||||||
}
|
}
|
||||||
return new Stats(count, fields);
|
show();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testParseURL() throws Exception {
|
||||||
|
testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void testReadBigFile(final boolean split) throws Exception {
|
||||||
|
for (int i = 0; i < max; i++) {
|
||||||
|
final long startMillis;
|
||||||
|
final Stats stats;
|
||||||
|
try (final BufferedReader in = new BufferedReader(createReader())) {
|
||||||
|
startMillis = System.currentTimeMillis();
|
||||||
|
stats = readAll(in, split);
|
||||||
|
}
|
||||||
|
show(split ? "file+split" : "file", stats, startMillis);
|
||||||
|
}
|
||||||
|
show();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -27,25 +27,6 @@ import org.hamcrest.TypeSafeDiagnosingMatcher;
|
||||||
*/
|
*/
|
||||||
final class TokenMatchers {
|
final class TokenMatchers {
|
||||||
|
|
||||||
public static Matcher<Token> hasType(final Token.Type expectedType) {
|
|
||||||
return new TypeSafeDiagnosingMatcher<Token>() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void describeTo(final Description description) {
|
|
||||||
description.appendText("token has type ");
|
|
||||||
description.appendValue(expectedType);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean matchesSafely(final Token item,
|
|
||||||
final Description mismatchDescription) {
|
|
||||||
mismatchDescription.appendText("token type is ");
|
|
||||||
mismatchDescription.appendValue(item.type);
|
|
||||||
return item.type == expectedType;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Matcher<Token> hasContent(final String expectedContent) {
|
public static Matcher<Token> hasContent(final String expectedContent) {
|
||||||
return new TypeSafeDiagnosingMatcher<Token>() {
|
return new TypeSafeDiagnosingMatcher<Token>() {
|
||||||
|
|
||||||
|
@ -65,6 +46,25 @@ final class TokenMatchers {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Matcher<Token> hasType(final Token.Type expectedType) {
|
||||||
|
return new TypeSafeDiagnosingMatcher<Token>() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void describeTo(final Description description) {
|
||||||
|
description.appendText("token has type ");
|
||||||
|
description.appendValue(expectedType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean matchesSafely(final Token item,
|
||||||
|
final Description mismatchDescription) {
|
||||||
|
mismatchDescription.appendText("token type is ");
|
||||||
|
mismatchDescription.appendValue(item.type);
|
||||||
|
return item.type == expectedType;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
public static Matcher<Token> isReady() {
|
public static Matcher<Token> isReady() {
|
||||||
return new TypeSafeDiagnosingMatcher<Token>() {
|
return new TypeSafeDiagnosingMatcher<Token>() {
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,12 @@ public class TokenMatchersTest {
|
||||||
token.content.append("content");
|
token.content.append("content");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHasContent() {
|
||||||
|
assertFalse(hasContent("This is not the token's content").matches(token));
|
||||||
|
assertTrue(hasContent("content").matches(token));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHasType() {
|
public void testHasType() {
|
||||||
assertFalse(hasType(Token.Type.COMMENT).matches(token));
|
assertFalse(hasType(Token.Type.COMMENT).matches(token));
|
||||||
|
@ -47,12 +53,6 @@ public class TokenMatchersTest {
|
||||||
assertTrue(hasType(Token.Type.TOKEN).matches(token));
|
assertTrue(hasType(Token.Type.TOKEN).matches(token));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testHasContent() {
|
|
||||||
assertFalse(hasContent("This is not the token's content").matches(token));
|
|
||||||
assertTrue(hasContent("content").matches(token));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIsReady() {
|
public void testIsReady() {
|
||||||
assertTrue(isReady().matches(token));
|
assertTrue(isReady().matches(token));
|
||||||
|
|
|
@ -28,9 +28,6 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
final class Utils {
|
final class Utils {
|
||||||
|
|
||||||
private Utils() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the 2d array has the same contents as the list of records.
|
* Checks if the 2d array has the same contents as the list of records.
|
||||||
*
|
*
|
||||||
|
@ -45,4 +42,7 @@ final class Utils {
|
||||||
assertArrayEquals(expected[i], actual.get(i).values(), message + " (entry " + i + ")");
|
assertArrayEquals(expected[i], actual.get(i).values(), message + " (entry " + i + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Utils() {
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,23 @@ import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class JiraCsv148Test {
|
public class JiraCsv148Test {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithIgnoreSurroundingSpacesEmpty() {
|
||||||
|
// @formatter:off
|
||||||
|
final CSVFormat format = CSVFormat.DEFAULT.builder()
|
||||||
|
.setQuoteMode(QuoteMode.ALL)
|
||||||
|
.setIgnoreSurroundingSpaces(true)
|
||||||
|
.build();
|
||||||
|
// @formatter:on
|
||||||
|
assertEquals(
|
||||||
|
"\"\",\" \",\" Single space on the left\",\"Single space on the right \","
|
||||||
|
+ "\" Single spaces on both sides \",\" Multiple spaces on the left\","
|
||||||
|
+ "\"Multiple spaces on the right \",\" Multiple spaces on both sides \"",
|
||||||
|
format.format("", " ", " Single space on the left", "Single space on the right ",
|
||||||
|
" Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ",
|
||||||
|
" Multiple spaces on both sides "));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The difference between withTrim()and withIgnoreSurroundingSpace(): difference: withTrim() can remove the leading
|
* The difference between withTrim()and withIgnoreSurroundingSpace(): difference: withTrim() can remove the leading
|
||||||
* and trailing spaces and newlines in quotation marks, while withIgnoreSurroundingSpace() cannot The same point:
|
* and trailing spaces and newlines in quotation marks, while withIgnoreSurroundingSpace() cannot The same point:
|
||||||
|
@ -45,21 +62,4 @@ public class JiraCsv148Test {
|
||||||
" Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ",
|
" Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ",
|
||||||
" Multiple spaces on both sides "));
|
" Multiple spaces on both sides "));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testWithIgnoreSurroundingSpacesEmpty() {
|
|
||||||
// @formatter:off
|
|
||||||
final CSVFormat format = CSVFormat.DEFAULT.builder()
|
|
||||||
.setQuoteMode(QuoteMode.ALL)
|
|
||||||
.setIgnoreSurroundingSpaces(true)
|
|
||||||
.build();
|
|
||||||
// @formatter:on
|
|
||||||
assertEquals(
|
|
||||||
"\"\",\" \",\" Single space on the left\",\"Single space on the right \","
|
|
||||||
+ "\" Single spaces on both sides \",\" Multiple spaces on the left\","
|
|
||||||
+ "\"Multiple spaces on the right \",\" Multiple spaces on both sides \"",
|
|
||||||
format.format("", " ", " Single space on the left", "Single space on the right ",
|
|
||||||
" Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ",
|
|
||||||
" Multiple spaces on both sides "));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,11 @@ import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class JiraCsv167Test {
|
public class JiraCsv167Test {
|
||||||
|
|
||||||
|
private Reader getTestReader() {
|
||||||
|
return new InputStreamReader(
|
||||||
|
ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/csv-167/sample1.csv"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void parse() throws IOException {
|
public void parse() throws IOException {
|
||||||
int totcomment = 0;
|
int totcomment = 0;
|
||||||
|
@ -81,9 +86,4 @@ public class JiraCsv167Test {
|
||||||
assertEquals(totcomment, comments);
|
assertEquals(totcomment, comments);
|
||||||
assertEquals(totrecs, records); // records includes the header
|
assertEquals(totrecs, records); // records includes the header
|
||||||
}
|
}
|
||||||
|
|
||||||
private Reader getTestReader() {
|
|
||||||
return new InputStreamReader(
|
|
||||||
ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/csv-167/sample1.csv"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -61,21 +61,6 @@ public class JiraCsv203Test {
|
||||||
assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString());
|
assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testWithoutQuoteMode() throws Exception {
|
|
||||||
// @formatter:off
|
|
||||||
final CSVFormat format = CSVFormat.EXCEL.builder()
|
|
||||||
.setNullString("N/A")
|
|
||||||
.setIgnoreSurroundingSpaces(true)
|
|
||||||
.build();
|
|
||||||
// @formatter:on
|
|
||||||
final StringBuilder buffer = new StringBuilder();
|
|
||||||
try (final CSVPrinter printer = new CSVPrinter(buffer, format)) {
|
|
||||||
printer.printRecord(null, "Hello", null, "World");
|
|
||||||
}
|
|
||||||
assertEquals("N/A,Hello,N/A,World\r\n", buffer.toString());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testQuoteModeMinimal() throws Exception {
|
public void testQuoteModeMinimal() throws Exception {
|
||||||
// @formatter:off
|
// @formatter:off
|
||||||
|
@ -108,6 +93,23 @@ public class JiraCsv203Test {
|
||||||
assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString());
|
assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWithEmptyValues() throws Exception {
|
||||||
|
// @formatter:off
|
||||||
|
final CSVFormat format = CSVFormat.EXCEL.builder()
|
||||||
|
.setNullString("N/A")
|
||||||
|
.setIgnoreSurroundingSpaces(true)
|
||||||
|
.setQuoteMode(QuoteMode.ALL)
|
||||||
|
.build();
|
||||||
|
// @formatter:on
|
||||||
|
final StringBuilder buffer = new StringBuilder();
|
||||||
|
try (final CSVPrinter printer = new CSVPrinter(buffer, format)) {
|
||||||
|
printer.printRecord("", "Hello", "", "World");
|
||||||
|
// printer.printRecord(new Object[] { null, "Hello", null, "World" });
|
||||||
|
}
|
||||||
|
assertEquals("\"\",\"Hello\",\"\",\"World\"\r\n", buffer.toString());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithoutNullString() throws Exception {
|
public void testWithoutNullString() throws Exception {
|
||||||
// @formatter:off
|
// @formatter:off
|
||||||
|
@ -125,19 +127,17 @@ public class JiraCsv203Test {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testWithEmptyValues() throws Exception {
|
public void testWithoutQuoteMode() throws Exception {
|
||||||
// @formatter:off
|
// @formatter:off
|
||||||
final CSVFormat format = CSVFormat.EXCEL.builder()
|
final CSVFormat format = CSVFormat.EXCEL.builder()
|
||||||
.setNullString("N/A")
|
.setNullString("N/A")
|
||||||
.setIgnoreSurroundingSpaces(true)
|
.setIgnoreSurroundingSpaces(true)
|
||||||
.setQuoteMode(QuoteMode.ALL)
|
|
||||||
.build();
|
.build();
|
||||||
// @formatter:on
|
// @formatter:on
|
||||||
final StringBuilder buffer = new StringBuilder();
|
final StringBuilder buffer = new StringBuilder();
|
||||||
try (final CSVPrinter printer = new CSVPrinter(buffer, format)) {
|
try (final CSVPrinter printer = new CSVPrinter(buffer, format)) {
|
||||||
printer.printRecord("", "Hello", "", "World");
|
printer.printRecord(null, "Hello", null, "World");
|
||||||
// printer.printRecord(new Object[] { null, "Hello", null, "World" });
|
|
||||||
}
|
}
|
||||||
assertEquals("\"\",\"Hello\",\"\",\"World\"\r\n", buffer.toString());
|
assertEquals("N/A,Hello,N/A,World\r\n", buffer.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,6 +30,10 @@ import org.apache.commons.csv.CSVRecord;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
public class JiraCsv248Test {
|
public class JiraCsv248Test {
|
||||||
|
private static InputStream getTestInput() {
|
||||||
|
return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test deserialisation of a CSVRecord created using version 1.6.
|
* Test deserialisation of a CSVRecord created using version 1.6.
|
||||||
*
|
*
|
||||||
|
@ -74,8 +78,4 @@ public class JiraCsv248Test {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static InputStream getTestInput() {
|
|
||||||
return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,11 +43,9 @@ import org.junit.jupiter.api.Test;
|
||||||
@SuppressWarnings("boxing") // test code
|
@SuppressWarnings("boxing") // test code
|
||||||
public class PerformanceTest {
|
public class PerformanceTest {
|
||||||
|
|
||||||
private final int max = 10;
|
|
||||||
|
|
||||||
private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
|
private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
|
||||||
private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
|
|
||||||
|
|
||||||
|
private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
|
||||||
@BeforeAll
|
@BeforeAll
|
||||||
public static void setUpClass() throws FileNotFoundException, IOException {
|
public static void setUpClass() throws FileNotFoundException, IOException {
|
||||||
if (BIG_FILE.exists()) {
|
if (BIG_FILE.exists()) {
|
||||||
|
@ -64,6 +62,8 @@ public class PerformanceTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final int max = 10;
|
||||||
|
|
||||||
private BufferedReader createBufferedReader() throws IOException {
|
private BufferedReader createBufferedReader() throws IOException {
|
||||||
return new BufferedReader(new FileReader(BIG_FILE));
|
return new BufferedReader(new FileReader(BIG_FILE));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue