Sort members.

This commit is contained in:
Gary Gregory 2021-07-24 11:21:30 -04:00
parent d7b1574483
commit 46f9211d60
14 changed files with 466 additions and 466 deletions

View File

@ -122,19 +122,6 @@ final class ExtendedBufferedReader extends BufferedReader {
return c;
}
/**
* Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This
* doesn't affect line number or last character.
*
* @param n the number characters look ahead.
* @return the next n characters.
* @throws IOException If an I/O error occurs
*/
char[] lookAhead(final int n) throws IOException {
final char[] buf = new char[n];
return lookAhead(buf);
}
/**
* Populates the buffer with the next {@code buf.length} characters in the
* current reader without consuming them. The next call to {@link #read()} will
@ -154,6 +141,19 @@ final class ExtendedBufferedReader extends BufferedReader {
return buf;
}
/**
* Returns the next n characters in the current reader without consuming them. The next call to {@link #read()} will still return the next value. This
* doesn't affect line number or last character.
*
* @param n the number characters look ahead.
* @return the next n characters.
* @throws IOException If an I/O error occurs
*/
char[] lookAhead(final int n) throws IOException {
final char[] buf = new char[n];
return lookAhead(buf);
}
@Override
public int read() throws IOException {
final int current = super.read();

View File

@ -42,6 +42,11 @@ public class CSVFileParserTest {
private static final File BASE_DIR = new File("src/test/resources/org/apache/commons/csv/CSVFileParser");
public static Stream<File> generateData() {
final File[] files = BASE_DIR.listFiles((dir, name) -> name.startsWith("test") && name.endsWith(".txt"));
return files != null ? Stream.of(files) : Stream.empty();
}
private String readTestData(final BufferedReader reader) throws IOException {
String line;
do {
@ -50,11 +55,6 @@ public class CSVFileParserTest {
return line;
}
public static Stream<File> generateData() {
final File[] files = BASE_DIR.listFiles((dir, name) -> name.startsWith("test") && name.endsWith(".txt"));
return files != null ? Stream.of(files) : Stream.empty();
}
@ParameterizedTest
@MethodSource("generateData")
public void testCSVFile(final File testFile) throws Exception {

View File

@ -41,6 +41,16 @@ public class CSVFormatPredefinedTest {
test(CSVFormat.EXCEL, "Excel");
}
@Test
public void testMongoDbCsv() {
test(CSVFormat.MONGODB_CSV, "MongoDBCsv");
}
@Test
public void testMongoDbTsv() {
test(CSVFormat.MONGODB_TSV, "MongoDBTsv");
}
@Test
public void testMySQL() {
test(CSVFormat.MYSQL, "MySQL");
@ -56,16 +66,6 @@ public class CSVFormatPredefinedTest {
test(CSVFormat.POSTGRESQL_CSV, "PostgreSQLCsv");
}
@Test
public void testMongoDbCsv() {
test(CSVFormat.MONGODB_CSV, "MongoDBCsv");
}
@Test
public void testMongoDbTsv() {
test(CSVFormat.MONGODB_TSV, "MongoDBTsv");
}
@Test
public void testPostgreSqlText() {
test(CSVFormat.POSTGRESQL_TEXT, "PostgreSQLText");

View File

@ -264,6 +264,15 @@ public class CSVRecordTest {
}
}
@Test
public void testStream() {
final AtomicInteger i = new AtomicInteger();
record.stream().forEach(value -> {
assertEquals(values[i.get()], value);
i.incrementAndGet();
});
}
@Test
public void testToList() {
int i = 0;
@ -297,15 +306,6 @@ public class CSVRecordTest {
}
}
@Test
public void testStream() {
final AtomicInteger i = new AtomicInteger();
record.stream().forEach(value -> {
assertEquals(values[i.get()], value);
i.incrementAndGet();
});
}
@Test
public void testToString() {
assertNotNull(recordWithHeader.toString());

View File

@ -47,63 +47,65 @@ public class LexerTest {
private CSVFormat formatWithEscaping;
@BeforeEach
public void setUp() {
formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\');
}
@SuppressWarnings("resource")
private Lexer createLexer(final String input, final CSVFormat format) {
return new Lexer(format, new ExtendedBufferedReader(new StringReader(input)));
}
@Test
public void testSurroundingSpacesAreDeleted() throws IOException {
final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,";
try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
}
@BeforeEach
public void setUp() {
formatWithEscaping = CSVFormat.DEFAULT.withEscape('\\');
}
// simple token with escaping enabled
@Test
public void testSurroundingTabsAreDeleted() throws IOException {
final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,";
try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
}
}
@Test
public void testIgnoreEmptyLines() throws IOException {
final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" +
"\n" + "last, line \n" + "\n" + "\n" + "\n";
final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines();
public void testBackslashWithEscaping() throws IOException {
/*
* file: a,\,,b \,,
*/
final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne";
final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false);
assertTrue(format.isEscapeCharacterSet());
try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "second"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "line"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line "));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "last"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, " line "));
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r"));
assertThat(parser.nextToken(new Token()), matches(EOF, "e"));
}
}
// simple token with escaping not enabled
@Test
public void testBackslashWithoutEscaping() throws IOException {
/*
* file: a,\,,b \,,
*/
final String code = "a,\\,,b\\\n\\,,";
final CSVFormat format = CSVFormat.DEFAULT;
assertFalse(format.isEscapeCharacterSet());
try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
// an unquoted single backslash is not an escape char
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
// an unquoted single backslash is not an escape char
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
}
}
@Test
public void testBackspace() throws Exception {
try (final Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped"));
}
}
@Test
public void testComments() throws IOException {
final String code = "first,line,\n" + "second,line,tokenWith#no-comment\n" + "# comment line \n" +
@ -173,45 +175,141 @@ public class LexerTest {
}
}
// simple token with escaping not enabled
@Test
public void testBackslashWithoutEscaping() throws IOException {
/*
* file: a,\,,b \,,
*/
final String code = "a,\\,,b\\\n\\,,";
final CSVFormat format = CSVFormat.DEFAULT;
assertFalse(format.isEscapeCharacterSet());
public void testCR() throws Exception {
try (final Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character"));
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
}
}
// From CSV-1
@Test
public void testDelimiterIsWhitespace() throws IOException {
final String code = "one\ttwo\t\tfour \t five\t six";
try (final Lexer parser = createLexer(code, CSVFormat.TDF)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "one"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "two"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "four"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "five"));
assertThat(parser.nextToken(new Token()), matches(EOF, "six"));
}
}
@Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
public void testEscapedBackspace() throws Exception {
try (final Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped"));
}
}
@Test
public void testEscapedCharacter() throws Exception {
try (final Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped"));
}
}
@Test
public void testEscapedControlCharacter() throws Exception {
// we are explicitly using an escape different from \ here
try (final Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
}
@Test
public void testEscapedControlCharacter2() throws Exception {
try (final Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
}
@Test
public void testEscapedCR() throws Exception {
try (final Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
}
@Test // TODO is this correct? Do we expect <esc>FF to be unescaped?
public void testEscapedFF() throws Exception {
try (final Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped"));
}
}
@Test
public void testEscapedLF() throws Exception {
try (final Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped"));
}
}
@Test
public void testEscapedMySqlNullValue() throws Exception {
// MySQL uses \N to symbolize null values. We have to restore this
try (final Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped"));
}
}
@Test // TODO is this correct? Do we expect <esc>TAB to be unescaped?
public void testEscapedTab() throws Exception {
try (final Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped"));
}
}
@Test
public void testEscapingAtEOF() throws Exception {
final String code = "escaping at EOF is evil\\";
try (final Lexer lexer = createLexer(code, formatWithEscaping)) {
assertThrows(IOException.class, () -> lexer.nextToken(new Token()));
}
}
@Test
public void testFF() throws Exception {
try (final Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped"));
}
}
@Test
public void testIgnoreEmptyLines() throws IOException {
final String code = "first,line,\n" + "\n" + "\n" + "second,line\n" + "\n" + "\n" + "third line \n" + "\n" +
"\n" + "last, line \n" + "\n" + "\n" + "\n";
final CSVFormat format = CSVFormat.DEFAULT.withIgnoreEmptyLines();
try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
// an unquoted single backslash is not an escape char
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
// an unquoted single backslash is not an escape char
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\\"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "first"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "line"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "second"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "line"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "third line "));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "last"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, " line "));
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
}
}
// simple token with escaping enabled
@Test
public void testBackslashWithEscaping() throws IOException {
/*
* file: a,\,,b \,,
*/
final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne";
final CSVFormat format = formatWithEscaping.withIgnoreEmptyLines(false);
assertTrue(format.isEscapeCharacterSet());
try (final Lexer parser = createLexer(code, format)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "b\\"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ","));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "\nc"));
assertThat(parser.nextToken(new Token()), matches(EORECORD, "d\r"));
assertThat(parser.nextToken(new Token()), matches(EOF, "e"));
public void testIsMetaCharCommentStart() throws IOException {
try (final Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) {
final int ch = lexer.readEscape();
assertEquals('#', ch);
}
}
@Test
public void testLF() throws Exception {
try (final Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character"));
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
}
}
@ -266,148 +364,6 @@ public class LexerTest {
}
}
// From CSV-1
@Test
public void testDelimiterIsWhitespace() throws IOException {
final String code = "one\ttwo\t\tfour \t five\t six";
try (final Lexer parser = createLexer(code, CSVFormat.TDF)) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "one"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "two"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "four"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "five"));
assertThat(parser.nextToken(new Token()), matches(EOF, "six"));
}
}
@Test
public void testEscapedCR() throws Exception {
try (final Lexer lexer = createLexer("character\\" + CR + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
}
@Test
public void testCR() throws Exception {
try (final Lexer lexer = createLexer("character" + CR + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character"));
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
}
}
@Test
public void testEscapedLF() throws Exception {
try (final Lexer lexer = createLexer("character\\" + LF + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + LF + "Escaped"));
}
}
@Test
public void testLF() throws Exception {
try (final Lexer lexer = createLexer("character" + LF + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character"));
assertThat(lexer.nextToken(new Token()), hasContent("NotEscaped"));
}
}
@Test // TODO is this correct? Do we expect <esc>TAB to be unescaped?
public void testEscapedTab() throws Exception {
try (final Lexer lexer = createLexer("character\\" + TAB + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "Escaped"));
}
}
@Test
public void testTab() throws Exception {
try (final Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped"));
}
}
@Test // TODO is this correct? Do we expect <esc>BACKSPACE to be unescaped?
public void testEscapedBackspace() throws Exception {
try (final Lexer lexer = createLexer("character\\" + BACKSPACE + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "Escaped"));
}
}
@Test
public void testBackspace() throws Exception {
try (final Lexer lexer = createLexer("character" + BACKSPACE + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + BACKSPACE + "NotEscaped"));
}
}
@Test // TODO is this correct? Do we expect <esc>FF to be unescaped?
public void testEscapedFF() throws Exception {
try (final Lexer lexer = createLexer("character\\" + FF + "Escaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "Escaped"));
}
}
@Test
public void testFF() throws Exception {
try (final Lexer lexer = createLexer("character" + FF + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + FF + "NotEscaped"));
}
}
@Test
public void testEscapedMySqlNullValue() throws Exception {
// MySQL uses \N to symbolize null values. We have to restore this
try (final Lexer lexer = createLexer("character\\NEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character\\NEscaped"));
}
}
@Test
public void testEscapedCharacter() throws Exception {
try (final Lexer lexer = createLexer("character\\aEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character\\aEscaped"));
}
}
@Test
public void testEscapedControlCharacter() throws Exception {
// we are explicitly using an escape different from \ here
try (final Lexer lexer = createLexer("character!rEscaped", CSVFormat.DEFAULT.withEscape('!'))) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
}
@Test
public void testEscapedControlCharacter2() throws Exception {
try (final Lexer lexer = createLexer("character\\rEscaped", CSVFormat.DEFAULT.withEscape('\\'))) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + CR + "Escaped"));
}
}
@Test
public void testEscapingAtEOF() throws Exception {
final String code = "escaping at EOF is evil\\";
try (final Lexer lexer = createLexer(code, formatWithEscaping)) {
assertThrows(IOException.class, () -> lexer.nextToken(new Token()));
}
}
@Test
public void testTrimTrailingSpacesZeroLength() throws Exception {
final StringBuilder buffer = new StringBuilder("");
final Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT);
lexer.trimTrailingSpaces(buffer);
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
}
@Test
public void testReadEscapeTab() throws IOException {
try (final Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) {
final int ch = lexer.readEscape();
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
assertEquals(TAB, ch);
}
}
@Test
public void testReadEscapeBackspace() throws IOException {
try (final Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {
@ -425,10 +381,54 @@ public class LexerTest {
}
@Test
public void testIsMetaCharCommentStart() throws IOException {
try (final Lexer lexer = createLexer("#", CSVFormat.DEFAULT.withCommentMarker('#'))) {
public void testReadEscapeTab() throws IOException {
try (final Lexer lexer = createLexer("t", CSVFormat.DEFAULT.withEscape('\t'))) {
final int ch = lexer.readEscape();
assertEquals('#', ch);
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
assertEquals(TAB, ch);
}
}
@Test
public void testSurroundingSpacesAreDeleted() throws IOException {
final String code = "noSpaces, leadingSpaces,trailingSpaces , surroundingSpaces , ,,";
try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingSpaces"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
}
}
@Test
public void testSurroundingTabsAreDeleted() throws IOException {
final String code = "noTabs,\tleadingTab,trailingTab\t,\tsurroundingTabs\t,\t\t,,";
try (final Lexer parser = createLexer(code, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "noTabs"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "leadingTab"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "trailingTab"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "surroundingTabs"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(TOKEN, ""));
assertThat(parser.nextToken(new Token()), matches(EOF, ""));
}
}
@Test
public void testTab() throws Exception {
try (final Lexer lexer = createLexer("character" + TAB + "NotEscaped", formatWithEscaping)) {
assertThat(lexer.nextToken(new Token()), hasContent("character" + TAB + "NotEscaped"));
}
}
@Test
public void testTrimTrailingSpacesZeroLength() throws Exception {
final StringBuilder buffer = new StringBuilder("");
final Lexer lexer = createLexer(buffer.toString(), CSVFormat.DEFAULT);
lexer.trimTrailingSpaces(buffer);
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
}
}

View File

@ -41,6 +41,21 @@ import org.apache.commons.io.IOUtils;
@SuppressWarnings("boxing")
public class PerformanceTest {
@FunctionalInterface
private interface CSVParserFactory {
CSVParser createParser() throws IOException;
}
// Container for basic statistics
private static class Stats {
final int count;
final int fields;
Stats(final int c, final int f) {
count = c;
fields = f;
}
}
private static final String[] PROPS = {
"java.version", // Java Runtime Environment version
"java.vendor", // Java Runtime Environment vendor
@ -58,17 +73,42 @@ public class PerformanceTest {
"os.arch", // Operating system architecture
"os.version", // Operating system version
};
private static int max = 11; // skip first test
private static int num; // number of elapsed times recorded
private static final long[] ELAPSED_TIMES = new long[max];
private static final long[] ELAPSED_TIMES = new long[max];
private static final CSVFormat format = CSVFormat.EXCEL;
private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
private static Reader createReader() throws IOException {
return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1);
}
private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input);
}
private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
@SuppressWarnings("unchecked")
final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class);
}
private static Stats iterate(final Iterable<CSVRecord> it) {
int count = 0;
int fields = 0;
for (final CSVRecord record : it) {
count++;
fields += record.size();
}
return new Stats(count, fields);
}
public static void main(final String [] args) throws Exception {
if (BIG_FILE.exists()) {
System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length());
@ -128,26 +168,15 @@ public class PerformanceTest {
}
}
private static Reader createReader() throws IOException {
return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1);
}
// Container for basic statistics
private static class Stats {
final int count;
final int fields;
Stats(final int c, final int f) {
count = c;
fields = f;
private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
int count = 0;
int fields = 0;
String record;
while ((record = in.readLine()) != null) {
count++;
fields += split ? record.split(",").length : 1;
}
}
// Display end stats; store elapsed for average
private static void show(final String msg, final Stats s, final long start) {
final long elapsed = System.currentTimeMillis() - start;
System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields);
ELAPSED_TIMES[num] = elapsed;
num++;
return new Stats(count, fields);
}
// calculate and show average
@ -162,106 +191,12 @@ public class PerformanceTest {
num = 0; // ready for next set
}
private static void testReadBigFile(final boolean split) throws Exception {
for (int i = 0; i < max; i++) {
final long startMillis;
final Stats stats;
try (final BufferedReader in = new BufferedReader(createReader())) {
startMillis = System.currentTimeMillis();
stats = readAll(in, split);
}
show(split ? "file+split" : "file", stats, startMillis);
}
show();
}
private static Stats readAll(final BufferedReader in, final boolean split) throws IOException {
int count = 0;
int fields = 0;
String record;
while ((record = in.readLine()) != null) {
count++;
fields += split ? record.split(",").length : 1;
}
return new Stats(count, fields);
}
private static void testExtendedBuffer(final boolean makeString) throws Exception {
for (int i = 0; i < max; i++) {
int fields = 0;
int lines = 0;
final long startMillis;
try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
startMillis = System.currentTimeMillis();
int read;
if (makeString) {
StringBuilder sb = new StringBuilder();
while ((read = in.read()) != -1) {
sb.append((char) read);
if (read == ',') { // count delimiters
sb.toString();
sb = new StringBuilder();
fields++;
} else if (read == '\n') {
sb.toString();
sb = new StringBuilder();
lines++;
}
}
} else {
while ((read = in.read()) != -1) {
if (read == ',') { // count delimiters
fields++;
} else if (read == '\n') {
lines++;
}
}
}
fields += lines; // EOL is a delimiter too
}
show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
}
show();
}
private static void testParser(final String msg, final CSVParserFactory fac) throws Exception {
for (int i = 0; i < max; i++) {
final long startMillis;
final Stats stats;
try (final CSVParser parser = fac.createParser()) {
startMillis = System.currentTimeMillis();
stats = iterate(parser);
}
show(msg, stats, startMillis);
}
show();
}
@FunctionalInterface
private interface CSVParserFactory {
CSVParser createParser() throws IOException;
}
private static void testParseCommonsCSV() throws Exception {
testParser("CSV", () -> new CSVParser(createReader(), format));
}
private static void testParsePath() throws Exception {
testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format));
}
private static void testParsePathDoubleBuffering() throws Exception {
testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format));
}
private static void testParseURL() throws Exception {
testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format));
}
private static Constructor<Lexer> getLexerCtor(final String clazz) throws Exception {
@SuppressWarnings("unchecked")
final Class<Lexer> lexer = (Class<Lexer>) Class.forName("org.apache.commons.csv." + clazz);
return lexer.getConstructor(CSVFormat.class, ExtendedBufferedReader.class);
// Display end stats; store elapsed for average
private static void show(final String msg, final Stats s, final long start) {
final long elapsed = System.currentTimeMillis() - start;
System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields);
ELAPSED_TIMES[num] = elapsed;
num++;
}
private static void testCSVLexer(final boolean newToken, final String test) throws Exception {
@ -312,19 +247,84 @@ public class PerformanceTest {
show();
}
private static Lexer createTestCSVLexer(final String test, final ExtendedBufferedReader input)
throws InstantiationException, IllegalAccessException, InvocationTargetException, Exception {
return test.startsWith("CSVLexer") ? getLexerCtor(test).newInstance(format, input) : new Lexer(format, input);
private static void testExtendedBuffer(final boolean makeString) throws Exception {
for (int i = 0; i < max; i++) {
int fields = 0;
int lines = 0;
final long startMillis;
try (final ExtendedBufferedReader in = new ExtendedBufferedReader(createReader())) {
startMillis = System.currentTimeMillis();
int read;
if (makeString) {
StringBuilder sb = new StringBuilder();
while ((read = in.read()) != -1) {
sb.append((char) read);
if (read == ',') { // count delimiters
sb.toString();
sb = new StringBuilder();
fields++;
} else if (read == '\n') {
sb.toString();
sb = new StringBuilder();
lines++;
}
}
} else {
while ((read = in.read()) != -1) {
if (read == ',') { // count delimiters
fields++;
} else if (read == '\n') {
lines++;
}
}
}
fields += lines; // EOL is a delimiter too
}
show("Extended" + (makeString ? " toString" : ""), new Stats(lines, fields), startMillis);
}
show();
}
private static Stats iterate(final Iterable<CSVRecord> it) {
int count = 0;
int fields = 0;
for (final CSVRecord record : it) {
count++;
fields += record.size();
private static void testParseCommonsCSV() throws Exception {
testParser("CSV", () -> new CSVParser(createReader(), format));
}
private static void testParsePath() throws Exception {
testParser("CSV-PATH", () -> CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format));
}
private static void testParsePathDoubleBuffering() throws Exception {
testParser("CSV-PATH-DB", () -> CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format));
}
private static void testParser(final String msg, final CSVParserFactory fac) throws Exception {
for (int i = 0; i < max; i++) {
final long startMillis;
final Stats stats;
try (final CSVParser parser = fac.createParser()) {
startMillis = System.currentTimeMillis();
stats = iterate(parser);
}
show(msg, stats, startMillis);
}
return new Stats(count, fields);
show();
}
private static void testParseURL() throws Exception {
testParser("CSV-URL", () -> CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format));
}
private static void testReadBigFile(final boolean split) throws Exception {
for (int i = 0; i < max; i++) {
final long startMillis;
final Stats stats;
try (final BufferedReader in = new BufferedReader(createReader())) {
startMillis = System.currentTimeMillis();
stats = readAll(in, split);
}
show(split ? "file+split" : "file", stats, startMillis);
}
show();
}
}

View File

@ -27,25 +27,6 @@ import org.hamcrest.TypeSafeDiagnosingMatcher;
*/
final class TokenMatchers {
public static Matcher<Token> hasType(final Token.Type expectedType) {
return new TypeSafeDiagnosingMatcher<Token>() {
@Override
public void describeTo(final Description description) {
description.appendText("token has type ");
description.appendValue(expectedType);
}
@Override
protected boolean matchesSafely(final Token item,
final Description mismatchDescription) {
mismatchDescription.appendText("token type is ");
mismatchDescription.appendValue(item.type);
return item.type == expectedType;
}
};
}
public static Matcher<Token> hasContent(final String expectedContent) {
return new TypeSafeDiagnosingMatcher<Token>() {
@ -65,6 +46,25 @@ final class TokenMatchers {
};
}
public static Matcher<Token> hasType(final Token.Type expectedType) {
return new TypeSafeDiagnosingMatcher<Token>() {
@Override
public void describeTo(final Description description) {
description.appendText("token has type ");
description.appendValue(expectedType);
}
@Override
protected boolean matchesSafely(final Token item,
final Description mismatchDescription) {
mismatchDescription.appendText("token type is ");
mismatchDescription.appendValue(item.type);
return item.type == expectedType;
}
};
}
public static Matcher<Token> isReady() {
return new TypeSafeDiagnosingMatcher<Token>() {

View File

@ -39,6 +39,12 @@ public class TokenMatchersTest {
token.content.append("content");
}
@Test
public void testHasContent() {
assertFalse(hasContent("This is not the token's content").matches(token));
assertTrue(hasContent("content").matches(token));
}
@Test
public void testHasType() {
assertFalse(hasType(Token.Type.COMMENT).matches(token));
@ -47,12 +53,6 @@ public class TokenMatchersTest {
assertTrue(hasType(Token.Type.TOKEN).matches(token));
}
@Test
public void testHasContent() {
assertFalse(hasContent("This is not the token's content").matches(token));
assertTrue(hasContent("content").matches(token));
}
@Test
public void testIsReady() {
assertTrue(isReady().matches(token));

View File

@ -28,9 +28,6 @@ import java.util.List;
*/
final class Utils {
private Utils() {
}
/**
* Checks if the 2d array has the same contents as the list of records.
*
@ -45,4 +42,7 @@ final class Utils {
assertArrayEquals(expected[i], actual.get(i).values(), message + " (entry " + i + ")");
}
}
private Utils() {
}
}

View File

@ -24,6 +24,23 @@ import org.junit.jupiter.api.Test;
public class JiraCsv148Test {
@Test
public void testWithIgnoreSurroundingSpacesEmpty() {
// @formatter:off
final CSVFormat format = CSVFormat.DEFAULT.builder()
.setQuoteMode(QuoteMode.ALL)
.setIgnoreSurroundingSpaces(true)
.build();
// @formatter:on
assertEquals(
"\"\",\" \",\" Single space on the left\",\"Single space on the right \","
+ "\" Single spaces on both sides \",\" Multiple spaces on the left\","
+ "\"Multiple spaces on the right \",\" Multiple spaces on both sides \"",
format.format("", " ", " Single space on the left", "Single space on the right ",
" Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ",
" Multiple spaces on both sides "));
}
/**
* The difference between withTrim()and withIgnoreSurroundingSpace() difference: withTrim() can remove the leading
* and trailing spaces and newlines in quotation marks, while withIgnoreSurroundingSpace() cannot The same point:
@ -45,21 +62,4 @@ public class JiraCsv148Test {
" Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ",
" Multiple spaces on both sides "));
}
@Test
public void testWithIgnoreSurroundingSpacesEmpty() {
// @formatter:off
final CSVFormat format = CSVFormat.DEFAULT.builder()
.setQuoteMode(QuoteMode.ALL)
.setIgnoreSurroundingSpaces(true)
.build();
// @formatter:on
assertEquals(
"\"\",\" \",\" Single space on the left\",\"Single space on the right \","
+ "\" Single spaces on both sides \",\" Multiple spaces on the left\","
+ "\"Multiple spaces on the right \",\" Multiple spaces on both sides \"",
format.format("", " ", " Single space on the left", "Single space on the right ",
" Single spaces on both sides ", " Multiple spaces on the left", "Multiple spaces on the right ",
" Multiple spaces on both sides "));
}
}

View File

@ -31,6 +31,11 @@ import org.junit.jupiter.api.Test;
public class JiraCsv167Test {
private Reader getTestReader() {
return new InputStreamReader(
ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/csv-167/sample1.csv"));
}
@Test
public void parse() throws IOException {
int totcomment = 0;
@ -81,9 +86,4 @@ public class JiraCsv167Test {
assertEquals(totcomment, comments);
assertEquals(totrecs, records); // records includes the header
}
private Reader getTestReader() {
return new InputStreamReader(
ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/csv-167/sample1.csv"));
}
}

View File

@ -61,21 +61,6 @@ public class JiraCsv203Test {
assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString());
}
@Test
public void testWithoutQuoteMode() throws Exception {
// @formatter:off
final CSVFormat format = CSVFormat.EXCEL.builder()
.setNullString("N/A")
.setIgnoreSurroundingSpaces(true)
.build();
// @formatter:on
final StringBuilder buffer = new StringBuilder();
try (final CSVPrinter printer = new CSVPrinter(buffer, format)) {
printer.printRecord(null, "Hello", null, "World");
}
assertEquals("N/A,Hello,N/A,World\r\n", buffer.toString());
}
@Test
public void testQuoteModeMinimal() throws Exception {
// @formatter:off
@ -108,6 +93,23 @@ public class JiraCsv203Test {
assertEquals("N/A,\"Hello\",N/A,\"World\"\r\n", buffer.toString());
}
@Test
public void testWithEmptyValues() throws Exception {
// @formatter:off
final CSVFormat format = CSVFormat.EXCEL.builder()
.setNullString("N/A")
.setIgnoreSurroundingSpaces(true)
.setQuoteMode(QuoteMode.ALL)
.build();
// @formatter:on
final StringBuilder buffer = new StringBuilder();
try (final CSVPrinter printer = new CSVPrinter(buffer, format)) {
printer.printRecord("", "Hello", "", "World");
// printer.printRecord(new Object[] { null, "Hello", null, "World" });
}
assertEquals("\"\",\"Hello\",\"\",\"World\"\r\n", buffer.toString());
}
@Test
public void testWithoutNullString() throws Exception {
// @formatter:off
@ -125,19 +127,17 @@ public class JiraCsv203Test {
}
@Test
public void testWithEmptyValues() throws Exception {
public void testWithoutQuoteMode() throws Exception {
// @formatter:off
final CSVFormat format = CSVFormat.EXCEL.builder()
.setNullString("N/A")
.setIgnoreSurroundingSpaces(true)
.setQuoteMode(QuoteMode.ALL)
.build();
// @formatter:on
final StringBuilder buffer = new StringBuilder();
try (final CSVPrinter printer = new CSVPrinter(buffer, format)) {
printer.printRecord("", "Hello", "", "World");
// printer.printRecord(new Object[] { null, "Hello", null, "World" });
printer.printRecord(null, "Hello", null, "World");
}
assertEquals("\"\",\"Hello\",\"\",\"World\"\r\n", buffer.toString());
assertEquals("N/A,Hello,N/A,World\r\n", buffer.toString());
}
}

View File

@ -30,6 +30,10 @@ import org.apache.commons.csv.CSVRecord;
import org.junit.jupiter.api.Test;
public class JiraCsv248Test {
private static InputStream getTestInput() {
return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin");
}
/**
* Test deserialisation of a CSVRecord created using version 1.6.
*
@ -74,8 +78,4 @@ public class JiraCsv248Test {
}
}
}
private static InputStream getTestInput() {
return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin");
}
}

View File

@ -43,11 +43,9 @@ import org.junit.jupiter.api.Test;
@SuppressWarnings("boxing") // test code
public class PerformanceTest {
private final int max = 10;
private static final String TEST_RESRC = "org/apache/commons/csv/perf/worldcitiespop.txt.gz";
private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt");
@BeforeAll
public static void setUpClass() throws FileNotFoundException, IOException {
if (BIG_FILE.exists()) {
@ -64,6 +62,8 @@ public class PerformanceTest {
}
}
private final int max = 10;
private BufferedReader createBufferedReader() throws IOException {
return new BufferedReader(new FileReader(BIG_FILE));
}