From 865872e0f1517796f4b203ff7682d922119514fa Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Sat, 19 May 2018 09:03:29 -0600 Subject: [PATCH] [CSV-225] Parse method should avoid creating a redundant BufferedReader. --- .../org/apache/commons/csv/CSVParser.java | 2 +- .../org/apache/commons/csv/CSVParserTest.java | 69 ++++++- .../apache/commons/csv/PerformanceTest.java | 172 +++++++++++------- 3 files changed, 174 insertions(+), 69 deletions(-) diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 7e9d7d41..ac45c72a 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -203,7 +203,7 @@ public final class CSVParser implements Iterable, Closeable { public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException { Assertions.notNull(path, "path"); Assertions.notNull(format, "format"); - return parse(Files.newBufferedReader(path, charset), format); + return parse(Files.newInputStream(path), charset, format); } /** diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 1e1d7a6c..83361547 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -39,6 +39,9 @@ import java.io.StringWriter; import java.net.URL; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -810,24 +813,78 @@ public class CSVParserTest { } } + @Test + public void testParse() throws Exception { + final ClassLoader loader = ClassLoader.getSystemClassLoader(); + final URL url = loader.getResource("CSVFileParser/test.csv"); + final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D"); + final Charset charset = StandardCharsets.UTF_8; + + try(final CSVParser parser = CSVParser.parse(new InputStreamReader(url.openStream(), charset), format)) { + parseFully(parser); + } + try(final CSVParser parser = CSVParser.parse(new String(Files.readAllBytes(Paths.get(url.toURI())), charset), format)) { + parseFully(parser); + } + try(final CSVParser parser = CSVParser.parse(new File(url.toURI()), charset, format)) { + parseFully(parser); + } + try(final CSVParser parser = CSVParser.parse(url.openStream(), charset, format)) { + parseFully(parser); + } + try(final CSVParser parser = CSVParser.parse(Paths.get(url.toURI()), charset, format)) { + parseFully(parser); + } + try(final CSVParser parser = CSVParser.parse(url, charset, format)) { + parseFully(parser); + } + try(final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format)) { + parseFully(parser); + } + try(final CSVParser parser = new CSVParser(new InputStreamReader(url.openStream(), charset), format, /*characterOffset=*/0, /*recordNumber=*/1)) { + parseFully(parser); + } + } + + private void parseFully(final CSVParser parser) { + for (final Iterator records = parser.iterator(); records.hasNext(); ) { + records.next(); + } + } + @Test(expected = IllegalArgumentException.class) public void testParseFileNullFormat() throws Exception { - CSVParser.parse(new File(""), Charset.defaultCharset(), null); + try (final CSVParser parser = CSVParser.parse(new File("CSVFileParser/test.csv"), Charset.defaultCharset(), null)) { + Assert.fail("This test should have thrown an exception."); + } } @Test(expected = IllegalArgumentException.class) public void testParseNullFileFormat() throws Exception { - CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT); + try (final CSVParser parser = CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT)) { + Assert.fail("This test should have thrown an exception."); + } + } + + @Test(expected = IllegalArgumentException.class) + public void testParseNullPathFormat() throws Exception { + try (final CSVParser parser = CSVParser.parse((Path) null, Charset.defaultCharset(), CSVFormat.DEFAULT)) { + Assert.fail("This test should have thrown an exception."); + } } @Test(expected = IllegalArgumentException.class) public void testParseNullStringFormat() throws Exception { - CSVParser.parse((String) null, CSVFormat.DEFAULT); + try (final CSVParser parser = CSVParser.parse((String) null, CSVFormat.DEFAULT)) { + Assert.fail("This test should have thrown an exception."); + } } @Test(expected = IllegalArgumentException.class) public void testParseNullUrlCharsetFormat() throws Exception { - CSVParser.parse((File) null, Charset.defaultCharset(), CSVFormat.DEFAULT); + try (final CSVParser parser = CSVParser.parse((URL) null, Charset.defaultCharset(), CSVFormat.DEFAULT)) { + Assert.fail("This test should have thrown an exception."); + } } @Test(expected = IllegalArgumentException.class) @@ -839,7 +896,9 @@ public class CSVParserTest { @Test(expected = IllegalArgumentException.class) public void testParseStringNullFormat() throws Exception { - CSVParser.parse("csv data", null); + try (final CSVParser parser = CSVParser.parse("csv data", (CSVFormat) null)) { + Assert.fail("This test should have thrown an exception."); + } } @Test(expected = IllegalArgumentException.class) diff --git a/src/test/java/org/apache/commons/csv/PerformanceTest.java b/src/test/java/org/apache/commons/csv/PerformanceTest.java index 65d8de9b..9ab70711 100644 --- a/src/test/java/org/apache/commons/csv/PerformanceTest.java +++ b/src/test/java/org/apache/commons/csv/PerformanceTest.java @@ -21,12 +21,16 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; -import java.io.FileReader; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; import java.io.OutputStream; +import java.io.Reader; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.zip.GZIPInputStream; import org.apache.commons.io.IOUtils; @@ -55,55 +59,62 @@ public class PerformanceTest { "os.name", // Operating system name "os.arch", // Operating system architecture "os.version", // Operating system version - }; - private static int max = 10; + private static int max = 11; // skip first test private static int num = 0; // number of elapsed times recorded private static long[] elapsedTimes = new long[max]; private static final CSVFormat format = CSVFormat.EXCEL; - private static final File BIG_FILE = new File(System.getProperty("java.io.tmpdir"), "worldcitiespop.txt"); + private static final File BIG_FILE = new File("src/test/resources/perf/worldcitiespop.txt"); public static void main(final String [] args) throws Exception { if (BIG_FILE.exists()) { - System.out.println(String.format("Found test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); + System.out.printf("Found test fixture %s: %,d bytes.%n", BIG_FILE, BIG_FILE.length()); } else { - System.out.println("Decompressing test fixture " + BIG_FILE + "..."); - try (final InputStream input = new GZIPInputStream( - new FileInputStream("src/test/resources/perf/worldcitiespop.txt.gz")); + final File compressedFile = new File(BIG_FILE.getParentFile(), BIG_FILE.getName() + ".gz"); + System.out.printf("Decompressing test fixture %s...%n", compressedFile); + long bytesOut = 0L; + try (final InputStream input = new GZIPInputStream(new FileInputStream(compressedFile)); final OutputStream output = new FileOutputStream(BIG_FILE)) { - IOUtils.copy(input, output); + bytesOut = IOUtils.copy(input, output); } - System.out.println(String.format("Decompressed test fixture %s: %,d bytes.", BIG_FILE, BIG_FILE.length())); + System.out.printf("Decompressed test fixture %s: %,d bytes to: %s: %,d bytes.%n", compressedFile, compressedFile.length(), BIG_FILE, bytesOut); } final int argc = args.length; - String tests[]; if (argc > 0) { - max=Integer.parseInt(args[0]); + max = Integer.parseInt(args[0]); } + + String tests[]; if (argc > 1) { - tests = new String[argc-1]; + tests = new String[argc - 1]; for (int i = 1; i < argc; i++) { - tests[i-1]=args[i]; + tests[i - 1] = args[i]; } } else { - tests=new String[]{"file", "split", "extb", "exts", "csv", "lexreset", "lexnew"}; + tests = new String[] { "file", "split", "extb", "exts", "csv", "csv-path", "csv-path-db", "csv-url", "lexreset", "lexnew" }; } - for(final String p : PROPS) { - System.out.println(p+"="+System.getProperty(p)); + for (final String p : PROPS) { + System.out.printf("%s=%s%n", p, System.getProperty(p)); } - System.out.println("Max count: "+max+"\n"); + System.out.printf("Max count: %d%n%n", max); - for(final String test : tests) { + for (final String test : tests) { if ("file".equals(test)) { testReadBigFile(false); } else if ("split".equals(test)) { testReadBigFile(true); } else if ("csv".equals(test)) { testParseCommonsCSV(); + } else if ("csv-path".equals(test)) { + testParsePath(); + } else if ("csv-path-db".equals(test)) { + testParsePathDoubleBuffering(); + } else if ("csv-url".equals(test)) { + testParseURL(); } else if ("lexreset".equals(test)) { testCSVLexer(false, test); } else if ("lexnew".equals(test)) { @@ -115,13 +126,13 @@ public class PerformanceTest { } else if ("exts".equals(test)) { testExtendedBuffer(true); } else { - System.out.println("Invalid test name: "+test); + System.out.printf("Invalid test name: %s%n", test); } } } - private static BufferedReader createReader() throws IOException { - return new BufferedReader(new FileReader(BIG_FILE)); + private static Reader createReader() throws IOException { + return new InputStreamReader(new FileInputStream(BIG_FILE), StandardCharsets.ISO_8859_1); } // Container for basic statistics @@ -129,35 +140,36 @@ public class PerformanceTest { final int count; final int fields; Stats(final int c, final int f) { - count=c; - fields=f; + count = c; + fields = f; } } // Display end stats; store elapsed for average private static void show(final String msg, final Stats s, final long start) { final long elapsed = System.currentTimeMillis() - start; - System.out.printf("%-20s: %5dms " + s.count + " lines "+ s.fields + " fields%n",msg,elapsed); - elapsedTimes[num++]=elapsed; + System.out.printf("%-20s: %5dms %d lines %d fields%n", msg, elapsed, s.count, s.fields); + elapsedTimes[num] = elapsed; + num++; } // calculate and show average private static void show(){ - long tot = 0; if (num > 1) { - for(int i=1; i < num; i++) { // skip first test + long tot = 0; + for (int i = 1; i < num; i++) { // skip first test tot += elapsedTimes[i]; } - System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot/(num-1)); + System.out.printf("%-20s: %5dms%n%n", "Average(not first)", tot / (num - 1)); } - num=0; // ready for next set + num = 0; // ready for next set } private static void testReadBigFile(final boolean split) throws Exception { for (int i = 0; i < max; i++) { final long startMillis; final Stats stats; - try (final BufferedReader in = createReader()) { + try (final BufferedReader in = new BufferedReader(createReader())) { startMillis = System.currentTimeMillis(); stats = readAll(in, split); } @@ -166,16 +178,16 @@ public class PerformanceTest { show(); } - private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { - int count = 0; - int fields = 0; - String record; - while ((record=in.readLine()) != null) { - count++; - fields+= split ? record.split(",").length : 1; - } - return new Stats(count, fields); - } + private static Stats readAll(final BufferedReader in, final boolean split) throws IOException { + int count = 0; + int fields = 0; + String record; + while ((record = in.readLine()) != null) { + count++; + fields += split ? record.split(",").length : 1; + } + return new Stats(count, fields); + } private static void testExtendedBuffer(final boolean makeString) throws Exception { for (int i = 0; i < max; i++) { @@ -215,27 +227,61 @@ public class PerformanceTest { show(); } - private static void testParseCommonsCSV() throws Exception { + private static void testParser(final String msg, final CSVParserFactory fac) throws Exception { for (int i = 0; i < max; i++) { final long startMillis; final Stats stats; - try (final BufferedReader reader = createReader()) { - try (final CSVParser parser = new CSVParser(reader, format)) { - startMillis = System.currentTimeMillis(); - stats = iterate(parser); - } - show("CSV", stats, startMillis); + try (final CSVParser parser = fac.createParser()) { + startMillis = System.currentTimeMillis(); + stats = iterate(parser); } + show(msg, stats, startMillis); } show(); } + private static interface CSVParserFactory { + public CSVParser createParser() throws IOException; + } - private static Constructor getLexerCtor(final String clazz) throws Exception { - @SuppressWarnings("unchecked") - final Class lexer = (Class) Class.forName("org.apache.commons.csv." + clazz); - return lexer.getConstructor(new Class[]{CSVFormat.class, ExtendedBufferedReader.class}); - } + private static void testParseCommonsCSV() throws Exception { + testParser("CSV", new CSVParserFactory() { + public CSVParser createParser() throws IOException { + return new CSVParser(createReader(), format); + } + }); + } + + private static void testParsePath() throws Exception { + testParser("CSV-PATH", new CSVParserFactory() { + public CSVParser createParser() throws IOException { + return CSVParser.parse(Files.newInputStream(Paths.get(BIG_FILE.toURI())), StandardCharsets.ISO_8859_1, format); + } + }); + } + + private static void testParsePathDoubleBuffering() throws Exception { + testParser("CSV-PATH-DB", new CSVParserFactory() { + public CSVParser createParser() throws IOException { + return CSVParser.parse(Files.newBufferedReader(Paths.get(BIG_FILE.toURI()), StandardCharsets.ISO_8859_1), format); + } + }); + } + + private static void testParseURL() throws Exception { + testParser("CSV-URL", new CSVParserFactory() { + public CSVParser createParser() throws IOException { + //NOTE: URL will always return a BufferedInputStream. + return CSVParser.parse(BIG_FILE.toURI().toURL(), StandardCharsets.ISO_8859_1, format); + } + }); + } + + private static Constructor getLexerCtor(final String clazz) throws Exception { + @SuppressWarnings("unchecked") + final Class lexer = (Class) Class.forName("org.apache.commons.csv." + clazz); + return lexer.getConstructor(new Class[]{CSVFormat.class, ExtendedBufferedReader.class}); + } private static void testCSVLexer(final boolean newToken, final String test) throws Exception { Token token = new Token(); @@ -245,7 +291,7 @@ public class PerformanceTest { final Stats stats; final long startMillis; try (final ExtendedBufferedReader input = new ExtendedBufferedReader(createReader()); - Lexer lexer = createTestCSVLexer(test, input)) { + final Lexer lexer = createTestCSVLexer(test, input)) { if (test.startsWith("CSVLexer")) { dynamic = "!"; } @@ -291,14 +337,14 @@ public class PerformanceTest { .newInstance(new Object[] { format, input }) : new Lexer(format, input); } - private static Stats iterate(final Iterable it) { - int count = 0; - int fields = 0; - for (final CSVRecord record : it) { - count++; - fields+=record.size(); - } - return new Stats(count, fields); - } + private static Stats iterate(final Iterable it) { + int count = 0; + int fields = 0; + for (final CSVRecord record : it) { + count++; + fields += record.size(); + } + return new Stats(count, fields); + } } \ No newline at end of file