Added a JMH benchmark to compare the most commons CSV parsers

git-svn-id: https://svn.apache.org/repos/asf/commons/proper/csv/trunk@1658276 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Emmanuel Bourg 2015-02-08 23:47:27 +00:00
parent fd533e1af3
commit 19fbc19294
2 changed files with 284 additions and 0 deletions

112
pom.xml
View File

@ -72,6 +72,12 @@ CSV files of various types.
<email>yonik@apache.org</email> <email>yonik@apache.org</email>
<organization>The Apache Software Foundation</organization> <organization>The Apache Software Foundation</organization>
</developer> </developer>
<developer>
<name>Emmanuel Bourg</name>
<id>ebourg</id>
<email>ebourg@apache.org</email>
<organization>Apache</organization>
</developer>
<developer> <developer>
<name>Gary Gregory</name> <name>Gary Gregory</name>
<id>ggregory</id> <id>ggregory</id>
@ -361,6 +367,112 @@ CSV files of various types.
</plugins> </plugins>
</build> </build>
</profile> </profile>
<!-- Profile to build and run the benchmarks. Use 'mvn test -Pbenchmark', and add '-Dbenchmark=foo' to run only the foo benchmark -->
<profile>
<id>benchmark</id>
<dependencies>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>1.5.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>1.5.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>genjava</groupId>
<artifactId>gj-csv</artifactId>
<version>1.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>net.sourceforge.javacsv</groupId>
<artifactId>javacsv</artifactId>
<version>2.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.opencsv</groupId>
<artifactId>opencsv</artifactId>
<version>3.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>net.sf.supercsv</groupId>
<artifactId>super-csv</artifactId>
<version>2.2.1</version>
</dependency>
<!-- Not in Maven Central, download manually from http://kasparov.skife.org/csv/csv-1.0.jar and copy in the base directory -->
<dependency>
<groupId>org.skife.kasparov</groupId>
<artifactId>csv</artifactId>
<version>1.0</version>
<scope>system</scope>
<systemPath>${basedir}/csv-1.0.jar</systemPath>
</dependency>
</dependencies>
<properties>
<skipTests>true</skipTests>
<benchmark>org.apache</benchmark>
</properties>
<build>
<plugins>
<!-- Enable the compilation of the benchmarks -->
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration combine.self="override">
<testIncludes>
<testInclude>**/*</testInclude>
</testIncludes>
</configuration>
</plugin>
<!-- Hook the benchmarks to the test phase -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<executions>
<execution>
<id>benchmark</id>
<phase>test</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<classpathScope>test</classpathScope>
<executable>java</executable>
<arguments>
<argument>-classpath</argument>
<classpath/>
<argument>org.openjdk.jmh.Main</argument>
<argument>-rf</argument>
<argument>json</argument>
<argument>-rff</argument>
<argument>target/jmh-result.json</argument>
<argument>${benchmark}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles> </profiles>
</project> </project>

View File

@ -0,0 +1,172 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.csv;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.generationjava.io.CsvReader;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;
@BenchmarkMode(Mode.AverageTime)
@Fork(value = 1, jvmArgs = "-server")
@Threads(1)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public class CSVBenchmark {
private BufferedReader getReader() throws IOException {
return new BufferedReader(new FileReader("worldcitiespop.txt"));
}
@Benchmark
public int baseline(Blackhole bh) throws Exception {
BufferedReader in = getReader();
int count = 0;
String line;
while ((line = in.readLine()) != null) {
count++;
}
bh.consume(count);
in.close();
return count;
}
@Benchmark
public int parseCommonsCSV(Blackhole bh) throws Exception {
BufferedReader in = getReader();
CSVFormat format = CSVFormat.DEFAULT.withHeader();
int count = 0;
for (CSVRecord record : format.parse(in)) {
count++;
}
bh.consume(count);
in.close();
return count;
}
@Benchmark
public int parseGenJavaCSV(Blackhole bh) throws Exception {
BufferedReader in = getReader();
CsvReader reader = new CsvReader(in);
reader.setFieldDelimiter(',');
int count = 0;
String[] record = null;
while ((record = reader.readLine()) != null) {
count++;
}
bh.consume(count);
in.close();
return count;
}
@Benchmark
public int parseJavaCSV(Blackhole bh) throws Exception {
BufferedReader in = getReader();
com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ',');
reader.setRecordDelimiter('\n');
int count = 0;
while (reader.readRecord()) {
count++;
}
bh.consume(count);
in.close();
return count;
}
@Benchmark
public int parseOpenCSV(Blackhole bh) throws Exception {
BufferedReader in = getReader();
com.opencsv.CSVReader reader = new com.opencsv.CSVReader(in, ',');
int count = 0;
while (reader.readNext() != null) {
count++;
}
bh.consume(count);
in.close();
return count;
}
@Benchmark
public int parseSkifeCSV(Blackhole bh) throws Exception {
BufferedReader in = getReader();
org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader();
reader.setSeperator(',');
CountingReaderCallback callback = new CountingReaderCallback();
reader.parse(in, callback);
bh.consume(callback);
in.close();
return callback.count;
}
private static class CountingReaderCallback implements org.skife.csv.ReaderCallback {
public int count = 0;
@Override
public void onRow(String[] fields) {
count++;
}
}
@Benchmark
public int parseSuperCSV(Blackhole bh) throws Exception {
BufferedReader in = getReader();
CsvListReader reader = new CsvListReader(in, CsvPreference.STANDARD_PREFERENCE);
int count = 0;
List<String> record = null;
while ((record = reader.read()) != null) {
count++;
}
bh.consume(count);
in.close();
return count;
}
}