From bb8e6ae8467d59a15e4558c6f1f6395e1718e674 Mon Sep 17 00:00:00 2001 From: Doron Cohen Date: Tue, 22 Mar 2011 15:08:29 +0000 Subject: [PATCH] LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1084210 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/eclipse/dot.classpath | 2 +- modules/benchmark/CHANGES.txt | 6 +++ .../benchmark/lib/commons-compress-1.0.jar | 2 - .../benchmark/lib/commons-compress-1.1.jar | 2 + .../benchmark/lib/commons-compress-NOTICE.txt | 12 +----- .../benchmark/byTask/feeds/ContentSource.java | 37 ++++++------------- 6 files changed, 21 insertions(+), 40 deletions(-) delete mode 100644 modules/benchmark/lib/commons-compress-1.0.jar create mode 100644 modules/benchmark/lib/commons-compress-1.1.jar diff --git a/dev-tools/eclipse/dot.classpath b/dev-tools/eclipse/dot.classpath index af0631b9453..987ea9ffcca 100644 --- a/dev-tools/eclipse/dot.classpath +++ b/dev-tools/eclipse/dot.classpath @@ -90,7 +90,7 @@ - + diff --git a/modules/benchmark/CHANGES.txt b/modules/benchmark/CHANGES.txt index ca3eeab8764..b33c96e9c1f 100644 --- a/modules/benchmark/CHANGES.txt +++ b/modules/benchmark/CHANGES.txt @@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. +03/22/2011 + LUCENE-2978: Upgrade benchmark's commons-compress from 1.0 to 1.1 as + the move of gzip decompression in LUCENE-1540 from Java's GZipInputStream + to commons-compress 1.0 made it 15 times slower. In 1.1 no such slow-down + is observed. (Doron Cohen) + 03/21/2011 LUCENE-2958: WriteLineDocTask improvements - allow to emit line docs also for empty docs, and be flexible about which fields are added to the line file. For this, a header diff --git a/modules/benchmark/lib/commons-compress-1.0.jar b/modules/benchmark/lib/commons-compress-1.0.jar deleted file mode 100644 index 473e2bfa654..00000000000 --- a/modules/benchmark/lib/commons-compress-1.0.jar +++ /dev/null @@ -1,2 +0,0 @@ -AnyObjectId[78d832c11c42023d4bc12077a1d9b7b5025217bc] was removed in git history. -Apache SVN contains full history. \ No newline at end of file diff --git a/modules/benchmark/lib/commons-compress-1.1.jar b/modules/benchmark/lib/commons-compress-1.1.jar new file mode 100644 index 00000000000..c47e3962b1c --- /dev/null +++ b/modules/benchmark/lib/commons-compress-1.1.jar @@ -0,0 +1,2 @@ +AnyObjectId[51baf91a2df10184a8cca5cb43f11418576743a1] was removed in git history. +Apache SVN contains full history. \ No newline at end of file diff --git a/modules/benchmark/lib/commons-compress-NOTICE.txt b/modules/benchmark/lib/commons-compress-NOTICE.txt index 4237bf9ff45..a5331f08e7a 100644 --- a/modules/benchmark/lib/commons-compress-NOTICE.txt +++ b/modules/benchmark/lib/commons-compress-NOTICE.txt @@ -1,15 +1,5 @@ Apache Commons Compress -Copyright 2002-2009 The Apache Software Foundation +Copyright 2002-2010 The Apache Software Foundation This product includes software developed by The Apache Software Foundation (http://www.apache.org/). - -Original BZip2 classes contributed by Keiron Liddle -, Aftex Software to the Apache Ant project - -Original Tar classes from contributors of the Apache Ant project - -Original Zip classes from contributors of the Apache Ant project - -Original CPIO classes contributed by Markus Kuss and the jRPM project -(jrpm.sourceforge.net) diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java index b831e69adab..da160d7a4d5 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java @@ -55,15 +55,13 @@ import org.apache.lucene.benchmark.byTask.utils.Config; */ public abstract class ContentSource { - private static final int BZIP = 0; - private static final int GZIP = 1; - private static final int OTHER = 2; - private static final Map extensionToType = new HashMap(); + private static final Map extensionToType = new HashMap(); static { - extensionToType.put(".bz2", Integer.valueOf(BZIP)); - extensionToType.put(".bzip", Integer.valueOf(BZIP)); - extensionToType.put(".gz", Integer.valueOf(GZIP)); - extensionToType.put(".gzip", Integer.valueOf(GZIP)); + // these in are lower case, we will lower case at the test as well + extensionToType.put(".bz2", CompressorStreamFactory.BZIP2); + extensionToType.put(".bzip", CompressorStreamFactory.BZIP2); + extensionToType.put(".gz", CompressorStreamFactory.GZIP); + extensionToType.put(".gzip", CompressorStreamFactory.GZIP); } protected static final int BUFFER_SIZE = 1 << 16; // 64K @@ -128,28 +126,15 @@ public abstract class ContentSource { String fileName = file.getName(); int idx = fileName.lastIndexOf('.'); - int type = OTHER; + String type = null; if (idx != -1) { - Integer typeInt = extensionToType.get(fileName.substring(idx)); - if (typeInt != null) { - type = typeInt.intValue(); - } + type = extensionToType.get(fileName.substring(idx)); } try { - switch (type) { - case BZIP: - // According to BZip2CompressorInputStream's code, it reads the first - // two file header chars ('B' and 'Z'). It is important to wrap the - // underlying input stream with a buffered one since - // Bzip2CompressorInputStream uses the read() method exclusively. - is = csFactory.createCompressorInputStream("bzip2", is); - break; - case GZIP: - is = csFactory.createCompressorInputStream("gz", is); - break; - default: // Do nothing, stay with FileInputStream - } + if (type!=null) { // bzip or gzip + return csFactory.createCompressorInputStream(type, is); + } } catch (CompressorException e) { IOException ioe = new IOException(e.getMessage()); ioe.initCause(e);