LUCENE-3261: Facet benchmarking - indexing support - ported from 3x.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1180674 13f79535-47bb-0310-9956-ffa450edef68
2011-10-09 18:01:36 +00:00 · 2011-10-09 18:01:36 +00:00 · 664a7191dd
parent 1912c6c9c6
commit 664a7191dd
21 changed files with 904 additions and 188 deletions
--- a/lucene/contrib/contrib-build.xml
+++ b/lucene/contrib/contrib-build.xml
@ -141,6 +141,17 @@
    <property name="analyzers-common.uptodate" value="true"/>
  </target>

+  <property name="facet.jar" value="${common.dir}/../modules/facet/build/lucene-facet-${version}.jar"/>
+  <target name="check-facet-uptodate" unless="facet.uptodate">
+    <module-uptodate name="facet" jarfile="${facet.jar}" property="facet.uptodate"/>
+  </target>
+  <target name="jar-facet" unless="facet.uptodate" depends="check-facet-uptodate">
+    <ant dir="${common.dir}/../modules/facet" target="jar-core" inheritall="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="facet.uptodate" value="true"/>
+  </target>
+
  <property name="analyzers-icu.jar" value="${common.dir}/../modules/analysis/build/icu/lucene-analyzers-icu-${version}.jar"/>
  <target name="check-analyzers-icu-uptodate" unless="analyzers-icu.uptodate">
    <module-uptodate name="analysis/icu" jarfile="${analyzers-icu.jar}" property="analyzers-icu.uptodate"/>
--- a/modules/benchmark/CHANGES.txt
+++ b/modules/benchmark/CHANGES.txt
@ -5,6 +5,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
 For more information on past and future Lucene versions, please see:
 http://s.apache.org/luceneversions

+10/07/2011
+  LUCENE-3262: Facet benchmarking - Benchmark tasks and sources were added for indexing
+  with facets, demonstrated in facets.alg. (Gilad Barkai, Doron Cohen)
+    
 09/25/2011
  LUCENE-3457: Upgrade commons-compress to 1.2 (and undo LUCENE-2980's workaround).
  (Doron Cohen)
--- a/modules/benchmark/build.xml
+++ b/modules/benchmark/build.xml
@ -153,6 +153,7 @@
      <pathelement path="${highlighter.jar}"/>
      <pathelement path="${analyzers-common.jar}"/>
      <pathelement path="${queryparser.jar}"/>
+      <pathelement path="${facet.jar}"/>
      <path refid="base.classpath"/>
    	<fileset dir="lib">
    		<include name="**/*.jar"/>
@ -241,7 +242,7 @@
      <echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
    </target>

-    <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser"/>
+    <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
  
    <target name="clean-javacc">
      <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
--- a/modules/benchmark/conf/facets.alg
+++ b/modules/benchmark/conf/facets.alg
@ -0,0 +1,72 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+
+with.facets=facets:true:false
+
+content.source.forever=false
+
+compound=true
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+taxonomy.directory=FSDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+log.step=1000
+
+docs.dir=reuters-out
+
+content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
+
+facet.source=org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource
+rand.seed=10
+max.doc.facets=20
+max.facet.depth=3
+
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+task.max.depth.log=2
+
+#log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+  ResetSystemErase
+  { "Populate"
+      -CreateIndex
+      -CreateTaxonomyIndex
+      { "MAddDocs" AddFacetedDoc > : *
+      -Optimize
+      -CloseIndex
+      -CloseTaxonomyIndex
+  }
+
+  OpenReader  
+  { "SearchSameRdr" Search > : 40
+  CloseReader 
+
+  #RepSumByNameRound
+  ResetSystemErase
+  NewRound
+} : 4
+
+RepSumByPrefRound Search
+RepSumByPrefRound Populate
+RepSumByPrefRound MAddDocs
+
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
@ -24,6 +24,7 @@ import java.util.Locale;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 import org.apache.lucene.benchmark.byTask.stats.Points;
 import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
@ -31,12 +32,15 @@ import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.benchmark.byTask.utils.FileUtils;
 import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.IOUtils;

 /**
 * Data maintained by a performance test run.
@ -45,11 +49,21 @@ import org.apache.lucene.store.RAMDirectory;
 * <ul>
 *  <li>Configuration.
 *  <li>Directory, Writer, Reader.
- *  <li>Docmaker and a few instances of QueryMaker.
+ *  <li>Taxonomy Directory, Writer, Reader.
+ *  <li>DocMaker, FacetSource and a few instances of QueryMaker.
 *  <li>Analyzer.
 *  <li>Statistics data which updated during the run.
 * </ul>
- * Config properties: work.dir=&lt;path to root of docs and index dirs| Default: work&gt;
+ * Config properties:
+ * <ul>
+ *  <li><b>work.dir</b>=&lt;path to root of docs and index dirs| Default: work&gt;
+ *  <li><b>analyzer</b>=&lt;class name for analyzer| Default: StandardAnalyzer&gt;
+ *  <li><b>doc.maker</b>=&lt;class name for doc-maker| Default: DocMaker&gt;
+ *  <li><b>facet.source</b>=&lt;class name for facet-source| Default: RandomFacetSource&gt;
+ *  <li><b>query.maker</b>=&lt;class name for query-maker| Default: SimpleQueryMaker&gt;
+ *  <li><b>log.queries</b>=&lt;whether queries should be printed| Default: false&gt;
+ *  <li><b>directory</b>=&lt;type of directory to use for the index| Default: RAMDirectory&gt;
+ *  <li><b>taxonomy.directory</b>=&lt;type of directory for taxonomy index| Default: RAMDirectory&gt;
 * </ul>
 */
 public class PerfRunData {
@ -62,7 +76,12 @@ public class PerfRunData {
  private Directory directory;
  private Analyzer analyzer;
  private DocMaker docMaker;
+  private FacetSource facetSource;
  private Locale locale;
+
+  private Directory taxonomyDir;
+  private TaxonomyWriter taxonomyWriter;
+  private TaxonomyReader taxonomyReader;
  
  // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
  private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
@ -73,6 +92,7 @@ public class PerfRunData {
  private IndexWriter indexWriter;
  private Config config;
  private long startTimeMillis;
+
  
  // constructor
  public PerfRunData (Config config) throws Exception {
@ -84,6 +104,10 @@ public class PerfRunData {
    docMaker = Class.forName(config.get("doc.maker",
        "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
    docMaker.setConfig(config);
+    // facet source
+    facetSource = Class.forName(config.get("facet.source",
+        "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
+    facetSource.setConfig(config);
    // query makers
    readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>();
    qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class);
@ -104,30 +128,17 @@ public class PerfRunData {
  public void reinit(boolean eraseIndex) throws Exception {

    // cleanup index
-    if (indexWriter!=null) {
-      indexWriter.close();
-      indexWriter = null;
-    }
-    if (indexReader!=null) {
-      indexReader.close();
-      indexReader = null;
-    }
-    if (directory!=null) {
-      directory.close();
-    }
+    IOUtils.close(indexWriter, indexReader, directory);
+    indexWriter = null;
+    indexReader = null;
+
+    IOUtils.close(taxonomyWriter, taxonomyReader, taxonomyDir);
+    taxonomyWriter = null;
+    taxonomyReader = null;
    
    // directory (default is ram-dir).
-    if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
-      File workDir = new File(config.get("work.dir","work"));
-      File indexDir = new File(workDir,"index");
-      if (eraseIndex && indexDir.exists()) {
-        FileUtils.fullyDelete(indexDir);
-      }
-      indexDir.mkdirs();
-      directory = FSDirectory.open(indexDir);
-    } else {
-      directory = new RAMDirectory();
-    }
+    directory = createDirectory(eraseIndex, "index", "directory");
+    taxonomyDir = createDirectory(eraseIndex, "taxo", "taxonomy.directory");

    // inputs
    resetInputs();
@ -139,6 +150,21 @@ public class PerfRunData {
    // Re-init clock
    setStartTimeMillis();
  }
+
+  private Directory createDirectory(boolean eraseIndex, String dirName,
+      String dirParam) throws IOException {
+    if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) {
+      File workDir = new File(config.get("work.dir","work"));
+      File indexDir = new File(workDir,dirName);
+      if (eraseIndex && indexDir.exists()) {
+        FileUtils.fullyDelete(indexDir);
+      }
+      indexDir.mkdirs();
+      return FSDirectory.open(indexDir);
+    } 
+
+    return new RAMDirectory();
+  }
  
  public long setStartTimeMillis() {
    startTimeMillis = System.currentTimeMillis();
@ -173,6 +199,57 @@ public class PerfRunData {
    this.directory = directory;
  }

+  /**
+   * @return Returns the taxonomy directory
+   */
+  public Directory getTaxonomyDir() {
+    return taxonomyDir;
+  }
+  
+  /**
+   * Set the taxonomy reader. Takes ownership of that taxonomy reader, that is,
+   * internally performs taxoReader.incRef() (If caller no longer needs that 
+   * reader it should decRef()/close() it after calling this method, otherwise, 
+   * the reader will remain open). 
+   * @param taxoReader The taxonomy reader to set.
+   */
+  public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException {
+    if (taxoReader == this.taxonomyReader) {
+      return;
+    }
+    if (taxonomyReader != null) {
+      taxonomyReader.decRef();
+    }
+    
+    if (taxoReader != null) {
+      taxoReader.incRef();
+    }
+    this.taxonomyReader = taxoReader;
+  }
+  
+  /**
+   * @return Returns the taxonomyReader.  NOTE: this returns a
+   * reference.  You must call TaxonomyReader.decRef() when
+   * you're done.
+   */
+  public synchronized TaxonomyReader getTaxonomyReader() {
+    if (taxonomyReader != null) {
+      taxonomyReader.incRef();
+    }
+    return taxonomyReader;
+  }
+  
+  /**
+   * @param taxoWriter The taxonomy writer to set.
+   */
+  public void setTaxonomyWriter(TaxonomyWriter taxoWriter) {
+    this.taxonomyWriter = taxoWriter;
+  }
+  
+  public TaxonomyWriter getTaxonomyWriter() {
+    return taxonomyWriter;
+  }
+  
  /**
   * @return Returns the indexReader.  NOTE: this returns a
   * reference.  You must call IndexReader.decRef() when
@ -198,13 +275,22 @@ public class PerfRunData {
  }

  /**
+   * Set the index reader. Takes ownership of that index reader, that is,
+   * internally performs indexReader.incRef() (If caller no longer needs that 
+   * reader it should decRef()/close() it after calling this method, otherwise, 
+   * the reader will remain open). 
   * @param indexReader The indexReader to set.
   */
  public synchronized void setIndexReader(IndexReader indexReader) throws IOException {
+    if (indexReader == this.indexReader) {
+      return;
+    }
+    
    if (this.indexReader != null) {
      // Release current IR
      this.indexReader.decRef();
    }
+
    this.indexReader = indexReader;
    if (indexReader != null) {
      // Hold reference to new IR
@ -246,6 +332,11 @@ public class PerfRunData {
    return docMaker;
  }

+  /** Returns the facet source. */
+  public FacetSource getFacetSource() {
+    return facetSource;
+  }
+
  /**
   * @return the locale
   */
@ -269,6 +360,7 @@ public class PerfRunData {

  public void resetInputs() throws IOException {
    docMaker.resetInputs();
+    facetSource.resetInputs();
    for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
      queryMaker.resetInputs();
    }
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
@ -0,0 +1,180 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.Format;
+
+/**
+ * Base class for source of data for benchmarking
+ * <p>
+ * Keeps track of various statistics, such as how many data items were generated, 
+ * size in bytes etc.
+ * <p>
+ * Supports the following configuration parameters:
+ * <ul>
+ * <li><b>content.source.forever</b> - specifies whether to generate items
+ * forever (<b>default=true</b>).
+ * <li><b>content.source.verbose</b> - specifies whether messages should be
+ * output by the content source (<b>default=false</b>).
+ * <li><b>content.source.encoding</b> - specifies which encoding to use when
+ * reading the files of that content source. Certain implementations may define
+ * a default value if this parameter is not specified. (<b>default=null</b>).
+ * <li><b>content.source.log.step</b> - specifies for how many items a
+ * message should be logged. If set to 0 it means no logging should occur.
+ * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
+ * logStep is not 0 (<b>default=0</b>).
+ * </ul>
+ */
+public abstract class ContentItemsSource {
+  
+  private long bytesCount;
+  private long totalBytesCount;
+  private int itemCount;
+  private int totalItemCount;
+  private Config config;
+
+  private int lastPrintedNumUniqueTexts = 0;
+  private long lastPrintedNumUniqueBytes = 0;
+  private int printNum = 0;
+
+  protected boolean forever;
+  protected int logStep;
+  protected boolean verbose;
+  protected String encoding;
+  
+  /** update count of bytes generated by this source */  
+  protected final synchronized void addBytes(long numBytes) {
+    bytesCount += numBytes;
+    totalBytesCount += numBytes;
+  }
+  
+  /** update count of items generated by this source */  
+  protected final synchronized void addItem() {
+    ++itemCount;
+    ++totalItemCount;
+  }
+
+  /**
+   * A convenience method for collecting all the files of a content source from
+   * a given directory. The collected {@link File} instances are stored in the
+   * given <code>files</code>.
+   */
+  protected final void collectFiles(File dir, ArrayList<File> files) {
+    if (!dir.canRead()) {
+      return;
+    }
+    
+    File[] dirFiles = dir.listFiles();
+    Arrays.sort(dirFiles);
+    for (int i = 0; i < dirFiles.length; i++) {
+      File file = dirFiles[i];
+      if (file.isDirectory()) {
+        collectFiles(file, files);
+      } else if (file.canRead()) {
+        files.add(file);
+      }
+    }
+  }
+
+  /**
+   * Returns true whether it's time to log a message (depending on verbose and
+   * the number of items generated).
+   */
+  protected final boolean shouldLog() {
+    return verbose && logStep > 0 && itemCount % logStep == 0;
+  }
+
+  /** Called when reading from this content source is no longer required. */
+  public abstract void close() throws IOException;
+  
+  /** Returns the number of bytes generated since last reset. */
+  public final long getBytesCount() { return bytesCount; }
+
+  /** Returns the number of generated items since last reset. */
+  public final int getItemsCount() { return itemCount; }
+  
+  public final Config getConfig() { return config; }
+
+  /** Returns the total number of bytes that were generated by this source. */ 
+  public final long getTotalBytesCount() { return totalBytesCount; }
+
+  /** Returns the total number of generated items. */
+  public final int getTotalItemsCount() { return totalItemCount; }
+
+  /**
+   * Resets the input for this content source, so that the test would behave as
+   * if it was just started, input-wise.
+   * <p>
+   * <b>NOTE:</b> the default implementation resets the number of bytes and
+   * items generated since the last reset, so it's important to call
+   * super.resetInputs in case you override this method.
+   */
+  @SuppressWarnings("unused")
+  public void resetInputs() throws IOException {
+    bytesCount = 0;
+    itemCount = 0;
+  }
+
+  /**
+   * Sets the {@link Config} for this content source. If you override this
+   * method, you must call super.setConfig.
+   */
+  public void setConfig(Config config) {
+    this.config = config;
+    forever = config.get("content.source.forever", true);
+    logStep = config.get("content.source.log.step", 0);
+    verbose = config.get("content.source.verbose", false);
+    encoding = config.get("content.source.encoding", null);
+  }
+
+  public void printStatistics(String itemsName) {
+    boolean print = false;
+    String col = "                  ";
+    StringBuilder sb = new StringBuilder();
+    String newline = System.getProperty("line.separator");
+    sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
+    int nut = getTotalItemsCount();
+    if (nut > lastPrintedNumUniqueTexts) {
+      print = true;
+      sb.append("total count of "+itemsName+": ").append(Format.format(0,nut,col)).append(newline);
+      lastPrintedNumUniqueTexts = nut;
+    }
+    long nub = getTotalBytesCount();
+    if (nub > lastPrintedNumUniqueBytes) {
+      print = true;
+      sb.append("total bytes of "+itemsName+": ").append(Format.format(0,nub,col)).append(newline);
+      lastPrintedNumUniqueBytes = nub;
+    }
+    if (getItemsCount() > 0) {
+      print = true;
+      sb.append("num "+itemsName+" added since last inputs reset:   ").append(Format.format(0,getItemsCount(),col)).append(newline);
+      sb.append("total bytes added for "+itemsName+" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
+    }
+    if (print) {
+      System.out.println(sb.append(newline).toString());
+      printNum++;
+    }
+  }
+  
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
@ -17,12 +17,7 @@ package org.apache.lucene.benchmark.byTask.feeds;
 * limitations under the License.
 */

-import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.lucene.benchmark.byTask.utils.Config;

 /**
 * Represents content from a specified source, such as TREC, Reuters etc. A
@ -31,119 +26,13 @@ import org.apache.lucene.benchmark.byTask.utils.Config;
 * of various statistics, such as how many documents were generated, size in
 * bytes etc.
 * <p>
- * Supports the following configuration parameters:
- * <ul>
- * <li><b>content.source.forever</b> - specifies whether to generate documents
- * forever (<b>default=true</b>).
- * <li><b>content.source.verbose</b> - specifies whether messages should be
- * output by the content source (<b>default=false</b>).
- * <li><b>content.source.encoding</b> - specifies which encoding to use when
- * reading the files of that content source. Certain implementations may define
- * a default value if this parameter is not specified. (<b>default=null</b>).
- * <li><b>content.source.log.step</b> - specifies for how many documents a
- * message should be logged. If set to 0 it means no logging should occur.
- * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
- * logStep is not 0 (<b>default=0</b>).
- * </ul>
+ * For supported configuration parameters see {@link ContentItemsSource}.
 */
-public abstract class ContentSource {
+public abstract class ContentSource extends ContentItemsSource {
  
-  private long bytesCount;
-  private long totalBytesCount;
-  private int docsCount;
-  private int totalDocsCount;
-  private Config config;
-
-  protected boolean forever;
-  protected int logStep;
-  protected boolean verbose;
-  protected String encoding;
-  
-  /** update count of bytes generated by this source */  
-  protected final synchronized void addBytes(long numBytes) {
-    bytesCount += numBytes;
-    totalBytesCount += numBytes;
-  }
-  
-  /** update count of documents generated by this source */  
-  protected final synchronized void addDoc() {
-    ++docsCount;
-    ++totalDocsCount;
-  }
-
-  /**
-   * A convenience method for collecting all the files of a content source from
-   * a given directory. The collected {@link File} instances are stored in the
-   * given <code>files</code>.
-   */
-  protected final void collectFiles(File dir, ArrayList<File> files) {
-    if (!dir.canRead()) {
-      return;
-    }
-    
-    File[] dirFiles = dir.listFiles();
-    Arrays.sort(dirFiles);
-    for (int i = 0; i < dirFiles.length; i++) {
-      File file = dirFiles[i];
-      if (file.isDirectory()) {
-        collectFiles(file, files);
-      } else if (file.canRead()) {
-        files.add(file);
-      }
-    }
-  }
-
-	/**
-   * Returns true whether it's time to log a message (depending on verbose and
-   * the number of documents generated).
-   */
-  protected final boolean shouldLog() {
-    return verbose && logStep > 0 && docsCount % logStep == 0;
-  }
-
-  /** Called when reading from this content source is no longer required. */
-  public abstract void close() throws IOException;
-  
-  /** Returns the number of bytes generated since last reset. */
-  public final long getBytesCount() { return bytesCount; }
-
-  /** Returns the number of generated documents since last reset. */
-  public final int getDocsCount() { return docsCount; }
-  
-  public final Config getConfig() { return config; }
-
-  /** Returns the next {@link DocData} from the content source. */
+  /** Returns the next {@link DocData} from the content source. 
+   * Implementations must account for multi-threading, as multiple threads 
+   * can call this method simultaneously. */
  public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException;

-  /** Returns the total number of bytes that were generated by this source. */ 
-  public final long getTotalBytesCount() { return totalBytesCount; }
-
-  /** Returns the total number of generated documents. */
-  public final int getTotalDocsCount() { return totalDocsCount; }
-
-  /**
-   * Resets the input for this content source, so that the test would behave as
-   * if it was just started, input-wise.
-   * <p>
-   * <b>NOTE:</b> the default implementation resets the number of bytes and
-   * documents generated since the last reset, so it's important to call
-   * super.resetInputs in case you override this method.
-   */
-  public void resetInputs() throws IOException {
-    bytesCount = 0;
-    docsCount = 0;
-  }
-
-  /**
-   * Sets the {@link Config} for this content source. If you override this
-   * method, you must call super.setConfig.
-   */
-  public void setConfig(Config config) {
-    this.config = config;
-    forever = config.get("content.source.forever", true);
-    logStep = config.get("content.source.log.step", 0);
-    verbose = config.get("content.source.verbose", false);
-    encoding = config.get("content.source.encoding", null);
-  }
-
 }
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
@ -31,7 +31,6 @@ import java.text.SimpleDateFormat;
 import java.text.ParsePosition;

 import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@ -186,13 +185,8 @@ public class DocMaker {
  protected boolean reuseFields;
  protected boolean indexProperties;
  
-  private int lastPrintedNumUniqueTexts = 0;
-
-  private long lastPrintedNumUniqueBytes = 0;
  private final AtomicInteger numDocsCreated = new AtomicInteger();

-  private int printNum = 0;
-
  public DocMaker() {
  }
  
@ -400,38 +394,9 @@ public class DocMaker {
    return doc;
  }
  
-  public void printDocStatistics() {
-    boolean print = false;
-    String col = "                  ";
-    StringBuilder sb = new StringBuilder();
-    String newline = System.getProperty("line.separator");
-    sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
-    int nut = source.getTotalDocsCount();
-    if (nut > lastPrintedNumUniqueTexts) {
-      print = true;
-      sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
-      lastPrintedNumUniqueTexts = nut;
-    }
-    long nub = getTotalBytesCount();
-    if (nub > lastPrintedNumUniqueBytes) {
-      print = true;
-      sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
-      lastPrintedNumUniqueBytes = nub;
-    }
-    if (source.getDocsCount() > 0) {
-      print = true;
-      sb.append("num docs added since last inputs reset:   ").append(Format.format(0,source.getDocsCount(),col)).append(newline);
-      sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
-    }
-    if (print) {
-      System.out.println(sb.append(newline).toString());
-      printNum++;
-    }
-  }
-  
  /** Reset inputs so that the test run would behave, input wise, as if it just started. */
  public synchronized void resetInputs() throws IOException {
-    printDocStatistics();
+    source.printStatistics("docs");
    // re-initiate since properties by round may have changed.
    setConfig(config);
    source.resetInputs();
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
@ -0,0 +1,45 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.facet.index.CategoryContainer;
+
+/**
+ * Source items for facets.
+ * <p>
+ * For supported configuration parameters see {@link ContentItemsSource}.
+ */
+public abstract class FacetSource extends ContentItemsSource {
+
+  /** Returns the next {@link CategoryContainer facets content item}. 
+   * Implementations must account for multi-threading, as multiple threads 
+   * can call this method simultaneously. 
+   */
+  public abstract CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException;
+
+  @Override
+  public void resetInputs() throws IOException {
+    printStatistics("facets");
+    // re-initiate since properties by round may have changed.
+    setConfig(getConfig());
+    super.resetInputs();
+  }
+  
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
@ -0,0 +1,81 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.facet.index.CategoryContainer;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+
+/**
+ * Simple implementation of a random facet source
+ * <p>
+ * Supports the following parameters:
+ * <ul>
+ * <li><b>rand.seed</b> - defines the seed to initialize Random with (default: <b>13</b>).
+ * <li><b>max.doc.facets</b> - maximal #facets per doc (default: <b>10</b>).
+ *    Actual number of facets in a certain doc would be anything between 1 and that number.
+ * <li><b>max.facet.depth</b> - maximal #components in a facet (default: <b>3</b>).
+ *    Actual number of components in a certain facet would be anything between 1 and that number.
+ * </ul>
+ */
+public class RandomFacetSource extends FacetSource {
+
+  Random random;
+  
+  private int maxDocFacets = 10;
+  private int maxFacetDepth = 3;
+  private int maxValue = maxDocFacets * maxFacetDepth;
+  
+  @Override
+  public CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException {
+    if (facets == null) {
+      facets = new CategoryContainer();
+    } else {
+      facets.clear();
+    }
+    int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
+    for (int i=0; i<numFacets; i++) {
+      CategoryPath cp = new CategoryPath();
+      int depth = 1 + random.nextInt(maxFacetDepth-1); // depth 0 is not useful
+      for (int k=0; k<depth; k++) {
+        cp.add(Integer.toString(random.nextInt(maxValue)));
+        addItem();
+      }
+      facets.addCategory(cp);
+      addBytes(cp.toString().length()); // very rough approximation
+    }
+    return facets;
+  }
+
+  @Override
+  public void close() throws IOException {
+    // nothing to do here
+  }
+
+  @Override
+  public void setConfig(Config config) {
+    super.setConfig(config);
+    random = new Random(config.get("rand.seed", 13));
+    maxDocFacets = config.get("max.doc.facets", 200);
+    maxFacetDepth = config.get("max.facet.depth", 10);
+    maxValue = maxDocFacets * maxFacetDepth;
+  }
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
@ -289,7 +289,7 @@ public class TrecContentSource extends ContentSource {
    // here, everything else is already private to that thread, so we're safe.
    try {
      docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
-      addDoc();
+      addItem();
    } catch (InterruptedException ie) {
      throw new ThreadInterruptedException(ie);
    }
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
@ -17,12 +17,14 @@ package org.apache.lucene.benchmark.byTask.tasks;
 * limitations under the License.
 */

+import java.text.NumberFormat;
+
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.document.Document;

 /**
- * Add a document, optionally with of a certain size.
+ * Add a document, optionally of a certain size.
 * <br>Other side effects: none.
 * <br>Takes optional param: document size. 
 */
@ -34,9 +36,12 @@ public class AddDocTask extends PerfTask {

  private int docSize = 0;
  
-  // volatile data passed between setup(), doLogic(), tearDown().
-  private Document doc = null;
-  
+  /** 
+   * volatile data passed between setup(), doLogic(), tearDown().
+   * the doc is created at setup() and added at doLogic(). 
+   */
+  protected Document doc = null;
+
  @Override
  public void setup() throws Exception {
    super.setup();
@ -56,7 +61,7 @@ public class AddDocTask extends PerfTask {

  @Override
  protected String getLogMessage(int recsCount) {
-    return "added " + recsCount + " docs";
+    return String.format("added %9d docs",recsCount);
  }
  
  @Override
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
@ -0,0 +1,77 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
+import org.apache.lucene.facet.index.CategoryContainer;
+import org.apache.lucene.facet.index.CategoryDocumentBuilder;
+
+/**
+ * Add a faceted document.
+ * <p>
+ * Config properties:
+ * <ul>
+ *  <li><b>with.facets</b>=&lt;tells whether to actually add any facets to the document| Default: true&gt;
+ *  <br>This config property allows to easily compare the performance of adding docs with and without facets.
+ *  Note that facets are created even when this is false, just that they are not added to the document (nor to the taxonomy).
+ * </ul> 
+ * <p>
+ * See {@link AddDocTask} for general document parameters and configuration.
+ * <p>
+ * Makes use of the {@link FacetSource} in effect - see {@link PerfRunData} for facet source settings.   
+ */
+public class AddFacetedDocTask extends AddDocTask {
+
+  public AddFacetedDocTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  private CategoryContainer facets = null;
+  private CategoryDocumentBuilder categoryDocBuilder = null;
+  private boolean withFacets = true;
+  
+  @Override
+  public void setup() throws Exception {
+    super.setup();
+    // create the facets even if they should not be added - allows to measure the effect of just adding facets 
+    facets = getRunData().getFacetSource().getNextFacets(facets);  
+    withFacets = getRunData().getConfig().get("with.facets", true);
+    if (withFacets) {
+      categoryDocBuilder = new CategoryDocumentBuilder(getRunData().getTaxonomyWriter());
+      categoryDocBuilder.setCategories(facets);
+    }
+  }
+
+  @Override
+  protected String getLogMessage(int recsCount) {
+    if (!withFacets) {
+      return super.getLogMessage(recsCount);
+    }
+    return super.getLogMessage(recsCount)+ " with facets";
+  }
+  
+  @Override
+  public int doLogic() throws Exception {
+    if (withFacets) {
+      categoryDocBuilder.build(doc);
+    }
+    return super.doLogic();
+  }
+
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
@ -0,0 +1,43 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Close taxonomy index.
+ * <br>Other side effects: taxonomy writer object in perfRunData is nullified.
+ */
+public class CloseTaxonomyIndexTask extends PerfTask {
+
+  public CloseTaxonomyIndexTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() throws IOException {
+    IOUtils.close(getRunData().getTaxonomyWriter());
+    getRunData().setTaxonomyWriter(null);
+
+    return 1;
+  }
+
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
@ -0,0 +1,46 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Close taxonomy reader.
+ * <br>Other side effects: taxonomy reader in perfRunData is nullified.
+ */
+public class CloseTaxonomyReaderTask extends PerfTask {
+
+  public CloseTaxonomyReaderTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() throws IOException {
+    TaxonomyReader taxoReader = getRunData().getTaxonomyReader();
+    getRunData().setTaxonomyReader(null);
+    if (taxoReader.getRefCount() != 1) {
+      System.out.println("WARNING: CloseTaxonomyReader: reference count is currently " + taxoReader.getRefCount());
+    }
+    taxoReader.close();
+    return 1;
+  }
+
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java
@ -0,0 +1,41 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+
+/**
+ * Commits the Taxonomy Index.
+ */
+public class CommitTaxonomyIndexTask extends PerfTask {
+  public CommitTaxonomyIndexTask(PerfRunData runData) {
+    super(runData);
+  }
+  
+  @Override
+  public int doLogic() throws Exception {
+    TaxonomyWriter taxonomyWriter = getRunData().getTaxonomyWriter();
+    if (taxonomyWriter != null) {
+      taxonomyWriter.commit();
+    } else {
+      throw new IllegalStateException("TaxonomyWriter is not currently open");
+    }
+    
+    return 1;
+  }
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
@ -0,0 +1,44 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+
+import java.io.IOException;
+
+
+/**
+ * Create a taxonomy index.
+ * <br>Other side effects: taxonomy writer object in perfRunData is set.
+ */
+public class CreateTaxonomyIndexTask extends PerfTask {
+
+  public CreateTaxonomyIndexTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() throws IOException {
+    PerfRunData runData = getRunData();
+    runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE));
+    return 1;
+  }
+
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
@ -0,0 +1,42 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import java.io.IOException;
+
+
+/**
+ * Open a taxonomy index.
+ * <br>Other side effects: taxonomy writer object in perfRunData is set.
+ */
+public class OpenTaxonomyIndexTask extends PerfTask {
+
+  public OpenTaxonomyIndexTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() throws IOException {
+    PerfRunData runData = getRunData();
+    runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir()));
+    return 1;
+  }
+
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
@ -0,0 +1,45 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
+
+/**
+ * Open a taxonomy index reader.
+ * <br>Other side effects: taxonomy reader object in perfRunData is set.
+ */
+public class OpenTaxonomyReaderTask extends PerfTask {
+
+  public OpenTaxonomyReaderTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() throws IOException {
+    PerfRunData runData = getRunData();
+    LuceneTaxonomyReader taxoReader = new LuceneTaxonomyReader(runData.getTaxonomyDir());
+    runData.setTaxonomyReader(taxoReader);
+    // We transfer reference to the run data
+    taxoReader.decRef();
+    return 1;
+  }
+ 
+}
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
@ -17,8 +17,6 @@ package org.apache.lucene.benchmark.byTask.tasks;
 * limitations under the License.
 */

-import java.text.NumberFormat;
-
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.stats.Points;
 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
@ -270,9 +268,7 @@ public abstract class PerfTask implements Cloneable {
  public void tearDown() throws Exception {
    if (++logStepCount % logStep == 0) {
      double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
-      NumberFormat nf = NumberFormat.getInstance();
-      nf.setMaximumFractionDigits(2);
-      System.out.println(nf.format(time) + " sec --> "
+      System.out.println(String.format("%7.2f",time) + " sec --> "
          + Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
    }
  }
--- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
+++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@ -40,6 +40,7 @@ import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
 import org.apache.lucene.collation.CollationKeyAnalyzer;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldsEnum;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -780,6 +781,42 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
    reader.close();
  }

+  /**
+   * Test indexing with facets tasks.
+   */
+  public void testIndexingWithFacets() throws Exception {
+    // 1. alg definition (required in every "logic" test)
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=100",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "doc.stored=false",
+        "merge.factor=3",
+        "doc.tokenized=false",
+        "debug.level=1",
+        "# ----- alg ",
+        "ResetSystemErase",
+        "CreateIndex",
+        "CreateTaxonomyIndex",
+        "{ \"AddDocs\"  AddFacetedDoc > : * ",
+        "CloseIndex",
+        "CloseTaxonomyIndex",
+        "OpenTaxonomyReader",
+    };
+
+    // 2. execute the algorithm  (required in every "logic" test)
+    Benchmark benchmark = execBenchmark(algLines);
+    PerfRunData runData = benchmark.getRunData();
+    assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter());
+    TaxonomyReader taxoReader = runData.getTaxonomyReader();
+    assertNotNull("taxo reader was not opened", taxoReader);
+    assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1);
+    taxoReader.close();
+  }
+  
  /**
   * Test that we can call optimize(maxNumSegments).
   */