From 664a7191dd16ec9757e17000866f805061f2d44a Mon Sep 17 00:00:00 2001 From: Doron Cohen Date: Sun, 9 Oct 2011 18:01:36 +0000 Subject: [PATCH] LUCENE-3261: Facet benchmarking - indexing support - ported from 3x. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1180674 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/contrib-build.xml | 11 ++ modules/benchmark/CHANGES.txt | 4 + modules/benchmark/build.xml | 3 +- modules/benchmark/conf/facets.alg | 72 +++++++ .../lucene/benchmark/byTask/PerfRunData.java | 140 +++++++++++--- .../byTask/feeds/ContentItemsSource.java | 180 ++++++++++++++++++ .../benchmark/byTask/feeds/ContentSource.java | 121 +----------- .../benchmark/byTask/feeds/DocMaker.java | 37 +--- .../benchmark/byTask/feeds/FacetSource.java | 45 +++++ .../byTask/feeds/RandomFacetSource.java | 81 ++++++++ .../byTask/feeds/TrecContentSource.java | 2 +- .../benchmark/byTask/tasks/AddDocTask.java | 15 +- .../byTask/tasks/AddFacetedDocTask.java | 77 ++++++++ .../byTask/tasks/CloseTaxonomyIndexTask.java | 43 +++++ .../byTask/tasks/CloseTaxonomyReaderTask.java | 46 +++++ .../byTask/tasks/CommitTaxonomyIndexTask.java | 41 ++++ .../byTask/tasks/CreateTaxonomyIndexTask.java | 44 +++++ .../byTask/tasks/OpenTaxonomyIndexTask.java | 42 ++++ .../byTask/tasks/OpenTaxonomyReaderTask.java | 45 +++++ .../benchmark/byTask/tasks/PerfTask.java | 6 +- .../benchmark/byTask/TestPerfTasksLogic.java | 37 ++++ 21 files changed, 904 insertions(+), 188 deletions(-) create mode 100644 modules/benchmark/conf/facets.alg create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java create mode 100644 modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java diff --git a/lucene/contrib/contrib-build.xml b/lucene/contrib/contrib-build.xml index 7e6c062977d..30d0bc34c98 100644 --- a/lucene/contrib/contrib-build.xml +++ b/lucene/contrib/contrib-build.xml @@ -141,6 +141,17 @@ + + + + + + + + + + + diff --git a/modules/benchmark/CHANGES.txt b/modules/benchmark/CHANGES.txt index 14898277551..3bc4e5e133d 100644 --- a/modules/benchmark/CHANGES.txt +++ b/modules/benchmark/CHANGES.txt @@ -5,6 +5,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety For more information on past and future Lucene versions, please see: http://s.apache.org/luceneversions +10/07/2011 + LUCENE-3262: Facet benchmarking - Benchmark tasks and sources were added for indexing + with facets, demonstrated in facets.alg. (Gilad Barkai, Doron Cohen) + 09/25/2011 LUCENE-3457: Upgrade commons-compress to 1.2 (and undo LUCENE-2980's workaround). (Doron Cohen) diff --git a/modules/benchmark/build.xml b/modules/benchmark/build.xml index b7f07409cf3..8eadf1788de 100644 --- a/modules/benchmark/build.xml +++ b/modules/benchmark/build.xml @@ -153,6 +153,7 @@ + @@ -241,7 +242,7 @@ Benchmark output in JIRA table format is in file: ${shingle.jira.output.file} - + diff --git a/modules/benchmark/conf/facets.alg b/modules/benchmark/conf/facets.alg new file mode 100644 index 00000000000..d53337eb77b --- /dev/null +++ b/modules/benchmark/conf/facets.alg @@ -0,0 +1,72 @@ +#/** +# * Licensed to the Apache Software Foundation (ASF) under one or more +# * contributor license agreements. See the NOTICE file distributed with +# * this work for additional information regarding copyright ownership. +# * The ASF licenses this file to You under the Apache License, Version 2.0 +# * (the "License"); you may not use this file except in compliance with +# * the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ +# ------------------------------------------------------------------------------------- + +with.facets=facets:true:false + +content.source.forever=false + +compound=true +analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer +directory=FSDirectory +taxonomy.directory=FSDirectory + +doc.stored=true +doc.tokenized=true +doc.term.vector=false +log.step=1000 + +docs.dir=reuters-out + +content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource + +facet.source=org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource +rand.seed=10 +max.doc.facets=20 +max.facet.depth=3 + +query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker + +task.max.depth.log=2 + +#log.queries=true +# ------------------------------------------------------------------------------------- + +{ "Rounds" + ResetSystemErase + { "Populate" + -CreateIndex + -CreateTaxonomyIndex + { "MAddDocs" AddFacetedDoc > : * + -Optimize + -CloseIndex + -CloseTaxonomyIndex + } + + OpenReader + { "SearchSameRdr" Search > : 40 + CloseReader + + #RepSumByNameRound + ResetSystemErase + NewRound +} : 4 + +RepSumByPrefRound Search +RepSumByPrefRound Populate +RepSumByPrefRound MAddDocs + diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java index 64f5731f666..5d24fe6e3fa 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java @@ -24,6 +24,7 @@ import java.util.Locale; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; +import org.apache.lucene.benchmark.byTask.feeds.FacetSource; import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; import org.apache.lucene.benchmark.byTask.stats.Points; import org.apache.lucene.benchmark.byTask.tasks.ReadTask; @@ -31,12 +32,15 @@ import org.apache.lucene.benchmark.byTask.tasks.SearchTask; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.FileUtils; import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.IOUtils; /** * Data maintained by a performance test run. @@ -45,11 +49,21 @@ import org.apache.lucene.store.RAMDirectory; *
    *
  • Configuration. *
  • Directory, Writer, Reader. - *
  • Docmaker and a few instances of QueryMaker. + *
  • Taxonomy Directory, Writer, Reader. + *
  • DocMaker, FacetSource and a few instances of QueryMaker. *
  • Analyzer. *
  • Statistics data which updated during the run. *
- * Config properties: work.dir=<path to root of docs and index dirs| Default: work> + * Config properties: + *
    + *
  • work.dir=<path to root of docs and index dirs| Default: work> + *
  • analyzer=<class name for analyzer| Default: StandardAnalyzer> + *
  • doc.maker=<class name for doc-maker| Default: DocMaker> + *
  • facet.source=<class name for facet-source| Default: RandomFacetSource> + *
  • query.maker=<class name for query-maker| Default: SimpleQueryMaker> + *
  • log.queries=<whether queries should be printed| Default: false> + *
  • directory=<type of directory to use for the index| Default: RAMDirectory> + *
  • taxonomy.directory=<type of directory for taxonomy index| Default: RAMDirectory> *
*/ public class PerfRunData { @@ -62,7 +76,12 @@ public class PerfRunData { private Directory directory; private Analyzer analyzer; private DocMaker docMaker; + private FacetSource facetSource; private Locale locale; + + private Directory taxonomyDir; + private TaxonomyWriter taxonomyWriter; + private TaxonomyReader taxonomyReader; // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately. private HashMap,QueryMaker> readTaskQueryMaker; @@ -73,6 +92,7 @@ public class PerfRunData { private IndexWriter indexWriter; private Config config; private long startTimeMillis; + // constructor public PerfRunData (Config config) throws Exception { @@ -84,6 +104,10 @@ public class PerfRunData { docMaker = Class.forName(config.get("doc.maker", "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance(); docMaker.setConfig(config); + // facet source + facetSource = Class.forName(config.get("facet.source", + "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance(); + facetSource.setConfig(config); // query makers readTaskQueryMaker = new HashMap,QueryMaker>(); qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class); @@ -104,30 +128,17 @@ public class PerfRunData { public void reinit(boolean eraseIndex) throws Exception { // cleanup index - if (indexWriter!=null) { - indexWriter.close(); - indexWriter = null; - } - if (indexReader!=null) { - indexReader.close(); - indexReader = null; - } - if (directory!=null) { - directory.close(); - } + IOUtils.close(indexWriter, indexReader, directory); + indexWriter = null; + indexReader = null; + + IOUtils.close(taxonomyWriter, taxonomyReader, taxonomyDir); + taxonomyWriter = null; + taxonomyReader = null; // directory (default is ram-dir). - if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) { - File workDir = new File(config.get("work.dir","work")); - File indexDir = new File(workDir,"index"); - if (eraseIndex && indexDir.exists()) { - FileUtils.fullyDelete(indexDir); - } - indexDir.mkdirs(); - directory = FSDirectory.open(indexDir); - } else { - directory = new RAMDirectory(); - } + directory = createDirectory(eraseIndex, "index", "directory"); + taxonomyDir = createDirectory(eraseIndex, "taxo", "taxonomy.directory"); // inputs resetInputs(); @@ -139,6 +150,21 @@ public class PerfRunData { // Re-init clock setStartTimeMillis(); } + + private Directory createDirectory(boolean eraseIndex, String dirName, + String dirParam) throws IOException { + if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) { + File workDir = new File(config.get("work.dir","work")); + File indexDir = new File(workDir,dirName); + if (eraseIndex && indexDir.exists()) { + FileUtils.fullyDelete(indexDir); + } + indexDir.mkdirs(); + return FSDirectory.open(indexDir); + } + + return new RAMDirectory(); + } public long setStartTimeMillis() { startTimeMillis = System.currentTimeMillis(); @@ -173,6 +199,57 @@ public class PerfRunData { this.directory = directory; } + /** + * @return Returns the taxonomy directory + */ + public Directory getTaxonomyDir() { + return taxonomyDir; + } + + /** + * Set the taxonomy reader. Takes ownership of that taxonomy reader, that is, + * internally performs taxoReader.incRef() (If caller no longer needs that + * reader it should decRef()/close() it after calling this method, otherwise, + * the reader will remain open). + * @param taxoReader The taxonomy reader to set. + */ + public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException { + if (taxoReader == this.taxonomyReader) { + return; + } + if (taxonomyReader != null) { + taxonomyReader.decRef(); + } + + if (taxoReader != null) { + taxoReader.incRef(); + } + this.taxonomyReader = taxoReader; + } + + /** + * @return Returns the taxonomyReader. NOTE: this returns a + * reference. You must call TaxonomyReader.decRef() when + * you're done. + */ + public synchronized TaxonomyReader getTaxonomyReader() { + if (taxonomyReader != null) { + taxonomyReader.incRef(); + } + return taxonomyReader; + } + + /** + * @param taxoWriter The taxonomy writer to set. + */ + public void setTaxonomyWriter(TaxonomyWriter taxoWriter) { + this.taxonomyWriter = taxoWriter; + } + + public TaxonomyWriter getTaxonomyWriter() { + return taxonomyWriter; + } + /** * @return Returns the indexReader. NOTE: this returns a * reference. You must call IndexReader.decRef() when @@ -198,13 +275,22 @@ public class PerfRunData { } /** + * Set the index reader. Takes ownership of that index reader, that is, + * internally performs indexReader.incRef() (If caller no longer needs that + * reader it should decRef()/close() it after calling this method, otherwise, + * the reader will remain open). * @param indexReader The indexReader to set. */ public synchronized void setIndexReader(IndexReader indexReader) throws IOException { + if (indexReader == this.indexReader) { + return; + } + if (this.indexReader != null) { // Release current IR this.indexReader.decRef(); } + this.indexReader = indexReader; if (indexReader != null) { // Hold reference to new IR @@ -246,6 +332,11 @@ public class PerfRunData { return docMaker; } + /** Returns the facet source. */ + public FacetSource getFacetSource() { + return facetSource; + } + /** * @return the locale */ @@ -269,6 +360,7 @@ public class PerfRunData { public void resetInputs() throws IOException { docMaker.resetInputs(); + facetSource.resetInputs(); for (final QueryMaker queryMaker : readTaskQueryMaker.values()) { queryMaker.resetInputs(); } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java new file mode 100644 index 00000000000..d89d88875a9 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java @@ -0,0 +1,180 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; + +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.benchmark.byTask.utils.Format; + +/** + * Base class for source of data for benchmarking + *

+ * Keeps track of various statistics, such as how many data items were generated, + * size in bytes etc. + *

+ * Supports the following configuration parameters: + *

    + *
  • content.source.forever - specifies whether to generate items + * forever (default=true). + *
  • content.source.verbose - specifies whether messages should be + * output by the content source (default=false). + *
  • content.source.encoding - specifies which encoding to use when + * reading the files of that content source. Certain implementations may define + * a default value if this parameter is not specified. (default=null). + *
  • content.source.log.step - specifies for how many items a + * message should be logged. If set to 0 it means no logging should occur. + * NOTE: if verbose is set to false, logging should not occur even if + * logStep is not 0 (default=0). + *
+ */ +public abstract class ContentItemsSource { + + private long bytesCount; + private long totalBytesCount; + private int itemCount; + private int totalItemCount; + private Config config; + + private int lastPrintedNumUniqueTexts = 0; + private long lastPrintedNumUniqueBytes = 0; + private int printNum = 0; + + protected boolean forever; + protected int logStep; + protected boolean verbose; + protected String encoding; + + /** update count of bytes generated by this source */ + protected final synchronized void addBytes(long numBytes) { + bytesCount += numBytes; + totalBytesCount += numBytes; + } + + /** update count of items generated by this source */ + protected final synchronized void addItem() { + ++itemCount; + ++totalItemCount; + } + + /** + * A convenience method for collecting all the files of a content source from + * a given directory. The collected {@link File} instances are stored in the + * given files. + */ + protected final void collectFiles(File dir, ArrayList files) { + if (!dir.canRead()) { + return; + } + + File[] dirFiles = dir.listFiles(); + Arrays.sort(dirFiles); + for (int i = 0; i < dirFiles.length; i++) { + File file = dirFiles[i]; + if (file.isDirectory()) { + collectFiles(file, files); + } else if (file.canRead()) { + files.add(file); + } + } + } + + /** + * Returns true whether it's time to log a message (depending on verbose and + * the number of items generated). + */ + protected final boolean shouldLog() { + return verbose && logStep > 0 && itemCount % logStep == 0; + } + + /** Called when reading from this content source is no longer required. */ + public abstract void close() throws IOException; + + /** Returns the number of bytes generated since last reset. */ + public final long getBytesCount() { return bytesCount; } + + /** Returns the number of generated items since last reset. */ + public final int getItemsCount() { return itemCount; } + + public final Config getConfig() { return config; } + + /** Returns the total number of bytes that were generated by this source. */ + public final long getTotalBytesCount() { return totalBytesCount; } + + /** Returns the total number of generated items. */ + public final int getTotalItemsCount() { return totalItemCount; } + + /** + * Resets the input for this content source, so that the test would behave as + * if it was just started, input-wise. + *

+ * NOTE: the default implementation resets the number of bytes and + * items generated since the last reset, so it's important to call + * super.resetInputs in case you override this method. + */ + @SuppressWarnings("unused") + public void resetInputs() throws IOException { + bytesCount = 0; + itemCount = 0; + } + + /** + * Sets the {@link Config} for this content source. If you override this + * method, you must call super.setConfig. + */ + public void setConfig(Config config) { + this.config = config; + forever = config.get("content.source.forever", true); + logStep = config.get("content.source.log.step", 0); + verbose = config.get("content.source.verbose", false); + encoding = config.get("content.source.encoding", null); + } + + public void printStatistics(String itemsName) { + boolean print = false; + String col = " "; + StringBuilder sb = new StringBuilder(); + String newline = System.getProperty("line.separator"); + sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); + int nut = getTotalItemsCount(); + if (nut > lastPrintedNumUniqueTexts) { + print = true; + sb.append("total count of "+itemsName+": ").append(Format.format(0,nut,col)).append(newline); + lastPrintedNumUniqueTexts = nut; + } + long nub = getTotalBytesCount(); + if (nub > lastPrintedNumUniqueBytes) { + print = true; + sb.append("total bytes of "+itemsName+": ").append(Format.format(0,nub,col)).append(newline); + lastPrintedNumUniqueBytes = nub; + } + if (getItemsCount() > 0) { + print = true; + sb.append("num "+itemsName+" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline); + sb.append("total bytes added for "+itemsName+" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline); + } + if (print) { + System.out.println(sb.append(newline).toString()); + printNum++; + } + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java index cfe377c3ffe..cb671c88f82 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java @@ -17,12 +17,7 @@ package org.apache.lucene.benchmark.byTask.feeds; * limitations under the License. */ -import java.io.File; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; - -import org.apache.lucene.benchmark.byTask.utils.Config; /** * Represents content from a specified source, such as TREC, Reuters etc. A @@ -31,119 +26,13 @@ import org.apache.lucene.benchmark.byTask.utils.Config; * of various statistics, such as how many documents were generated, size in * bytes etc. *

- * Supports the following configuration parameters: - *

    - *
  • content.source.forever - specifies whether to generate documents - * forever (default=true). - *
  • content.source.verbose - specifies whether messages should be - * output by the content source (default=false). - *
  • content.source.encoding - specifies which encoding to use when - * reading the files of that content source. Certain implementations may define - * a default value if this parameter is not specified. (default=null). - *
  • content.source.log.step - specifies for how many documents a - * message should be logged. If set to 0 it means no logging should occur. - * NOTE: if verbose is set to false, logging should not occur even if - * logStep is not 0 (default=0). - *
+ * For supported configuration parameters see {@link ContentItemsSource}. */ -public abstract class ContentSource { +public abstract class ContentSource extends ContentItemsSource { - private long bytesCount; - private long totalBytesCount; - private int docsCount; - private int totalDocsCount; - private Config config; - - protected boolean forever; - protected int logStep; - protected boolean verbose; - protected String encoding; - - /** update count of bytes generated by this source */ - protected final synchronized void addBytes(long numBytes) { - bytesCount += numBytes; - totalBytesCount += numBytes; - } - - /** update count of documents generated by this source */ - protected final synchronized void addDoc() { - ++docsCount; - ++totalDocsCount; - } - - /** - * A convenience method for collecting all the files of a content source from - * a given directory. The collected {@link File} instances are stored in the - * given files. - */ - protected final void collectFiles(File dir, ArrayList files) { - if (!dir.canRead()) { - return; - } - - File[] dirFiles = dir.listFiles(); - Arrays.sort(dirFiles); - for (int i = 0; i < dirFiles.length; i++) { - File file = dirFiles[i]; - if (file.isDirectory()) { - collectFiles(file, files); - } else if (file.canRead()) { - files.add(file); - } - } - } - - /** - * Returns true whether it's time to log a message (depending on verbose and - * the number of documents generated). - */ - protected final boolean shouldLog() { - return verbose && logStep > 0 && docsCount % logStep == 0; - } - - /** Called when reading from this content source is no longer required. */ - public abstract void close() throws IOException; - - /** Returns the number of bytes generated since last reset. */ - public final long getBytesCount() { return bytesCount; } - - /** Returns the number of generated documents since last reset. */ - public final int getDocsCount() { return docsCount; } - - public final Config getConfig() { return config; } - - /** Returns the next {@link DocData} from the content source. */ + /** Returns the next {@link DocData} from the content source. + * Implementations must account for multi-threading, as multiple threads + * can call this method simultaneously. */ public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException; - /** Returns the total number of bytes that were generated by this source. */ - public final long getTotalBytesCount() { return totalBytesCount; } - - /** Returns the total number of generated documents. */ - public final int getTotalDocsCount() { return totalDocsCount; } - - /** - * Resets the input for this content source, so that the test would behave as - * if it was just started, input-wise. - *

- * NOTE: the default implementation resets the number of bytes and - * documents generated since the last reset, so it's important to call - * super.resetInputs in case you override this method. - */ - public void resetInputs() throws IOException { - bytesCount = 0; - docsCount = 0; - } - - /** - * Sets the {@link Config} for this content source. If you override this - * method, you must call super.setConfig. - */ - public void setConfig(Config config) { - this.config = config; - forever = config.get("content.source.forever", true); - logStep = config.get("content.source.log.step", 0); - verbose = config.get("content.source.verbose", false); - encoding = config.get("content.source.encoding", null); - } - } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index ba4b83c6ee4..a324cdcf491 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -31,7 +31,6 @@ import java.text.SimpleDateFormat; import java.text.ParsePosition; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -186,13 +185,8 @@ public class DocMaker { protected boolean reuseFields; protected boolean indexProperties; - private int lastPrintedNumUniqueTexts = 0; - - private long lastPrintedNumUniqueBytes = 0; private final AtomicInteger numDocsCreated = new AtomicInteger(); - private int printNum = 0; - public DocMaker() { } @@ -400,38 +394,9 @@ public class DocMaker { return doc; } - public void printDocStatistics() { - boolean print = false; - String col = " "; - StringBuilder sb = new StringBuilder(); - String newline = System.getProperty("line.separator"); - sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); - int nut = source.getTotalDocsCount(); - if (nut > lastPrintedNumUniqueTexts) { - print = true; - sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline); - lastPrintedNumUniqueTexts = nut; - } - long nub = getTotalBytesCount(); - if (nub > lastPrintedNumUniqueBytes) { - print = true; - sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline); - lastPrintedNumUniqueBytes = nub; - } - if (source.getDocsCount() > 0) { - print = true; - sb.append("num docs added since last inputs reset: ").append(Format.format(0,source.getDocsCount(),col)).append(newline); - sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline); - } - if (print) { - System.out.println(sb.append(newline).toString()); - printNum++; - } - } - /** Reset inputs so that the test run would behave, input wise, as if it just started. */ public synchronized void resetInputs() throws IOException { - printDocStatistics(); + source.printStatistics("docs"); // re-initiate since properties by round may have changed. setConfig(config); source.resetInputs(); diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java new file mode 100644 index 00000000000..1cfa0506059 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java @@ -0,0 +1,45 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.facet.index.CategoryContainer; + +/** + * Source items for facets. + *

+ * For supported configuration parameters see {@link ContentItemsSource}. + */ +public abstract class FacetSource extends ContentItemsSource { + + /** Returns the next {@link CategoryContainer facets content item}. + * Implementations must account for multi-threading, as multiple threads + * can call this method simultaneously. + */ + public abstract CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException; + + @Override + public void resetInputs() throws IOException { + printStatistics("facets"); + // re-initiate since properties by round may have changed. + setConfig(getConfig()); + super.resetInputs(); + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java new file mode 100644 index 00000000000..0254682c1ef --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java @@ -0,0 +1,81 @@ +package org.apache.lucene.benchmark.byTask.feeds; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Random; + +import org.apache.lucene.benchmark.byTask.utils.Config; +import org.apache.lucene.facet.index.CategoryContainer; +import org.apache.lucene.facet.taxonomy.CategoryPath; + +/** + * Simple implementation of a random facet source + *

+ * Supports the following parameters: + *

    + *
  • rand.seed - defines the seed to initialize Random with (default: 13). + *
  • max.doc.facets - maximal #facets per doc (default: 10). + * Actual number of facets in a certain doc would be anything between 1 and that number. + *
  • max.facet.depth - maximal #components in a facet (default: 3). + * Actual number of components in a certain facet would be anything between 1 and that number. + *
+ */ +public class RandomFacetSource extends FacetSource { + + Random random; + + private int maxDocFacets = 10; + private int maxFacetDepth = 3; + private int maxValue = maxDocFacets * maxFacetDepth; + + @Override + public CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException { + if (facets == null) { + facets = new CategoryContainer(); + } else { + facets.clear(); + } + int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc + for (int i=0; iOther side effects: none. *
Takes optional param: document size. */ @@ -34,9 +36,12 @@ public class AddDocTask extends PerfTask { private int docSize = 0; - // volatile data passed between setup(), doLogic(), tearDown(). - private Document doc = null; - + /** + * volatile data passed between setup(), doLogic(), tearDown(). + * the doc is created at setup() and added at doLogic(). + */ + protected Document doc = null; + @Override public void setup() throws Exception { super.setup(); @@ -56,7 +61,7 @@ public class AddDocTask extends PerfTask { @Override protected String getLogMessage(int recsCount) { - return "added " + recsCount + " docs"; + return String.format("added %9d docs",recsCount); } @Override diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java new file mode 100644 index 00000000000..2df7cac3729 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java @@ -0,0 +1,77 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.benchmark.byTask.feeds.FacetSource; +import org.apache.lucene.facet.index.CategoryContainer; +import org.apache.lucene.facet.index.CategoryDocumentBuilder; + +/** + * Add a faceted document. + *

+ * Config properties: + *

    + *
  • with.facets=<tells whether to actually add any facets to the document| Default: true> + *
    This config property allows to easily compare the performance of adding docs with and without facets. + * Note that facets are created even when this is false, just that they are not added to the document (nor to the taxonomy). + *
+ *

+ * See {@link AddDocTask} for general document parameters and configuration. + *

+ * Makes use of the {@link FacetSource} in effect - see {@link PerfRunData} for facet source settings. + */ +public class AddFacetedDocTask extends AddDocTask { + + public AddFacetedDocTask(PerfRunData runData) { + super(runData); + } + + private CategoryContainer facets = null; + private CategoryDocumentBuilder categoryDocBuilder = null; + private boolean withFacets = true; + + @Override + public void setup() throws Exception { + super.setup(); + // create the facets even if they should not be added - allows to measure the effect of just adding facets + facets = getRunData().getFacetSource().getNextFacets(facets); + withFacets = getRunData().getConfig().get("with.facets", true); + if (withFacets) { + categoryDocBuilder = new CategoryDocumentBuilder(getRunData().getTaxonomyWriter()); + categoryDocBuilder.setCategories(facets); + } + } + + @Override + protected String getLogMessage(int recsCount) { + if (!withFacets) { + return super.getLogMessage(recsCount); + } + return super.getLogMessage(recsCount)+ " with facets"; + } + + @Override + public int doLogic() throws Exception { + if (withFacets) { + categoryDocBuilder.build(doc); + } + return super.doLogic(); + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java new file mode 100644 index 00000000000..e67d2055957 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java @@ -0,0 +1,43 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.util.IOUtils; + +/** + * Close taxonomy index. + *
Other side effects: taxonomy writer object in perfRunData is nullified. + */ +public class CloseTaxonomyIndexTask extends PerfTask { + + public CloseTaxonomyIndexTask(PerfRunData runData) { + super(runData); + } + + @Override + public int doLogic() throws IOException { + IOUtils.close(getRunData().getTaxonomyWriter()); + getRunData().setTaxonomyWriter(null); + + return 1; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java new file mode 100644 index 00000000000..82e9265c709 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java @@ -0,0 +1,46 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** + * Close taxonomy reader. + *
Other side effects: taxonomy reader in perfRunData is nullified. + */ +public class CloseTaxonomyReaderTask extends PerfTask { + + public CloseTaxonomyReaderTask(PerfRunData runData) { + super(runData); + } + + @Override + public int doLogic() throws IOException { + TaxonomyReader taxoReader = getRunData().getTaxonomyReader(); + getRunData().setTaxonomyReader(null); + if (taxoReader.getRefCount() != 1) { + System.out.println("WARNING: CloseTaxonomyReader: reference count is currently " + taxoReader.getRefCount()); + } + taxoReader.close(); + return 1; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java new file mode 100644 index 00000000000..95cb01db6dc --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java @@ -0,0 +1,41 @@ +package org.apache.lucene.benchmark.byTask.tasks; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; + +/** + * Commits the Taxonomy Index. + */ +public class CommitTaxonomyIndexTask extends PerfTask { + public CommitTaxonomyIndexTask(PerfRunData runData) { + super(runData); + } + + @Override + public int doLogic() throws Exception { + TaxonomyWriter taxonomyWriter = getRunData().getTaxonomyWriter(); + if (taxonomyWriter != null) { + taxonomyWriter.commit(); + } else { + throw new IllegalStateException("TaxonomyWriter is not currently open"); + } + + return 1; + } +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java new file mode 100644 index 00000000000..3752f498182 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java @@ -0,0 +1,44 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; + +import java.io.IOException; + + +/** + * Create a taxonomy index. + *
Other side effects: taxonomy writer object in perfRunData is set. + */ +public class CreateTaxonomyIndexTask extends PerfTask { + + public CreateTaxonomyIndexTask(PerfRunData runData) { + super(runData); + } + + @Override + public int doLogic() throws IOException { + PerfRunData runData = getRunData(); + runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE)); + return 1; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java new file mode 100644 index 00000000000..613578dd63a --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java @@ -0,0 +1,42 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter; +import java.io.IOException; + + +/** + * Open a taxonomy index. + *
Other side effects: taxonomy writer object in perfRunData is set. + */ +public class OpenTaxonomyIndexTask extends PerfTask { + + public OpenTaxonomyIndexTask(PerfRunData runData) { + super(runData); + } + + @Override + public int doLogic() throws IOException { + PerfRunData runData = getRunData(); + runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir())); + return 1; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java new file mode 100644 index 00000000000..03268f06064 --- /dev/null +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java @@ -0,0 +1,45 @@ +package org.apache.lucene.benchmark.byTask.tasks; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.benchmark.byTask.PerfRunData; +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader; + +/** + * Open a taxonomy index reader. + *
Other side effects: taxonomy reader object in perfRunData is set. + */ +public class OpenTaxonomyReaderTask extends PerfTask { + + public OpenTaxonomyReaderTask(PerfRunData runData) { + super(runData); + } + + @Override + public int doLogic() throws IOException { + PerfRunData runData = getRunData(); + LuceneTaxonomyReader taxoReader = new LuceneTaxonomyReader(runData.getTaxonomyDir()); + runData.setTaxonomyReader(taxoReader); + // We transfer reference to the run data + taxoReader.decRef(); + return 1; + } + +} diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java index 7ac051b6ece..54ed1b92a9f 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java @@ -17,8 +17,6 @@ package org.apache.lucene.benchmark.byTask.tasks; * limitations under the License. */ -import java.text.NumberFormat; - import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Points; import org.apache.lucene.benchmark.byTask.stats.TaskStats; @@ -270,9 +268,7 @@ public abstract class PerfTask implements Cloneable { public void tearDown() throws Exception { if (++logStepCount % logStep == 0) { double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0; - NumberFormat nf = NumberFormat.getInstance(); - nf.setMaximumFractionDigits(2); - System.out.println(nf.format(time) + " sec --> " + System.out.println(String.format("%7.2f",time) + " sec --> " + Thread.currentThread().getName() + " " + getLogMessage(logStepCount)); } } diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index 9b63fef1254..848ac1fe400 100755 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -40,6 +40,7 @@ import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; import org.apache.lucene.collation.CollationKeyAnalyzer; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -780,6 +781,42 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { reader.close(); } + /** + * Test indexing with facets tasks. + */ + public void testIndexingWithFacets() throws Exception { + // 1. alg definition (required in every "logic" test) + String algLines[] = { + "# ----- properties ", + "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", + "docs.file=" + getReuters20LinesFile(), + "content.source.log.step=100", + "content.source.forever=false", + "directory=RAMDirectory", + "doc.stored=false", + "merge.factor=3", + "doc.tokenized=false", + "debug.level=1", + "# ----- alg ", + "ResetSystemErase", + "CreateIndex", + "CreateTaxonomyIndex", + "{ \"AddDocs\" AddFacetedDoc > : * ", + "CloseIndex", + "CloseTaxonomyIndex", + "OpenTaxonomyReader", + }; + + // 2. execute the algorithm (required in every "logic" test) + Benchmark benchmark = execBenchmark(algLines); + PerfRunData runData = benchmark.getRunData(); + assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter()); + TaxonomyReader taxoReader = runData.getTaxonomyReader(); + assertNotNull("taxo reader was not opened", taxoReader); + assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1); + taxoReader.close(); + } + /** * Test that we can call optimize(maxNumSegments). */