LUCENE-1778: Add support for log.step setting per task type. Perviously, if you included a log.step line in the .alg file, it had been applied to all tasks. Now, you can include a log.step.AddDoc, or log.step.DeleteDoc (for example) to control logging for just these tasks. If you want to ommit logging for any other task, include log.step=-1. The syntax is "log.step." together with the Task's 'short' name (i.e., without the 'Task' part).

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@800768 13f79535-47bb-0310-9956-ffa450edef68
2009-08-04 13:05:03 +00:00 · 2009-08-04 13:05:03 +00:00 · b155258203
parent 4054b4ebf3
commit b155258203
7 changed files with 151 additions and 28 deletions
--- a/contrib/benchmark/CHANGES.txt
+++ b/contrib/benchmark/CHANGES.txt
@ -4,6 +4,15 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety

 $Id:$

+8/03/2009
+  LUCENE-1778: Add support for log.step setting per task type. Perviously, if
+  you included a log.step line in the .alg file, it had been applied to all
+  tasks. Now, you can include a log.step.AddDoc, or log.step.DeleteDoc (for 
+  example) to control logging for just these tasks. If you want to ommit logging
+  for any other task, include log.step=-1. The syntax is "log.step." together
+  with the Task's 'short' name (i.e., without the 'Task' part).
+  (Shai Erera via Mark Miller)
+
 7/24/2009
  LUCENE-1595: Deprecate LineDocMaker and EnwikiDocMaker in favor of
  using DocMaker directly, with content.source = LineDocSource or
--- a/contrib/benchmark/conf/compound-penalty.alg
+++ b/contrib/benchmark/conf/compound-penalty.alg
@ -35,7 +35,7 @@ doc.stored=stored:true:true:false:false
 doc.tokenized=true
 doc.term.vector=vector:true:true:false:false
 log.step=500
-delete.log.step=100
+log.step.DeleteDoc=100

 docs.dir=reuters-out
 #docs.dir=reuters-111
--- a/contrib/benchmark/conf/deletes.alg
+++ b/contrib/benchmark/conf/deletes.alg
@ -33,7 +33,7 @@ doc.stored=true
 doc.tokenized=true
 doc.term.vector=false
 log.step=10000
-delete.log.step=100
+log.step.DeleteDoc=100

 docs.dir=reuters-out
 #docs.dir=reuters-111
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/package.html
@ -116,8 +116,17 @@ herein.
 Each benchmark run has a DocMaker and a QueryMaker. These two should usually
 match, so that "meaningful" queries are used for a certain collection.
 Properties set at the header of the alg file define which "makers" should be
-used. You can also specify your own makers, implementing the DocMaker and
-QureyMaker interfaces.
+used. You can also specify your own makers, extending DocMaker and implementing
+QureyMaker.
+	<blockquote>
+		<b>Note:</b> since 2.9, DocMaker is a concrete class which accepts a 
+		ContentSource. In most cases, you can use the DocMaker class to create 
+		Documents, while providing your own ContentSource implementation. For 
+		example, the current Benchmark package includes ContentSource 
+		implementations for TREC, Enwiki and Reuters collections, as well as 
+		others like LineDocSource which reads a 'line' file produced by 
+		WriteLineDocTask.
+	</blockquote>
 </p>

 <p>
@ -436,7 +445,6 @@ regular index/search work tasks, report tasks, and control tasks.
     <font color="#FF0066">OpenIndex</font> both leave the
     index open for later update operations.
     <font color="#FF0066">CloseIndex</font> would close it.
-     </li>
     <li>
     <font color="#FF0066">OpenReader</font>, similarly, would
     leave an index reader open for later search operations.
@ -455,7 +463,24 @@ regular index/search work tasks, report tasks, and control tasks.
     and traverse and retrieve).
     Notice that each of the 3 search task types maintains
     its own queryMaker instance.
-     </li>
+	 <li>
+	 <font color="#FF0066">CommitIndex</font> and 
+	 <font color="#FF0066">Optimize</font> can be used to commit
+	 changes to the index and/or optimize the index created thus
+	 far.
+	 <li>
+	 <font color="#FF0066">WriteLineDoc</font> prepares a 'line'
+	 file where each line holds a document with <i>title</i>, 
+	 <i>date</i> and <i>body</i> elements, seperated by [TAB].
+	 A line file is useful if one wants to measure pure indexing
+	 performance, without the overhead of parsing the data.<br>
+	 You can use LineDocSource as a ContentSource over a 'line'
+	 file.
+	 <li>
+	 <font color="#FF0066">ConsumeContentSource</font> consumes
+	 a ContentSource. Useful for e.g. testing a ContentSource
+	 performance, without the overhead of preparing a Document
+	 out of it.
 </ul
 </li>
 </ol>
@ -544,11 +569,10 @@ Here is a list of currently defined properties:
  </li>

  <li><b>Logging</b>:
-    <ul><li>doc.add.log.step
-    </li><li>doc.delete.log.step
+    <ul><li>log.step
+	</li><li>log.step.[class name]Task ie log.step.DeleteDoc (e.g. log.step.Wonderful for the WonderfulTask example above).
    </li><li>log.queries
    </li><li>task.max.depth.log
-    </li><li>doc.tokenize.log.step
    </li></ul>
  </li>

--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/DeleteDocTask.java
@ -1,7 +1,5 @@
 package org.apache.lucene.benchmark.byTask.tasks;

-import org.apache.lucene.benchmark.byTask.PerfRunData;
-
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -19,12 +17,11 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
 * limitations under the License.
 */

+import org.apache.lucene.benchmark.byTask.PerfRunData;
+
 /**
- * Delete a document by docid.
- * <br>Other side effects: none.
- * <br>Relevant properties: <code>doc.delete.step, delete.log.step</code>.
- * <br>If no docid param is supplied, deletes doc with <code>id = last-deleted-doc + doc.delete.step</code>. 
- * <br>Takes optional param: document id. 
+ * Delete a document by docid. If no docid param is supplied, deletes doc with
+ * <code>id = last-deleted-doc + doc.delete.step</code>.
 */
 public class DeleteDocTask extends PerfTask {

@ -35,11 +32,6 @@ public class DeleteDocTask extends PerfTask {
  
  public DeleteDocTask(PerfRunData runData) {
    super(runData);
-    // Override log.step, which is read by PerfTask
-    int deleteLogStep = runData.getConfig().get("delete.log.step", -1);
-    if (deleteLogStep != -1) {
-      logStep = deleteLogStep;
-    }
  }

  private int deleteStep = -1;
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
@ -31,11 +31,23 @@ import org.apache.lucene.benchmark.byTask.utils.Format;
 * {@link #doLogic()} method, which performss the actual task. <br>
 * Tasks performing some work that should be measured for the task, can overide
 * {@link #setup()} and/or {@link #tearDown()} and place that work there. <br>
- * Relevant properties: <code>task.max.depth.log</code>.
+ * Relevant properties: <code>task.max.depth.log</code>.<br>
+ * Also supports the following logging attributes:
+ * <ul>
+ * <li>log.step - specifies how often to log messages about the current running
+ * task. Default is 1000 {@link #doLogic()} invocations. Set to -1 to disable
+ * logging.
+ * <li>log.step.[class Task Name] - specifies the same as 'log.step', only for a
+ * particular task name. For example, log.step.AddDoc will be applied only for
+ * {@link AddDocTask}, but not for {@link DeleteDocTask}. It's a way to control
+ * per task logging settings. If you want to ommit logging for any other task,
+ * include log.step=-1. The syntax is "log.step." together with the Task's
+ * 'short' name (i.e., without the 'Task' part).
+ * </ul>
 */
 public abstract class PerfTask implements Cloneable {

-  private static final int DEFAULT_LOG_STEP = 1000;
+  static final int DEFAULT_LOG_STEP = 1000;
  
  private PerfRunData runData;
  
@ -66,14 +78,14 @@ public abstract class PerfTask implements Cloneable {
  private void checkObsoleteSettings(Config config) {
    if (config.get("doc.add.log.step", null) != null) {
      throw new RuntimeException("doc.add.log.step is not supported anymore. " +
-      		"Use log.step and refer to CHANGES to read on the recent API changes " +
-      		"done to Benchmark's DocMaker and Task-based logging.");
+      		"Use log.step.AddDoc and refer to CHANGES to read on the recent " +
+      		"API changes done to Benchmark's DocMaker and Task-based logging.");
    }
    
    if (config.get("doc.delete.log.step", null) != null) {
      throw new RuntimeException("doc.delete.log.step is not supported anymore. " +
-          "Use delete.log.step and refer to CHANGES to read on the recent API changes " +
-          "done to Benchmark's DocMaker and Task-based logging.");
+          "Use log.step.DeleteDoc and refer to CHANGES to read on the recent " +
+          "API changes done to Benchmark's DocMaker and Task-based logging.");
    }
  }
  
@ -82,7 +94,21 @@ public abstract class PerfTask implements Cloneable {
    this.runData = runData;
    Config config = runData.getConfig();
    this.maxDepthLogStart = config.get("task.max.depth.log",0);
-    logStep = config.get("log.step", DEFAULT_LOG_STEP);
+
+    String logStepAtt = "log.step";
+    // TODO (1.5): call getClass().getSimpleName() instead.
+    String taskName = getClass().getName();
+    int idx = taskName.lastIndexOf('.');
+    // To support test internal classes. when we move to getSimpleName, this can be removed.
+    int idx2 = taskName.indexOf('$', idx);
+    if (idx2 != -1) idx = idx2;
+    String taskLogStepAtt = "log.step." + taskName.substring(idx + 1, taskName.length() - 4 /* w/o the 'Task' part */);
+    if (config.get(taskLogStepAtt, null) != null) {
+      logStepAtt = taskLogStepAtt;
+    }
+
+    // It's important to read this from Config, to support vals-by-round.
+    logStep = config.get(logStepAtt, DEFAULT_LOG_STEP);
    // To avoid the check 'if (logStep > 0)' in tearDown(). This effectively
    // turns logging off.
    if (logStep <= 0) {
--- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java
+++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java
@ -0,0 +1,72 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Properties;
+
+import org.apache.lucene.benchmark.BenchmarkTestCase;
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+/** Tests the functionality of the abstract {@link PerfTask}. */
+public class PerfTaskTest extends BenchmarkTestCase {
+
+  private static final class MyPerfTask extends PerfTask {
+
+    public MyPerfTask(PerfRunData runData) {
+      super(runData);
+    }
+
+    public int doLogic() throws Exception {
+      return 0;
+    }
+
+    public int getLogStep() { return logStep; }
+    
+  }
+  
+  private PerfRunData createPerfRunData(boolean setLogStep, int logStepVal,
+      boolean setTaskLogStep, int taskLogStepVal) throws Exception {
+    Properties props = new Properties();
+    if (setLogStep) {
+      props.setProperty("log.step", Integer.toString(logStepVal));
+    }
+    if (setTaskLogStep) {
+      props.setProperty("log.step.MyPerf", Integer.toString(taskLogStepVal));
+    }
+    props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
+    Config config = new Config(props);
+    return new PerfRunData(config);
+  }
+  
+  private void doLogStepTest(boolean setLogStep, int logStepVal,
+      boolean setTaskLogStep, int taskLogStepVal, int expLogStepValue) throws Exception {
+    PerfRunData runData = createPerfRunData(setLogStep, logStepVal, setTaskLogStep, taskLogStepVal);
+    MyPerfTask mpt = new MyPerfTask(runData);
+    assertEquals(expLogStepValue, mpt.getLogStep());
+  }
+  
+  public void testLogStep() throws Exception {
+    doLogStepTest(false, -1, false, -1, PerfTask.DEFAULT_LOG_STEP);
+    doLogStepTest(true, -1, false, -1, Integer.MAX_VALUE);
+    doLogStepTest(true, 100, false, -1, 100);
+    doLogStepTest(false, -1, true, -1, Integer.MAX_VALUE);
+    doLogStepTest(false, -1, true, 100, 100);
+  }
+  
+}