LUCENE-981 and LUCENE-980: Added new AnalyzerTask and fixed issue with long strings in Format.java

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@567262 13f79535-47bb-0310-9956-ffa450edef68
2007-08-18 12:24:21 +00:00 · 2007-08-18 12:24:21 +00:00 · c67fd79a83
parent c1a76d9dbe
commit c67fd79a83
5 changed files with 180 additions and 7 deletions
--- a/contrib/benchmark/CHANGES.txt
+++ b/contrib/benchmark/CHANGES.txt
@ -4,6 +4,9 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety

 $Id:$

+8/15/07
+  LUCENE-
+
 8/9/07
  LUCENE-971: Change enwiki tasks to a doc maker (extending
  LineDocMaker) that directly processes the Wikipedia XML and produces
--- a/contrib/benchmark/conf/analyzer.alg
+++ b/contrib/benchmark/conf/analyzer.alg
@ -0,0 +1,80 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+merge.factor=mrg:10
+#:100:10:100
+max.buffered=buf:10
+#:10:100:100
+compound=true
+
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=500
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=2
+
+log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+
+    ResetSystemErase
+#If the analyzer is in o.a.l.analysis, then just the classname can be used, otherwise the FQN must be used
+#Standard Analyzer can be shortened to standard.StandardAnalyzer
+    {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) > 
+    { "Populate"
+        CreateIndex
+        { "MAddDocs" AddDoc > : 2000
+        Optimize
+        CloseIndex
+    }
+
+    OpenReader  
+    { "SearchSameRdr" Search > : 5000
+    CloseReader 
+                
+    { "WarmNewRdr" Warm > : 50
+                
+    { "SrchNewRdr" Search > : 500
+                
+    { "SrchTrvNewRdr" SearchTrav(1000) > : 300
+                
+    { "SrchTrvRetNewRdr" SearchTravRet(2000) > : 100
+                
+    NewRound
+
+} : 4
+
+RepSumByName
+RepSumByPrefRound MAddDocs
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
@ -17,10 +17,6 @@ package org.apache.lucene.benchmark.byTask;
 * limitations under the License.
 */

-import java.io.File;
-import java.util.HashMap;
-import java.util.Iterator;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.benchmark.byTask.feeds.HTMLParser;
@ -28,13 +24,17 @@ import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 import org.apache.lucene.benchmark.byTask.stats.Points;
 import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
 import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.FileUtils;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.FileUtils;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Iterator;


 /**
@ -207,6 +207,11 @@ public class PerfRunData {
    return analyzer;
  }

+
+  public void setAnalyzer(Analyzer analyzer) {
+    this.analyzer = analyzer;
+  }
+
  /**
   * @return Returns the docMaker.
   */
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
@ -0,0 +1,84 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringTokenizer;
+
+/**
+ * Create a new {@link org.apache.lucene.analysis.Analyzer} and set it it in the getRunData() for use by all future tasks.
+ *
+ */
+public class NewAnalyzerTask extends PerfTask {
+  private List/*<String>*/ analyzerClassNames;
+  private int current;
+
+  public NewAnalyzerTask(PerfRunData runData) {
+    super(runData);
+    analyzerClassNames = new ArrayList();
+  }
+
+  public int doLogic() throws IOException {
+    String className = null;
+    try {
+      if (current >= analyzerClassNames.size())
+      {
+        current = 0;
+      }
+      className = (String) analyzerClassNames.get(current++);
+      if (className == null || className.equals(""))
+      {
+        className = "org.apache.lucene.analysis.standard.StandardAnalyzer"; 
+      }
+      if (className.indexOf(".") == -1  || className.startsWith("standard."))//there is no package name, assume o.a.l.analysis
+      {
+        className = "org.apache.lucene.analysis." + className;
+      }
+      getRunData().setAnalyzer((Analyzer) Class.forName(className).newInstance());
+      System.out.println("Changed Analyzer to: " + className);
+    } catch (Exception e) {
+      throw new RuntimeException("Error creating Analyzer: " + className, e);
+    }
+    return 1;
+  }
+
+  /**
+   * Set the params (analyzerClassName only),  Comma-separate list of Analyzer class names.  If the Analyzer lives in
+   * org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a part of the Fully Qualified Class Name.
+   * <p/>
+   * Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
+   * @param params analyzerClassName, or empty for the StandardAnalyzer
+   */
+  public void setParams(String params) {
+    super.setParams(params);
+    for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
+      String s = tokenizer.nextToken();
+      analyzerClassNames.add(s.trim());
+    }
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
+   */
+  public boolean supportsParams() {
+    return true;
+  }
+}
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
@ -82,7 +82,8 @@ public class Format {
   * @return formatted string.
   */
  public static String format(String s, String col) {
-    return (s + padd).substring(0, col.length());
+    String s1 = (s + padd);
+    return s1.substring(0, Math.min(col.length(), s1.length()));
  }

  /**