diff --git a/lucene/contrib/benchmark/CHANGES.txt b/lucene/contrib/benchmark/CHANGES.txt
index 9cc34224110..3ba25a272c0 100644
--- a/lucene/contrib/benchmark/CHANGES.txt
+++ b/lucene/contrib/benchmark/CHANGES.txt
@@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
+4/27/2010: WriteLineDocTask now supports multi-threading. Also,
+ StringBufferReader was renamed to StringBuilderReader and works on
+ StringBuilder now. In addition, LongToEnglishCountentSource starts from 0
+ (instead of Long.MIN_VAL+10) and wraps around to MIN_VAL (if you ever hit
+ Long.MAX_VAL). (Shai Erera)
+
4/07/2010
LUCENE-2377: Enable the use of NoMergePolicy and NoMergeScheduler by
CreateIndexTask. (Shai Erera)
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java
index 2e2626d90c9..f59d931edb0 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java
@@ -17,7 +17,6 @@ package org.apache.lucene.benchmark.byTask.feeds;
import org.apache.lucene.search.Query;
import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
/**
* Abstract base query maker.
@@ -45,7 +44,7 @@ public abstract class AbstractQueryMaker implements QueryMaker {
StringBuffer sb = new StringBuffer();
if (queries != null) {
for (int i = 0; i < queries.length; i++) {
- sb.append(i+". "+ Format.simpleName(queries[i].getClass())+" - "+queries[i].toString());
+ sb.append(i+". "+ queries[i].getClass().getSimpleName()+" - "+queries[i].toString());
sb.append(newline);
}
}
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
index 8d2a193b623..2fd72170392 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
@@ -40,8 +40,6 @@ import java.util.Stack;
* denotes a relative path (default=work).
*
docs.dir - specifies the directory the Dir collection. Can be set
* to a relative path if "work.dir" is also specified (default=dir-out).
- * html.parser - specifies the {@link HTMLParser} class to use for
- * parsing the TREC documents content (default=DemoHTMLParser).
*
*/
public class DirContentSource extends ContentSource {
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
index b26c499aa39..191e1ac34f7 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
@@ -333,7 +333,7 @@ public class DocMaker {
String col = " ";
StringBuffer sb = new StringBuffer();
String newline = System.getProperty("line.separator");
- sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
+ sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
int nut = source.getTotalDocsCount();
if (nut > lastPrintedNumUniqueTexts) {
print = true;
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java
index c53ff8eb3bc..4d20e91b3fd 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java
@@ -1,34 +1,55 @@
package org.apache.lucene.benchmark.byTask.feeds;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
import org.apache.lucene.util.English;
import java.io.IOException;
import java.util.Date;
-
/**
- *
- *
- **/
+ * Creates documents whose content is a long
number starting from
+ * {@link Long#MIN_VALUE} + 10
.
+ */
public class LongToEnglishContentSource extends ContentSource{
- private long counter = Long.MIN_VALUE + 10;
+ private long counter = 0;
@Override
public void close() throws IOException {
-
}
- //TODO: reduce/clean up synchonization
+
@Override
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
docData.clear();
- docData.setBody(English.longToEnglish(counter));
- docData.setName("doc_" + String.valueOf(counter));
- docData.setTitle("title_" + String.valueOf(counter));
+ // store the current counter to avoid synchronization later on
+ long curCounter;
+ synchronized (this) {
+ curCounter = counter;
+ if (counter == Long.MAX_VALUE){
+ counter = Long.MIN_VALUE;//loop around
+ } else {
+ ++counter;
+ }
+ }
+ docData.setBody(English.longToEnglish(curCounter));
+ docData.setName("doc_" + String.valueOf(curCounter));
+ docData.setTitle("title_" + String.valueOf(curCounter));
docData.setDate(new Date());
- if (counter == Long.MAX_VALUE){
- counter = Long.MIN_VALUE + 10;//loop around
- }
- counter++;
return docData;
}
@@ -36,4 +57,5 @@ public class LongToEnglishContentSource extends ContentSource{
public void resetInputs() throws IOException {
counter = Long.MIN_VALUE + 10;
}
+
}
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
index dba774b3e8d..1101e661c91 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
@@ -32,7 +32,7 @@ import java.util.Locale;
import java.util.zip.GZIPInputStream;
import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.StringBufferReader;
+import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader;
import org.apache.lucene.util.ThreadInterruptedException;
/**
@@ -53,7 +53,6 @@ import org.apache.lucene.util.ThreadInterruptedException;
*
*/
public class TrecContentSource extends ContentSource {
- // TODO (3.0): change StringBuffer to StringBuilder
private static final class DateFormatInfo {
DateFormat[] dfs;
@@ -79,8 +78,8 @@ public class TrecContentSource extends ContentSource {
};
private ThreadLocal dateFormats = new ThreadLocal();
- private ThreadLocal trecDocReader = new ThreadLocal();
- private ThreadLocal trecDocBuffer = new ThreadLocal();
+ private ThreadLocal trecDocReader = new ThreadLocal();
+ private ThreadLocal trecDocBuffer = new ThreadLocal();
private File dataDir = null;
private ArrayList inputFiles = new ArrayList();
private int nextFile = 0;
@@ -110,19 +109,19 @@ public class TrecContentSource extends ContentSource {
return dfi;
}
- private StringBuffer getDocBuffer() {
- StringBuffer sb = trecDocBuffer.get();
+ private StringBuilder getDocBuffer() {
+ StringBuilder sb = trecDocBuffer.get();
if (sb == null) {
- sb = new StringBuffer();
+ sb = new StringBuilder();
trecDocBuffer.set(sb);
}
return sb;
}
- private Reader getTrecDocReader(StringBuffer docBuffer) {
- StringBufferReader r = trecDocReader.get();
+ private Reader getTrecDocReader(StringBuilder docBuffer) {
+ StringBuilderReader r = trecDocReader.get();
if (r == null) {
- r = new StringBufferReader(docBuffer);
+ r = new StringBuilderReader(docBuffer);
trecDocReader.set(r);
} else {
r.set(docBuffer);
@@ -131,7 +130,7 @@ public class TrecContentSource extends ContentSource {
}
// read until finding a line that starts with the specified prefix, or a terminating tag has been found.
- private void read(StringBuffer buf, String prefix, boolean collectMatchLine,
+ private void read(StringBuilder buf, String prefix, boolean collectMatchLine,
boolean collectAll, String terminatingTag)
throws IOException, NoMoreDataException {
String sep = "";
@@ -248,7 +247,7 @@ public class TrecContentSource extends ContentSource {
openNextFile();
}
- StringBuffer docBuf = getDocBuffer();
+ StringBuilder docBuf = getDocBuffer();
// 1. skip until doc start
docBuf.setLength(0);
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
index 024800c4069..6424673fc98 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
@@ -23,7 +23,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
/**
* An abstract task to be tested for performance.
@@ -67,7 +66,7 @@ public abstract class PerfTask implements Cloneable {
/** Should not be used externally */
private PerfTask() {
- name = Format.simpleName(getClass());
+ name = getClass().getSimpleName();
if (name.endsWith("Task")) {
name = name.substring(0, name.length() - 4);
}
@@ -99,13 +98,7 @@ public abstract class PerfTask implements Cloneable {
this.maxDepthLogStart = config.get("task.max.depth.log",0);
String logStepAtt = "log.step";
- // TODO (1.5): call getClass().getSimpleName() instead.
- String taskName = getClass().getName();
- int idx = taskName.lastIndexOf('.');
- // To support test internal classes. when we move to getSimpleName, this can be removed.
- int idx2 = taskName.indexOf('$', idx);
- if (idx2 != -1) idx = idx2;
- String taskLogStepAtt = "log.step." + taskName.substring(idx + 1, taskName.length() - 4 /* w/o the 'Task' part */);
+ String taskLogStepAtt = "log.step." + name;
if (config.get(taskLogStepAtt, null) != null) {
logStepAtt = taskLogStepAtt;
}
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
index 84798f5796a..f815153a493 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java
@@ -22,6 +22,7 @@ import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -56,8 +57,9 @@ public class WriteLineDocTask extends PerfTask {
private static final Matcher NORMALIZER = Pattern.compile("[\t\r\n]+").matcher("");
private int docSize = 0;
- private BufferedWriter lineFileOut = null;
+ private PrintWriter lineFileOut = null;
private DocMaker docMaker;
+ private ThreadLocal threadBuffer = new ThreadLocal();
public WriteLineDocTask(PerfRunData runData) throws Exception {
super(runData);
@@ -85,7 +87,7 @@ public class WriteLineDocTask extends PerfTask {
out = new BufferedOutputStream(out, 1 << 16);
out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out);
}
- lineFileOut = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16);
+ lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16));
docMaker = runData.getDocMaker();
}
@@ -109,12 +111,15 @@ public class WriteLineDocTask extends PerfTask {
f = doc.getField(DocMaker.DATE_FIELD);
String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
- lineFileOut.write(title, 0, title.length());
- lineFileOut.write(SEP);
- lineFileOut.write(date, 0, date.length());
- lineFileOut.write(SEP);
- lineFileOut.write(body, 0, body.length());
- lineFileOut.newLine();
+ StringBuilder sb = threadBuffer.get();
+ if (sb == null) {
+ sb = new StringBuilder();
+ threadBuffer.set(sb);
+ }
+ sb.setLength(0);
+ sb.append(title).append(SEP).append(date).append(SEP).append(body);
+ // lineFileOut is a PrintWriter, which synchronizes internally in println.
+ lineFileOut.println(sb.toString());
}
return 1;
}
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
index 811906515dc..a571fb5b314 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
@@ -107,19 +107,4 @@ public class Format {
return res.substring(res.length() - col.length());
}
- /**
- * Extract simple class name
- * @param cls class whose simple name is required
- * @return simple class name
- */
- public static String simpleName (Class> cls) {
- String c = cls.getName();
- String p = cls.getPackage().getName();
- int k = c.lastIndexOf(p+".");
- if (k<0) {
- return c;
- }
- return c.substring(k+1+p.length());
- }
-
}
diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
similarity index 85%
rename from lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java
rename to lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
index d8f56fb3302..c6e9510e01d 100644
--- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java
+++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java
@@ -21,22 +21,22 @@ import java.io.IOException;
import java.io.Reader;
/**
- * Implements a {@link Reader} over a {@link StringBuffer} instance. Although
+ * Implements a {@link Reader} over a {@link StringBuilder} instance. Although
* one can use {@link java.io.StringReader} by passing it
- * {@link StringBuffer#toString()}, it is better to use this class, as it
- * doesn't mark the passed-in {@link StringBuffer} as shared (which will cause
+ * {@link StringBuilder#toString()}, it is better to use this class, as it
+ * doesn't mark the passed-in {@link StringBuilder} as shared (which will cause
* inner char[] allocations at the next append() attempt).
* Notes:
*
- * - This implementation assumes the underlying {@link StringBuffer} is not
+ *
- This implementation assumes the underlying {@link StringBuilder} is not
* changed during the use of this {@link Reader} implementation.
*
- This implementation is thread-safe.
*
- The implementation looks very much like {@link java.io.StringReader} (for
* the right reasons).
*
- If one wants to reuse that instance, then the following needs to be done:
*
- * StringBuffer sb = new StringBuffer("some text");
- * Reader reader = new StringBufferReader(sb);
+ * StringBuilder sb = new StringBuilder("some text");
+ * Reader reader = new StringBuilderReader(sb);
* ... read from reader - don't close it ! ...
* sb.setLength(0);
* sb.append("some new text");
@@ -45,23 +45,21 @@ import java.io.Reader;
*
*
*/
-public class StringBufferReader extends Reader {
+public class StringBuilderReader extends Reader {
- // TODO (3.0): change to StringBuffer (including the name of the class)
-
- // The StringBuffer to read from.
- private StringBuffer sb;
+ // The StringBuilder to read from.
+ private StringBuilder sb;
// The length of 'sb'.
private int length;
- // The next position to read from the StringBuffer.
+ // The next position to read from the StringBuilder.
private int next = 0;
// The mark position. The default value 0 means the start of the text.
private int mark = 0;
- public StringBufferReader(StringBuffer sb) {
+ public StringBuilderReader(StringBuilder sb) {
set(sb);
}
@@ -85,7 +83,7 @@ public class StringBufferReader extends Reader {
*
* @param readAheadLimit Limit on the number of characters that may be read
* while still preserving the mark. Because the stream's input comes
- * from a StringBuffer, there is no actual limit, so this argument
+ * from a StringBuilder, there is no actual limit, so this argument
* must not be negative, but is otherwise ignored.
* @exception IllegalArgumentException If readAheadLimit is < 0
* @exception IOException If an I/O error occurs
@@ -156,7 +154,7 @@ public class StringBufferReader extends Reader {
}
}
- public void set(StringBuffer sb) {
+ public void set(StringBuilder sb) {
synchronized (lock) {
this.sb = sb;
length = sb.length();
diff --git a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
index ee93b968f84..f2121164bf2 100644
--- a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
+++ b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java
@@ -22,7 +22,9 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.util.HashSet;
import java.util.Properties;
+import java.util.Set;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase;
@@ -97,6 +99,21 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
}
}
+ // class has to be public so that Class.forName.newInstance() will work
+ public static final class ThreadingDocMaker extends DocMaker {
+
+ @Override
+ public Document makeDocument() throws Exception {
+ Document doc = new Document();
+ String name = Thread.currentThread().getName();
+ doc.add(new Field(BODY_FIELD, "body_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(TITLE_FIELD, "title_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+ doc.add(new Field(DATE_FIELD, "date_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
+ return doc;
+ }
+
+ }
+
private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
private PerfRunData createPerfRunData(File file, boolean setBZCompress,
@@ -225,5 +242,49 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
br.close();
}
}
-
+
+ public void testMultiThreaded() throws Exception {
+ File file = new File(getWorkDir(), "one-line");
+ PerfRunData runData = createPerfRunData(file, false, null, ThreadingDocMaker.class.getName());
+ final WriteLineDocTask wldt = new WriteLineDocTask(runData);
+ Thread[] threads = new Thread[10];
+ for (int i = 0; i < threads.length; i++) {
+ threads[i] = new Thread("t" + i) {
+ @Override
+ public void run() {
+ try {
+ wldt.doLogic();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ }
+
+ for (Thread t : threads) t.start();
+ for (Thread t : threads) t.join();
+
+ wldt.close();
+
+ Set ids = new HashSet();
+ BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
+ try {
+ for (int i = 0; i < threads.length; i++) {
+ String line = br.readLine();
+ String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
+ assertEquals(3, parts.length);
+ // check that all thread names written are the same in the same line
+ String tname = parts[0].substring(parts[0].indexOf('_'));
+ ids.add(tname);
+ assertEquals(tname, parts[1].substring(parts[1].indexOf('_')));
+ assertEquals(tname, parts[2].substring(parts[2].indexOf('_')));
+ }
+ // only threads.length lines should exist
+ assertNull(br.readLine());
+ assertEquals(threads.length, ids.size());
+ } finally {
+ br.close();
+ }
+ }
+
}