LUCENE-2416: Some improvements to Benchmark

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@938582 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2010-04-27 17:59:45 +00:00
parent af96066c27
commit 8dccdd7679
11 changed files with 145 additions and 79 deletions

View File

@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
4/27/2010: WriteLineDocTask now supports multi-threading. Also,
StringBufferReader was renamed to StringBuilderReader and works on
StringBuilder now. In addition, LongToEnglishCountentSource starts from 0
(instead of Long.MIN_VAL+10) and wraps around to MIN_VAL (if you ever hit
Long.MAX_VAL). (Shai Erera)
4/07/2010 4/07/2010
LUCENE-2377: Enable the use of NoMergePolicy and NoMergeScheduler by LUCENE-2377: Enable the use of NoMergePolicy and NoMergeScheduler by
CreateIndexTask. (Shai Erera) CreateIndexTask. (Shai Erera)

View File

@ -17,7 +17,6 @@ package org.apache.lucene.benchmark.byTask.feeds;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;
/** /**
* Abstract base query maker. * Abstract base query maker.
@ -45,7 +44,7 @@ public abstract class AbstractQueryMaker implements QueryMaker {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
if (queries != null) { if (queries != null) {
for (int i = 0; i < queries.length; i++) { for (int i = 0; i < queries.length; i++) {
sb.append(i+". "+ Format.simpleName(queries[i].getClass())+" - "+queries[i].toString()); sb.append(i+". "+ queries[i].getClass().getSimpleName()+" - "+queries[i].toString());
sb.append(newline); sb.append(newline);
} }
} }

View File

@ -40,8 +40,6 @@ import java.util.Stack;
* denotes a relative path (<b>default=work</b>). * denotes a relative path (<b>default=work</b>).
* <li><b>docs.dir</b> - specifies the directory the Dir collection. Can be set * <li><b>docs.dir</b> - specifies the directory the Dir collection. Can be set
* to a relative path if "work.dir" is also specified (<b>default=dir-out</b>). * to a relative path if "work.dir" is also specified (<b>default=dir-out</b>).
* <li><b>html.parser</b> - specifies the {@link HTMLParser} class to use for
* parsing the TREC documents content (<b>default=DemoHTMLParser</b>).
* </ul> * </ul>
*/ */
public class DirContentSource extends ContentSource { public class DirContentSource extends ContentSource {

View File

@ -333,7 +333,7 @@ public class DocMaker {
String col = " "; String col = " ";
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
String newline = System.getProperty("line.separator"); String newline = System.getProperty("line.separator");
sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline); sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
int nut = source.getTotalDocsCount(); int nut = source.getTotalDocsCount();
if (nut > lastPrintedNumUniqueTexts) { if (nut > lastPrintedNumUniqueTexts) {
print = true; print = true;

View File

@ -1,34 +1,55 @@
package org.apache.lucene.benchmark.byTask.feeds; package org.apache.lucene.benchmark.byTask.feeds;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.English; import org.apache.lucene.util.English;
import java.io.IOException; import java.io.IOException;
import java.util.Date; import java.util.Date;
/** /**
* * Creates documents whose content is a <code>long</code> number starting from
* * <code>{@link Long#MIN_VALUE} + 10</code>.
**/ */
public class LongToEnglishContentSource extends ContentSource{ public class LongToEnglishContentSource extends ContentSource{
private long counter = Long.MIN_VALUE + 10; private long counter = 0;
@Override @Override
public void close() throws IOException { public void close() throws IOException {
} }
//TODO: reduce/clean up synchonization
@Override @Override
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
docData.clear(); docData.clear();
docData.setBody(English.longToEnglish(counter)); // store the current counter to avoid synchronization later on
docData.setName("doc_" + String.valueOf(counter)); long curCounter;
docData.setTitle("title_" + String.valueOf(counter)); synchronized (this) {
curCounter = counter;
if (counter == Long.MAX_VALUE){
counter = Long.MIN_VALUE;//loop around
} else {
++counter;
}
}
docData.setBody(English.longToEnglish(curCounter));
docData.setName("doc_" + String.valueOf(curCounter));
docData.setTitle("title_" + String.valueOf(curCounter));
docData.setDate(new Date()); docData.setDate(new Date());
if (counter == Long.MAX_VALUE){
counter = Long.MIN_VALUE + 10;//loop around
}
counter++;
return docData; return docData;
} }
@ -36,4 +57,5 @@ public class LongToEnglishContentSource extends ContentSource{
public void resetInputs() throws IOException { public void resetInputs() throws IOException {
counter = Long.MIN_VALUE + 10; counter = Long.MIN_VALUE + 10;
} }
} }

View File

@ -32,7 +32,7 @@ import java.util.Locale;
import java.util.zip.GZIPInputStream; import java.util.zip.GZIPInputStream;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StringBufferReader; import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader;
import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.ThreadInterruptedException;
/** /**
@ -53,7 +53,6 @@ import org.apache.lucene.util.ThreadInterruptedException;
* </ul> * </ul>
*/ */
public class TrecContentSource extends ContentSource { public class TrecContentSource extends ContentSource {
// TODO (3.0): change StringBuffer to StringBuilder
private static final class DateFormatInfo { private static final class DateFormatInfo {
DateFormat[] dfs; DateFormat[] dfs;
@ -79,8 +78,8 @@ public class TrecContentSource extends ContentSource {
}; };
private ThreadLocal<DateFormatInfo> dateFormats = new ThreadLocal<DateFormatInfo>(); private ThreadLocal<DateFormatInfo> dateFormats = new ThreadLocal<DateFormatInfo>();
private ThreadLocal<StringBufferReader> trecDocReader = new ThreadLocal<StringBufferReader>(); private ThreadLocal<StringBuilderReader> trecDocReader = new ThreadLocal<StringBuilderReader>();
private ThreadLocal<StringBuffer> trecDocBuffer = new ThreadLocal<StringBuffer>(); private ThreadLocal<StringBuilder> trecDocBuffer = new ThreadLocal<StringBuilder>();
private File dataDir = null; private File dataDir = null;
private ArrayList<File> inputFiles = new ArrayList<File>(); private ArrayList<File> inputFiles = new ArrayList<File>();
private int nextFile = 0; private int nextFile = 0;
@ -110,19 +109,19 @@ public class TrecContentSource extends ContentSource {
return dfi; return dfi;
} }
private StringBuffer getDocBuffer() { private StringBuilder getDocBuffer() {
StringBuffer sb = trecDocBuffer.get(); StringBuilder sb = trecDocBuffer.get();
if (sb == null) { if (sb == null) {
sb = new StringBuffer(); sb = new StringBuilder();
trecDocBuffer.set(sb); trecDocBuffer.set(sb);
} }
return sb; return sb;
} }
private Reader getTrecDocReader(StringBuffer docBuffer) { private Reader getTrecDocReader(StringBuilder docBuffer) {
StringBufferReader r = trecDocReader.get(); StringBuilderReader r = trecDocReader.get();
if (r == null) { if (r == null) {
r = new StringBufferReader(docBuffer); r = new StringBuilderReader(docBuffer);
trecDocReader.set(r); trecDocReader.set(r);
} else { } else {
r.set(docBuffer); r.set(docBuffer);
@ -131,7 +130,7 @@ public class TrecContentSource extends ContentSource {
} }
// read until finding a line that starts with the specified prefix, or a terminating tag has been found. // read until finding a line that starts with the specified prefix, or a terminating tag has been found.
private void read(StringBuffer buf, String prefix, boolean collectMatchLine, private void read(StringBuilder buf, String prefix, boolean collectMatchLine,
boolean collectAll, String terminatingTag) boolean collectAll, String terminatingTag)
throws IOException, NoMoreDataException { throws IOException, NoMoreDataException {
String sep = ""; String sep = "";
@ -248,7 +247,7 @@ public class TrecContentSource extends ContentSource {
openNextFile(); openNextFile();
} }
StringBuffer docBuf = getDocBuffer(); StringBuilder docBuf = getDocBuffer();
// 1. skip until doc start // 1. skip until doc start
docBuf.setLength(0); docBuf.setLength(0);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.stats.Points; import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.stats.TaskStats; import org.apache.lucene.benchmark.byTask.stats.TaskStats;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;
/** /**
* An abstract task to be tested for performance. <br> * An abstract task to be tested for performance. <br>
@ -67,7 +66,7 @@ public abstract class PerfTask implements Cloneable {
/** Should not be used externally */ /** Should not be used externally */
private PerfTask() { private PerfTask() {
name = Format.simpleName(getClass()); name = getClass().getSimpleName();
if (name.endsWith("Task")) { if (name.endsWith("Task")) {
name = name.substring(0, name.length() - 4); name = name.substring(0, name.length() - 4);
} }
@ -99,13 +98,7 @@ public abstract class PerfTask implements Cloneable {
this.maxDepthLogStart = config.get("task.max.depth.log",0); this.maxDepthLogStart = config.get("task.max.depth.log",0);
String logStepAtt = "log.step"; String logStepAtt = "log.step";
// TODO (1.5): call getClass().getSimpleName() instead. String taskLogStepAtt = "log.step." + name;
String taskName = getClass().getName();
int idx = taskName.lastIndexOf('.');
// To support test internal classes. when we move to getSimpleName, this can be removed.
int idx2 = taskName.indexOf('$', idx);
if (idx2 != -1) idx = idx2;
String taskLogStepAtt = "log.step." + taskName.substring(idx + 1, taskName.length() - 4 /* w/o the 'Task' part */);
if (config.get(taskLogStepAtt, null) != null) { if (config.get(taskLogStepAtt, null) != null) {
logStepAtt = taskLogStepAtt; logStepAtt = taskLogStepAtt;
} }

View File

@ -22,6 +22,7 @@ import java.io.BufferedWriter;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.OutputStreamWriter; import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -56,8 +57,9 @@ public class WriteLineDocTask extends PerfTask {
private static final Matcher NORMALIZER = Pattern.compile("[\t\r\n]+").matcher(""); private static final Matcher NORMALIZER = Pattern.compile("[\t\r\n]+").matcher("");
private int docSize = 0; private int docSize = 0;
private BufferedWriter lineFileOut = null; private PrintWriter lineFileOut = null;
private DocMaker docMaker; private DocMaker docMaker;
private ThreadLocal<StringBuilder> threadBuffer = new ThreadLocal<StringBuilder>();
public WriteLineDocTask(PerfRunData runData) throws Exception { public WriteLineDocTask(PerfRunData runData) throws Exception {
super(runData); super(runData);
@ -85,7 +87,7 @@ public class WriteLineDocTask extends PerfTask {
out = new BufferedOutputStream(out, 1 << 16); out = new BufferedOutputStream(out, 1 << 16);
out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out); out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out);
} }
lineFileOut = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16); lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16));
docMaker = runData.getDocMaker(); docMaker = runData.getDocMaker();
} }
@ -109,12 +111,15 @@ public class WriteLineDocTask extends PerfTask {
f = doc.getField(DocMaker.DATE_FIELD); f = doc.getField(DocMaker.DATE_FIELD);
String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : ""; String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
lineFileOut.write(title, 0, title.length()); StringBuilder sb = threadBuffer.get();
lineFileOut.write(SEP); if (sb == null) {
lineFileOut.write(date, 0, date.length()); sb = new StringBuilder();
lineFileOut.write(SEP); threadBuffer.set(sb);
lineFileOut.write(body, 0, body.length()); }
lineFileOut.newLine(); sb.setLength(0);
sb.append(title).append(SEP).append(date).append(SEP).append(body);
// lineFileOut is a PrintWriter, which synchronizes internally in println.
lineFileOut.println(sb.toString());
} }
return 1; return 1;
} }

View File

@ -107,19 +107,4 @@ public class Format {
return res.substring(res.length() - col.length()); return res.substring(res.length() - col.length());
} }
/**
* Extract simple class name
* @param cls class whose simple name is required
* @return simple class name
*/
public static String simpleName (Class<?> cls) {
String c = cls.getName();
String p = cls.getPackage().getName();
int k = c.lastIndexOf(p+".");
if (k<0) {
return c;
}
return c.substring(k+1+p.length());
}
} }

View File

@ -21,22 +21,22 @@ import java.io.IOException;
import java.io.Reader; import java.io.Reader;
/** /**
* Implements a {@link Reader} over a {@link StringBuffer} instance. Although * Implements a {@link Reader} over a {@link StringBuilder} instance. Although
* one can use {@link java.io.StringReader} by passing it * one can use {@link java.io.StringReader} by passing it
* {@link StringBuffer#toString()}, it is better to use this class, as it * {@link StringBuilder#toString()}, it is better to use this class, as it
* doesn't mark the passed-in {@link StringBuffer} as shared (which will cause * doesn't mark the passed-in {@link StringBuilder} as shared (which will cause
* inner char[] allocations at the next append() attempt).<br> * inner char[] allocations at the next append() attempt).<br>
* Notes: * Notes:
* <ul> * <ul>
* <li>This implementation assumes the underlying {@link StringBuffer} is not * <li>This implementation assumes the underlying {@link StringBuilder} is not
* changed during the use of this {@link Reader} implementation. * changed during the use of this {@link Reader} implementation.
* <li>This implementation is thread-safe. * <li>This implementation is thread-safe.
* <li>The implementation looks very much like {@link java.io.StringReader} (for * <li>The implementation looks very much like {@link java.io.StringReader} (for
* the right reasons). * the right reasons).
* <li>If one wants to reuse that instance, then the following needs to be done: * <li>If one wants to reuse that instance, then the following needs to be done:
* <pre> * <pre>
* StringBuffer sb = new StringBuffer("some text"); * StringBuilder sb = new StringBuilder("some text");
* Reader reader = new StringBufferReader(sb); * Reader reader = new StringBuilderReader(sb);
* ... read from reader - don't close it ! ... * ... read from reader - don't close it ! ...
* sb.setLength(0); * sb.setLength(0);
* sb.append("some new text"); * sb.append("some new text");
@ -45,23 +45,21 @@ import java.io.Reader;
* </pre> * </pre>
* </ul> * </ul>
*/ */
public class StringBufferReader extends Reader { public class StringBuilderReader extends Reader {
// TODO (3.0): change to StringBuffer (including the name of the class) // The StringBuilder to read from.
private StringBuilder sb;
// The StringBuffer to read from.
private StringBuffer sb;
// The length of 'sb'. // The length of 'sb'.
private int length; private int length;
// The next position to read from the StringBuffer. // The next position to read from the StringBuilder.
private int next = 0; private int next = 0;
// The mark position. The default value 0 means the start of the text. // The mark position. The default value 0 means the start of the text.
private int mark = 0; private int mark = 0;
public StringBufferReader(StringBuffer sb) { public StringBuilderReader(StringBuilder sb) {
set(sb); set(sb);
} }
@ -85,7 +83,7 @@ public class StringBufferReader extends Reader {
* *
* @param readAheadLimit Limit on the number of characters that may be read * @param readAheadLimit Limit on the number of characters that may be read
* while still preserving the mark. Because the stream's input comes * while still preserving the mark. Because the stream's input comes
* from a StringBuffer, there is no actual limit, so this argument * from a StringBuilder, there is no actual limit, so this argument
* must not be negative, but is otherwise ignored. * must not be negative, but is otherwise ignored.
* @exception IllegalArgumentException If readAheadLimit is < 0 * @exception IllegalArgumentException If readAheadLimit is < 0
* @exception IOException If an I/O error occurs * @exception IOException If an I/O error occurs
@ -156,7 +154,7 @@ public class StringBufferReader extends Reader {
} }
} }
public void set(StringBuffer sb) { public void set(StringBuilder sb) {
synchronized (lock) { synchronized (lock) {
this.sb = sb; this.sb = sb;
length = sb.length(); length = sb.length();

View File

@ -22,7 +22,9 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Properties; import java.util.Properties;
import java.util.Set;
import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.BenchmarkTestCase;
@ -97,6 +99,21 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
} }
} }
// class has to be public so that Class.forName.newInstance() will work
public static final class ThreadingDocMaker extends DocMaker {
@Override
public Document makeDocument() throws Exception {
Document doc = new Document();
String name = Thread.currentThread().getName();
doc.add(new Field(BODY_FIELD, "body_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field(TITLE_FIELD, "title_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field(DATE_FIELD, "date_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
return doc;
}
}
private static final CompressorStreamFactory csFactory = new CompressorStreamFactory(); private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
private PerfRunData createPerfRunData(File file, boolean setBZCompress, private PerfRunData createPerfRunData(File file, boolean setBZCompress,
@ -225,5 +242,49 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
br.close(); br.close();
} }
} }
public void testMultiThreaded() throws Exception {
File file = new File(getWorkDir(), "one-line");
PerfRunData runData = createPerfRunData(file, false, null, ThreadingDocMaker.class.getName());
final WriteLineDocTask wldt = new WriteLineDocTask(runData);
Thread[] threads = new Thread[10];
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread("t" + i) {
@Override
public void run() {
try {
wldt.doLogic();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
}
for (Thread t : threads) t.start();
for (Thread t : threads) t.join();
wldt.close();
Set<String> ids = new HashSet<String>();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
try {
for (int i = 0; i < threads.length; i++) {
String line = br.readLine();
String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
assertEquals(3, parts.length);
// check that all thread names written are the same in the same line
String tname = parts[0].substring(parts[0].indexOf('_'));
ids.add(tname);
assertEquals(tname, parts[1].substring(parts[1].indexOf('_')));
assertEquals(tname, parts[2].substring(parts[2].indexOf('_')));
}
// only threads.length lines should exist
assertNull(br.readLine());
assertEquals(threads.length, ids.size());
} finally {
br.close();
}
}
} }