LUCENE-2416: Some improvements to Benchmark

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@938582 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2010-04-27 17:59:45 +00:00
parent af96066c27
commit 8dccdd7679
11 changed files with 145 additions and 79 deletions

View File

@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
4/27/2010: WriteLineDocTask now supports multi-threading. Also,
StringBufferReader was renamed to StringBuilderReader and works on
StringBuilder now. In addition, LongToEnglishCountentSource starts from 0
(instead of Long.MIN_VAL+10) and wraps around to MIN_VAL (if you ever hit
Long.MAX_VAL). (Shai Erera)
4/07/2010
LUCENE-2377: Enable the use of NoMergePolicy and NoMergeScheduler by
CreateIndexTask. (Shai Erera)

View File

@ -17,7 +17,6 @@ package org.apache.lucene.benchmark.byTask.feeds;
import org.apache.lucene.search.Query;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;
/**
* Abstract base query maker.
@ -45,7 +44,7 @@ public abstract class AbstractQueryMaker implements QueryMaker {
StringBuffer sb = new StringBuffer();
if (queries != null) {
for (int i = 0; i < queries.length; i++) {
sb.append(i+". "+ Format.simpleName(queries[i].getClass())+" - "+queries[i].toString());
sb.append(i+". "+ queries[i].getClass().getSimpleName()+" - "+queries[i].toString());
sb.append(newline);
}
}

View File

@ -40,8 +40,6 @@ import java.util.Stack;
* denotes a relative path (<b>default=work</b>).
* <li><b>docs.dir</b> - specifies the directory the Dir collection. Can be set
* to a relative path if "work.dir" is also specified (<b>default=dir-out</b>).
* <li><b>html.parser</b> - specifies the {@link HTMLParser} class to use for
* parsing the TREC documents content (<b>default=DemoHTMLParser</b>).
* </ul>
*/
public class DirContentSource extends ContentSource {

View File

@ -333,7 +333,7 @@ public class DocMaker {
String col = " ";
StringBuffer sb = new StringBuffer();
String newline = System.getProperty("line.separator");
sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
int nut = source.getTotalDocsCount();
if (nut > lastPrintedNumUniqueTexts) {
print = true;

View File

@ -1,34 +1,55 @@
package org.apache.lucene.benchmark.byTask.feeds;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.English;
import java.io.IOException;
import java.util.Date;
/**
*
*
**/
* Creates documents whose content is a <code>long</code> number starting from
* <code>{@link Long#MIN_VALUE} + 10</code>.
*/
public class LongToEnglishContentSource extends ContentSource{
private long counter = Long.MIN_VALUE + 10;
private long counter = 0;
@Override
public void close() throws IOException {
}
//TODO: reduce/clean up synchonization
@Override
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
docData.clear();
docData.setBody(English.longToEnglish(counter));
docData.setName("doc_" + String.valueOf(counter));
docData.setTitle("title_" + String.valueOf(counter));
docData.setDate(new Date());
// store the current counter to avoid synchronization later on
long curCounter;
synchronized (this) {
curCounter = counter;
if (counter == Long.MAX_VALUE){
counter = Long.MIN_VALUE + 10;//loop around
counter = Long.MIN_VALUE;//loop around
} else {
++counter;
}
counter++;
}
docData.setBody(English.longToEnglish(curCounter));
docData.setName("doc_" + String.valueOf(curCounter));
docData.setTitle("title_" + String.valueOf(curCounter));
docData.setDate(new Date());
return docData;
}
@ -36,4 +57,5 @@ public class LongToEnglishContentSource extends ContentSource{
public void resetInputs() throws IOException {
counter = Long.MIN_VALUE + 10;
}
}

View File

@ -32,7 +32,7 @@ import java.util.Locale;
import java.util.zip.GZIPInputStream;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StringBufferReader;
import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader;
import org.apache.lucene.util.ThreadInterruptedException;
/**
@ -53,7 +53,6 @@ import org.apache.lucene.util.ThreadInterruptedException;
* </ul>
*/
public class TrecContentSource extends ContentSource {
// TODO (3.0): change StringBuffer to StringBuilder
private static final class DateFormatInfo {
DateFormat[] dfs;
@ -79,8 +78,8 @@ public class TrecContentSource extends ContentSource {
};
private ThreadLocal<DateFormatInfo> dateFormats = new ThreadLocal<DateFormatInfo>();
private ThreadLocal<StringBufferReader> trecDocReader = new ThreadLocal<StringBufferReader>();
private ThreadLocal<StringBuffer> trecDocBuffer = new ThreadLocal<StringBuffer>();
private ThreadLocal<StringBuilderReader> trecDocReader = new ThreadLocal<StringBuilderReader>();
private ThreadLocal<StringBuilder> trecDocBuffer = new ThreadLocal<StringBuilder>();
private File dataDir = null;
private ArrayList<File> inputFiles = new ArrayList<File>();
private int nextFile = 0;
@ -110,19 +109,19 @@ public class TrecContentSource extends ContentSource {
return dfi;
}
private StringBuffer getDocBuffer() {
StringBuffer sb = trecDocBuffer.get();
private StringBuilder getDocBuffer() {
StringBuilder sb = trecDocBuffer.get();
if (sb == null) {
sb = new StringBuffer();
sb = new StringBuilder();
trecDocBuffer.set(sb);
}
return sb;
}
private Reader getTrecDocReader(StringBuffer docBuffer) {
StringBufferReader r = trecDocReader.get();
private Reader getTrecDocReader(StringBuilder docBuffer) {
StringBuilderReader r = trecDocReader.get();
if (r == null) {
r = new StringBufferReader(docBuffer);
r = new StringBuilderReader(docBuffer);
trecDocReader.set(r);
} else {
r.set(docBuffer);
@ -131,7 +130,7 @@ public class TrecContentSource extends ContentSource {
}
// read until finding a line that starts with the specified prefix, or a terminating tag has been found.
private void read(StringBuffer buf, String prefix, boolean collectMatchLine,
private void read(StringBuilder buf, String prefix, boolean collectMatchLine,
boolean collectAll, String terminatingTag)
throws IOException, NoMoreDataException {
String sep = "";
@ -248,7 +247,7 @@ public class TrecContentSource extends ContentSource {
openNextFile();
}
StringBuffer docBuf = getDocBuffer();
StringBuilder docBuf = getDocBuffer();
// 1. skip until doc start
docBuf.setLength(0);

View File

@ -23,7 +23,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;
/**
* An abstract task to be tested for performance. <br>
@ -67,7 +66,7 @@ public abstract class PerfTask implements Cloneable {
/** Should not be used externally */
private PerfTask() {
name = Format.simpleName(getClass());
name = getClass().getSimpleName();
if (name.endsWith("Task")) {
name = name.substring(0, name.length() - 4);
}
@ -99,13 +98,7 @@ public abstract class PerfTask implements Cloneable {
this.maxDepthLogStart = config.get("task.max.depth.log",0);
String logStepAtt = "log.step";
// TODO (1.5): call getClass().getSimpleName() instead.
String taskName = getClass().getName();
int idx = taskName.lastIndexOf('.');
// To support test internal classes. when we move to getSimpleName, this can be removed.
int idx2 = taskName.indexOf('$', idx);
if (idx2 != -1) idx = idx2;
String taskLogStepAtt = "log.step." + taskName.substring(idx + 1, taskName.length() - 4 /* w/o the 'Task' part */);
String taskLogStepAtt = "log.step." + name;
if (config.get(taskLogStepAtt, null) != null) {
logStepAtt = taskLogStepAtt;
}

View File

@ -22,6 +22,7 @@ import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ -56,8 +57,9 @@ public class WriteLineDocTask extends PerfTask {
private static final Matcher NORMALIZER = Pattern.compile("[\t\r\n]+").matcher("");
private int docSize = 0;
private BufferedWriter lineFileOut = null;
private PrintWriter lineFileOut = null;
private DocMaker docMaker;
private ThreadLocal<StringBuilder> threadBuffer = new ThreadLocal<StringBuilder>();
public WriteLineDocTask(PerfRunData runData) throws Exception {
super(runData);
@ -85,7 +87,7 @@ public class WriteLineDocTask extends PerfTask {
out = new BufferedOutputStream(out, 1 << 16);
out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out);
}
lineFileOut = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16);
lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16));
docMaker = runData.getDocMaker();
}
@ -109,12 +111,15 @@ public class WriteLineDocTask extends PerfTask {
f = doc.getField(DocMaker.DATE_FIELD);
String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
lineFileOut.write(title, 0, title.length());
lineFileOut.write(SEP);
lineFileOut.write(date, 0, date.length());
lineFileOut.write(SEP);
lineFileOut.write(body, 0, body.length());
lineFileOut.newLine();
StringBuilder sb = threadBuffer.get();
if (sb == null) {
sb = new StringBuilder();
threadBuffer.set(sb);
}
sb.setLength(0);
sb.append(title).append(SEP).append(date).append(SEP).append(body);
// lineFileOut is a PrintWriter, which synchronizes internally in println.
lineFileOut.println(sb.toString());
}
return 1;
}

View File

@ -107,19 +107,4 @@ public class Format {
return res.substring(res.length() - col.length());
}
/**
* Extract simple class name
* @param cls class whose simple name is required
* @return simple class name
*/
public static String simpleName (Class<?> cls) {
String c = cls.getName();
String p = cls.getPackage().getName();
int k = c.lastIndexOf(p+".");
if (k<0) {
return c;
}
return c.substring(k+1+p.length());
}
}

View File

@ -21,22 +21,22 @@ import java.io.IOException;
import java.io.Reader;
/**
* Implements a {@link Reader} over a {@link StringBuffer} instance. Although
* Implements a {@link Reader} over a {@link StringBuilder} instance. Although
* one can use {@link java.io.StringReader} by passing it
* {@link StringBuffer#toString()}, it is better to use this class, as it
* doesn't mark the passed-in {@link StringBuffer} as shared (which will cause
* {@link StringBuilder#toString()}, it is better to use this class, as it
* doesn't mark the passed-in {@link StringBuilder} as shared (which will cause
* inner char[] allocations at the next append() attempt).<br>
* Notes:
* <ul>
* <li>This implementation assumes the underlying {@link StringBuffer} is not
* <li>This implementation assumes the underlying {@link StringBuilder} is not
* changed during the use of this {@link Reader} implementation.
* <li>This implementation is thread-safe.
* <li>The implementation looks very much like {@link java.io.StringReader} (for
* the right reasons).
* <li>If one wants to reuse that instance, then the following needs to be done:
* <pre>
* StringBuffer sb = new StringBuffer("some text");
* Reader reader = new StringBufferReader(sb);
* StringBuilder sb = new StringBuilder("some text");
* Reader reader = new StringBuilderReader(sb);
* ... read from reader - don't close it ! ...
* sb.setLength(0);
* sb.append("some new text");
@ -45,23 +45,21 @@ import java.io.Reader;
* </pre>
* </ul>
*/
public class StringBufferReader extends Reader {
public class StringBuilderReader extends Reader {
// TODO (3.0): change to StringBuffer (including the name of the class)
// The StringBuffer to read from.
private StringBuffer sb;
// The StringBuilder to read from.
private StringBuilder sb;
// The length of 'sb'.
private int length;
// The next position to read from the StringBuffer.
// The next position to read from the StringBuilder.
private int next = 0;
// The mark position. The default value 0 means the start of the text.
private int mark = 0;
public StringBufferReader(StringBuffer sb) {
public StringBuilderReader(StringBuilder sb) {
set(sb);
}
@ -85,7 +83,7 @@ public class StringBufferReader extends Reader {
*
* @param readAheadLimit Limit on the number of characters that may be read
* while still preserving the mark. Because the stream's input comes
* from a StringBuffer, there is no actual limit, so this argument
* from a StringBuilder, there is no actual limit, so this argument
* must not be negative, but is otherwise ignored.
* @exception IllegalArgumentException If readAheadLimit is < 0
* @exception IOException If an I/O error occurs
@ -156,7 +154,7 @@ public class StringBufferReader extends Reader {
}
}
public void set(StringBuffer sb) {
public void set(StringBuilder sb) {
synchronized (lock) {
this.sb = sb;
length = sb.length();

View File

@ -22,7 +22,9 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase;
@ -97,6 +99,21 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
}
}
// class has to be public so that Class.forName.newInstance() will work
public static final class ThreadingDocMaker extends DocMaker {
@Override
public Document makeDocument() throws Exception {
Document doc = new Document();
String name = Thread.currentThread().getName();
doc.add(new Field(BODY_FIELD, "body_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field(TITLE_FIELD, "title_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field(DATE_FIELD, "date_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
return doc;
}
}
private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
private PerfRunData createPerfRunData(File file, boolean setBZCompress,
@ -226,4 +243,48 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
}
}
public void testMultiThreaded() throws Exception {
File file = new File(getWorkDir(), "one-line");
PerfRunData runData = createPerfRunData(file, false, null, ThreadingDocMaker.class.getName());
final WriteLineDocTask wldt = new WriteLineDocTask(runData);
Thread[] threads = new Thread[10];
for (int i = 0; i < threads.length; i++) {
threads[i] = new Thread("t" + i) {
@Override
public void run() {
try {
wldt.doLogic();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
};
}
for (Thread t : threads) t.start();
for (Thread t : threads) t.join();
wldt.close();
Set<String> ids = new HashSet<String>();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
try {
for (int i = 0; i < threads.length; i++) {
String line = br.readLine();
String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
assertEquals(3, parts.length);
// check that all thread names written are the same in the same line
String tname = parts[0].substring(parts[0].indexOf('_'));
ids.add(tname);
assertEquals(tname, parts[1].substring(parts[1].indexOf('_')));
assertEquals(tname, parts[2].substring(parts[2].indexOf('_')));
}
// only threads.length lines should exist
assertNull(br.readLine());
assertEquals(threads.length, ids.size());
} finally {
br.close();
}
}
}