mirror of https://github.com/apache/lucene.git
LUCENE-2416: Some improvements to Benchmark
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@938582 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
af96066c27
commit
8dccdd7679
|
@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log
|
|||
|
||||
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
|
||||
|
||||
4/27/2010: WriteLineDocTask now supports multi-threading. Also,
|
||||
StringBufferReader was renamed to StringBuilderReader and works on
|
||||
StringBuilder now. In addition, LongToEnglishCountentSource starts from 0
|
||||
(instead of Long.MIN_VAL+10) and wraps around to MIN_VAL (if you ever hit
|
||||
Long.MAX_VAL). (Shai Erera)
|
||||
|
||||
4/07/2010
|
||||
LUCENE-2377: Enable the use of NoMergePolicy and NoMergeScheduler by
|
||||
CreateIndexTask. (Shai Erera)
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
|||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Format;
|
||||
|
||||
/**
|
||||
* Abstract base query maker.
|
||||
|
@ -45,7 +44,7 @@ public abstract class AbstractQueryMaker implements QueryMaker {
|
|||
StringBuffer sb = new StringBuffer();
|
||||
if (queries != null) {
|
||||
for (int i = 0; i < queries.length; i++) {
|
||||
sb.append(i+". "+ Format.simpleName(queries[i].getClass())+" - "+queries[i].toString());
|
||||
sb.append(i+". "+ queries[i].getClass().getSimpleName()+" - "+queries[i].toString());
|
||||
sb.append(newline);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,8 +40,6 @@ import java.util.Stack;
|
|||
* denotes a relative path (<b>default=work</b>).
|
||||
* <li><b>docs.dir</b> - specifies the directory the Dir collection. Can be set
|
||||
* to a relative path if "work.dir" is also specified (<b>default=dir-out</b>).
|
||||
* <li><b>html.parser</b> - specifies the {@link HTMLParser} class to use for
|
||||
* parsing the TREC documents content (<b>default=DemoHTMLParser</b>).
|
||||
* </ul>
|
||||
*/
|
||||
public class DirContentSource extends ContentSource {
|
||||
|
|
|
@ -333,7 +333,7 @@ public class DocMaker {
|
|||
String col = " ";
|
||||
StringBuffer sb = new StringBuffer();
|
||||
String newline = System.getProperty("line.separator");
|
||||
sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
|
||||
sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
|
||||
int nut = source.getTotalDocsCount();
|
||||
if (nut > lastPrintedNumUniqueTexts) {
|
||||
print = true;
|
||||
|
|
|
@ -1,34 +1,55 @@
|
|||
package org.apache.lucene.benchmark.byTask.feeds;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.English;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Date;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
* Creates documents whose content is a <code>long</code> number starting from
|
||||
* <code>{@link Long#MIN_VALUE} + 10</code>.
|
||||
*/
|
||||
public class LongToEnglishContentSource extends ContentSource{
|
||||
private long counter = Long.MIN_VALUE + 10;
|
||||
private long counter = 0;
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
|
||||
}
|
||||
//TODO: reduce/clean up synchonization
|
||||
|
||||
@Override
|
||||
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
|
||||
docData.clear();
|
||||
docData.setBody(English.longToEnglish(counter));
|
||||
docData.setName("doc_" + String.valueOf(counter));
|
||||
docData.setTitle("title_" + String.valueOf(counter));
|
||||
docData.setDate(new Date());
|
||||
// store the current counter to avoid synchronization later on
|
||||
long curCounter;
|
||||
synchronized (this) {
|
||||
curCounter = counter;
|
||||
if (counter == Long.MAX_VALUE){
|
||||
counter = Long.MIN_VALUE + 10;//loop around
|
||||
counter = Long.MIN_VALUE;//loop around
|
||||
} else {
|
||||
++counter;
|
||||
}
|
||||
counter++;
|
||||
}
|
||||
docData.setBody(English.longToEnglish(curCounter));
|
||||
docData.setName("doc_" + String.valueOf(curCounter));
|
||||
docData.setTitle("title_" + String.valueOf(curCounter));
|
||||
docData.setDate(new Date());
|
||||
return docData;
|
||||
}
|
||||
|
||||
|
@ -36,4 +57,5 @@ public class LongToEnglishContentSource extends ContentSource{
|
|||
public void resetInputs() throws IOException {
|
||||
counter = Long.MIN_VALUE + 10;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ import java.util.Locale;
|
|||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.StringBufferReader;
|
||||
import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
/**
|
||||
|
@ -53,7 +53,6 @@ import org.apache.lucene.util.ThreadInterruptedException;
|
|||
* </ul>
|
||||
*/
|
||||
public class TrecContentSource extends ContentSource {
|
||||
// TODO (3.0): change StringBuffer to StringBuilder
|
||||
|
||||
private static final class DateFormatInfo {
|
||||
DateFormat[] dfs;
|
||||
|
@ -79,8 +78,8 @@ public class TrecContentSource extends ContentSource {
|
|||
};
|
||||
|
||||
private ThreadLocal<DateFormatInfo> dateFormats = new ThreadLocal<DateFormatInfo>();
|
||||
private ThreadLocal<StringBufferReader> trecDocReader = new ThreadLocal<StringBufferReader>();
|
||||
private ThreadLocal<StringBuffer> trecDocBuffer = new ThreadLocal<StringBuffer>();
|
||||
private ThreadLocal<StringBuilderReader> trecDocReader = new ThreadLocal<StringBuilderReader>();
|
||||
private ThreadLocal<StringBuilder> trecDocBuffer = new ThreadLocal<StringBuilder>();
|
||||
private File dataDir = null;
|
||||
private ArrayList<File> inputFiles = new ArrayList<File>();
|
||||
private int nextFile = 0;
|
||||
|
@ -110,19 +109,19 @@ public class TrecContentSource extends ContentSource {
|
|||
return dfi;
|
||||
}
|
||||
|
||||
private StringBuffer getDocBuffer() {
|
||||
StringBuffer sb = trecDocBuffer.get();
|
||||
private StringBuilder getDocBuffer() {
|
||||
StringBuilder sb = trecDocBuffer.get();
|
||||
if (sb == null) {
|
||||
sb = new StringBuffer();
|
||||
sb = new StringBuilder();
|
||||
trecDocBuffer.set(sb);
|
||||
}
|
||||
return sb;
|
||||
}
|
||||
|
||||
private Reader getTrecDocReader(StringBuffer docBuffer) {
|
||||
StringBufferReader r = trecDocReader.get();
|
||||
private Reader getTrecDocReader(StringBuilder docBuffer) {
|
||||
StringBuilderReader r = trecDocReader.get();
|
||||
if (r == null) {
|
||||
r = new StringBufferReader(docBuffer);
|
||||
r = new StringBuilderReader(docBuffer);
|
||||
trecDocReader.set(r);
|
||||
} else {
|
||||
r.set(docBuffer);
|
||||
|
@ -131,7 +130,7 @@ public class TrecContentSource extends ContentSource {
|
|||
}
|
||||
|
||||
// read until finding a line that starts with the specified prefix, or a terminating tag has been found.
|
||||
private void read(StringBuffer buf, String prefix, boolean collectMatchLine,
|
||||
private void read(StringBuilder buf, String prefix, boolean collectMatchLine,
|
||||
boolean collectAll, String terminatingTag)
|
||||
throws IOException, NoMoreDataException {
|
||||
String sep = "";
|
||||
|
@ -248,7 +247,7 @@ public class TrecContentSource extends ContentSource {
|
|||
openNextFile();
|
||||
}
|
||||
|
||||
StringBuffer docBuf = getDocBuffer();
|
||||
StringBuilder docBuf = getDocBuffer();
|
||||
|
||||
// 1. skip until doc start
|
||||
docBuf.setLength(0);
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
|
|||
import org.apache.lucene.benchmark.byTask.stats.Points;
|
||||
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Format;
|
||||
|
||||
/**
|
||||
* An abstract task to be tested for performance. <br>
|
||||
|
@ -67,7 +66,7 @@ public abstract class PerfTask implements Cloneable {
|
|||
|
||||
/** Should not be used externally */
|
||||
private PerfTask() {
|
||||
name = Format.simpleName(getClass());
|
||||
name = getClass().getSimpleName();
|
||||
if (name.endsWith("Task")) {
|
||||
name = name.substring(0, name.length() - 4);
|
||||
}
|
||||
|
@ -99,13 +98,7 @@ public abstract class PerfTask implements Cloneable {
|
|||
this.maxDepthLogStart = config.get("task.max.depth.log",0);
|
||||
|
||||
String logStepAtt = "log.step";
|
||||
// TODO (1.5): call getClass().getSimpleName() instead.
|
||||
String taskName = getClass().getName();
|
||||
int idx = taskName.lastIndexOf('.');
|
||||
// To support test internal classes. when we move to getSimpleName, this can be removed.
|
||||
int idx2 = taskName.indexOf('$', idx);
|
||||
if (idx2 != -1) idx = idx2;
|
||||
String taskLogStepAtt = "log.step." + taskName.substring(idx + 1, taskName.length() - 4 /* w/o the 'Task' part */);
|
||||
String taskLogStepAtt = "log.step." + name;
|
||||
if (config.get(taskLogStepAtt, null) != null) {
|
||||
logStepAtt = taskLogStepAtt;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.BufferedWriter;
|
|||
import java.io.FileOutputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
|
@ -56,8 +57,9 @@ public class WriteLineDocTask extends PerfTask {
|
|||
private static final Matcher NORMALIZER = Pattern.compile("[\t\r\n]+").matcher("");
|
||||
|
||||
private int docSize = 0;
|
||||
private BufferedWriter lineFileOut = null;
|
||||
private PrintWriter lineFileOut = null;
|
||||
private DocMaker docMaker;
|
||||
private ThreadLocal<StringBuilder> threadBuffer = new ThreadLocal<StringBuilder>();
|
||||
|
||||
public WriteLineDocTask(PerfRunData runData) throws Exception {
|
||||
super(runData);
|
||||
|
@ -85,7 +87,7 @@ public class WriteLineDocTask extends PerfTask {
|
|||
out = new BufferedOutputStream(out, 1 << 16);
|
||||
out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out);
|
||||
}
|
||||
lineFileOut = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16);
|
||||
lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16));
|
||||
docMaker = runData.getDocMaker();
|
||||
}
|
||||
|
||||
|
@ -109,12 +111,15 @@ public class WriteLineDocTask extends PerfTask {
|
|||
f = doc.getField(DocMaker.DATE_FIELD);
|
||||
String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
|
||||
|
||||
lineFileOut.write(title, 0, title.length());
|
||||
lineFileOut.write(SEP);
|
||||
lineFileOut.write(date, 0, date.length());
|
||||
lineFileOut.write(SEP);
|
||||
lineFileOut.write(body, 0, body.length());
|
||||
lineFileOut.newLine();
|
||||
StringBuilder sb = threadBuffer.get();
|
||||
if (sb == null) {
|
||||
sb = new StringBuilder();
|
||||
threadBuffer.set(sb);
|
||||
}
|
||||
sb.setLength(0);
|
||||
sb.append(title).append(SEP).append(date).append(SEP).append(body);
|
||||
// lineFileOut is a PrintWriter, which synchronizes internally in println.
|
||||
lineFileOut.println(sb.toString());
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -107,19 +107,4 @@ public class Format {
|
|||
return res.substring(res.length() - col.length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract simple class name
|
||||
* @param cls class whose simple name is required
|
||||
* @return simple class name
|
||||
*/
|
||||
public static String simpleName (Class<?> cls) {
|
||||
String c = cls.getName();
|
||||
String p = cls.getPackage().getName();
|
||||
int k = c.lastIndexOf(p+".");
|
||||
if (k<0) {
|
||||
return c;
|
||||
}
|
||||
return c.substring(k+1+p.length());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -21,22 +21,22 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Implements a {@link Reader} over a {@link StringBuffer} instance. Although
|
||||
* Implements a {@link Reader} over a {@link StringBuilder} instance. Although
|
||||
* one can use {@link java.io.StringReader} by passing it
|
||||
* {@link StringBuffer#toString()}, it is better to use this class, as it
|
||||
* doesn't mark the passed-in {@link StringBuffer} as shared (which will cause
|
||||
* {@link StringBuilder#toString()}, it is better to use this class, as it
|
||||
* doesn't mark the passed-in {@link StringBuilder} as shared (which will cause
|
||||
* inner char[] allocations at the next append() attempt).<br>
|
||||
* Notes:
|
||||
* <ul>
|
||||
* <li>This implementation assumes the underlying {@link StringBuffer} is not
|
||||
* <li>This implementation assumes the underlying {@link StringBuilder} is not
|
||||
* changed during the use of this {@link Reader} implementation.
|
||||
* <li>This implementation is thread-safe.
|
||||
* <li>The implementation looks very much like {@link java.io.StringReader} (for
|
||||
* the right reasons).
|
||||
* <li>If one wants to reuse that instance, then the following needs to be done:
|
||||
* <pre>
|
||||
* StringBuffer sb = new StringBuffer("some text");
|
||||
* Reader reader = new StringBufferReader(sb);
|
||||
* StringBuilder sb = new StringBuilder("some text");
|
||||
* Reader reader = new StringBuilderReader(sb);
|
||||
* ... read from reader - don't close it ! ...
|
||||
* sb.setLength(0);
|
||||
* sb.append("some new text");
|
||||
|
@ -45,23 +45,21 @@ import java.io.Reader;
|
|||
* </pre>
|
||||
* </ul>
|
||||
*/
|
||||
public class StringBufferReader extends Reader {
|
||||
public class StringBuilderReader extends Reader {
|
||||
|
||||
// TODO (3.0): change to StringBuffer (including the name of the class)
|
||||
|
||||
// The StringBuffer to read from.
|
||||
private StringBuffer sb;
|
||||
// The StringBuilder to read from.
|
||||
private StringBuilder sb;
|
||||
|
||||
// The length of 'sb'.
|
||||
private int length;
|
||||
|
||||
// The next position to read from the StringBuffer.
|
||||
// The next position to read from the StringBuilder.
|
||||
private int next = 0;
|
||||
|
||||
// The mark position. The default value 0 means the start of the text.
|
||||
private int mark = 0;
|
||||
|
||||
public StringBufferReader(StringBuffer sb) {
|
||||
public StringBuilderReader(StringBuilder sb) {
|
||||
set(sb);
|
||||
}
|
||||
|
||||
|
@ -85,7 +83,7 @@ public class StringBufferReader extends Reader {
|
|||
*
|
||||
* @param readAheadLimit Limit on the number of characters that may be read
|
||||
* while still preserving the mark. Because the stream's input comes
|
||||
* from a StringBuffer, there is no actual limit, so this argument
|
||||
* from a StringBuilder, there is no actual limit, so this argument
|
||||
* must not be negative, but is otherwise ignored.
|
||||
* @exception IllegalArgumentException If readAheadLimit is < 0
|
||||
* @exception IOException If an I/O error occurs
|
||||
|
@ -156,7 +154,7 @@ public class StringBufferReader extends Reader {
|
|||
}
|
||||
}
|
||||
|
||||
public void set(StringBuffer sb) {
|
||||
public void set(StringBuilder sb) {
|
||||
synchronized (lock) {
|
||||
this.sb = sb;
|
||||
length = sb.length();
|
|
@ -22,7 +22,9 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.HashSet;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.compress.compressors.CompressorStreamFactory;
|
||||
import org.apache.lucene.benchmark.BenchmarkTestCase;
|
||||
|
@ -97,6 +99,21 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// class has to be public so that Class.forName.newInstance() will work
|
||||
public static final class ThreadingDocMaker extends DocMaker {
|
||||
|
||||
@Override
|
||||
public Document makeDocument() throws Exception {
|
||||
Document doc = new Document();
|
||||
String name = Thread.currentThread().getName();
|
||||
doc.add(new Field(BODY_FIELD, "body_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
|
||||
doc.add(new Field(TITLE_FIELD, "title_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
|
||||
doc.add(new Field(DATE_FIELD, "date_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS));
|
||||
return doc;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
|
||||
|
||||
private PerfRunData createPerfRunData(File file, boolean setBZCompress,
|
||||
|
@ -226,4 +243,48 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testMultiThreaded() throws Exception {
|
||||
File file = new File(getWorkDir(), "one-line");
|
||||
PerfRunData runData = createPerfRunData(file, false, null, ThreadingDocMaker.class.getName());
|
||||
final WriteLineDocTask wldt = new WriteLineDocTask(runData);
|
||||
Thread[] threads = new Thread[10];
|
||||
for (int i = 0; i < threads.length; i++) {
|
||||
threads[i] = new Thread("t" + i) {
|
||||
@Override
|
||||
public void run() {
|
||||
try {
|
||||
wldt.doLogic();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for (Thread t : threads) t.start();
|
||||
for (Thread t : threads) t.join();
|
||||
|
||||
wldt.close();
|
||||
|
||||
Set<String> ids = new HashSet<String>();
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
|
||||
try {
|
||||
for (int i = 0; i < threads.length; i++) {
|
||||
String line = br.readLine();
|
||||
String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
|
||||
assertEquals(3, parts.length);
|
||||
// check that all thread names written are the same in the same line
|
||||
String tname = parts[0].substring(parts[0].indexOf('_'));
|
||||
ids.add(tname);
|
||||
assertEquals(tname, parts[1].substring(parts[1].indexOf('_')));
|
||||
assertEquals(tname, parts[2].substring(parts[2].indexOf('_')));
|
||||
}
|
||||
// only threads.length lines should exist
|
||||
assertNull(br.readLine());
|
||||
assertEquals(threads.length, ids.size());
|
||||
} finally {
|
||||
br.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue