mirror of https://github.com/apache/lucene.git
Applied 788 and 790 from Doron Cohen. Ran both the micro-standard and the task runs and results look reasonable.
Thanks, Doron git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@506093 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
881ab33b81
commit
bb66099414
|
@ -9,3 +9,6 @@ $Id:$
|
||||||
1. Committed Doron Cohen's benchmarking contribution, which provides an easily expandable task based approach to benchmarking. See the javadocs for information. (Doron Cohen via Grant Ingersoll)
|
1. Committed Doron Cohen's benchmarking contribution, which provides an easily expandable task based approach to benchmarking. See the javadocs for information. (Doron Cohen via Grant Ingersoll)
|
||||||
|
|
||||||
2. Added this file.
|
2. Added this file.
|
||||||
|
|
||||||
|
3. 2/11/07: LUCENE-790 and 788: Fixed Locale issue with date formatter. Fixed some minor issues with benchmarking by task. Added a dependency
|
||||||
|
on the Lucene demo to the build classpath. (Doron Cohen, Grant Ingersoll)
|
|
@ -8,6 +8,7 @@
|
||||||
<import file="../contrib-build.xml"/>
|
<import file="../contrib-build.xml"/>
|
||||||
<property name="working.dir" value="work"/>
|
<property name="working.dir" value="work"/>
|
||||||
|
|
||||||
|
|
||||||
<target name="check-files">
|
<target name="check-files">
|
||||||
|
|
||||||
<available file="temp/news20.tar.gz" property="news20.exists"/>
|
<available file="temp/news20.tar.gz" property="news20.exists"/>
|
||||||
|
@ -84,8 +85,11 @@
|
||||||
<property name="collections.jar" value="commons-collections-3.1.jar"/>
|
<property name="collections.jar" value="commons-collections-3.1.jar"/>
|
||||||
<property name="logging.jar" value="commons-logging-1.0.4.jar"/>
|
<property name="logging.jar" value="commons-logging-1.0.4.jar"/>
|
||||||
<property name="bean-utils.jar" value="commons-beanutils-1.7.0.jar"/>
|
<property name="bean-utils.jar" value="commons-beanutils-1.7.0.jar"/>
|
||||||
|
<property name="lucene-demos.jar" location="${common.dir}/build/lucene-demos-${version}.jar"/>
|
||||||
|
|
||||||
<path id="classpath">
|
<path id="classpath">
|
||||||
<pathelement path="${lucene.jar}"/>
|
<pathelement path="${lucene.jar}"/>
|
||||||
|
<pathelement path="${lucene-demos.jar}"/>
|
||||||
<pathelement path="${basedir}/lib/${digester.jar}"/>
|
<pathelement path="${basedir}/lib/${digester.jar}"/>
|
||||||
<pathelement path="${basedir}/lib/${collections.jar}"/>
|
<pathelement path="${basedir}/lib/${collections.jar}"/>
|
||||||
<pathelement path="${basedir}/lib/${logging.jar}"/>
|
<pathelement path="${basedir}/lib/${logging.jar}"/>
|
||||||
|
|
|
@ -27,8 +27,17 @@ import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
* Run the benchmark algorithm.
|
* Run the benchmark algorithm.
|
||||||
* <p>Usage: java Benchmark algorithm-file
|
* <p>Usage: java Benchmark algorithm-file
|
||||||
* <ol>
|
* <ol>
|
||||||
* <li>Read algorithm.
|
* <li>Read algorithm.</li>
|
||||||
* <li> Run the algorithm.
|
* <li> Run the algorithm.</li>
|
||||||
|
* </ol>
|
||||||
|
* Things to be added/fixed in "Benchmarking by tasks":
|
||||||
|
* <ol>
|
||||||
|
* <li>TODO - report into Excel and/or graphed view.</li>
|
||||||
|
* <li>TODO - perf comparison between Lucene releases over the years.</li>
|
||||||
|
* <li>TODO - perf report adequate to include in Lucene nightly build site? (so we can easily track performance changes.)</li>
|
||||||
|
* <li>TODO - add overall time control for repeated execution (vs. current by-count only).</li>
|
||||||
|
* <li>TODO - query maker that is based on index statistics.</li>
|
||||||
|
* <li>TODO - prpoerties documentation - each task should document the properties it relies on.</li>
|
||||||
* </ol>
|
* </ol>
|
||||||
*/
|
*/
|
||||||
public class Benchmark {
|
public class Benchmark {
|
||||||
|
|
|
@ -23,24 +23,21 @@ import java.io.FileReader;
|
||||||
import java.text.DateFormat;
|
import java.text.DateFormat;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Date;
|
import java.util.Locale;
|
||||||
import org.apache.lucene.document.DateTools;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A DocMaker using the Reuters collection for its input.
|
* A DocMaker using the Reuters collection for its input.
|
||||||
*/
|
*/
|
||||||
public class ReutersDocMaker extends SimpleDocMaker {
|
public class ReutersDocMaker extends BasicDocMaker {
|
||||||
|
|
||||||
private DateFormat dateFormat;
|
private DateFormat dateFormat;
|
||||||
private File dataDir = null;
|
private File dataDir = null;
|
||||||
private ArrayList txtFiles = new ArrayList();
|
private ArrayList inputFiles = new ArrayList();
|
||||||
private int nextFile = 0;
|
private int nextFile = 0;
|
||||||
private int round=0;
|
private int iteration=0;
|
||||||
private int count = 0;
|
|
||||||
|
|
||||||
/* (non-Javadoc)
|
/* (non-Javadoc)
|
||||||
* @see SimpleDocMaker#setConfig(java.util.Properties)
|
* @see SimpleDocMaker#setConfig(java.util.Properties)
|
||||||
|
@ -49,48 +46,28 @@ public class ReutersDocMaker extends SimpleDocMaker {
|
||||||
super.setConfig(config);
|
super.setConfig(config);
|
||||||
String d = config.get("docs.dir","reuters-out");
|
String d = config.get("docs.dir","reuters-out");
|
||||||
dataDir = new File(new File("work"),d);
|
dataDir = new File(new File("work"),d);
|
||||||
addFiles(dataDir);
|
collectFiles(dataDir,inputFiles);
|
||||||
if (txtFiles.size()==0) {
|
if (inputFiles.size()==0) {
|
||||||
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
||||||
}
|
}
|
||||||
// date format: 30-MAR-1987 14:22:36.87
|
// date format: 30-MAR-1987 14:22:36.87
|
||||||
dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS");
|
dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
|
||||||
dateFormat.setLenient(true);
|
dateFormat.setLenient(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addFiles(File f) {
|
protected DocData getNextDocData() throws Exception {
|
||||||
if (!f.canRead()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (f.isDirectory()) {
|
|
||||||
File files[] = f.listFiles();
|
|
||||||
for (int i = 0; i < files.length; i++) {
|
|
||||||
addFiles(files[i]);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
txtFiles.add(f);
|
|
||||||
addUniqueBytes(f.length());
|
|
||||||
}
|
|
||||||
|
|
||||||
/* (non-Javadoc)
|
|
||||||
* @see SimpleDocMaker#makeDocument()
|
|
||||||
*/
|
|
||||||
public Document makeDocument() throws Exception {
|
|
||||||
File f = null;
|
File f = null;
|
||||||
String name = null;
|
String name = null;
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
f = (File) txtFiles.get(nextFile++);
|
f = (File) inputFiles.get(nextFile++);
|
||||||
name = f.getCanonicalPath()+"_"+round;
|
name = f.getCanonicalPath()+"_"+iteration;
|
||||||
if (nextFile >= txtFiles.size()) {
|
if (nextFile >= inputFiles.size()) {
|
||||||
// exhausted files, start a new round
|
// exhausted files, start a new round
|
||||||
nextFile = 0;
|
nextFile = 0;
|
||||||
round++;
|
iteration++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(new Field("name",name,storeVal,indexVal,termVecVal));
|
|
||||||
BufferedReader reader = new BufferedReader(new FileReader(f));
|
BufferedReader reader = new BufferedReader(new FileReader(f));
|
||||||
String line = null;
|
String line = null;
|
||||||
//First line is the date, 3rd is the title, rest is body
|
//First line is the date, 3rd is the title, rest is body
|
||||||
|
@ -98,27 +75,23 @@ public class ReutersDocMaker extends SimpleDocMaker {
|
||||||
reader.readLine();//skip an empty line
|
reader.readLine();//skip an empty line
|
||||||
String title = reader.readLine();
|
String title = reader.readLine();
|
||||||
reader.readLine();//skip an empty line
|
reader.readLine();//skip an empty line
|
||||||
StringBuffer body = new StringBuffer(1024);
|
StringBuffer bodyBuf = new StringBuffer(1024);
|
||||||
while ((line = reader.readLine()) != null) {
|
while ((line = reader.readLine()) != null) {
|
||||||
body.append(line).append(' ');
|
bodyBuf.append(line).append(' ');
|
||||||
}
|
|
||||||
Date date = dateFormat.parse(dateStr.trim());
|
|
||||||
doc.add(new Field("date", DateTools.dateToString(date, DateTools.Resolution.SECOND),
|
|
||||||
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
|
||||||
|
|
||||||
if (title != null) {
|
|
||||||
doc.add(new Field("title", title, storeVal,indexVal,termVecVal));
|
|
||||||
}
|
|
||||||
if (body.length() > 0) {
|
|
||||||
doc.add(new Field("body", body.toString(), storeVal,indexVal,termVecVal));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
count++;
|
|
||||||
addBytes(f.length());
|
addBytes(f.length());
|
||||||
|
|
||||||
return doc;
|
DocData dd = new DocData();
|
||||||
|
|
||||||
|
dd.date = dateFormat.parse(dateStr.trim());
|
||||||
|
dd.name = name;
|
||||||
|
dd.title = title;
|
||||||
|
dd.body = bodyBuf.toString();
|
||||||
|
return dd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* (non-Javadoc)
|
* (non-Javadoc)
|
||||||
* @see DocMaker#resetIinputs()
|
* @see DocMaker#resetIinputs()
|
||||||
|
@ -126,8 +99,7 @@ public class ReutersDocMaker extends SimpleDocMaker {
|
||||||
public synchronized void resetInputs() {
|
public synchronized void resetInputs() {
|
||||||
super.resetInputs();
|
super.resetInputs();
|
||||||
nextFile = 0;
|
nextFile = 0;
|
||||||
round = 0;
|
iteration = 0;
|
||||||
count = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -135,22 +107,7 @@ public class ReutersDocMaker extends SimpleDocMaker {
|
||||||
* @see DocMaker#numUniqueTexts()
|
* @see DocMaker#numUniqueTexts()
|
||||||
*/
|
*/
|
||||||
public int numUniqueTexts() {
|
public int numUniqueTexts() {
|
||||||
return txtFiles.size();
|
return inputFiles.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
* @see DocMaker#getCount()
|
|
||||||
*/
|
|
||||||
public int getCount() {
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
* @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int)
|
|
||||||
*/
|
|
||||||
public Document makeDocument(int size) throws Exception {
|
|
||||||
throw new Exception(this+".makeDocument (int size) is not supported!");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,28 +17,12 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Format;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create documents for the test
|
* Create documents for the test
|
||||||
*/
|
*/
|
||||||
public class SimpleDocMaker implements DocMaker {
|
public class SimpleDocMaker extends BasicDocMaker {
|
||||||
|
|
||||||
static final String BODY_FIELD = "body";
|
|
||||||
private int docID = 0;
|
private int docID = 0;
|
||||||
private long numBytes = 0;
|
|
||||||
private long numUniqueBytes = 0;
|
|
||||||
|
|
||||||
protected Config config;
|
|
||||||
private int nextDocTextPosition = 0; // for creating docs of fixed size.
|
|
||||||
|
|
||||||
protected Field.Store storeVal = Field.Store.NO;
|
|
||||||
protected Field.Index indexVal = Field.Index.TOKENIZED;
|
|
||||||
protected Field.TermVector termVecVal = Field.TermVector.NO;
|
|
||||||
|
|
||||||
static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org
|
static final String DOC_TEXT = // from a public first aid info at http://firstaid.ie.eu.org
|
||||||
"Well it may be a little dramatic but sometimes it true. " +
|
"Well it may be a little dramatic but sometimes it true. " +
|
||||||
|
@ -52,100 +36,18 @@ public class SimpleDocMaker implements DocMaker {
|
||||||
"ones and the stranger whose life may depend on you being in the " +
|
"ones and the stranger whose life may depend on you being in the " +
|
||||||
"right place at the right time with the right knowledge.";
|
"right place at the right time with the right knowledge.";
|
||||||
|
|
||||||
private static int DOC_TEXT_LENGTH = DOC_TEXT.length();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
* @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument()
|
|
||||||
*/
|
|
||||||
public Document makeDocument () throws Exception {
|
|
||||||
return makeDocument(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
* @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#makeDocument(int)
|
|
||||||
*/
|
|
||||||
public Document makeDocument(int size) throws Exception {
|
|
||||||
int docid = newdocid();
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
|
|
||||||
String docText = createDocText(size);
|
|
||||||
doc.add(new Field(BODY_FIELD, "synthetic body text"+docid+" "+docText, storeVal, indexVal, termVecVal));
|
|
||||||
addBytes(docText.length()); // should multiply by 2 here?
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
private synchronized int[] nextDocText(int fixedDocSize) {
|
|
||||||
int from = nextDocTextPosition;
|
|
||||||
int to = nextDocTextPosition;
|
|
||||||
int wraps = 0;
|
|
||||||
int size = 0;
|
|
||||||
|
|
||||||
while (size<fixedDocSize) {
|
|
||||||
int added = DOC_TEXT_LENGTH - to;
|
|
||||||
if (size+added <= fixedDocSize) {
|
|
||||||
to = 0;
|
|
||||||
size += added;
|
|
||||||
wraps ++;
|
|
||||||
} else {
|
|
||||||
added = fixedDocSize - size;
|
|
||||||
size += added;
|
|
||||||
to += added;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
nextDocTextPosition = to;
|
|
||||||
|
|
||||||
return new int[]{from,to,wraps};
|
|
||||||
}
|
|
||||||
|
|
||||||
private String createDocText(int fixedDocSize) {
|
|
||||||
if (fixedDocSize<=0) {
|
|
||||||
//no fixed doc size requirement
|
|
||||||
return DOC_TEXT;
|
|
||||||
}
|
|
||||||
|
|
||||||
// create a document wit fixed doc size
|
|
||||||
int fromToWraps[] = nextDocText(fixedDocSize);
|
|
||||||
int from = fromToWraps[0];
|
|
||||||
int to = fromToWraps[1];
|
|
||||||
int wraps = fromToWraps[2];
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
while (wraps-- > 0) {
|
|
||||||
sb.append(DOC_TEXT.substring(from));
|
|
||||||
from = 0;
|
|
||||||
}
|
|
||||||
sb.append(DOC_TEXT.substring(from,to));
|
|
||||||
return sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
// return a new docid
|
// return a new docid
|
||||||
private synchronized int newdocid() {
|
private synchronized int newdocid() {
|
||||||
return docID++;
|
return docID++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* (non-Javadoc)
|
|
||||||
* @see DocMaker#setConfig(java.util.Properties)
|
|
||||||
*/
|
|
||||||
public void setConfig(Config config) {
|
|
||||||
this.config = config;
|
|
||||||
boolean stored = config.get("doc.stored",false);
|
|
||||||
boolean tokenized = config.get("doc.tokenized",true);
|
|
||||||
boolean termVec = config.get("doc.term.vector",false);
|
|
||||||
storeVal = (stored ? Field.Store.YES : Field.Store.NO);
|
|
||||||
indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
|
|
||||||
termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* (non-Javadoc)
|
* (non-Javadoc)
|
||||||
* @see DocMaker#resetIinputs()
|
* @see DocMaker#resetIinputs()
|
||||||
*/
|
*/
|
||||||
public synchronized void resetInputs() {
|
public synchronized void resetInputs() {
|
||||||
printDocStatistics();
|
super.resetInputs();
|
||||||
docID = 0;
|
docID = 0;
|
||||||
numBytes = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -156,72 +58,12 @@ public class SimpleDocMaker implements DocMaker {
|
||||||
return 0; // not applicable
|
return 0; // not applicable
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
protected DocData getNextDocData() {
|
||||||
* (non-Javadoc)
|
DocData dd = new DocData();
|
||||||
* @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#numUniqueBytes()
|
dd.body = DOC_TEXT;
|
||||||
*/
|
dd.name = "doc"+newdocid();
|
||||||
public long numUniqueBytes() {
|
addBytes(DOC_TEXT.length());
|
||||||
return numUniqueBytes;
|
return dd;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
* @see DocMaker#getCount()
|
|
||||||
*/
|
|
||||||
public int getCount() {
|
|
||||||
return docID;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
* @see DocMaker#getByteCount()
|
|
||||||
*/
|
|
||||||
public long getByteCount() {
|
|
||||||
return numBytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void addUniqueBytes (long n) {
|
|
||||||
numUniqueBytes += n;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void addBytes (long n) {
|
|
||||||
numBytes += n;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* (non-Javadoc)
|
|
||||||
* @see org.apache.lucene.benchmark.byTask.feeds.DocMaker#printDocStatistics()
|
|
||||||
*/
|
|
||||||
private int lastPrintedNumUniqueTexts = 0;
|
|
||||||
private long lastPrintedNumUniqueBytes = 0;
|
|
||||||
private int printNum = 0;
|
|
||||||
public void printDocStatistics() {
|
|
||||||
boolean print = false;
|
|
||||||
String col = " ";
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
String newline = System.getProperty("line.separator");
|
|
||||||
sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
|
|
||||||
int nut = numUniqueTexts();
|
|
||||||
if (nut > lastPrintedNumUniqueTexts) {
|
|
||||||
print = true;
|
|
||||||
sb.append("total bytes of unique texts: ").append(Format.format(0,nut,col)).append(newline);
|
|
||||||
lastPrintedNumUniqueTexts = nut;
|
|
||||||
}
|
|
||||||
long nub = numUniqueBytes();
|
|
||||||
if (nub > lastPrintedNumUniqueBytes) {
|
|
||||||
print = true;
|
|
||||||
sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
|
|
||||||
lastPrintedNumUniqueBytes = nub;
|
|
||||||
}
|
|
||||||
if (getCount()>0) {
|
|
||||||
print = true;
|
|
||||||
sb.append("num files added since last inputs reset: ").append(Format.format(0,getCount(),col)).append(newline);
|
|
||||||
sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline);
|
|
||||||
}
|
|
||||||
if (print) {
|
|
||||||
System.out.println(sb.append(newline).toString());
|
|
||||||
printNum++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,7 +44,7 @@ Contained packages:
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><a href="feeds/package-summary.html">feeds</a></td>
|
<td><a href="feeds/package-summary.html">feeds</a></td>
|
||||||
<td>Sources foe benchmark inputs: documents and queries.</td>
|
<td>Sources for benchmark inputs: documents and queries.</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><a href="utils/package-summary.html">utils</a></td>
|
<td><a href="utils/package-summary.html">utils</a></td>
|
||||||
|
@ -92,7 +92,7 @@ Easiest way to run a benchmarks is using the predefined ant task:
|
||||||
<br>- would run the <code>compound-penalty.alg</code> "algorithm".
|
<br>- would run the <code>compound-penalty.alg</code> "algorithm".
|
||||||
</li>
|
</li>
|
||||||
<li>ant run-task -Dtask.alg=[full-path-to-your-alg-file]
|
<li>ant run-task -Dtask.alg=[full-path-to-your-alg-file]
|
||||||
<br>- would run the <code>your perf test</code> "algorithm".
|
<br>- would run <code>your perf test</code> "algorithm".
|
||||||
</li>
|
</li>
|
||||||
<li>java org.apache.lucene.benchmark.byTask.programmatic.Sample
|
<li>java org.apache.lucene.benchmark.byTask.programmatic.Sample
|
||||||
<br>- would run a performance test programmatically - without using an alg file.
|
<br>- would run a performance test programmatically - without using an alg file.
|
||||||
|
@ -109,7 +109,7 @@ otherwise, you can extend the framework to meet your needs, as explained herein.
|
||||||
<p>
|
<p>
|
||||||
Each benchmark run has a DocMaker and a QueryMaker. These two should usually match, so
|
Each benchmark run has a DocMaker and a QueryMaker. These two should usually match, so
|
||||||
that "meaningful" queries are used for a certain collection.
|
that "meaningful" queries are used for a certain collection.
|
||||||
Properties defined at the header of the alg file define which "makers" should be used.
|
Properties set at the header of the alg file define which "makers" should be used.
|
||||||
You can also specify your own makers, implementing the DocMaker and QureyMaker interfaces.
|
You can also specify your own makers, implementing the DocMaker and QureyMaker interfaces.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
@ -275,8 +275,8 @@ regular index/search work tasks, report tasks, and control tasks.
|
||||||
<br>This increments a global "round counter". All task runs that would start now would
|
<br>This increments a global "round counter". All task runs that would start now would
|
||||||
record the new, updated round counter as their round number. This would appear in reports.
|
record the new, updated round counter as their round number. This would appear in reports.
|
||||||
In particular, see <font color="#FF0066">RepSumByNameRound</font> above.
|
In particular, see <font color="#FF0066">RepSumByNameRound</font> above.
|
||||||
<br>An additional effect of NewRound, is that numeric and boolean properties defined in the
|
<br>An additional effect of NewRound, is that numeric and boolean properties defined (at the head
|
||||||
.properties file as a sequence of values, e.g. <font color="#FF0066">merge.factor=mrg:10:100:10:100</font> would
|
of the .alg file) as a sequence of values, e.g. <font color="#FF0066">merge.factor=mrg:10:100:10:100</font> would
|
||||||
increment (cyclic) to the next value.
|
increment (cyclic) to the next value.
|
||||||
Note: this would also be reflected in the reports, in this case under a column that would be named "mrg".
|
Note: this would also be reflected in the reports, in this case under a column that would be named "mrg".
|
||||||
</li>
|
</li>
|
||||||
|
@ -368,7 +368,7 @@ Some of the currently defined properties are:
|
||||||
(Make sure it is no shorter than any value in the sequence).
|
(Make sure it is no shorter than any value in the sequence).
|
||||||
<ul>
|
<ul>
|
||||||
<li><font color="#FF0066">max.buffered</font>
|
<li><font color="#FF0066">max.buffered</font>
|
||||||
<br>Example: buffered=buf.10.10.100.100 -
|
<br>Example: max.buffered=buf:10:10:100:100 -
|
||||||
this would define using maxBufferedDocs of 10 in iterations 0 and 1,
|
this would define using maxBufferedDocs of 10 in iterations 0 and 1,
|
||||||
and 100 in iterations 2 and 3.
|
and 100 in iterations 2 and 3.
|
||||||
</li>
|
</li>
|
||||||
|
|
|
@ -51,6 +51,10 @@ public class Sample {
|
||||||
// task to report
|
// task to report
|
||||||
RepSumByNameTask rep = new RepSumByNameTask(runData);
|
RepSumByNameTask rep = new RepSumByNameTask(runData);
|
||||||
top.addTask(rep);
|
top.addTask(rep);
|
||||||
|
|
||||||
|
// print algorithm
|
||||||
|
System.out.println(top.toString());
|
||||||
|
|
||||||
// execute
|
// execute
|
||||||
top.doLogic();
|
top.doLogic();
|
||||||
}
|
}
|
||||||
|
|
|
@ -210,6 +210,12 @@ public class TaskSequence extends PerfTask {
|
||||||
*/
|
*/
|
||||||
public void setNoChildReport() {
|
public void setNoChildReport() {
|
||||||
letChildReport = false;
|
letChildReport = false;
|
||||||
|
for (Iterator it = tasks.iterator(); it.hasNext();) {
|
||||||
|
PerfTask task = (PerfTask) it.next();
|
||||||
|
if (task instanceof TaskSequence) {
|
||||||
|
((TaskSequence)task).setNoChildReport();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1,19 +1,5 @@
|
||||||
package org.apache.lucene.benchmark.standard;
|
package org.apache.lucene.benchmark.standard;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileFilter;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.FileReader;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.text.DateFormat;
|
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Date;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.benchmark.AbstractBenchmarker;
|
import org.apache.lucene.benchmark.AbstractBenchmarker;
|
||||||
|
@ -33,6 +19,11 @@ import org.apache.lucene.search.Hits;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.text.DateFormat;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.*;
|
||||||
/**
|
/**
|
||||||
* Copyright 2005 The Apache Software Foundation
|
* Copyright 2005 The Apache Software Foundation
|
||||||
*
|
*
|
||||||
|
@ -66,7 +57,7 @@ public class StandardBenchmarker extends AbstractBenchmarker implements Benchmar
|
||||||
|
|
||||||
public static final String INDEX_DIR = "index";
|
public static final String INDEX_DIR = "index";
|
||||||
//30-MAR-1987 14:22:36.87
|
//30-MAR-1987 14:22:36.87
|
||||||
private static DateFormat format = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS");
|
private static DateFormat format = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
|
||||||
//DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.SHORT);
|
//DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.SHORT);
|
||||||
static{
|
static{
|
||||||
format.setLenient(true);
|
format.setLenient(true);
|
||||||
|
|
Loading…
Reference in New Issue