LUCENE-940: Multi-threaded issues fixed: SimpleDateFormat;

logging for addDoc/deleteDoc tasks;


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@550905 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2007-06-26 18:27:21 +00:00
parent 3e39b0a9a0
commit e6c659269a
8 changed files with 81 additions and 34 deletions

View File

@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
$Id:$
6/25/07
- LUCENE-940: Multi-threaded issues fixed: SimpleDateFormat;
logging for addDoc/deleteDoc tasks. (Doron Cohen)
4/17/07
- LUCENE-863: Deprecated StandardBenchmarker in favour of byTask code.
(Otis Gospodnetic)

View File

@ -22,9 +22,7 @@ import java.io.Reader;
import java.io.StringReader;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Properties;
/**
@ -32,11 +30,7 @@ import java.util.Properties;
*/
public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser {
DateFormat dateFormat;
public DemoHTMLParser () {
dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy kk:mm:ss ",Locale.US); //Tue, 09 Dec 2003 22:39:08 GMT
dateFormat.setLenient(true);
}
/*
@ -74,8 +68,11 @@ public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.
return new DocData(name, bodyBuf.toString(), title, props, date);
}
/*
* (non-Javadoc)
* @see org.apache.lucene.benchmark.byTask.feeds.HTMLParser#parse(java.lang.String, java.util.Date, java.lang.StringBuffer, java.text.DateFormat)
*/
public DocData parse(String name, Date date, StringBuffer inputText, DateFormat dateFormat) throws IOException, InterruptedException {
// TODO Auto-generated method stub
return parse(name, date, new StringReader(inputText.toString()), dateFormat);
}

View File

@ -39,7 +39,7 @@ import java.util.Locale;
*/
public class ReutersDocMaker extends BasicDocMaker {
private DateFormat dateFormat;
private ThreadLocal dateFormat = new ThreadLocal();
private File dataDir = null;
private ArrayList inputFiles = new ArrayList();
private int nextFile = 0;
@ -58,11 +58,21 @@ public class ReutersDocMaker extends BasicDocMaker {
if (inputFiles.size()==0) {
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
}
// date format: 30-MAR-1987 14:22:36.87
dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
dateFormat.setLenient(true);
}
// get/initiate a thread-local simple date format (must do so
// because SimpleDateFormat is not thread-safe.
protected synchronized DateFormat getDateFormat () {
DateFormat df = (DateFormat) dateFormat.get();
if (df == null) {
// date format: 30-MAR-1987 14:22:36.87
df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
df.setLenient(true);
dateFormat.set(df);
}
return df;
}
protected DocData getNextDocData() throws Exception {
File f = null;
String name = null;
@ -95,7 +105,7 @@ public class ReutersDocMaker extends BasicDocMaker {
addBytes(f.length());
Date date = dateFormat.parse(dateStr.trim());
Date date = getDateFormat().parse(dateStr.trim());
return new DocData(name, bodyBuf.toString(), title, null, date);
}

View File

@ -37,7 +37,10 @@ public class SimpleDocMaker extends BasicDocMaker {
"right place at the right time with the right knowledge.";
// return a new docid
private synchronized int newdocid() {
private synchronized int newdocid() throws NoMoreDataException {
if (docID>0 && !forever) {
throw new NoMoreDataException();
}
return docID++;
}
@ -59,11 +62,9 @@ public class SimpleDocMaker extends BasicDocMaker {
}
protected DocData getNextDocData() throws NoMoreDataException {
if (docID>0 && !forever) {
throw new NoMoreDataException();
}
int id = newdocid();
addBytes(DOC_TEXT.length());
return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
return new DocData("doc"+id, DOC_TEXT, null, null, null);
}
}

View File

@ -41,7 +41,7 @@ public class TrecDocMaker extends BasicDocMaker {
private static final String newline = System.getProperty("line.separator");
private DateFormat dateFormat [];
private ThreadLocal dateFormat = new ThreadLocal();
private File dataDir = null;
private ArrayList inputFiles = new ArrayList();
private int nextFile = 0;
@ -67,12 +67,6 @@ public class TrecDocMaker extends BasicDocMaker {
if (inputFiles.size()==0) {
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
}
// date format: 30-MAR-1987 14:22:36.87
dateFormat = new SimpleDateFormat[DATE_FORMATS.length];
for (int i = 0; i < dateFormat.length; i++) {
dateFormat[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
dateFormat[i].setLenient(true);
}
}
private void openNextFile() throws NoMoreDataException, Exception {
@ -177,17 +171,30 @@ public class TrecDocMaker extends BasicDocMaker {
// this is the next document, so parse it
Date date = parseDate(dateStr);
HTMLParser p = getHtmlParser();
DocData docData = p.parse(name, date, sb, dateFormat[0]);
DocData docData = p.parse(name, date, sb, getDateFormat(0));
addBytes(sb.length()); // count char length of parsed html text (larger than the plain doc body text).
return docData;
}
private DateFormat getDateFormat(int n) {
DateFormat df[] = (DateFormat[]) dateFormat.get();
if (df == null) {
df = new SimpleDateFormat[DATE_FORMATS.length];
for (int i = 0; i < df.length; i++) {
df[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
df[i].setLenient(true);
}
dateFormat.set(df);
}
return df[n];
}
private Date parseDate(String dateStr) {
Date date = null;
for (int i=0; i<dateFormat.length; i++) {
for (int i=0; i<DATE_FORMATS.length; i++) {
try {
date = dateFormat[i].parse(dateStr.trim());
date = getDateFormat(i).parse(dateStr.trim());
return date;
} catch (ParseException e) {
}

View File

@ -40,8 +40,9 @@ public class AddDocTask extends PerfTask {
super(runData);
}
private static int logStep = -1;
private int logStep = -1;
private int docSize = 0;
int count = 0;
// volatile data passed between setup(), doLogic(), tearDown().
private Document doc = null;
@ -64,8 +65,7 @@ public class AddDocTask extends PerfTask {
* @see PerfTask#tearDown()
*/
public void tearDown() throws Exception {
DocMaker docMaker = getRunData().getDocMaker();
log(docMaker.getCount());
log(++count);
doc = null;
super.tearDown();
}
@ -77,11 +77,11 @@ public class AddDocTask extends PerfTask {
private void log (int count) {
if (logStep<0) {
// avoid sync although race possible here
// init once per instance
logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);
}
if (logStep>0 && (count%logStep)==0) {
System.out.println("--> processed (add) "+count+" docs");
System.out.println("--> "+Thread.currentThread().getName()+" processed (add) "+count+" docs");
}
}

View File

@ -43,8 +43,8 @@ public class DeleteDocTask extends PerfTask {
super(runData);
}
private static int logStep = -1;
private static int deleteStep = -1;
private int logStep = -1;
private int deleteStep = -1;
private static int numDeleted = 0;
private static int lastDeleted = -1;

View File

@ -123,6 +123,34 @@ public class TestPerfTasksLogic extends TestCase {
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
}
/**
* Test Parallel Doc Maker logic (for LUCENE-940)
*/
public void testParallelDocMaker() throws Exception {
// 1. alg definition (required in every "logic" test)
String algLines[] = {
"# ----- properties ",
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
"doc.add.log.step=2697",
"doc.term.vector=false",
"doc.maker.forever=false",
"directory=FSDirectory",
"doc.stored=false",
"doc.tokenized=false",
"# ----- alg ",
"CreateIndex",
"[ { AddDoc } : * ] : 4 ",
"CloseIndex",
};
// 2. execute the algorithm (required in every "logic" test)
Benchmark benchmark = execBenchmark(algLines);
// 3. test number of docs in the index
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
}
// create the benchmark and execute it.
private Benchmark execBenchmark(String[] algLines) throws Exception {