mirror of https://github.com/apache/lucene.git
LUCENE-940: Multi-threaded issues fixed: SimpleDateFormat;
logging for addDoc/deleteDoc tasks; git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@550905 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3e39b0a9a0
commit
e6c659269a
|
@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
|||
|
||||
$Id:$
|
||||
|
||||
6/25/07
|
||||
- LUCENE-940: Multi-threaded issues fixed: SimpleDateFormat;
|
||||
logging for addDoc/deleteDoc tasks. (Doron Cohen)
|
||||
|
||||
4/17/07
|
||||
- LUCENE-863: Deprecated StandardBenchmarker in favour of byTask code.
|
||||
(Otis Gospodnetic)
|
||||
|
|
|
@ -22,9 +22,7 @@ import java.io.Reader;
|
|||
import java.io.StringReader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
|
@ -32,11 +30,7 @@ import java.util.Properties;
|
|||
*/
|
||||
public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser {
|
||||
|
||||
DateFormat dateFormat;
|
||||
|
||||
public DemoHTMLParser () {
|
||||
dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy kk:mm:ss ",Locale.US); //Tue, 09 Dec 2003 22:39:08 GMT
|
||||
dateFormat.setLenient(true);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -74,8 +68,11 @@ public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.
|
|||
return new DocData(name, bodyBuf.toString(), title, props, date);
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
* @see org.apache.lucene.benchmark.byTask.feeds.HTMLParser#parse(java.lang.String, java.util.Date, java.lang.StringBuffer, java.text.DateFormat)
|
||||
*/
|
||||
public DocData parse(String name, Date date, StringBuffer inputText, DateFormat dateFormat) throws IOException, InterruptedException {
|
||||
// TODO Auto-generated method stub
|
||||
return parse(name, date, new StringReader(inputText.toString()), dateFormat);
|
||||
}
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ import java.util.Locale;
|
|||
*/
|
||||
public class ReutersDocMaker extends BasicDocMaker {
|
||||
|
||||
private DateFormat dateFormat;
|
||||
private ThreadLocal dateFormat = new ThreadLocal();
|
||||
private File dataDir = null;
|
||||
private ArrayList inputFiles = new ArrayList();
|
||||
private int nextFile = 0;
|
||||
|
@ -58,11 +58,21 @@ public class ReutersDocMaker extends BasicDocMaker {
|
|||
if (inputFiles.size()==0) {
|
||||
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
||||
}
|
||||
// date format: 30-MAR-1987 14:22:36.87
|
||||
dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
|
||||
dateFormat.setLenient(true);
|
||||
}
|
||||
|
||||
// get/initiate a thread-local simple date format (must do so
|
||||
// because SimpleDateFormat is not thread-safe.
|
||||
protected synchronized DateFormat getDateFormat () {
|
||||
DateFormat df = (DateFormat) dateFormat.get();
|
||||
if (df == null) {
|
||||
// date format: 30-MAR-1987 14:22:36.87
|
||||
df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
|
||||
df.setLenient(true);
|
||||
dateFormat.set(df);
|
||||
}
|
||||
return df;
|
||||
}
|
||||
|
||||
protected DocData getNextDocData() throws Exception {
|
||||
File f = null;
|
||||
String name = null;
|
||||
|
@ -95,7 +105,7 @@ public class ReutersDocMaker extends BasicDocMaker {
|
|||
addBytes(f.length());
|
||||
|
||||
|
||||
Date date = dateFormat.parse(dateStr.trim());
|
||||
Date date = getDateFormat().parse(dateStr.trim());
|
||||
return new DocData(name, bodyBuf.toString(), title, null, date);
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,10 @@ public class SimpleDocMaker extends BasicDocMaker {
|
|||
"right place at the right time with the right knowledge.";
|
||||
|
||||
// return a new docid
|
||||
private synchronized int newdocid() {
|
||||
private synchronized int newdocid() throws NoMoreDataException {
|
||||
if (docID>0 && !forever) {
|
||||
throw new NoMoreDataException();
|
||||
}
|
||||
return docID++;
|
||||
}
|
||||
|
||||
|
@ -59,11 +62,9 @@ public class SimpleDocMaker extends BasicDocMaker {
|
|||
}
|
||||
|
||||
protected DocData getNextDocData() throws NoMoreDataException {
|
||||
if (docID>0 && !forever) {
|
||||
throw new NoMoreDataException();
|
||||
}
|
||||
int id = newdocid();
|
||||
addBytes(DOC_TEXT.length());
|
||||
return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
|
||||
return new DocData("doc"+id, DOC_TEXT, null, null, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ public class TrecDocMaker extends BasicDocMaker {
|
|||
|
||||
private static final String newline = System.getProperty("line.separator");
|
||||
|
||||
private DateFormat dateFormat [];
|
||||
private ThreadLocal dateFormat = new ThreadLocal();
|
||||
private File dataDir = null;
|
||||
private ArrayList inputFiles = new ArrayList();
|
||||
private int nextFile = 0;
|
||||
|
@ -67,12 +67,6 @@ public class TrecDocMaker extends BasicDocMaker {
|
|||
if (inputFiles.size()==0) {
|
||||
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
||||
}
|
||||
// date format: 30-MAR-1987 14:22:36.87
|
||||
dateFormat = new SimpleDateFormat[DATE_FORMATS.length];
|
||||
for (int i = 0; i < dateFormat.length; i++) {
|
||||
dateFormat[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
|
||||
dateFormat[i].setLenient(true);
|
||||
}
|
||||
}
|
||||
|
||||
private void openNextFile() throws NoMoreDataException, Exception {
|
||||
|
@ -177,17 +171,30 @@ public class TrecDocMaker extends BasicDocMaker {
|
|||
// this is the next document, so parse it
|
||||
Date date = parseDate(dateStr);
|
||||
HTMLParser p = getHtmlParser();
|
||||
DocData docData = p.parse(name, date, sb, dateFormat[0]);
|
||||
DocData docData = p.parse(name, date, sb, getDateFormat(0));
|
||||
addBytes(sb.length()); // count char length of parsed html text (larger than the plain doc body text).
|
||||
|
||||
return docData;
|
||||
}
|
||||
|
||||
private DateFormat getDateFormat(int n) {
|
||||
DateFormat df[] = (DateFormat[]) dateFormat.get();
|
||||
if (df == null) {
|
||||
df = new SimpleDateFormat[DATE_FORMATS.length];
|
||||
for (int i = 0; i < df.length; i++) {
|
||||
df[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
|
||||
df[i].setLenient(true);
|
||||
}
|
||||
dateFormat.set(df);
|
||||
}
|
||||
return df[n];
|
||||
}
|
||||
|
||||
private Date parseDate(String dateStr) {
|
||||
Date date = null;
|
||||
for (int i=0; i<dateFormat.length; i++) {
|
||||
for (int i=0; i<DATE_FORMATS.length; i++) {
|
||||
try {
|
||||
date = dateFormat[i].parse(dateStr.trim());
|
||||
date = getDateFormat(i).parse(dateStr.trim());
|
||||
return date;
|
||||
} catch (ParseException e) {
|
||||
}
|
||||
|
|
|
@ -40,8 +40,9 @@ public class AddDocTask extends PerfTask {
|
|||
super(runData);
|
||||
}
|
||||
|
||||
private static int logStep = -1;
|
||||
private int logStep = -1;
|
||||
private int docSize = 0;
|
||||
int count = 0;
|
||||
|
||||
// volatile data passed between setup(), doLogic(), tearDown().
|
||||
private Document doc = null;
|
||||
|
@ -64,8 +65,7 @@ public class AddDocTask extends PerfTask {
|
|||
* @see PerfTask#tearDown()
|
||||
*/
|
||||
public void tearDown() throws Exception {
|
||||
DocMaker docMaker = getRunData().getDocMaker();
|
||||
log(docMaker.getCount());
|
||||
log(++count);
|
||||
doc = null;
|
||||
super.tearDown();
|
||||
}
|
||||
|
@ -77,11 +77,11 @@ public class AddDocTask extends PerfTask {
|
|||
|
||||
private void log (int count) {
|
||||
if (logStep<0) {
|
||||
// avoid sync although race possible here
|
||||
// init once per instance
|
||||
logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);
|
||||
}
|
||||
if (logStep>0 && (count%logStep)==0) {
|
||||
System.out.println("--> processed (add) "+count+" docs");
|
||||
System.out.println("--> "+Thread.currentThread().getName()+" processed (add) "+count+" docs");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -43,8 +43,8 @@ public class DeleteDocTask extends PerfTask {
|
|||
super(runData);
|
||||
}
|
||||
|
||||
private static int logStep = -1;
|
||||
private static int deleteStep = -1;
|
||||
private int logStep = -1;
|
||||
private int deleteStep = -1;
|
||||
private static int numDeleted = 0;
|
||||
private static int lastDeleted = -1;
|
||||
|
||||
|
|
|
@ -123,6 +123,34 @@ public class TestPerfTasksLogic extends TestCase {
|
|||
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Parallel Doc Maker logic (for LUCENE-940)
|
||||
*/
|
||||
public void testParallelDocMaker() throws Exception {
|
||||
// 1. alg definition (required in every "logic" test)
|
||||
String algLines[] = {
|
||||
"# ----- properties ",
|
||||
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
|
||||
"doc.add.log.step=2697",
|
||||
"doc.term.vector=false",
|
||||
"doc.maker.forever=false",
|
||||
"directory=FSDirectory",
|
||||
"doc.stored=false",
|
||||
"doc.tokenized=false",
|
||||
"# ----- alg ",
|
||||
"CreateIndex",
|
||||
"[ { AddDoc } : * ] : 4 ",
|
||||
"CloseIndex",
|
||||
};
|
||||
|
||||
// 2. execute the algorithm (required in every "logic" test)
|
||||
Benchmark benchmark = execBenchmark(algLines);
|
||||
|
||||
// 3. test number of docs in the index
|
||||
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||
int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
|
||||
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
|
||||
}
|
||||
|
||||
// create the benchmark and execute it.
|
||||
private Benchmark execBenchmark(String[] algLines) throws Exception {
|
||||
|
|
Loading…
Reference in New Issue