mirror of https://github.com/apache/lucene.git
LUCENE-940: Multi-threaded issues fixed: SimpleDateFormat;
logging for addDoc/deleteDoc tasks; git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@550905 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3e39b0a9a0
commit
e6c659269a
|
@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
||||||
|
|
||||||
$Id:$
|
$Id:$
|
||||||
|
|
||||||
|
6/25/07
|
||||||
|
- LUCENE-940: Multi-threaded issues fixed: SimpleDateFormat;
|
||||||
|
logging for addDoc/deleteDoc tasks. (Doron Cohen)
|
||||||
|
|
||||||
4/17/07
|
4/17/07
|
||||||
- LUCENE-863: Deprecated StandardBenchmarker in favour of byTask code.
|
- LUCENE-863: Deprecated StandardBenchmarker in favour of byTask code.
|
||||||
(Otis Gospodnetic)
|
(Otis Gospodnetic)
|
||||||
|
|
|
@ -22,9 +22,7 @@ import java.io.Reader;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
import java.text.DateFormat;
|
import java.text.DateFormat;
|
||||||
import java.text.ParseException;
|
import java.text.ParseException;
|
||||||
import java.text.SimpleDateFormat;
|
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.Locale;
|
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -32,11 +30,7 @@ import java.util.Properties;
|
||||||
*/
|
*/
|
||||||
public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser {
|
public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser {
|
||||||
|
|
||||||
DateFormat dateFormat;
|
|
||||||
|
|
||||||
public DemoHTMLParser () {
|
public DemoHTMLParser () {
|
||||||
dateFormat = new SimpleDateFormat("EEE, dd MMM yyyy kk:mm:ss ",Locale.US); //Tue, 09 Dec 2003 22:39:08 GMT
|
|
||||||
dateFormat.setLenient(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -74,8 +68,11 @@ public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.
|
||||||
return new DocData(name, bodyBuf.toString(), title, props, date);
|
return new DocData(name, bodyBuf.toString(), title, props, date);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* (non-Javadoc)
|
||||||
|
* @see org.apache.lucene.benchmark.byTask.feeds.HTMLParser#parse(java.lang.String, java.util.Date, java.lang.StringBuffer, java.text.DateFormat)
|
||||||
|
*/
|
||||||
public DocData parse(String name, Date date, StringBuffer inputText, DateFormat dateFormat) throws IOException, InterruptedException {
|
public DocData parse(String name, Date date, StringBuffer inputText, DateFormat dateFormat) throws IOException, InterruptedException {
|
||||||
// TODO Auto-generated method stub
|
|
||||||
return parse(name, date, new StringReader(inputText.toString()), dateFormat);
|
return parse(name, date, new StringReader(inputText.toString()), dateFormat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ import java.util.Locale;
|
||||||
*/
|
*/
|
||||||
public class ReutersDocMaker extends BasicDocMaker {
|
public class ReutersDocMaker extends BasicDocMaker {
|
||||||
|
|
||||||
private DateFormat dateFormat;
|
private ThreadLocal dateFormat = new ThreadLocal();
|
||||||
private File dataDir = null;
|
private File dataDir = null;
|
||||||
private ArrayList inputFiles = new ArrayList();
|
private ArrayList inputFiles = new ArrayList();
|
||||||
private int nextFile = 0;
|
private int nextFile = 0;
|
||||||
|
@ -58,11 +58,21 @@ public class ReutersDocMaker extends BasicDocMaker {
|
||||||
if (inputFiles.size()==0) {
|
if (inputFiles.size()==0) {
|
||||||
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
||||||
}
|
}
|
||||||
// date format: 30-MAR-1987 14:22:36.87
|
|
||||||
dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
|
|
||||||
dateFormat.setLenient(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get/initiate a thread-local simple date format (must do so
|
||||||
|
// because SimpleDateFormat is not thread-safe.
|
||||||
|
protected synchronized DateFormat getDateFormat () {
|
||||||
|
DateFormat df = (DateFormat) dateFormat.get();
|
||||||
|
if (df == null) {
|
||||||
|
// date format: 30-MAR-1987 14:22:36.87
|
||||||
|
df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
|
||||||
|
df.setLenient(true);
|
||||||
|
dateFormat.set(df);
|
||||||
|
}
|
||||||
|
return df;
|
||||||
|
}
|
||||||
|
|
||||||
protected DocData getNextDocData() throws Exception {
|
protected DocData getNextDocData() throws Exception {
|
||||||
File f = null;
|
File f = null;
|
||||||
String name = null;
|
String name = null;
|
||||||
|
@ -95,7 +105,7 @@ public class ReutersDocMaker extends BasicDocMaker {
|
||||||
addBytes(f.length());
|
addBytes(f.length());
|
||||||
|
|
||||||
|
|
||||||
Date date = dateFormat.parse(dateStr.trim());
|
Date date = getDateFormat().parse(dateStr.trim());
|
||||||
return new DocData(name, bodyBuf.toString(), title, null, date);
|
return new DocData(name, bodyBuf.toString(), title, null, date);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,10 @@ public class SimpleDocMaker extends BasicDocMaker {
|
||||||
"right place at the right time with the right knowledge.";
|
"right place at the right time with the right knowledge.";
|
||||||
|
|
||||||
// return a new docid
|
// return a new docid
|
||||||
private synchronized int newdocid() {
|
private synchronized int newdocid() throws NoMoreDataException {
|
||||||
|
if (docID>0 && !forever) {
|
||||||
|
throw new NoMoreDataException();
|
||||||
|
}
|
||||||
return docID++;
|
return docID++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,11 +62,9 @@ public class SimpleDocMaker extends BasicDocMaker {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DocData getNextDocData() throws NoMoreDataException {
|
protected DocData getNextDocData() throws NoMoreDataException {
|
||||||
if (docID>0 && !forever) {
|
int id = newdocid();
|
||||||
throw new NoMoreDataException();
|
|
||||||
}
|
|
||||||
addBytes(DOC_TEXT.length());
|
addBytes(DOC_TEXT.length());
|
||||||
return new DocData("doc"+newdocid(),DOC_TEXT, null, null, null);
|
return new DocData("doc"+id, DOC_TEXT, null, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,7 +41,7 @@ public class TrecDocMaker extends BasicDocMaker {
|
||||||
|
|
||||||
private static final String newline = System.getProperty("line.separator");
|
private static final String newline = System.getProperty("line.separator");
|
||||||
|
|
||||||
private DateFormat dateFormat [];
|
private ThreadLocal dateFormat = new ThreadLocal();
|
||||||
private File dataDir = null;
|
private File dataDir = null;
|
||||||
private ArrayList inputFiles = new ArrayList();
|
private ArrayList inputFiles = new ArrayList();
|
||||||
private int nextFile = 0;
|
private int nextFile = 0;
|
||||||
|
@ -67,12 +67,6 @@ public class TrecDocMaker extends BasicDocMaker {
|
||||||
if (inputFiles.size()==0) {
|
if (inputFiles.size()==0) {
|
||||||
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
||||||
}
|
}
|
||||||
// date format: 30-MAR-1987 14:22:36.87
|
|
||||||
dateFormat = new SimpleDateFormat[DATE_FORMATS.length];
|
|
||||||
for (int i = 0; i < dateFormat.length; i++) {
|
|
||||||
dateFormat[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
|
|
||||||
dateFormat[i].setLenient(true);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void openNextFile() throws NoMoreDataException, Exception {
|
private void openNextFile() throws NoMoreDataException, Exception {
|
||||||
|
@ -177,17 +171,30 @@ public class TrecDocMaker extends BasicDocMaker {
|
||||||
// this is the next document, so parse it
|
// this is the next document, so parse it
|
||||||
Date date = parseDate(dateStr);
|
Date date = parseDate(dateStr);
|
||||||
HTMLParser p = getHtmlParser();
|
HTMLParser p = getHtmlParser();
|
||||||
DocData docData = p.parse(name, date, sb, dateFormat[0]);
|
DocData docData = p.parse(name, date, sb, getDateFormat(0));
|
||||||
addBytes(sb.length()); // count char length of parsed html text (larger than the plain doc body text).
|
addBytes(sb.length()); // count char length of parsed html text (larger than the plain doc body text).
|
||||||
|
|
||||||
return docData;
|
return docData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private DateFormat getDateFormat(int n) {
|
||||||
|
DateFormat df[] = (DateFormat[]) dateFormat.get();
|
||||||
|
if (df == null) {
|
||||||
|
df = new SimpleDateFormat[DATE_FORMATS.length];
|
||||||
|
for (int i = 0; i < df.length; i++) {
|
||||||
|
df[i] = new SimpleDateFormat(DATE_FORMATS[i],Locale.US);
|
||||||
|
df[i].setLenient(true);
|
||||||
|
}
|
||||||
|
dateFormat.set(df);
|
||||||
|
}
|
||||||
|
return df[n];
|
||||||
|
}
|
||||||
|
|
||||||
private Date parseDate(String dateStr) {
|
private Date parseDate(String dateStr) {
|
||||||
Date date = null;
|
Date date = null;
|
||||||
for (int i=0; i<dateFormat.length; i++) {
|
for (int i=0; i<DATE_FORMATS.length; i++) {
|
||||||
try {
|
try {
|
||||||
date = dateFormat[i].parse(dateStr.trim());
|
date = getDateFormat(i).parse(dateStr.trim());
|
||||||
return date;
|
return date;
|
||||||
} catch (ParseException e) {
|
} catch (ParseException e) {
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,8 +40,9 @@ public class AddDocTask extends PerfTask {
|
||||||
super(runData);
|
super(runData);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int logStep = -1;
|
private int logStep = -1;
|
||||||
private int docSize = 0;
|
private int docSize = 0;
|
||||||
|
int count = 0;
|
||||||
|
|
||||||
// volatile data passed between setup(), doLogic(), tearDown().
|
// volatile data passed between setup(), doLogic(), tearDown().
|
||||||
private Document doc = null;
|
private Document doc = null;
|
||||||
|
@ -64,8 +65,7 @@ public class AddDocTask extends PerfTask {
|
||||||
* @see PerfTask#tearDown()
|
* @see PerfTask#tearDown()
|
||||||
*/
|
*/
|
||||||
public void tearDown() throws Exception {
|
public void tearDown() throws Exception {
|
||||||
DocMaker docMaker = getRunData().getDocMaker();
|
log(++count);
|
||||||
log(docMaker.getCount());
|
|
||||||
doc = null;
|
doc = null;
|
||||||
super.tearDown();
|
super.tearDown();
|
||||||
}
|
}
|
||||||
|
@ -77,11 +77,11 @@ public class AddDocTask extends PerfTask {
|
||||||
|
|
||||||
private void log (int count) {
|
private void log (int count) {
|
||||||
if (logStep<0) {
|
if (logStep<0) {
|
||||||
// avoid sync although race possible here
|
// init once per instance
|
||||||
logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);
|
logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);
|
||||||
}
|
}
|
||||||
if (logStep>0 && (count%logStep)==0) {
|
if (logStep>0 && (count%logStep)==0) {
|
||||||
System.out.println("--> processed (add) "+count+" docs");
|
System.out.println("--> "+Thread.currentThread().getName()+" processed (add) "+count+" docs");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,8 +43,8 @@ public class DeleteDocTask extends PerfTask {
|
||||||
super(runData);
|
super(runData);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int logStep = -1;
|
private int logStep = -1;
|
||||||
private static int deleteStep = -1;
|
private int deleteStep = -1;
|
||||||
private static int numDeleted = 0;
|
private static int numDeleted = 0;
|
||||||
private static int lastDeleted = -1;
|
private static int lastDeleted = -1;
|
||||||
|
|
||||||
|
|
|
@ -123,6 +123,34 @@ public class TestPerfTasksLogic extends TestCase {
|
||||||
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
|
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test Parallel Doc Maker logic (for LUCENE-940)
|
||||||
|
*/
|
||||||
|
public void testParallelDocMaker() throws Exception {
|
||||||
|
// 1. alg definition (required in every "logic" test)
|
||||||
|
String algLines[] = {
|
||||||
|
"# ----- properties ",
|
||||||
|
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
|
||||||
|
"doc.add.log.step=2697",
|
||||||
|
"doc.term.vector=false",
|
||||||
|
"doc.maker.forever=false",
|
||||||
|
"directory=FSDirectory",
|
||||||
|
"doc.stored=false",
|
||||||
|
"doc.tokenized=false",
|
||||||
|
"# ----- alg ",
|
||||||
|
"CreateIndex",
|
||||||
|
"[ { AddDoc } : * ] : 4 ",
|
||||||
|
"CloseIndex",
|
||||||
|
};
|
||||||
|
|
||||||
|
// 2. execute the algorithm (required in every "logic" test)
|
||||||
|
Benchmark benchmark = execBenchmark(algLines);
|
||||||
|
|
||||||
|
// 3. test number of docs in the index
|
||||||
|
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||||
|
int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
|
||||||
|
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
|
||||||
|
}
|
||||||
|
|
||||||
// create the benchmark and execute it.
|
// create the benchmark and execute it.
|
||||||
private Benchmark execBenchmark(String[] algLines) throws Exception {
|
private Benchmark execBenchmark(String[] algLines) throws Exception {
|
||||||
|
|
Loading…
Reference in New Issue