LUCENE-1994: fix thread safety of EnwikContentSource and DocMaker

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@830488 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-10-28 10:06:23 +00:00
parent 578761c635
commit 66d8e773b1
4 changed files with 53 additions and 14 deletions

View File

@ -4,6 +4,11 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
$Id:$ $Id:$
10/28/2009
LUCENE-1994: Fix thread safety of EnwikiContentSource and DocMaker
when doc.reuse.fields is false. Also made docs.reuse.fields=true
thread safe. (Mark Miller, Shai Erera, Mike McCandless)
8/4/2009 8/4/2009
LUCENE-1770: Add EnwikiQueryMaker (Mark Miller) LUCENE-1770: Add EnwikiQueryMaker (Mark Miller)

View File

@ -145,7 +145,6 @@ public class DocMaker {
protected ContentSource source; protected ContentSource source;
protected boolean reuseFields; protected boolean reuseFields;
protected DocState localDocState;
protected boolean indexProperties; protected boolean indexProperties;
private int lastPrintedNumUniqueTexts = 0; private int lastPrintedNumUniqueTexts = 0;
@ -159,7 +158,7 @@ public class DocMaker {
// reset the docdata properties so they are not added more than once. // reset the docdata properties so they are not added more than once.
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException { private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
final DocState ds = reuseFields ? getDocState() : localDocState; final DocState ds = getDocState();
final Document doc = reuseFields ? ds.doc : new Document(); final Document doc = reuseFields ? ds.doc : new Document();
doc.getFields().clear(); doc.getFields().clear();
@ -242,7 +241,7 @@ public class DocMaker {
protected DocState getDocState() { protected DocState getDocState() {
DocState ds = docState.get(); DocState ds = docState.get();
if (ds == null) { if (ds == null) {
ds = new DocState(true, storeVal, indexVal, bodyIndexVal, termVecVal); ds = new DocState(reuseFields, storeVal, indexVal, bodyIndexVal, termVecVal);
docState.set(ds); docState.set(ds);
} }
return ds; return ds;
@ -286,7 +285,7 @@ public class DocMaker {
*/ */
public Document makeDocument() throws Exception { public Document makeDocument() throws Exception {
resetLeftovers(); resetLeftovers();
DocData docData = source.getNextDocData(reuseFields ? getDocState().docData : localDocState.docData); DocData docData = source.getNextDocData(getDocState().docData);
Document doc = createDocument(docData, 0, -1); Document doc = createDocument(docData, 0, -1);
return doc; return doc;
} }
@ -301,7 +300,7 @@ public class DocMaker {
|| lvr.docdata.getBody().length() == 0) { || lvr.docdata.getBody().length() == 0) {
resetLeftovers(); resetLeftovers();
} }
DocData docData = reuseFields ? getDocState().docData : localDocState.docData; DocData docData = getDocState().docData;
DocData dd = (lvr == null ? source.getNextDocData(docData) : lvr.docdata); DocData dd = (lvr == null ? source.getNextDocData(docData) : lvr.docdata);
int cnt = (lvr == null ? 0 : lvr.cnt); int cnt = (lvr == null ? 0 : lvr.cnt);
while (dd.getBody() == null || dd.getBody().length() < size) { while (dd.getBody() == null || dd.getBody().length() < size) {
@ -404,14 +403,11 @@ public class DocMaker {
storeBytes = config.get("doc.store.body.bytes", false); storeBytes = config.get("doc.store.body.bytes", false);
reuseFields = config.get("doc.reuse.fields", true); reuseFields = config.get("doc.reuse.fields", true);
if (!reuseFields) {
localDocState = new DocState(false, storeVal, indexVal, bodyIndexVal, termVecVal); // In a multi-rounds run, it is important to reset DocState since settings
} else { // of fields may change between rounds, and this is the only way to reset
// In a multi-rounds run, it is important to reset DocState since settings // the cache of all threads.
// of fields may change between rounds, and this is the only way to reset docState = new ThreadLocal<DocState>();
// the cache of all threads.
docState = new ThreadLocal<DocState>();
}
indexProperties = config.get("doc.index.props", false); indexProperties = config.get("doc.index.props", false);

View File

@ -266,7 +266,7 @@ public class EnwikiContentSource extends ContentSource {
} }
} }
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
String[] tuple = parser.next(); String[] tuple = parser.next();
docData.clear(); docData.clear();
docData.setName(tuple[ID]); docData.setName(tuple[ID]);

View File

@ -40,6 +40,8 @@ import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.LogDocMergePolicy; import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.search.FieldCache.StringIndex;
import org.apache.lucene.search.FieldCache;
import junit.framework.TestCase; import junit.framework.TestCase;
@ -268,6 +270,42 @@ public class TestPerfTasksLogic extends TestCase {
ir.close(); ir.close();
} }
// LUCENE-1994: test thread safety of SortableSingleDocMaker
public void testDocMakerThreadSafety() throws Exception {
// 1. alg definition (required in every "logic" test)
String algLines[] = {
"# ----- properties ",
"content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource",
"doc.term.vector=false",
"log.step.AddDoc=10000",
"content.source.forever=true",
"directory=RAMDirectory",
"doc.reuse.fields=false",
"doc.stored=false",
"doc.tokenized=false",
"doc.index.props=true",
"# ----- alg ",
"CreateIndex",
"[ { AddDoc > : 2500 ] : 4",
"CloseIndex",
};
// 2. we test this value later
CountingSearchTestTask.numSearches = 0;
// 3. execute the algorithm (required in every "logic" test)
Benchmark benchmark = execBenchmark(algLines);
IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true);
StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country");
final int maxDoc = r.maxDoc();
assertEquals(10000, maxDoc);
for(int i=0;i<10000;i++) {
assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]);
}
r.close();
}
/** /**
* Test Parallel Doc Maker logic (for LUCENE-940) * Test Parallel Doc Maker logic (for LUCENE-940)
*/ */