mirror of https://github.com/apache/lucene.git
LUCENE-1994: fix thread safety of EnwikContentSource and DocMaker
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@830488 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
578761c635
commit
66d8e773b1
|
@ -4,6 +4,11 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
||||||
|
|
||||||
$Id:$
|
$Id:$
|
||||||
|
|
||||||
|
10/28/2009
|
||||||
|
LUCENE-1994: Fix thread safety of EnwikiContentSource and DocMaker
|
||||||
|
when doc.reuse.fields is false. Also made docs.reuse.fields=true
|
||||||
|
thread safe. (Mark Miller, Shai Erera, Mike McCandless)
|
||||||
|
|
||||||
8/4/2009
|
8/4/2009
|
||||||
LUCENE-1770: Add EnwikiQueryMaker (Mark Miller)
|
LUCENE-1770: Add EnwikiQueryMaker (Mark Miller)
|
||||||
|
|
||||||
|
|
|
@ -145,7 +145,6 @@ public class DocMaker {
|
||||||
|
|
||||||
protected ContentSource source;
|
protected ContentSource source;
|
||||||
protected boolean reuseFields;
|
protected boolean reuseFields;
|
||||||
protected DocState localDocState;
|
|
||||||
protected boolean indexProperties;
|
protected boolean indexProperties;
|
||||||
|
|
||||||
private int lastPrintedNumUniqueTexts = 0;
|
private int lastPrintedNumUniqueTexts = 0;
|
||||||
|
@ -159,7 +158,7 @@ public class DocMaker {
|
||||||
// reset the docdata properties so they are not added more than once.
|
// reset the docdata properties so they are not added more than once.
|
||||||
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
|
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
|
||||||
|
|
||||||
final DocState ds = reuseFields ? getDocState() : localDocState;
|
final DocState ds = getDocState();
|
||||||
final Document doc = reuseFields ? ds.doc : new Document();
|
final Document doc = reuseFields ? ds.doc : new Document();
|
||||||
doc.getFields().clear();
|
doc.getFields().clear();
|
||||||
|
|
||||||
|
@ -242,7 +241,7 @@ public class DocMaker {
|
||||||
protected DocState getDocState() {
|
protected DocState getDocState() {
|
||||||
DocState ds = docState.get();
|
DocState ds = docState.get();
|
||||||
if (ds == null) {
|
if (ds == null) {
|
||||||
ds = new DocState(true, storeVal, indexVal, bodyIndexVal, termVecVal);
|
ds = new DocState(reuseFields, storeVal, indexVal, bodyIndexVal, termVecVal);
|
||||||
docState.set(ds);
|
docState.set(ds);
|
||||||
}
|
}
|
||||||
return ds;
|
return ds;
|
||||||
|
@ -286,7 +285,7 @@ public class DocMaker {
|
||||||
*/
|
*/
|
||||||
public Document makeDocument() throws Exception {
|
public Document makeDocument() throws Exception {
|
||||||
resetLeftovers();
|
resetLeftovers();
|
||||||
DocData docData = source.getNextDocData(reuseFields ? getDocState().docData : localDocState.docData);
|
DocData docData = source.getNextDocData(getDocState().docData);
|
||||||
Document doc = createDocument(docData, 0, -1);
|
Document doc = createDocument(docData, 0, -1);
|
||||||
return doc;
|
return doc;
|
||||||
}
|
}
|
||||||
|
@ -301,7 +300,7 @@ public class DocMaker {
|
||||||
|| lvr.docdata.getBody().length() == 0) {
|
|| lvr.docdata.getBody().length() == 0) {
|
||||||
resetLeftovers();
|
resetLeftovers();
|
||||||
}
|
}
|
||||||
DocData docData = reuseFields ? getDocState().docData : localDocState.docData;
|
DocData docData = getDocState().docData;
|
||||||
DocData dd = (lvr == null ? source.getNextDocData(docData) : lvr.docdata);
|
DocData dd = (lvr == null ? source.getNextDocData(docData) : lvr.docdata);
|
||||||
int cnt = (lvr == null ? 0 : lvr.cnt);
|
int cnt = (lvr == null ? 0 : lvr.cnt);
|
||||||
while (dd.getBody() == null || dd.getBody().length() < size) {
|
while (dd.getBody() == null || dd.getBody().length() < size) {
|
||||||
|
@ -404,14 +403,11 @@ public class DocMaker {
|
||||||
storeBytes = config.get("doc.store.body.bytes", false);
|
storeBytes = config.get("doc.store.body.bytes", false);
|
||||||
|
|
||||||
reuseFields = config.get("doc.reuse.fields", true);
|
reuseFields = config.get("doc.reuse.fields", true);
|
||||||
if (!reuseFields) {
|
|
||||||
localDocState = new DocState(false, storeVal, indexVal, bodyIndexVal, termVecVal);
|
// In a multi-rounds run, it is important to reset DocState since settings
|
||||||
} else {
|
// of fields may change between rounds, and this is the only way to reset
|
||||||
// In a multi-rounds run, it is important to reset DocState since settings
|
// the cache of all threads.
|
||||||
// of fields may change between rounds, and this is the only way to reset
|
docState = new ThreadLocal<DocState>();
|
||||||
// the cache of all threads.
|
|
||||||
docState = new ThreadLocal<DocState>();
|
|
||||||
}
|
|
||||||
|
|
||||||
indexProperties = config.get("doc.index.props", false);
|
indexProperties = config.get("doc.index.props", false);
|
||||||
|
|
||||||
|
|
|
@ -266,7 +266,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
|
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
|
||||||
String[] tuple = parser.next();
|
String[] tuple = parser.next();
|
||||||
docData.clear();
|
docData.clear();
|
||||||
docData.setName(tuple[ID]);
|
docData.setName(tuple[ID]);
|
||||||
|
|
|
@ -40,6 +40,8 @@ import org.apache.lucene.index.SerialMergeScheduler;
|
||||||
import org.apache.lucene.index.LogDocMergePolicy;
|
import org.apache.lucene.index.LogDocMergePolicy;
|
||||||
import org.apache.lucene.index.TermFreqVector;
|
import org.apache.lucene.index.TermFreqVector;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.search.FieldCache.StringIndex;
|
||||||
|
import org.apache.lucene.search.FieldCache;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -268,6 +270,42 @@ public class TestPerfTasksLogic extends TestCase {
|
||||||
ir.close();
|
ir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-1994: test thread safety of SortableSingleDocMaker
|
||||||
|
public void testDocMakerThreadSafety() throws Exception {
|
||||||
|
// 1. alg definition (required in every "logic" test)
|
||||||
|
String algLines[] = {
|
||||||
|
"# ----- properties ",
|
||||||
|
"content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource",
|
||||||
|
"doc.term.vector=false",
|
||||||
|
"log.step.AddDoc=10000",
|
||||||
|
"content.source.forever=true",
|
||||||
|
"directory=RAMDirectory",
|
||||||
|
"doc.reuse.fields=false",
|
||||||
|
"doc.stored=false",
|
||||||
|
"doc.tokenized=false",
|
||||||
|
"doc.index.props=true",
|
||||||
|
"# ----- alg ",
|
||||||
|
"CreateIndex",
|
||||||
|
"[ { AddDoc > : 2500 ] : 4",
|
||||||
|
"CloseIndex",
|
||||||
|
};
|
||||||
|
|
||||||
|
// 2. we test this value later
|
||||||
|
CountingSearchTestTask.numSearches = 0;
|
||||||
|
|
||||||
|
// 3. execute the algorithm (required in every "logic" test)
|
||||||
|
Benchmark benchmark = execBenchmark(algLines);
|
||||||
|
|
||||||
|
IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true);
|
||||||
|
StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country");
|
||||||
|
final int maxDoc = r.maxDoc();
|
||||||
|
assertEquals(10000, maxDoc);
|
||||||
|
for(int i=0;i<10000;i++) {
|
||||||
|
assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]);
|
||||||
|
}
|
||||||
|
r.close();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Parallel Doc Maker logic (for LUCENE-940)
|
* Test Parallel Doc Maker logic (for LUCENE-940)
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue