LUCENE-1209: Fixed DocMaker settings by round. Prior to this fix, DocMaker settings of

first round were used in all rounds.  (E.g. term vectors.)


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@635280 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2008-03-09 16:43:32 +00:00
parent ce0309ed5f
commit 25f80c71c9
5 changed files with 30 additions and 7 deletions

View File

@ -3,6 +3,11 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
$Id:$
3/9/08
LUCENE-1209: Fixed DocMaker settings by round. Prior to this fix, DocMaker settings of
first round were used in all rounds. (E.g. term vectors.)
(Mark Miller via Doron Cohen)
1/30/08
LUCENE-1156: Fixed redirect problem in EnwikiDocMaker. Refactored ExtractWikipedia to use EnwikiDocMaker. Added property to EnwikiDocMaker to allow
for skipping image only documents.

View File

@ -219,6 +219,7 @@ public abstract class BasicDocMaker implements DocMaker {
*/
public synchronized void resetInputs() {
printDocStatistics();
setConfig(config); //re-initiate since properties by round may have changed.
numBytes = 0;
numDocsCreated = 0;
resetLeftovers();
@ -252,6 +253,10 @@ public abstract class BasicDocMaker implements DocMaker {
numUniqueBytes += n;
}
protected void resetUniqueBytes () {
numUniqueBytes = 0;
}
protected synchronized void addBytes (long n) {
numBytes += n;
}

View File

@ -56,7 +56,8 @@ public class ReutersDocMaker extends BasicDocMaker {
if (!dataDir.isAbsolute()) {
dataDir = new File(workDir, d);
}
resetUniqueBytes();
inputFiles.clear();
collectFiles(dataDir,inputFiles);
if (inputFiles.size()==0) {
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());

View File

@ -72,6 +72,8 @@ public class TrecDocMaker extends BasicDocMaker {
if (!dataDir.isAbsolute()) {
dataDir = new File(workDir, d);
}
resetUniqueBytes();
inputFiles.clear();
collectFiles(dataDir,inputFiles);
if (inputFiles.size()==0) {
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());

View File

@ -38,6 +38,8 @@ import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.store.Directory;
import junit.framework.TestCase;
@ -165,7 +167,7 @@ public class TestPerfTasksLogic extends TestCase {
assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
// now we should be able to open the index for write.
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED);
iw.close();
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
@ -237,7 +239,7 @@ public class TestPerfTasksLogic extends TestCase {
assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
// now we should be able to open the index for write.
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED);
iw.close();
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
@ -327,7 +329,7 @@ public class TestPerfTasksLogic extends TestCase {
benchmark = execBenchmark(algLines2);
// now we should be able to open the index for write.
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED);
iw.close();
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
@ -639,8 +641,8 @@ public class TestPerfTasksLogic extends TestCase {
"doc.add.log.step=3",
"ram.flush.mb=-1",
"max.buffered=2",
"compound=false",
"doc.term.vector=false",
"compound=cmpnd:true:false",
"doc.term.vector=vector:false:true",
"doc.maker.forever=false",
"directory=RAMDirectory",
"doc.stored=false",
@ -652,6 +654,7 @@ public class TestPerfTasksLogic extends TestCase {
" ResetSystemErase",
" CreateIndex",
" { \"AddDocs\" AddDoc > : * ",
" NewRound",
"} : 2",
};
@ -661,7 +664,14 @@ public class TestPerfTasksLogic extends TestCase {
assertEquals(2, writer.getMaxBufferedDocs());
assertEquals(IndexWriter.DISABLE_AUTO_FLUSH, (int) writer.getRAMBufferSizeMB());
assertEquals(3, writer.getMergeFactor());
assertEquals(false, writer.getUseCompoundFile());
assertFalse(writer.getUseCompoundFile());
writer.close();
Directory dir = benchmark.getRunData().getDirectory();
IndexReader reader = IndexReader.open(dir);
TermFreqVector [] tfv = reader.getTermFreqVectors(0);
assertNotNull(tfv);
assertTrue(tfv.length > 0);
reader.close();
}
/**