mirror of https://github.com/apache/lucene.git
LUCENE-1209: Fixed DocMaker settings by round. Prior to this fix, DocMaker settings of
first round were used in all rounds. (E.g. term vectors.) git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@635280 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ce0309ed5f
commit
25f80c71c9
|
@ -3,6 +3,11 @@ Lucene Benchmark Contrib Change Log
|
|||
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
|
||||
|
||||
$Id:$
|
||||
3/9/08
|
||||
LUCENE-1209: Fixed DocMaker settings by round. Prior to this fix, DocMaker settings of
|
||||
first round were used in all rounds. (E.g. term vectors.)
|
||||
(Mark Miller via Doron Cohen)
|
||||
|
||||
1/30/08
|
||||
LUCENE-1156: Fixed redirect problem in EnwikiDocMaker. Refactored ExtractWikipedia to use EnwikiDocMaker. Added property to EnwikiDocMaker to allow
|
||||
for skipping image only documents.
|
||||
|
|
|
@ -219,6 +219,7 @@ public abstract class BasicDocMaker implements DocMaker {
|
|||
*/
|
||||
public synchronized void resetInputs() {
|
||||
printDocStatistics();
|
||||
setConfig(config); //re-initiate since properties by round may have changed.
|
||||
numBytes = 0;
|
||||
numDocsCreated = 0;
|
||||
resetLeftovers();
|
||||
|
@ -252,6 +253,10 @@ public abstract class BasicDocMaker implements DocMaker {
|
|||
numUniqueBytes += n;
|
||||
}
|
||||
|
||||
protected void resetUniqueBytes () {
|
||||
numUniqueBytes = 0;
|
||||
}
|
||||
|
||||
protected synchronized void addBytes (long n) {
|
||||
numBytes += n;
|
||||
}
|
||||
|
|
|
@ -56,7 +56,8 @@ public class ReutersDocMaker extends BasicDocMaker {
|
|||
if (!dataDir.isAbsolute()) {
|
||||
dataDir = new File(workDir, d);
|
||||
}
|
||||
|
||||
resetUniqueBytes();
|
||||
inputFiles.clear();
|
||||
collectFiles(dataDir,inputFiles);
|
||||
if (inputFiles.size()==0) {
|
||||
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
||||
|
|
|
@ -72,6 +72,8 @@ public class TrecDocMaker extends BasicDocMaker {
|
|||
if (!dataDir.isAbsolute()) {
|
||||
dataDir = new File(workDir, d);
|
||||
}
|
||||
resetUniqueBytes();
|
||||
inputFiles.clear();
|
||||
collectFiles(dataDir,inputFiles);
|
||||
if (inputFiles.size()==0) {
|
||||
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
||||
|
|
|
@ -38,6 +38,8 @@ import org.apache.lucene.index.TermEnum;
|
|||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.LogDocMergePolicy;
|
||||
import org.apache.lucene.index.TermFreqVector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
|
@ -165,7 +167,7 @@ public class TestPerfTasksLogic extends TestCase {
|
|||
|
||||
assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
|
||||
// now we should be able to open the index for write.
|
||||
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
|
||||
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
iw.close();
|
||||
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||
assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
|
||||
|
@ -237,7 +239,7 @@ public class TestPerfTasksLogic extends TestCase {
|
|||
assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches);
|
||||
assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
|
||||
// now we should be able to open the index for write.
|
||||
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
|
||||
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
iw.close();
|
||||
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
|
||||
|
@ -327,7 +329,7 @@ public class TestPerfTasksLogic extends TestCase {
|
|||
benchmark = execBenchmark(algLines2);
|
||||
|
||||
// now we should be able to open the index for write.
|
||||
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
|
||||
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
iw.close();
|
||||
|
||||
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||
|
@ -639,8 +641,8 @@ public class TestPerfTasksLogic extends TestCase {
|
|||
"doc.add.log.step=3",
|
||||
"ram.flush.mb=-1",
|
||||
"max.buffered=2",
|
||||
"compound=false",
|
||||
"doc.term.vector=false",
|
||||
"compound=cmpnd:true:false",
|
||||
"doc.term.vector=vector:false:true",
|
||||
"doc.maker.forever=false",
|
||||
"directory=RAMDirectory",
|
||||
"doc.stored=false",
|
||||
|
@ -652,6 +654,7 @@ public class TestPerfTasksLogic extends TestCase {
|
|||
" ResetSystemErase",
|
||||
" CreateIndex",
|
||||
" { \"AddDocs\" AddDoc > : * ",
|
||||
" NewRound",
|
||||
"} : 2",
|
||||
};
|
||||
|
||||
|
@ -661,7 +664,14 @@ public class TestPerfTasksLogic extends TestCase {
|
|||
assertEquals(2, writer.getMaxBufferedDocs());
|
||||
assertEquals(IndexWriter.DISABLE_AUTO_FLUSH, (int) writer.getRAMBufferSizeMB());
|
||||
assertEquals(3, writer.getMergeFactor());
|
||||
assertEquals(false, writer.getUseCompoundFile());
|
||||
assertFalse(writer.getUseCompoundFile());
|
||||
writer.close();
|
||||
Directory dir = benchmark.getRunData().getDirectory();
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
TermFreqVector [] tfv = reader.getTermFreqVectors(0);
|
||||
assertNotNull(tfv);
|
||||
assertTrue(tfv.length > 0);
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue