From 25f80c71c98be4a5901109d07e877cb9dcae567b Mon Sep 17 00:00:00 2001 From: Doron Cohen Date: Sun, 9 Mar 2008 16:43:32 +0000 Subject: [PATCH] LUCENE-1209: Fixed DocMaker settings by round. Prior to this fix, DocMaker settings of first round were used in all rounds. (E.g. term vectors.) git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@635280 13f79535-47bb-0310-9956-ffa450edef68 --- contrib/benchmark/CHANGES.txt | 5 +++++ .../benchmark/byTask/feeds/BasicDocMaker.java | 5 +++++ .../byTask/feeds/ReutersDocMaker.java | 3 ++- .../benchmark/byTask/feeds/TrecDocMaker.java | 2 ++ .../benchmark/byTask/TestPerfTasksLogic.java | 22 ++++++++++++++----- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/contrib/benchmark/CHANGES.txt b/contrib/benchmark/CHANGES.txt index dc9c986eca0..aa586ecab1b 100644 --- a/contrib/benchmark/CHANGES.txt +++ b/contrib/benchmark/CHANGES.txt @@ -3,6 +3,11 @@ Lucene Benchmark Contrib Change Log The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. $Id:$ +3/9/08 + LUCENE-1209: Fixed DocMaker settings by round. Prior to this fix, DocMaker settings of + first round were used in all rounds. (E.g. term vectors.) + (Mark Miller via Doron Cohen) + 1/30/08 LUCENE-1156: Fixed redirect problem in EnwikiDocMaker. Refactored ExtractWikipedia to use EnwikiDocMaker. Added property to EnwikiDocMaker to allow for skipping image only documents. diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java index 2b6c14fd8e0..bf5cf5ae57a 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/BasicDocMaker.java @@ -219,6 +219,7 @@ public abstract class BasicDocMaker implements DocMaker { */ public synchronized void resetInputs() { printDocStatistics(); + setConfig(config); //re-initiate since properties by round may have changed. numBytes = 0; numDocsCreated = 0; resetLeftovers(); @@ -252,6 +253,10 @@ public abstract class BasicDocMaker implements DocMaker { numUniqueBytes += n; } + protected void resetUniqueBytes () { + numUniqueBytes = 0; + } + protected synchronized void addBytes (long n) { numBytes += n; } diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java index 2665c6cfcc4..4e13ddd6e8c 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersDocMaker.java @@ -56,7 +56,8 @@ public class ReutersDocMaker extends BasicDocMaker { if (!dataDir.isAbsolute()) { dataDir = new File(workDir, d); } - + resetUniqueBytes(); + inputFiles.clear(); collectFiles(dataDir,inputFiles); if (inputFiles.size()==0) { throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java index 4e56a889eb2..566020addb0 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocMaker.java @@ -72,6 +72,8 @@ public class TrecDocMaker extends BasicDocMaker { if (!dataDir.isAbsolute()) { dataDir = new File(workDir, d); } + resetUniqueBytes(); + inputFiles.clear(); collectFiles(dataDir,inputFiles); if (inputFiles.size()==0) { throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath()); diff --git a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index cb8b8f4706e..32dbcd343df 100755 --- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -38,6 +38,8 @@ import org.apache.lucene.index.TermEnum; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.LogDocMergePolicy; +import org.apache.lucene.index.TermFreqVector; +import org.apache.lucene.store.Directory; import junit.framework.TestCase; @@ -165,7 +167,7 @@ public class TestPerfTasksLogic extends TestCase { assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false); + IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED); iw.close(); IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory()); assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs()); @@ -237,7 +239,7 @@ public class TestPerfTasksLogic extends TestCase { assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches); assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false); + IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED); iw.close(); IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory()); assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs()); @@ -327,7 +329,7 @@ public class TestPerfTasksLogic extends TestCase { benchmark = execBenchmark(algLines2); // now we should be able to open the index for write. - IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false); + IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false,IndexWriter.MaxFieldLength.UNLIMITED); iw.close(); IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory()); @@ -639,8 +641,8 @@ public class TestPerfTasksLogic extends TestCase { "doc.add.log.step=3", "ram.flush.mb=-1", "max.buffered=2", - "compound=false", - "doc.term.vector=false", + "compound=cmpnd:true:false", + "doc.term.vector=vector:false:true", "doc.maker.forever=false", "directory=RAMDirectory", "doc.stored=false", @@ -652,6 +654,7 @@ public class TestPerfTasksLogic extends TestCase { " ResetSystemErase", " CreateIndex", " { \"AddDocs\" AddDoc > : * ", + " NewRound", "} : 2", }; @@ -661,7 +664,14 @@ public class TestPerfTasksLogic extends TestCase { assertEquals(2, writer.getMaxBufferedDocs()); assertEquals(IndexWriter.DISABLE_AUTO_FLUSH, (int) writer.getRAMBufferSizeMB()); assertEquals(3, writer.getMergeFactor()); - assertEquals(false, writer.getUseCompoundFile()); + assertFalse(writer.getUseCompoundFile()); + writer.close(); + Directory dir = benchmark.getRunData().getDirectory(); + IndexReader reader = IndexReader.open(dir); + TermFreqVector [] tfv = reader.getTermFreqVectors(0); + assertNotNull(tfv); + assertTrue(tfv.length > 0); + reader.close(); } /**