LUCENE-4064: Move ContentSource to PerfRunData out of DocMaker

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339555 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2012-05-17 11:22:10 +00:00
parent 422828cfd6
commit 63da7ea3fd
6 changed files with 45 additions and 74 deletions

View File

@ -25,20 +25,20 @@ import java.util.HashMap;
import java.util.Locale; import java.util.Locale;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.FacetSource; import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.stats.Points; import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.benchmark.byTask.tasks.PerfTask; import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
import org.apache.lucene.benchmark.byTask.tasks.ReadTask; import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
import org.apache.lucene.benchmark.byTask.tasks.SearchTask; import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.FileUtils; import org.apache.lucene.benchmark.byTask.utils.FileUtils;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -80,6 +80,7 @@ public class PerfRunData implements Closeable {
private Directory directory; private Directory directory;
private Analyzer analyzer; private Analyzer analyzer;
private DocMaker docMaker; private DocMaker docMaker;
private ContentSource contentSource;
private FacetSource facetSource; private FacetSource facetSource;
private Locale locale; private Locale locale;
@ -105,10 +106,16 @@ public class PerfRunData implements Closeable {
// analyzer (default is standard analyzer) // analyzer (default is standard analyzer)
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer", analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
"org.apache.lucene.analysis.standard.StandardAnalyzer")); "org.apache.lucene.analysis.standard.StandardAnalyzer"));
// content source
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
contentSource = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
contentSource.setConfig(config);
// doc maker // doc maker
docMaker = Class.forName(config.get("doc.maker", docMaker = Class.forName(config.get("doc.maker",
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance(); "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
docMaker.setConfig(config); docMaker.setConfig(config, contentSource);
// facet source // facet source
facetSource = Class.forName(config.get("facet.source", facetSource = Class.forName(config.get("facet.source",
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance(); "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
@ -129,10 +136,11 @@ public class PerfRunData implements Closeable {
} }
} }
@Override
public void close() throws IOException { public void close() throws IOException {
IOUtils.close(indexWriter, indexReader, directory, IOUtils.close(indexWriter, indexReader, directory,
taxonomyWriter, taxonomyReader, taxonomyDir, taxonomyWriter, taxonomyReader, taxonomyDir,
docMaker, facetSource); docMaker, facetSource, contentSource);
// close all perf objects that are closeable. // close all perf objects that are closeable.
ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>(); ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>();
@ -361,7 +369,12 @@ public class PerfRunData implements Closeable {
this.analyzer = analyzer; this.analyzer = analyzer;
} }
/** Returns the docMaker. */ /** Returns the ContentSource. */
public ContentSource getContentSource() {
return contentSource;
}
/** Returns the DocMaker. */
public DocMaker getDocMaker() { public DocMaker getDocMaker() {
return docMaker; return docMaker;
} }
@ -393,6 +406,7 @@ public class PerfRunData implements Closeable {
} }
public void resetInputs() throws IOException { public void resetInputs() throws IOException {
contentSource.resetInputs();
docMaker.resetInputs(); docMaker.resetInputs();
facetSource.resetInputs(); facetSource.resetInputs();
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) { for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {

View File

@ -131,7 +131,6 @@ public abstract class ContentItemsSource implements Closeable {
* items generated since the last reset, so it's important to call * items generated since the last reset, so it's important to call
* super.resetInputs in case you override this method. * super.resetInputs in case you override this method.
*/ */
@SuppressWarnings("unused")
public void resetInputs() throws IOException { public void resetInputs() throws IOException {
bytesCount = 0; bytesCount = 0;
itemCount = 0; itemCount = 0;

View File

@ -355,26 +355,11 @@ public class DocMaker implements Closeable {
* {@link ContentSource}, and it can be overridden to do more work (but make * {@link ContentSource}, and it can be overridden to do more work (but make
* sure to call super.close()). * sure to call super.close()).
*/ */
@Override
public void close() throws IOException { public void close() throws IOException {
source.close(); source.close();
} }
/**
* Returns the number of bytes generated by the content source since last
* reset.
*/
public synchronized long getBytesCount() {
return source.getBytesCount();
}
/**
* Returns the total number of bytes that were generated by the content source
* defined to that doc maker.
*/
public long getTotalBytesCount() {
return source.getTotalBytesCount();
}
/** /**
* Creates a {@link Document} object ready for indexing. This method uses the * Creates a {@link Document} object ready for indexing. This method uses the
* {@link ContentSource} to get the next document from the source, and creates * {@link ContentSource} to get the next document from the source, and creates
@ -426,26 +411,16 @@ public class DocMaker implements Closeable {
public synchronized void resetInputs() throws IOException { public synchronized void resetInputs() throws IOException {
source.printStatistics("docs"); source.printStatistics("docs");
// re-initiate since properties by round may have changed. // re-initiate since properties by round may have changed.
setConfig(config); setConfig(config, source);
source.resetInputs(); source.resetInputs();
numDocsCreated.set(0); numDocsCreated.set(0);
resetLeftovers(); resetLeftovers();
} }
/** Set the configuration parameters of this doc maker. */ /** Set the configuration parameters of this doc maker. */
public void setConfig(Config config) { public void setConfig(Config config, ContentSource source) {
this.config = config; this.config = config;
try { this.source = source;
if (source != null) {
source.close();
}
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
source.setConfig(config);
} catch (Exception e) {
// Should not get here. Throw runtime exception.
throw new RuntimeException(e);
}
boolean stored = config.get("doc.stored", false); boolean stored = config.get("doc.stored", false);
boolean bodyStored = config.get("doc.body.stored", stored); boolean bodyStored = config.get("doc.body.stored", stored);

View File

@ -20,34 +20,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource; import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocData; import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.utils.Config;
/** /** Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}. */
* Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}.
* Supports the following parameters:
* <ul>
* <li>content.source - the content source to use. (mandatory)
* </ul>
*/
public class ConsumeContentSourceTask extends PerfTask { public class ConsumeContentSourceTask extends PerfTask {
private ContentSource source; private final ContentSource source;
private DocData dd = new DocData(); private ThreadLocal<DocData> dd = new ThreadLocal<DocData>();
public ConsumeContentSourceTask(PerfRunData runData) { public ConsumeContentSourceTask(PerfRunData runData) {
super(runData); super(runData);
Config config = runData.getConfig(); source = runData.getContentSource();
String sourceClass = config.get("content.source", null);
if (sourceClass == null) {
throw new IllegalArgumentException("content.source must be defined");
}
try {
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
source.setConfig(config);
source.resetInputs();
} catch (Exception e) {
throw new RuntimeException(e);
}
} }
@Override @Override
@ -55,15 +37,9 @@ public class ConsumeContentSourceTask extends PerfTask {
return "read " + recsCount + " documents from the content source"; return "read " + recsCount + " documents from the content source";
} }
@Override
public void close() throws Exception {
source.close();
super.close();
}
@Override @Override
public int doLogic() throws Exception { public int doLogic() throws Exception {
dd = source.getNextDocData(dd); dd.set(source.getNextDocData(dd.get()));
return 1; return 1;
} }

View File

@ -22,7 +22,9 @@ import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.util.Properties; import java.util.Properties;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException; import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -122,15 +124,19 @@ public class ExtractWikipedia {
} else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) { } else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) {
keepImageOnlyDocs = false; keepImageOnlyDocs = false;
} }
} }
DocMaker docMaker = new DocMaker();
Properties properties = new Properties(); Properties properties = new Properties();
properties.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource");
properties.setProperty("docs.file", wikipedia.getAbsolutePath()); properties.setProperty("docs.file", wikipedia.getAbsolutePath());
properties.setProperty("content.source.forever", "false"); properties.setProperty("content.source.forever", "false");
properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs)); properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
docMaker.setConfig(new Config(properties)); Config config = new Config(properties);
ContentSource source = new EnwikiContentSource();
source.setConfig(config);
DocMaker docMaker = new DocMaker();
docMaker.setConfig(config, source);
docMaker.resetInputs(); docMaker.resetInputs();
if (wikipedia.exists()) { if (wikipedia.exists()) {
System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource"); System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");

View File

@ -28,7 +28,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.tasks.AddDocTask; import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask; import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask; import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.ResetInputsTask;
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence; import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -42,7 +41,7 @@ import org.apache.lucene.search.TopDocs;
/** Tests the functionality of {@link DocMaker}. */ /** Tests the functionality of {@link DocMaker}. */
public class DocMakerTest extends BenchmarkTestCase { public class DocMakerTest extends BenchmarkTestCase {
static final class OneDocSource extends ContentSource { public static final class OneDocSource extends ContentSource {
private boolean finish = false; private boolean finish = false;
@ -106,7 +105,6 @@ public class DocMakerTest extends BenchmarkTestCase {
// Indexing configuration. // Indexing configuration.
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName()); props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
props.setProperty("content.source", OneDocSource.class.getName());
props.setProperty("directory", "RAMDirectory"); props.setProperty("directory", "RAMDirectory");
if (setNormsProp) { if (setNormsProp) {
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal)); props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
@ -119,7 +117,7 @@ public class DocMakerTest extends BenchmarkTestCase {
Config config = new Config(props); Config config = new Config(props);
DocMaker dm = new DocMaker(); DocMaker dm = new DocMaker();
dm.setConfig(config); dm.setConfig(config, new OneDocSource());
return dm.makeDocument(); return dm.makeDocument();
} }
@ -175,12 +173,15 @@ public class DocMakerTest extends BenchmarkTestCase {
ps.close(); ps.close();
Properties props = new Properties(); Properties props = new Properties();
props.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.LineDocSource");
props.setProperty("docs.file", f.getAbsolutePath()); props.setProperty("docs.file", f.getAbsolutePath());
props.setProperty("content.source.forever", "false"); props.setProperty("content.source.forever", "false");
Config config = new Config(props); Config config = new Config(props);
ContentSource source = new LineDocSource();
source.setConfig(config);
DocMaker dm = new DocMaker(); DocMaker dm = new DocMaker();
dm.setConfig(config); dm.setConfig(config, source);
dm.resetInputs(); dm.resetInputs();
dm.resetInputs(); dm.resetInputs();
dm.close(); dm.close();