LUCENE-4064: Move ContentSource to PerfRunData out of DocMaker

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339555 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2012-05-17 11:22:10 +00:00
parent 422828cfd6
commit 63da7ea3fd
6 changed files with 45 additions and 74 deletions

View File

@ -25,20 +25,20 @@ import java.util.HashMap;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
@ -80,6 +80,7 @@ public class PerfRunData implements Closeable {
private Directory directory;
private Analyzer analyzer;
private DocMaker docMaker;
private ContentSource contentSource;
private FacetSource facetSource;
private Locale locale;
@ -105,10 +106,16 @@ public class PerfRunData implements Closeable {
// analyzer (default is standard analyzer)
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
// content source
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
contentSource = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
contentSource.setConfig(config);
// doc maker
docMaker = Class.forName(config.get("doc.maker",
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
docMaker.setConfig(config);
docMaker.setConfig(config, contentSource);
// facet source
facetSource = Class.forName(config.get("facet.source",
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
@ -129,10 +136,11 @@ public class PerfRunData implements Closeable {
}
}
@Override
public void close() throws IOException {
IOUtils.close(indexWriter, indexReader, directory,
taxonomyWriter, taxonomyReader, taxonomyDir,
docMaker, facetSource);
docMaker, facetSource, contentSource);
// close all perf objects that are closeable.
ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>();
@ -361,7 +369,12 @@ public class PerfRunData implements Closeable {
this.analyzer = analyzer;
}
/** Returns the docMaker. */
/** Returns the ContentSource. */
public ContentSource getContentSource() {
return contentSource;
}
/** Returns the DocMaker. */
public DocMaker getDocMaker() {
return docMaker;
}
@ -393,6 +406,7 @@ public class PerfRunData implements Closeable {
}
public void resetInputs() throws IOException {
contentSource.resetInputs();
docMaker.resetInputs();
facetSource.resetInputs();
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {

View File

@ -131,7 +131,6 @@ public abstract class ContentItemsSource implements Closeable {
* items generated since the last reset, so it's important to call
* super.resetInputs in case you override this method.
*/
@SuppressWarnings("unused")
public void resetInputs() throws IOException {
bytesCount = 0;
itemCount = 0;

View File

@ -355,26 +355,11 @@ public class DocMaker implements Closeable {
* {@link ContentSource}, and it can be overridden to do more work (but make
* sure to call super.close()).
*/
@Override
public void close() throws IOException {
source.close();
}
/**
* Returns the number of bytes generated by the content source since last
* reset.
*/
public synchronized long getBytesCount() {
return source.getBytesCount();
}
/**
* Returns the total number of bytes that were generated by the content source
* defined to that doc maker.
*/
public long getTotalBytesCount() {
return source.getTotalBytesCount();
}
/**
* Creates a {@link Document} object ready for indexing. This method uses the
* {@link ContentSource} to get the next document from the source, and creates
@ -426,26 +411,16 @@ public class DocMaker implements Closeable {
public synchronized void resetInputs() throws IOException {
source.printStatistics("docs");
// re-initiate since properties by round may have changed.
setConfig(config);
setConfig(config, source);
source.resetInputs();
numDocsCreated.set(0);
resetLeftovers();
}
/** Set the configuration parameters of this doc maker. */
public void setConfig(Config config) {
public void setConfig(Config config, ContentSource source) {
this.config = config;
try {
if (source != null) {
source.close();
}
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
source.setConfig(config);
} catch (Exception e) {
// Should not get here. Throw runtime exception.
throw new RuntimeException(e);
}
this.source = source;
boolean stored = config.get("doc.stored", false);
boolean bodyStored = config.get("doc.body.stored", stored);

View File

@ -20,34 +20,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.utils.Config;
/**
* Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}.
* Supports the following parameters:
* <ul>
* <li>content.source - the content source to use. (mandatory)
* </ul>
*/
/** Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}. */
public class ConsumeContentSourceTask extends PerfTask {
private ContentSource source;
private DocData dd = new DocData();
private final ContentSource source;
private ThreadLocal<DocData> dd = new ThreadLocal<DocData>();
public ConsumeContentSourceTask(PerfRunData runData) {
super(runData);
Config config = runData.getConfig();
String sourceClass = config.get("content.source", null);
if (sourceClass == null) {
throw new IllegalArgumentException("content.source must be defined");
}
try {
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
source.setConfig(config);
source.resetInputs();
} catch (Exception e) {
throw new RuntimeException(e);
}
source = runData.getContentSource();
}
@Override
@ -55,15 +37,9 @@ public class ConsumeContentSourceTask extends PerfTask {
return "read " + recsCount + " documents from the content source";
}
@Override
public void close() throws Exception {
source.close();
super.close();
}
@Override
public int doLogic() throws Exception {
dd = source.getNextDocData(dd);
dd.set(source.getNextDocData(dd.get()));
return 1;
}

View File

@ -22,7 +22,9 @@ import java.io.FileWriter;
import java.io.IOException;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
@ -122,15 +124,19 @@ public class ExtractWikipedia {
} else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) {
keepImageOnlyDocs = false;
}
}
DocMaker docMaker = new DocMaker();
Properties properties = new Properties();
properties.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource");
properties.setProperty("docs.file", wikipedia.getAbsolutePath());
properties.setProperty("content.source.forever", "false");
properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
docMaker.setConfig(new Config(properties));
Config config = new Config(properties);
ContentSource source = new EnwikiContentSource();
source.setConfig(config);
DocMaker docMaker = new DocMaker();
docMaker.setConfig(config, source);
docMaker.resetInputs();
if (wikipedia.exists()) {
System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");

View File

@ -28,7 +28,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.ResetInputsTask;
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
@ -42,7 +41,7 @@ import org.apache.lucene.search.TopDocs;
/** Tests the functionality of {@link DocMaker}. */
public class DocMakerTest extends BenchmarkTestCase {
static final class OneDocSource extends ContentSource {
public static final class OneDocSource extends ContentSource {
private boolean finish = false;
@ -106,7 +105,6 @@ public class DocMakerTest extends BenchmarkTestCase {
// Indexing configuration.
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
props.setProperty("content.source", OneDocSource.class.getName());
props.setProperty("directory", "RAMDirectory");
if (setNormsProp) {
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
@ -119,7 +117,7 @@ public class DocMakerTest extends BenchmarkTestCase {
Config config = new Config(props);
DocMaker dm = new DocMaker();
dm.setConfig(config);
dm.setConfig(config, new OneDocSource());
return dm.makeDocument();
}
@ -175,12 +173,15 @@ public class DocMakerTest extends BenchmarkTestCase {
ps.close();
Properties props = new Properties();
props.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.LineDocSource");
props.setProperty("docs.file", f.getAbsolutePath());
props.setProperty("content.source.forever", "false");
Config config = new Config(props);
ContentSource source = new LineDocSource();
source.setConfig(config);
DocMaker dm = new DocMaker();
dm.setConfig(config);
dm.setConfig(config, source);
dm.resetInputs();
dm.resetInputs();
dm.close();