mirror of https://github.com/apache/lucene.git
LUCENE-4064: Move ContentSource to PerfRunData out of DocMaker
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339555 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
422828cfd6
commit
63da7ea3fd
|
@ -25,20 +25,20 @@ import java.util.HashMap;
|
|||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
||||
import org.apache.lucene.benchmark.byTask.stats.Points;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -80,6 +80,7 @@ public class PerfRunData implements Closeable {
|
|||
private Directory directory;
|
||||
private Analyzer analyzer;
|
||||
private DocMaker docMaker;
|
||||
private ContentSource contentSource;
|
||||
private FacetSource facetSource;
|
||||
private Locale locale;
|
||||
|
||||
|
@ -105,10 +106,16 @@ public class PerfRunData implements Closeable {
|
|||
// analyzer (default is standard analyzer)
|
||||
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
||||
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
||||
|
||||
// content source
|
||||
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
|
||||
contentSource = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||
contentSource.setConfig(config);
|
||||
|
||||
// doc maker
|
||||
docMaker = Class.forName(config.get("doc.maker",
|
||||
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
|
||||
docMaker.setConfig(config);
|
||||
docMaker.setConfig(config, contentSource);
|
||||
// facet source
|
||||
facetSource = Class.forName(config.get("facet.source",
|
||||
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
|
||||
|
@ -129,10 +136,11 @@ public class PerfRunData implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(indexWriter, indexReader, directory,
|
||||
taxonomyWriter, taxonomyReader, taxonomyDir,
|
||||
docMaker, facetSource);
|
||||
docMaker, facetSource, contentSource);
|
||||
|
||||
// close all perf objects that are closeable.
|
||||
ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>();
|
||||
|
@ -361,7 +369,12 @@ public class PerfRunData implements Closeable {
|
|||
this.analyzer = analyzer;
|
||||
}
|
||||
|
||||
/** Returns the docMaker. */
|
||||
/** Returns the ContentSource. */
|
||||
public ContentSource getContentSource() {
|
||||
return contentSource;
|
||||
}
|
||||
|
||||
/** Returns the DocMaker. */
|
||||
public DocMaker getDocMaker() {
|
||||
return docMaker;
|
||||
}
|
||||
|
@ -393,6 +406,7 @@ public class PerfRunData implements Closeable {
|
|||
}
|
||||
|
||||
public void resetInputs() throws IOException {
|
||||
contentSource.resetInputs();
|
||||
docMaker.resetInputs();
|
||||
facetSource.resetInputs();
|
||||
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
|
||||
|
|
|
@ -131,7 +131,6 @@ public abstract class ContentItemsSource implements Closeable {
|
|||
* items generated since the last reset, so it's important to call
|
||||
* super.resetInputs in case you override this method.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public void resetInputs() throws IOException {
|
||||
bytesCount = 0;
|
||||
itemCount = 0;
|
||||
|
|
|
@ -355,26 +355,11 @@ public class DocMaker implements Closeable {
|
|||
* {@link ContentSource}, and it can be overridden to do more work (but make
|
||||
* sure to call super.close()).
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
source.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes generated by the content source since last
|
||||
* reset.
|
||||
*/
|
||||
public synchronized long getBytesCount() {
|
||||
return source.getBytesCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of bytes that were generated by the content source
|
||||
* defined to that doc maker.
|
||||
*/
|
||||
public long getTotalBytesCount() {
|
||||
return source.getTotalBytesCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link Document} object ready for indexing. This method uses the
|
||||
* {@link ContentSource} to get the next document from the source, and creates
|
||||
|
@ -426,26 +411,16 @@ public class DocMaker implements Closeable {
|
|||
public synchronized void resetInputs() throws IOException {
|
||||
source.printStatistics("docs");
|
||||
// re-initiate since properties by round may have changed.
|
||||
setConfig(config);
|
||||
setConfig(config, source);
|
||||
source.resetInputs();
|
||||
numDocsCreated.set(0);
|
||||
resetLeftovers();
|
||||
}
|
||||
|
||||
/** Set the configuration parameters of this doc maker. */
|
||||
public void setConfig(Config config) {
|
||||
public void setConfig(Config config, ContentSource source) {
|
||||
this.config = config;
|
||||
try {
|
||||
if (source != null) {
|
||||
source.close();
|
||||
}
|
||||
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
|
||||
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||
source.setConfig(config);
|
||||
} catch (Exception e) {
|
||||
// Should not get here. Throw runtime exception.
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
this.source = source;
|
||||
|
||||
boolean stored = config.get("doc.stored", false);
|
||||
boolean bodyStored = config.get("doc.body.stored", stored);
|
||||
|
|
|
@ -20,34 +20,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocData;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
|
||||
/**
|
||||
* Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}.
|
||||
* Supports the following parameters:
|
||||
* <ul>
|
||||
* <li>content.source - the content source to use. (mandatory)
|
||||
* </ul>
|
||||
*/
|
||||
/** Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}. */
|
||||
public class ConsumeContentSourceTask extends PerfTask {
|
||||
|
||||
private ContentSource source;
|
||||
private DocData dd = new DocData();
|
||||
private final ContentSource source;
|
||||
private ThreadLocal<DocData> dd = new ThreadLocal<DocData>();
|
||||
|
||||
public ConsumeContentSourceTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
Config config = runData.getConfig();
|
||||
String sourceClass = config.get("content.source", null);
|
||||
if (sourceClass == null) {
|
||||
throw new IllegalArgumentException("content.source must be defined");
|
||||
}
|
||||
try {
|
||||
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||
source.setConfig(config);
|
||||
source.resetInputs();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
source = runData.getContentSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -55,15 +37,9 @@ public class ConsumeContentSourceTask extends PerfTask {
|
|||
return "read " + recsCount + " documents from the content source";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {
|
||||
source.close();
|
||||
super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws Exception {
|
||||
dd = source.getNextDocData(dd);
|
||||
dd.set(source.getNextDocData(dd.get()));
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,9 @@ import java.io.FileWriter;
|
|||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -122,15 +124,19 @@ public class ExtractWikipedia {
|
|||
} else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) {
|
||||
keepImageOnlyDocs = false;
|
||||
}
|
||||
|
||||
}
|
||||
DocMaker docMaker = new DocMaker();
|
||||
|
||||
Properties properties = new Properties();
|
||||
properties.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource");
|
||||
properties.setProperty("docs.file", wikipedia.getAbsolutePath());
|
||||
properties.setProperty("content.source.forever", "false");
|
||||
properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
|
||||
docMaker.setConfig(new Config(properties));
|
||||
Config config = new Config(properties);
|
||||
|
||||
ContentSource source = new EnwikiContentSource();
|
||||
source.setConfig(config);
|
||||
|
||||
DocMaker docMaker = new DocMaker();
|
||||
docMaker.setConfig(config, source);
|
||||
docMaker.resetInputs();
|
||||
if (wikipedia.exists()) {
|
||||
System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
|
|||
import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.ResetInputsTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -42,7 +41,7 @@ import org.apache.lucene.search.TopDocs;
|
|||
/** Tests the functionality of {@link DocMaker}. */
|
||||
public class DocMakerTest extends BenchmarkTestCase {
|
||||
|
||||
static final class OneDocSource extends ContentSource {
|
||||
public static final class OneDocSource extends ContentSource {
|
||||
|
||||
private boolean finish = false;
|
||||
|
||||
|
@ -106,7 +105,6 @@ public class DocMakerTest extends BenchmarkTestCase {
|
|||
|
||||
// Indexing configuration.
|
||||
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
|
||||
props.setProperty("content.source", OneDocSource.class.getName());
|
||||
props.setProperty("directory", "RAMDirectory");
|
||||
if (setNormsProp) {
|
||||
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
|
||||
|
@ -119,7 +117,7 @@ public class DocMakerTest extends BenchmarkTestCase {
|
|||
Config config = new Config(props);
|
||||
|
||||
DocMaker dm = new DocMaker();
|
||||
dm.setConfig(config);
|
||||
dm.setConfig(config, new OneDocSource());
|
||||
return dm.makeDocument();
|
||||
}
|
||||
|
||||
|
@ -175,12 +173,15 @@ public class DocMakerTest extends BenchmarkTestCase {
|
|||
ps.close();
|
||||
|
||||
Properties props = new Properties();
|
||||
props.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.LineDocSource");
|
||||
props.setProperty("docs.file", f.getAbsolutePath());
|
||||
props.setProperty("content.source.forever", "false");
|
||||
Config config = new Config(props);
|
||||
|
||||
ContentSource source = new LineDocSource();
|
||||
source.setConfig(config);
|
||||
|
||||
DocMaker dm = new DocMaker();
|
||||
dm.setConfig(config);
|
||||
dm.setConfig(config, source);
|
||||
dm.resetInputs();
|
||||
dm.resetInputs();
|
||||
dm.close();
|
||||
|
|
Loading…
Reference in New Issue