mirror of https://github.com/apache/lucene.git
LUCENE-4064: Move ContentSource to PerfRunData out of DocMaker
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339555 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
422828cfd6
commit
63da7ea3fd
|
@ -25,20 +25,20 @@ import java.util.HashMap;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
|
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
||||||
import org.apache.lucene.benchmark.byTask.stats.Points;
|
import org.apache.lucene.benchmark.byTask.stats.Points;
|
||||||
|
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
|
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
|
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
|
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
|
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -80,6 +80,7 @@ public class PerfRunData implements Closeable {
|
||||||
private Directory directory;
|
private Directory directory;
|
||||||
private Analyzer analyzer;
|
private Analyzer analyzer;
|
||||||
private DocMaker docMaker;
|
private DocMaker docMaker;
|
||||||
|
private ContentSource contentSource;
|
||||||
private FacetSource facetSource;
|
private FacetSource facetSource;
|
||||||
private Locale locale;
|
private Locale locale;
|
||||||
|
|
||||||
|
@ -105,10 +106,16 @@ public class PerfRunData implements Closeable {
|
||||||
// analyzer (default is standard analyzer)
|
// analyzer (default is standard analyzer)
|
||||||
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
||||||
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
||||||
|
|
||||||
|
// content source
|
||||||
|
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
|
||||||
|
contentSource = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||||
|
contentSource.setConfig(config);
|
||||||
|
|
||||||
// doc maker
|
// doc maker
|
||||||
docMaker = Class.forName(config.get("doc.maker",
|
docMaker = Class.forName(config.get("doc.maker",
|
||||||
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
|
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
|
||||||
docMaker.setConfig(config);
|
docMaker.setConfig(config, contentSource);
|
||||||
// facet source
|
// facet source
|
||||||
facetSource = Class.forName(config.get("facet.source",
|
facetSource = Class.forName(config.get("facet.source",
|
||||||
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
|
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
|
||||||
|
@ -129,10 +136,11 @@ public class PerfRunData implements Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
IOUtils.close(indexWriter, indexReader, directory,
|
IOUtils.close(indexWriter, indexReader, directory,
|
||||||
taxonomyWriter, taxonomyReader, taxonomyDir,
|
taxonomyWriter, taxonomyReader, taxonomyDir,
|
||||||
docMaker, facetSource);
|
docMaker, facetSource, contentSource);
|
||||||
|
|
||||||
// close all perf objects that are closeable.
|
// close all perf objects that are closeable.
|
||||||
ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>();
|
ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>();
|
||||||
|
@ -361,7 +369,12 @@ public class PerfRunData implements Closeable {
|
||||||
this.analyzer = analyzer;
|
this.analyzer = analyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the docMaker. */
|
/** Returns the ContentSource. */
|
||||||
|
public ContentSource getContentSource() {
|
||||||
|
return contentSource;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the DocMaker. */
|
||||||
public DocMaker getDocMaker() {
|
public DocMaker getDocMaker() {
|
||||||
return docMaker;
|
return docMaker;
|
||||||
}
|
}
|
||||||
|
@ -393,6 +406,7 @@ public class PerfRunData implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void resetInputs() throws IOException {
|
public void resetInputs() throws IOException {
|
||||||
|
contentSource.resetInputs();
|
||||||
docMaker.resetInputs();
|
docMaker.resetInputs();
|
||||||
facetSource.resetInputs();
|
facetSource.resetInputs();
|
||||||
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
|
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
|
||||||
|
|
|
@ -131,7 +131,6 @@ public abstract class ContentItemsSource implements Closeable {
|
||||||
* items generated since the last reset, so it's important to call
|
* items generated since the last reset, so it's important to call
|
||||||
* super.resetInputs in case you override this method.
|
* super.resetInputs in case you override this method.
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unused")
|
|
||||||
public void resetInputs() throws IOException {
|
public void resetInputs() throws IOException {
|
||||||
bytesCount = 0;
|
bytesCount = 0;
|
||||||
itemCount = 0;
|
itemCount = 0;
|
||||||
|
|
|
@ -355,26 +355,11 @@ public class DocMaker implements Closeable {
|
||||||
* {@link ContentSource}, and it can be overridden to do more work (but make
|
* {@link ContentSource}, and it can be overridden to do more work (but make
|
||||||
* sure to call super.close()).
|
* sure to call super.close()).
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
source.close();
|
source.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the number of bytes generated by the content source since last
|
|
||||||
* reset.
|
|
||||||
*/
|
|
||||||
public synchronized long getBytesCount() {
|
|
||||||
return source.getBytesCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the total number of bytes that were generated by the content source
|
|
||||||
* defined to that doc maker.
|
|
||||||
*/
|
|
||||||
public long getTotalBytesCount() {
|
|
||||||
return source.getTotalBytesCount();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a {@link Document} object ready for indexing. This method uses the
|
* Creates a {@link Document} object ready for indexing. This method uses the
|
||||||
* {@link ContentSource} to get the next document from the source, and creates
|
* {@link ContentSource} to get the next document from the source, and creates
|
||||||
|
@ -426,26 +411,16 @@ public class DocMaker implements Closeable {
|
||||||
public synchronized void resetInputs() throws IOException {
|
public synchronized void resetInputs() throws IOException {
|
||||||
source.printStatistics("docs");
|
source.printStatistics("docs");
|
||||||
// re-initiate since properties by round may have changed.
|
// re-initiate since properties by round may have changed.
|
||||||
setConfig(config);
|
setConfig(config, source);
|
||||||
source.resetInputs();
|
source.resetInputs();
|
||||||
numDocsCreated.set(0);
|
numDocsCreated.set(0);
|
||||||
resetLeftovers();
|
resetLeftovers();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Set the configuration parameters of this doc maker. */
|
/** Set the configuration parameters of this doc maker. */
|
||||||
public void setConfig(Config config) {
|
public void setConfig(Config config, ContentSource source) {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
try {
|
this.source = source;
|
||||||
if (source != null) {
|
|
||||||
source.close();
|
|
||||||
}
|
|
||||||
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
|
|
||||||
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
|
||||||
source.setConfig(config);
|
|
||||||
} catch (Exception e) {
|
|
||||||
// Should not get here. Throw runtime exception.
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean stored = config.get("doc.stored", false);
|
boolean stored = config.get("doc.stored", false);
|
||||||
boolean bodyStored = config.get("doc.body.stored", stored);
|
boolean bodyStored = config.get("doc.body.stored", stored);
|
||||||
|
|
|
@ -20,34 +20,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.DocData;
|
import org.apache.lucene.benchmark.byTask.feeds.DocData;
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
|
||||||
|
|
||||||
/**
|
/** Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}. */
|
||||||
* Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}.
|
|
||||||
* Supports the following parameters:
|
|
||||||
* <ul>
|
|
||||||
* <li>content.source - the content source to use. (mandatory)
|
|
||||||
* </ul>
|
|
||||||
*/
|
|
||||||
public class ConsumeContentSourceTask extends PerfTask {
|
public class ConsumeContentSourceTask extends PerfTask {
|
||||||
|
|
||||||
private ContentSource source;
|
private final ContentSource source;
|
||||||
private DocData dd = new DocData();
|
private ThreadLocal<DocData> dd = new ThreadLocal<DocData>();
|
||||||
|
|
||||||
public ConsumeContentSourceTask(PerfRunData runData) {
|
public ConsumeContentSourceTask(PerfRunData runData) {
|
||||||
super(runData);
|
super(runData);
|
||||||
Config config = runData.getConfig();
|
source = runData.getContentSource();
|
||||||
String sourceClass = config.get("content.source", null);
|
|
||||||
if (sourceClass == null) {
|
|
||||||
throw new IllegalArgumentException("content.source must be defined");
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
|
||||||
source.setConfig(config);
|
|
||||||
source.resetInputs();
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -55,15 +37,9 @@ public class ConsumeContentSourceTask extends PerfTask {
|
||||||
return "read " + recsCount + " documents from the content source";
|
return "read " + recsCount + " documents from the content source";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws Exception {
|
|
||||||
source.close();
|
|
||||||
super.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int doLogic() throws Exception {
|
public int doLogic() throws Exception {
|
||||||
dd = source.getNextDocData(dd);
|
dd.set(source.getNextDocData(dd.get()));
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,9 @@ import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||||
|
import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
|
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -122,15 +124,19 @@ public class ExtractWikipedia {
|
||||||
} else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) {
|
} else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) {
|
||||||
keepImageOnlyDocs = false;
|
keepImageOnlyDocs = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
DocMaker docMaker = new DocMaker();
|
|
||||||
Properties properties = new Properties();
|
Properties properties = new Properties();
|
||||||
properties.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource");
|
|
||||||
properties.setProperty("docs.file", wikipedia.getAbsolutePath());
|
properties.setProperty("docs.file", wikipedia.getAbsolutePath());
|
||||||
properties.setProperty("content.source.forever", "false");
|
properties.setProperty("content.source.forever", "false");
|
||||||
properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
|
properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
|
||||||
docMaker.setConfig(new Config(properties));
|
Config config = new Config(properties);
|
||||||
|
|
||||||
|
ContentSource source = new EnwikiContentSource();
|
||||||
|
source.setConfig(config);
|
||||||
|
|
||||||
|
DocMaker docMaker = new DocMaker();
|
||||||
|
docMaker.setConfig(config, source);
|
||||||
docMaker.resetInputs();
|
docMaker.resetInputs();
|
||||||
if (wikipedia.exists()) {
|
if (wikipedia.exists()) {
|
||||||
System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");
|
System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");
|
||||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
|
import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
|
import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
|
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.ResetInputsTask;
|
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
|
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -42,7 +41,7 @@ import org.apache.lucene.search.TopDocs;
|
||||||
/** Tests the functionality of {@link DocMaker}. */
|
/** Tests the functionality of {@link DocMaker}. */
|
||||||
public class DocMakerTest extends BenchmarkTestCase {
|
public class DocMakerTest extends BenchmarkTestCase {
|
||||||
|
|
||||||
static final class OneDocSource extends ContentSource {
|
public static final class OneDocSource extends ContentSource {
|
||||||
|
|
||||||
private boolean finish = false;
|
private boolean finish = false;
|
||||||
|
|
||||||
|
@ -106,7 +105,6 @@ public class DocMakerTest extends BenchmarkTestCase {
|
||||||
|
|
||||||
// Indexing configuration.
|
// Indexing configuration.
|
||||||
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
|
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
|
||||||
props.setProperty("content.source", OneDocSource.class.getName());
|
|
||||||
props.setProperty("directory", "RAMDirectory");
|
props.setProperty("directory", "RAMDirectory");
|
||||||
if (setNormsProp) {
|
if (setNormsProp) {
|
||||||
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
|
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
|
||||||
|
@ -119,7 +117,7 @@ public class DocMakerTest extends BenchmarkTestCase {
|
||||||
Config config = new Config(props);
|
Config config = new Config(props);
|
||||||
|
|
||||||
DocMaker dm = new DocMaker();
|
DocMaker dm = new DocMaker();
|
||||||
dm.setConfig(config);
|
dm.setConfig(config, new OneDocSource());
|
||||||
return dm.makeDocument();
|
return dm.makeDocument();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -175,12 +173,15 @@ public class DocMakerTest extends BenchmarkTestCase {
|
||||||
ps.close();
|
ps.close();
|
||||||
|
|
||||||
Properties props = new Properties();
|
Properties props = new Properties();
|
||||||
props.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.LineDocSource");
|
|
||||||
props.setProperty("docs.file", f.getAbsolutePath());
|
props.setProperty("docs.file", f.getAbsolutePath());
|
||||||
props.setProperty("content.source.forever", "false");
|
props.setProperty("content.source.forever", "false");
|
||||||
Config config = new Config(props);
|
Config config = new Config(props);
|
||||||
|
|
||||||
|
ContentSource source = new LineDocSource();
|
||||||
|
source.setConfig(config);
|
||||||
|
|
||||||
DocMaker dm = new DocMaker();
|
DocMaker dm = new DocMaker();
|
||||||
dm.setConfig(config);
|
dm.setConfig(config, source);
|
||||||
dm.resetInputs();
|
dm.resetInputs();
|
||||||
dm.resetInputs();
|
dm.resetInputs();
|
||||||
dm.close();
|
dm.close();
|
||||||
|
|
Loading…
Reference in New Issue