mirror of https://github.com/apache/lucene.git
LUCENE-3261: Facet benchmarking - indexing support - ported from 3x.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1180674 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1912c6c9c6
commit
664a7191dd
|
@ -141,6 +141,17 @@
|
|||
<property name="analyzers-common.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="facet.jar" value="${common.dir}/../modules/facet/build/lucene-facet-${version}.jar"/>
|
||||
<target name="check-facet-uptodate" unless="facet.uptodate">
|
||||
<module-uptodate name="facet" jarfile="${facet.jar}" property="facet.uptodate"/>
|
||||
</target>
|
||||
<target name="jar-facet" unless="facet.uptodate" depends="check-facet-uptodate">
|
||||
<ant dir="${common.dir}/../modules/facet" target="jar-core" inheritall="false">
|
||||
<propertyset refid="uptodate.and.compiled.properties"/>
|
||||
</ant>
|
||||
<property name="facet.uptodate" value="true"/>
|
||||
</target>
|
||||
|
||||
<property name="analyzers-icu.jar" value="${common.dir}/../modules/analysis/build/icu/lucene-analyzers-icu-${version}.jar"/>
|
||||
<target name="check-analyzers-icu-uptodate" unless="analyzers-icu.uptodate">
|
||||
<module-uptodate name="analysis/icu" jarfile="${analyzers-icu.jar}" property="analyzers-icu.uptodate"/>
|
||||
|
|
|
@ -5,6 +5,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
|||
For more information on past and future Lucene versions, please see:
|
||||
http://s.apache.org/luceneversions
|
||||
|
||||
10/07/2011
|
||||
LUCENE-3262: Facet benchmarking - Benchmark tasks and sources were added for indexing
|
||||
with facets, demonstrated in facets.alg. (Gilad Barkai, Doron Cohen)
|
||||
|
||||
09/25/2011
|
||||
LUCENE-3457: Upgrade commons-compress to 1.2 (and undo LUCENE-2980's workaround).
|
||||
(Doron Cohen)
|
||||
|
|
|
@ -153,6 +153,7 @@
|
|||
<pathelement path="${highlighter.jar}"/>
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<pathelement path="${queryparser.jar}"/>
|
||||
<pathelement path="${facet.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
<fileset dir="lib">
|
||||
<include name="**/*.jar"/>
|
||||
|
@ -241,7 +242,7 @@
|
|||
<echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
|
||||
</target>
|
||||
|
||||
<target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser"/>
|
||||
<target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
|
||||
|
||||
<target name="clean-javacc">
|
||||
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
#/**
|
||||
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# * contributor license agreements. See the NOTICE file distributed with
|
||||
# * this work for additional information regarding copyright ownership.
|
||||
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# * (the "License"); you may not use this file except in compliance with
|
||||
# * the License. You may obtain a copy of the License at
|
||||
# *
|
||||
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||
# *
|
||||
# * Unless required by applicable law or agreed to in writing, software
|
||||
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# * See the License for the specific language governing permissions and
|
||||
# * limitations under the License.
|
||||
# */
|
||||
# -------------------------------------------------------------------------------------
|
||||
|
||||
with.facets=facets:true:false
|
||||
|
||||
content.source.forever=false
|
||||
|
||||
compound=true
|
||||
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
|
||||
directory=FSDirectory
|
||||
taxonomy.directory=FSDirectory
|
||||
|
||||
doc.stored=true
|
||||
doc.tokenized=true
|
||||
doc.term.vector=false
|
||||
log.step=1000
|
||||
|
||||
docs.dir=reuters-out
|
||||
|
||||
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
|
||||
|
||||
facet.source=org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource
|
||||
rand.seed=10
|
||||
max.doc.facets=20
|
||||
max.facet.depth=3
|
||||
|
||||
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
|
||||
|
||||
task.max.depth.log=2
|
||||
|
||||
#log.queries=true
|
||||
# -------------------------------------------------------------------------------------
|
||||
|
||||
{ "Rounds"
|
||||
ResetSystemErase
|
||||
{ "Populate"
|
||||
-CreateIndex
|
||||
-CreateTaxonomyIndex
|
||||
{ "MAddDocs" AddFacetedDoc > : *
|
||||
-Optimize
|
||||
-CloseIndex
|
||||
-CloseTaxonomyIndex
|
||||
}
|
||||
|
||||
OpenReader
|
||||
{ "SearchSameRdr" Search > : 40
|
||||
CloseReader
|
||||
|
||||
#RepSumByNameRound
|
||||
ResetSystemErase
|
||||
NewRound
|
||||
} : 4
|
||||
|
||||
RepSumByPrefRound Search
|
||||
RepSumByPrefRound Populate
|
||||
RepSumByPrefRound MAddDocs
|
||||
|
|
@ -24,6 +24,7 @@ import java.util.Locale;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
||||
import org.apache.lucene.benchmark.byTask.stats.Points;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
|
||||
|
@ -31,12 +32,15 @@ import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
|
|||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Data maintained by a performance test run.
|
||||
|
@ -45,11 +49,21 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
* <ul>
|
||||
* <li>Configuration.
|
||||
* <li>Directory, Writer, Reader.
|
||||
* <li>Docmaker and a few instances of QueryMaker.
|
||||
* <li>Taxonomy Directory, Writer, Reader.
|
||||
* <li>DocMaker, FacetSource and a few instances of QueryMaker.
|
||||
* <li>Analyzer.
|
||||
* <li>Statistics data which updated during the run.
|
||||
* </ul>
|
||||
* Config properties: work.dir=<path to root of docs and index dirs| Default: work>
|
||||
* Config properties:
|
||||
* <ul>
|
||||
* <li><b>work.dir</b>=<path to root of docs and index dirs| Default: work>
|
||||
* <li><b>analyzer</b>=<class name for analyzer| Default: StandardAnalyzer>
|
||||
* <li><b>doc.maker</b>=<class name for doc-maker| Default: DocMaker>
|
||||
* <li><b>facet.source</b>=<class name for facet-source| Default: RandomFacetSource>
|
||||
* <li><b>query.maker</b>=<class name for query-maker| Default: SimpleQueryMaker>
|
||||
* <li><b>log.queries</b>=<whether queries should be printed| Default: false>
|
||||
* <li><b>directory</b>=<type of directory to use for the index| Default: RAMDirectory>
|
||||
* <li><b>taxonomy.directory</b>=<type of directory for taxonomy index| Default: RAMDirectory>
|
||||
* </ul>
|
||||
*/
|
||||
public class PerfRunData {
|
||||
|
@ -62,7 +76,12 @@ public class PerfRunData {
|
|||
private Directory directory;
|
||||
private Analyzer analyzer;
|
||||
private DocMaker docMaker;
|
||||
private FacetSource facetSource;
|
||||
private Locale locale;
|
||||
|
||||
private Directory taxonomyDir;
|
||||
private TaxonomyWriter taxonomyWriter;
|
||||
private TaxonomyReader taxonomyReader;
|
||||
|
||||
// we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
|
||||
private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
|
||||
|
@ -73,6 +92,7 @@ public class PerfRunData {
|
|||
private IndexWriter indexWriter;
|
||||
private Config config;
|
||||
private long startTimeMillis;
|
||||
|
||||
|
||||
// constructor
|
||||
public PerfRunData (Config config) throws Exception {
|
||||
|
@ -84,6 +104,10 @@ public class PerfRunData {
|
|||
docMaker = Class.forName(config.get("doc.maker",
|
||||
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
|
||||
docMaker.setConfig(config);
|
||||
// facet source
|
||||
facetSource = Class.forName(config.get("facet.source",
|
||||
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
|
||||
facetSource.setConfig(config);
|
||||
// query makers
|
||||
readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>();
|
||||
qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class);
|
||||
|
@ -104,30 +128,17 @@ public class PerfRunData {
|
|||
public void reinit(boolean eraseIndex) throws Exception {
|
||||
|
||||
// cleanup index
|
||||
if (indexWriter!=null) {
|
||||
indexWriter.close();
|
||||
indexWriter = null;
|
||||
}
|
||||
if (indexReader!=null) {
|
||||
indexReader.close();
|
||||
indexReader = null;
|
||||
}
|
||||
if (directory!=null) {
|
||||
directory.close();
|
||||
}
|
||||
IOUtils.close(indexWriter, indexReader, directory);
|
||||
indexWriter = null;
|
||||
indexReader = null;
|
||||
|
||||
IOUtils.close(taxonomyWriter, taxonomyReader, taxonomyDir);
|
||||
taxonomyWriter = null;
|
||||
taxonomyReader = null;
|
||||
|
||||
// directory (default is ram-dir).
|
||||
if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
|
||||
File workDir = new File(config.get("work.dir","work"));
|
||||
File indexDir = new File(workDir,"index");
|
||||
if (eraseIndex && indexDir.exists()) {
|
||||
FileUtils.fullyDelete(indexDir);
|
||||
}
|
||||
indexDir.mkdirs();
|
||||
directory = FSDirectory.open(indexDir);
|
||||
} else {
|
||||
directory = new RAMDirectory();
|
||||
}
|
||||
directory = createDirectory(eraseIndex, "index", "directory");
|
||||
taxonomyDir = createDirectory(eraseIndex, "taxo", "taxonomy.directory");
|
||||
|
||||
// inputs
|
||||
resetInputs();
|
||||
|
@ -139,6 +150,21 @@ public class PerfRunData {
|
|||
// Re-init clock
|
||||
setStartTimeMillis();
|
||||
}
|
||||
|
||||
private Directory createDirectory(boolean eraseIndex, String dirName,
|
||||
String dirParam) throws IOException {
|
||||
if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) {
|
||||
File workDir = new File(config.get("work.dir","work"));
|
||||
File indexDir = new File(workDir,dirName);
|
||||
if (eraseIndex && indexDir.exists()) {
|
||||
FileUtils.fullyDelete(indexDir);
|
||||
}
|
||||
indexDir.mkdirs();
|
||||
return FSDirectory.open(indexDir);
|
||||
}
|
||||
|
||||
return new RAMDirectory();
|
||||
}
|
||||
|
||||
public long setStartTimeMillis() {
|
||||
startTimeMillis = System.currentTimeMillis();
|
||||
|
@ -173,6 +199,57 @@ public class PerfRunData {
|
|||
this.directory = directory;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the taxonomy directory
|
||||
*/
|
||||
public Directory getTaxonomyDir() {
|
||||
return taxonomyDir;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the taxonomy reader. Takes ownership of that taxonomy reader, that is,
|
||||
* internally performs taxoReader.incRef() (If caller no longer needs that
|
||||
* reader it should decRef()/close() it after calling this method, otherwise,
|
||||
* the reader will remain open).
|
||||
* @param taxoReader The taxonomy reader to set.
|
||||
*/
|
||||
public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException {
|
||||
if (taxoReader == this.taxonomyReader) {
|
||||
return;
|
||||
}
|
||||
if (taxonomyReader != null) {
|
||||
taxonomyReader.decRef();
|
||||
}
|
||||
|
||||
if (taxoReader != null) {
|
||||
taxoReader.incRef();
|
||||
}
|
||||
this.taxonomyReader = taxoReader;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the taxonomyReader. NOTE: this returns a
|
||||
* reference. You must call TaxonomyReader.decRef() when
|
||||
* you're done.
|
||||
*/
|
||||
public synchronized TaxonomyReader getTaxonomyReader() {
|
||||
if (taxonomyReader != null) {
|
||||
taxonomyReader.incRef();
|
||||
}
|
||||
return taxonomyReader;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param taxoWriter The taxonomy writer to set.
|
||||
*/
|
||||
public void setTaxonomyWriter(TaxonomyWriter taxoWriter) {
|
||||
this.taxonomyWriter = taxoWriter;
|
||||
}
|
||||
|
||||
public TaxonomyWriter getTaxonomyWriter() {
|
||||
return taxonomyWriter;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the indexReader. NOTE: this returns a
|
||||
* reference. You must call IndexReader.decRef() when
|
||||
|
@ -198,13 +275,22 @@ public class PerfRunData {
|
|||
}
|
||||
|
||||
/**
|
||||
* Set the index reader. Takes ownership of that index reader, that is,
|
||||
* internally performs indexReader.incRef() (If caller no longer needs that
|
||||
* reader it should decRef()/close() it after calling this method, otherwise,
|
||||
* the reader will remain open).
|
||||
* @param indexReader The indexReader to set.
|
||||
*/
|
||||
public synchronized void setIndexReader(IndexReader indexReader) throws IOException {
|
||||
if (indexReader == this.indexReader) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.indexReader != null) {
|
||||
// Release current IR
|
||||
this.indexReader.decRef();
|
||||
}
|
||||
|
||||
this.indexReader = indexReader;
|
||||
if (indexReader != null) {
|
||||
// Hold reference to new IR
|
||||
|
@ -246,6 +332,11 @@ public class PerfRunData {
|
|||
return docMaker;
|
||||
}
|
||||
|
||||
/** Returns the facet source. */
|
||||
public FacetSource getFacetSource() {
|
||||
return facetSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the locale
|
||||
*/
|
||||
|
@ -269,6 +360,7 @@ public class PerfRunData {
|
|||
|
||||
public void resetInputs() throws IOException {
|
||||
docMaker.resetInputs();
|
||||
facetSource.resetInputs();
|
||||
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
|
||||
queryMaker.resetInputs();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,180 @@
|
|||
package org.apache.lucene.benchmark.byTask.feeds;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Format;
|
||||
|
||||
/**
|
||||
* Base class for source of data for benchmarking
|
||||
* <p>
|
||||
* Keeps track of various statistics, such as how many data items were generated,
|
||||
* size in bytes etc.
|
||||
* <p>
|
||||
* Supports the following configuration parameters:
|
||||
* <ul>
|
||||
* <li><b>content.source.forever</b> - specifies whether to generate items
|
||||
* forever (<b>default=true</b>).
|
||||
* <li><b>content.source.verbose</b> - specifies whether messages should be
|
||||
* output by the content source (<b>default=false</b>).
|
||||
* <li><b>content.source.encoding</b> - specifies which encoding to use when
|
||||
* reading the files of that content source. Certain implementations may define
|
||||
* a default value if this parameter is not specified. (<b>default=null</b>).
|
||||
* <li><b>content.source.log.step</b> - specifies for how many items a
|
||||
* message should be logged. If set to 0 it means no logging should occur.
|
||||
* <b>NOTE:</b> if verbose is set to false, logging should not occur even if
|
||||
* logStep is not 0 (<b>default=0</b>).
|
||||
* </ul>
|
||||
*/
|
||||
public abstract class ContentItemsSource {
|
||||
|
||||
private long bytesCount;
|
||||
private long totalBytesCount;
|
||||
private int itemCount;
|
||||
private int totalItemCount;
|
||||
private Config config;
|
||||
|
||||
private int lastPrintedNumUniqueTexts = 0;
|
||||
private long lastPrintedNumUniqueBytes = 0;
|
||||
private int printNum = 0;
|
||||
|
||||
protected boolean forever;
|
||||
protected int logStep;
|
||||
protected boolean verbose;
|
||||
protected String encoding;
|
||||
|
||||
/** update count of bytes generated by this source */
|
||||
protected final synchronized void addBytes(long numBytes) {
|
||||
bytesCount += numBytes;
|
||||
totalBytesCount += numBytes;
|
||||
}
|
||||
|
||||
/** update count of items generated by this source */
|
||||
protected final synchronized void addItem() {
|
||||
++itemCount;
|
||||
++totalItemCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenience method for collecting all the files of a content source from
|
||||
* a given directory. The collected {@link File} instances are stored in the
|
||||
* given <code>files</code>.
|
||||
*/
|
||||
protected final void collectFiles(File dir, ArrayList<File> files) {
|
||||
if (!dir.canRead()) {
|
||||
return;
|
||||
}
|
||||
|
||||
File[] dirFiles = dir.listFiles();
|
||||
Arrays.sort(dirFiles);
|
||||
for (int i = 0; i < dirFiles.length; i++) {
|
||||
File file = dirFiles[i];
|
||||
if (file.isDirectory()) {
|
||||
collectFiles(file, files);
|
||||
} else if (file.canRead()) {
|
||||
files.add(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true whether it's time to log a message (depending on verbose and
|
||||
* the number of items generated).
|
||||
*/
|
||||
protected final boolean shouldLog() {
|
||||
return verbose && logStep > 0 && itemCount % logStep == 0;
|
||||
}
|
||||
|
||||
/** Called when reading from this content source is no longer required. */
|
||||
public abstract void close() throws IOException;
|
||||
|
||||
/** Returns the number of bytes generated since last reset. */
|
||||
public final long getBytesCount() { return bytesCount; }
|
||||
|
||||
/** Returns the number of generated items since last reset. */
|
||||
public final int getItemsCount() { return itemCount; }
|
||||
|
||||
public final Config getConfig() { return config; }
|
||||
|
||||
/** Returns the total number of bytes that were generated by this source. */
|
||||
public final long getTotalBytesCount() { return totalBytesCount; }
|
||||
|
||||
/** Returns the total number of generated items. */
|
||||
public final int getTotalItemsCount() { return totalItemCount; }
|
||||
|
||||
/**
|
||||
* Resets the input for this content source, so that the test would behave as
|
||||
* if it was just started, input-wise.
|
||||
* <p>
|
||||
* <b>NOTE:</b> the default implementation resets the number of bytes and
|
||||
* items generated since the last reset, so it's important to call
|
||||
* super.resetInputs in case you override this method.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public void resetInputs() throws IOException {
|
||||
bytesCount = 0;
|
||||
itemCount = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the {@link Config} for this content source. If you override this
|
||||
* method, you must call super.setConfig.
|
||||
*/
|
||||
public void setConfig(Config config) {
|
||||
this.config = config;
|
||||
forever = config.get("content.source.forever", true);
|
||||
logStep = config.get("content.source.log.step", 0);
|
||||
verbose = config.get("content.source.verbose", false);
|
||||
encoding = config.get("content.source.encoding", null);
|
||||
}
|
||||
|
||||
public void printStatistics(String itemsName) {
|
||||
boolean print = false;
|
||||
String col = " ";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String newline = System.getProperty("line.separator");
|
||||
sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
|
||||
int nut = getTotalItemsCount();
|
||||
if (nut > lastPrintedNumUniqueTexts) {
|
||||
print = true;
|
||||
sb.append("total count of "+itemsName+": ").append(Format.format(0,nut,col)).append(newline);
|
||||
lastPrintedNumUniqueTexts = nut;
|
||||
}
|
||||
long nub = getTotalBytesCount();
|
||||
if (nub > lastPrintedNumUniqueBytes) {
|
||||
print = true;
|
||||
sb.append("total bytes of "+itemsName+": ").append(Format.format(0,nub,col)).append(newline);
|
||||
lastPrintedNumUniqueBytes = nub;
|
||||
}
|
||||
if (getItemsCount() > 0) {
|
||||
print = true;
|
||||
sb.append("num "+itemsName+" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline);
|
||||
sb.append("total bytes added for "+itemsName+" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
|
||||
}
|
||||
if (print) {
|
||||
System.out.println(sb.append(newline).toString());
|
||||
printNum++;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -17,12 +17,7 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
|
||||
/**
|
||||
* Represents content from a specified source, such as TREC, Reuters etc. A
|
||||
|
@ -31,119 +26,13 @@ import org.apache.lucene.benchmark.byTask.utils.Config;
|
|||
* of various statistics, such as how many documents were generated, size in
|
||||
* bytes etc.
|
||||
* <p>
|
||||
* Supports the following configuration parameters:
|
||||
* <ul>
|
||||
* <li><b>content.source.forever</b> - specifies whether to generate documents
|
||||
* forever (<b>default=true</b>).
|
||||
* <li><b>content.source.verbose</b> - specifies whether messages should be
|
||||
* output by the content source (<b>default=false</b>).
|
||||
* <li><b>content.source.encoding</b> - specifies which encoding to use when
|
||||
* reading the files of that content source. Certain implementations may define
|
||||
* a default value if this parameter is not specified. (<b>default=null</b>).
|
||||
* <li><b>content.source.log.step</b> - specifies for how many documents a
|
||||
* message should be logged. If set to 0 it means no logging should occur.
|
||||
* <b>NOTE:</b> if verbose is set to false, logging should not occur even if
|
||||
* logStep is not 0 (<b>default=0</b>).
|
||||
* </ul>
|
||||
* For supported configuration parameters see {@link ContentItemsSource}.
|
||||
*/
|
||||
public abstract class ContentSource {
|
||||
public abstract class ContentSource extends ContentItemsSource {
|
||||
|
||||
private long bytesCount;
|
||||
private long totalBytesCount;
|
||||
private int docsCount;
|
||||
private int totalDocsCount;
|
||||
private Config config;
|
||||
|
||||
protected boolean forever;
|
||||
protected int logStep;
|
||||
protected boolean verbose;
|
||||
protected String encoding;
|
||||
|
||||
/** update count of bytes generated by this source */
|
||||
protected final synchronized void addBytes(long numBytes) {
|
||||
bytesCount += numBytes;
|
||||
totalBytesCount += numBytes;
|
||||
}
|
||||
|
||||
/** update count of documents generated by this source */
|
||||
protected final synchronized void addDoc() {
|
||||
++docsCount;
|
||||
++totalDocsCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenience method for collecting all the files of a content source from
|
||||
* a given directory. The collected {@link File} instances are stored in the
|
||||
* given <code>files</code>.
|
||||
*/
|
||||
protected final void collectFiles(File dir, ArrayList<File> files) {
|
||||
if (!dir.canRead()) {
|
||||
return;
|
||||
}
|
||||
|
||||
File[] dirFiles = dir.listFiles();
|
||||
Arrays.sort(dirFiles);
|
||||
for (int i = 0; i < dirFiles.length; i++) {
|
||||
File file = dirFiles[i];
|
||||
if (file.isDirectory()) {
|
||||
collectFiles(file, files);
|
||||
} else if (file.canRead()) {
|
||||
files.add(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true whether it's time to log a message (depending on verbose and
|
||||
* the number of documents generated).
|
||||
*/
|
||||
protected final boolean shouldLog() {
|
||||
return verbose && logStep > 0 && docsCount % logStep == 0;
|
||||
}
|
||||
|
||||
/** Called when reading from this content source is no longer required. */
|
||||
public abstract void close() throws IOException;
|
||||
|
||||
/** Returns the number of bytes generated since last reset. */
|
||||
public final long getBytesCount() { return bytesCount; }
|
||||
|
||||
/** Returns the number of generated documents since last reset. */
|
||||
public final int getDocsCount() { return docsCount; }
|
||||
|
||||
public final Config getConfig() { return config; }
|
||||
|
||||
/** Returns the next {@link DocData} from the content source. */
|
||||
/** Returns the next {@link DocData} from the content source.
|
||||
* Implementations must account for multi-threading, as multiple threads
|
||||
* can call this method simultaneously. */
|
||||
public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException;
|
||||
|
||||
/** Returns the total number of bytes that were generated by this source. */
|
||||
public final long getTotalBytesCount() { return totalBytesCount; }
|
||||
|
||||
/** Returns the total number of generated documents. */
|
||||
public final int getTotalDocsCount() { return totalDocsCount; }
|
||||
|
||||
/**
|
||||
* Resets the input for this content source, so that the test would behave as
|
||||
* if it was just started, input-wise.
|
||||
* <p>
|
||||
* <b>NOTE:</b> the default implementation resets the number of bytes and
|
||||
* documents generated since the last reset, so it's important to call
|
||||
* super.resetInputs in case you override this method.
|
||||
*/
|
||||
public void resetInputs() throws IOException {
|
||||
bytesCount = 0;
|
||||
docsCount = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the {@link Config} for this content source. If you override this
|
||||
* method, you must call super.setConfig.
|
||||
*/
|
||||
public void setConfig(Config config) {
|
||||
this.config = config;
|
||||
forever = config.get("content.source.forever", true);
|
||||
logStep = config.get("content.source.log.step", 0);
|
||||
verbose = config.get("content.source.verbose", false);
|
||||
encoding = config.get("content.source.encoding", null);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -31,7 +31,6 @@ import java.text.SimpleDateFormat;
|
|||
import java.text.ParsePosition;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Format;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -186,13 +185,8 @@ public class DocMaker {
|
|||
protected boolean reuseFields;
|
||||
protected boolean indexProperties;
|
||||
|
||||
private int lastPrintedNumUniqueTexts = 0;
|
||||
|
||||
private long lastPrintedNumUniqueBytes = 0;
|
||||
private final AtomicInteger numDocsCreated = new AtomicInteger();
|
||||
|
||||
private int printNum = 0;
|
||||
|
||||
public DocMaker() {
|
||||
}
|
||||
|
||||
|
@ -400,38 +394,9 @@ public class DocMaker {
|
|||
return doc;
|
||||
}
|
||||
|
||||
public void printDocStatistics() {
|
||||
boolean print = false;
|
||||
String col = " ";
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String newline = System.getProperty("line.separator");
|
||||
sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
|
||||
int nut = source.getTotalDocsCount();
|
||||
if (nut > lastPrintedNumUniqueTexts) {
|
||||
print = true;
|
||||
sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
|
||||
lastPrintedNumUniqueTexts = nut;
|
||||
}
|
||||
long nub = getTotalBytesCount();
|
||||
if (nub > lastPrintedNumUniqueBytes) {
|
||||
print = true;
|
||||
sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
|
||||
lastPrintedNumUniqueBytes = nub;
|
||||
}
|
||||
if (source.getDocsCount() > 0) {
|
||||
print = true;
|
||||
sb.append("num docs added since last inputs reset: ").append(Format.format(0,source.getDocsCount(),col)).append(newline);
|
||||
sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
|
||||
}
|
||||
if (print) {
|
||||
System.out.println(sb.append(newline).toString());
|
||||
printNum++;
|
||||
}
|
||||
}
|
||||
|
||||
/** Reset inputs so that the test run would behave, input wise, as if it just started. */
|
||||
public synchronized void resetInputs() throws IOException {
|
||||
printDocStatistics();
|
||||
source.printStatistics("docs");
|
||||
// re-initiate since properties by round may have changed.
|
||||
setConfig(config);
|
||||
source.resetInputs();
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
package org.apache.lucene.benchmark.byTask.feeds;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.facet.index.CategoryContainer;
|
||||
|
||||
/**
|
||||
* Source items for facets.
|
||||
* <p>
|
||||
* For supported configuration parameters see {@link ContentItemsSource}.
|
||||
*/
|
||||
public abstract class FacetSource extends ContentItemsSource {
|
||||
|
||||
/** Returns the next {@link CategoryContainer facets content item}.
|
||||
* Implementations must account for multi-threading, as multiple threads
|
||||
* can call this method simultaneously.
|
||||
*/
|
||||
public abstract CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException;
|
||||
|
||||
@Override
|
||||
public void resetInputs() throws IOException {
|
||||
printStatistics("facets");
|
||||
// re-initiate since properties by round may have changed.
|
||||
setConfig(getConfig());
|
||||
super.resetInputs();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
package org.apache.lucene.benchmark.byTask.feeds;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.facet.index.CategoryContainer;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/**
|
||||
* Simple implementation of a random facet source
|
||||
* <p>
|
||||
* Supports the following parameters:
|
||||
* <ul>
|
||||
* <li><b>rand.seed</b> - defines the seed to initialize Random with (default: <b>13</b>).
|
||||
* <li><b>max.doc.facets</b> - maximal #facets per doc (default: <b>10</b>).
|
||||
* Actual number of facets in a certain doc would be anything between 1 and that number.
|
||||
* <li><b>max.facet.depth</b> - maximal #components in a facet (default: <b>3</b>).
|
||||
* Actual number of components in a certain facet would be anything between 1 and that number.
|
||||
* </ul>
|
||||
*/
|
||||
public class RandomFacetSource extends FacetSource {
|
||||
|
||||
Random random;
|
||||
|
||||
private int maxDocFacets = 10;
|
||||
private int maxFacetDepth = 3;
|
||||
private int maxValue = maxDocFacets * maxFacetDepth;
|
||||
|
||||
@Override
|
||||
public CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException {
|
||||
if (facets == null) {
|
||||
facets = new CategoryContainer();
|
||||
} else {
|
||||
facets.clear();
|
||||
}
|
||||
int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
|
||||
for (int i=0; i<numFacets; i++) {
|
||||
CategoryPath cp = new CategoryPath();
|
||||
int depth = 1 + random.nextInt(maxFacetDepth-1); // depth 0 is not useful
|
||||
for (int k=0; k<depth; k++) {
|
||||
cp.add(Integer.toString(random.nextInt(maxValue)));
|
||||
addItem();
|
||||
}
|
||||
facets.addCategory(cp);
|
||||
addBytes(cp.toString().length()); // very rough approximation
|
||||
}
|
||||
return facets;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
// nothing to do here
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setConfig(Config config) {
|
||||
super.setConfig(config);
|
||||
random = new Random(config.get("rand.seed", 13));
|
||||
maxDocFacets = config.get("max.doc.facets", 200);
|
||||
maxFacetDepth = config.get("max.facet.depth", 10);
|
||||
maxValue = maxDocFacets * maxFacetDepth;
|
||||
}
|
||||
}
|
|
@ -289,7 +289,7 @@ public class TrecContentSource extends ContentSource {
|
|||
// here, everything else is already private to that thread, so we're safe.
|
||||
try {
|
||||
docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
|
||||
addDoc();
|
||||
addItem();
|
||||
} catch (InterruptedException ie) {
|
||||
throw new ThreadInterruptedException(ie);
|
||||
}
|
||||
|
|
|
@ -17,12 +17,14 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.NumberFormat;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
||||
/**
|
||||
* Add a document, optionally with of a certain size.
|
||||
* Add a document, optionally of a certain size.
|
||||
* <br>Other side effects: none.
|
||||
* <br>Takes optional param: document size.
|
||||
*/
|
||||
|
@ -34,9 +36,12 @@ public class AddDocTask extends PerfTask {
|
|||
|
||||
private int docSize = 0;
|
||||
|
||||
// volatile data passed between setup(), doLogic(), tearDown().
|
||||
private Document doc = null;
|
||||
|
||||
/**
|
||||
* volatile data passed between setup(), doLogic(), tearDown().
|
||||
* the doc is created at setup() and added at doLogic().
|
||||
*/
|
||||
protected Document doc = null;
|
||||
|
||||
@Override
|
||||
public void setup() throws Exception {
|
||||
super.setup();
|
||||
|
@ -56,7 +61,7 @@ public class AddDocTask extends PerfTask {
|
|||
|
||||
@Override
|
||||
protected String getLogMessage(int recsCount) {
|
||||
return "added " + recsCount + " docs";
|
||||
return String.format("added %9d docs",recsCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
|
||||
import org.apache.lucene.facet.index.CategoryContainer;
|
||||
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
|
||||
|
||||
/**
|
||||
* Add a faceted document.
|
||||
* <p>
|
||||
* Config properties:
|
||||
* <ul>
|
||||
* <li><b>with.facets</b>=<tells whether to actually add any facets to the document| Default: true>
|
||||
* <br>This config property allows to easily compare the performance of adding docs with and without facets.
|
||||
* Note that facets are created even when this is false, just that they are not added to the document (nor to the taxonomy).
|
||||
* </ul>
|
||||
* <p>
|
||||
* See {@link AddDocTask} for general document parameters and configuration.
|
||||
* <p>
|
||||
* Makes use of the {@link FacetSource} in effect - see {@link PerfRunData} for facet source settings.
|
||||
*/
|
||||
public class AddFacetedDocTask extends AddDocTask {
|
||||
|
||||
public AddFacetedDocTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
private CategoryContainer facets = null;
|
||||
private CategoryDocumentBuilder categoryDocBuilder = null;
|
||||
private boolean withFacets = true;
|
||||
|
||||
@Override
|
||||
public void setup() throws Exception {
|
||||
super.setup();
|
||||
// create the facets even if they should not be added - allows to measure the effect of just adding facets
|
||||
facets = getRunData().getFacetSource().getNextFacets(facets);
|
||||
withFacets = getRunData().getConfig().get("with.facets", true);
|
||||
if (withFacets) {
|
||||
categoryDocBuilder = new CategoryDocumentBuilder(getRunData().getTaxonomyWriter());
|
||||
categoryDocBuilder.setCategories(facets);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String getLogMessage(int recsCount) {
|
||||
if (!withFacets) {
|
||||
return super.getLogMessage(recsCount);
|
||||
}
|
||||
return super.getLogMessage(recsCount)+ " with facets";
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws Exception {
|
||||
if (withFacets) {
|
||||
categoryDocBuilder.build(doc);
|
||||
}
|
||||
return super.doLogic();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Close taxonomy index.
|
||||
* <br>Other side effects: taxonomy writer object in perfRunData is nullified.
|
||||
*/
|
||||
public class CloseTaxonomyIndexTask extends PerfTask {
|
||||
|
||||
public CloseTaxonomyIndexTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws IOException {
|
||||
IOUtils.close(getRunData().getTaxonomyWriter());
|
||||
getRunData().setTaxonomyWriter(null);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/**
|
||||
* Close taxonomy reader.
|
||||
* <br>Other side effects: taxonomy reader in perfRunData is nullified.
|
||||
*/
|
||||
public class CloseTaxonomyReaderTask extends PerfTask {
|
||||
|
||||
public CloseTaxonomyReaderTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws IOException {
|
||||
TaxonomyReader taxoReader = getRunData().getTaxonomyReader();
|
||||
getRunData().setTaxonomyReader(null);
|
||||
if (taxoReader.getRefCount() != 1) {
|
||||
System.out.println("WARNING: CloseTaxonomyReader: reference count is currently " + taxoReader.getRefCount());
|
||||
}
|
||||
taxoReader.close();
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Commits the Taxonomy Index.
|
||||
*/
|
||||
public class CommitTaxonomyIndexTask extends PerfTask {
|
||||
public CommitTaxonomyIndexTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws Exception {
|
||||
TaxonomyWriter taxonomyWriter = getRunData().getTaxonomyWriter();
|
||||
if (taxonomyWriter != null) {
|
||||
taxonomyWriter.commit();
|
||||
} else {
|
||||
throw new IllegalStateException("TaxonomyWriter is not currently open");
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Create a taxonomy index.
|
||||
* <br>Other side effects: taxonomy writer object in perfRunData is set.
|
||||
*/
|
||||
public class CreateTaxonomyIndexTask extends PerfTask {
|
||||
|
||||
public CreateTaxonomyIndexTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws IOException {
|
||||
PerfRunData runData = getRunData();
|
||||
runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE));
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Open a taxonomy index.
|
||||
* <br>Other side effects: taxonomy writer object in perfRunData is set.
|
||||
*/
|
||||
public class OpenTaxonomyIndexTask extends PerfTask {
|
||||
|
||||
public OpenTaxonomyIndexTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws IOException {
|
||||
PerfRunData runData = getRunData();
|
||||
runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir()));
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
|
||||
|
||||
/**
|
||||
* Open a taxonomy index reader.
|
||||
* <br>Other side effects: taxonomy reader object in perfRunData is set.
|
||||
*/
|
||||
public class OpenTaxonomyReaderTask extends PerfTask {
|
||||
|
||||
public OpenTaxonomyReaderTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws IOException {
|
||||
PerfRunData runData = getRunData();
|
||||
LuceneTaxonomyReader taxoReader = new LuceneTaxonomyReader(runData.getTaxonomyDir());
|
||||
runData.setTaxonomyReader(taxoReader);
|
||||
// We transfer reference to the run data
|
||||
taxoReader.decRef();
|
||||
return 1;
|
||||
}
|
||||
|
||||
}
|
|
@ -17,8 +17,6 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.NumberFormat;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.stats.Points;
|
||||
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
|
||||
|
@ -270,9 +268,7 @@ public abstract class PerfTask implements Cloneable {
|
|||
public void tearDown() throws Exception {
|
||||
if (++logStepCount % logStep == 0) {
|
||||
double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
|
||||
NumberFormat nf = NumberFormat.getInstance();
|
||||
nf.setMaximumFractionDigits(2);
|
||||
System.out.println(nf.format(time) + " sec --> "
|
||||
System.out.println(String.format("%7.2f",time) + " sec --> "
|
||||
+ Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
|
|||
import org.apache.lucene.collation.CollationKeyAnalyzer;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -780,6 +781,42 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
|||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test indexing with facets tasks.
|
||||
*/
|
||||
public void testIndexingWithFacets() throws Exception {
|
||||
// 1. alg definition (required in every "logic" test)
|
||||
String algLines[] = {
|
||||
"# ----- properties ",
|
||||
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
|
||||
"docs.file=" + getReuters20LinesFile(),
|
||||
"content.source.log.step=100",
|
||||
"content.source.forever=false",
|
||||
"directory=RAMDirectory",
|
||||
"doc.stored=false",
|
||||
"merge.factor=3",
|
||||
"doc.tokenized=false",
|
||||
"debug.level=1",
|
||||
"# ----- alg ",
|
||||
"ResetSystemErase",
|
||||
"CreateIndex",
|
||||
"CreateTaxonomyIndex",
|
||||
"{ \"AddDocs\" AddFacetedDoc > : * ",
|
||||
"CloseIndex",
|
||||
"CloseTaxonomyIndex",
|
||||
"OpenTaxonomyReader",
|
||||
};
|
||||
|
||||
// 2. execute the algorithm (required in every "logic" test)
|
||||
Benchmark benchmark = execBenchmark(algLines);
|
||||
PerfRunData runData = benchmark.getRunData();
|
||||
assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter());
|
||||
TaxonomyReader taxoReader = runData.getTaxonomyReader();
|
||||
assertNotNull("taxo reader was not opened", taxoReader);
|
||||
assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1);
|
||||
taxoReader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we can call optimize(maxNumSegments).
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue