LUCENE-1539: add DeleteByPercent, FlushReader tasks, and ability to open reader on a labelled commit point

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@784587 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-06-14 17:07:55 +00:00
parent 26b9dea62b
commit 5b472b2d62
12 changed files with 436 additions and 13 deletions

View File

@ -3,6 +3,15 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
$Id:$
6/12/09
LUCENE-1539: Added DeleteByPercentTask which enables deleting a
percentage of documents and searching on them. Changed CommitIndex
to optionally accept a label (recorded as userData=<label> in the
commit point). Added FlushReaderTask, and modified OpenReaderTask
to also optionally take a label referencing a commit point to open.
Also changed default autoCommit (when IndexWriter is opened) to
true. (Jason Rutherglen via Mike McCandless)
12/20/08
LUCENE-1495: Allow task sequence to run for specfied number of seconds by adding ": 2.7s" (for example).

View File

@ -0,0 +1,105 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
autocommit=false
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
doc.add.log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
deletion.policy=org.apache.lucene.benchmark.utils.NoDeletionPolicy
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
-CreateIndex
{ "MAddDocs" AddDoc > : 1000
CommitIndex(original)
CloseIndex
}
OpenReader(false,original)
DeleteByPercent(5)
{ "SearchSameRdr5" Search > : 500
FlushReader(5%)
CloseReader
PrintReader(5%)
OpenReader(false,5%)
DeleteByPercent(10)
{ "SearchSameRdr10" Search > : 500
FlushReader(10%)
CloseReader
PrintReader(10%)
OpenReader(false,10%)
DeleteByPercent(20)
{ "SearchSameRdr20" Search > : 500
FlushReader(20%)
CloseReader
PrintReader(20%)
OpenReader(false,20%)
DeleteByPercent(60)
{ "SearchSameRdr60" Search > : 500
FlushReader(60%)
CloseReader
PrintReader(60%)
OpenReader(false,60%)
DeleteByPercent(75)
{ "SearchSameRdr75" Search > : 500
FlushReader(75%)
CloseReader
PrintReader(75%)
# Test lower percentage of deletes (so undeleteAll is used)
OpenReader(false,75%)
DeleteByPercent(7)
{ "SearchSameRdr7" Search > : 500
FlushReader(7%)
CloseReader
PrintReader(7%)
NewRound
} : 1
RepSumByName
RepSumByPrefRound MAddDocs

View File

@ -55,6 +55,7 @@ log.queries=true
CreateIndex
[{ "MAddDocs" AddDoc } : 5000] : 4
Optimize
CommitIndex(commit1)
CloseIndex
}

View File

@ -76,7 +76,7 @@ public class AddDocTask extends PerfTask {
return 1;
}
private void log (int count) {
protected void log (int count) {
if (logStep<0) {
// init once per instance
logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);

View File

@ -16,22 +16,40 @@ package org.apache.lucene.benchmark.byTask.tasks;
* limitations under the License.
*/
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.index.IndexWriter;
/**
* Commits the IndexReader.
* Commits the IndexWriter.
*
*/
public class CommitIndexTask extends PerfTask {
String commitUserData = null;
public CommitIndexTask(PerfRunData runData) {
super(runData);
}
public boolean supportsParams() {
return true;
}
public void setParams(String params) {
commitUserData = params;
}
public int doLogic() throws Exception {
IndexWriter iw = getRunData().getIndexWriter();
if (iw!=null) {
iw.commit();
if (iw != null) {
if (commitUserData == null) iw.commit();
else {
Map map = new HashMap();
map.put(OpenReaderTask.USER_DATA, commitUserData);
iw.commit(map);
}
}
return 1;

View File

@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.MergePolicy;
@ -91,14 +92,38 @@ public class CreateIndexTask extends PerfTask {
writer.setRAMBufferSizeMB(ramBuffer);
}
}
public static IndexDeletionPolicy getIndexDeletionPolicy(Config config) {
String deletionPolicyName = config.get("deletion.policy", "org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy");
IndexDeletionPolicy indexDeletionPolicy = null;
RuntimeException err = null;
try {
indexDeletionPolicy = ((IndexDeletionPolicy) Class.forName(deletionPolicyName).newInstance());
} catch (IllegalAccessException iae) {
err = new RuntimeException("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy");
err.initCause(iae);
} catch (InstantiationException ie) {
err = new RuntimeException("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy");
err.initCause(ie);
} catch (ClassNotFoundException cnfe) {
err = new RuntimeException("unable to load class '" + deletionPolicyName + "' as IndexDeletionPolicy");
err.initCause(cnfe);
}
if (err != null)
throw err;
return indexDeletionPolicy;
}
public int doLogic() throws IOException {
PerfRunData runData = getRunData();
Config config = runData.getConfig();
IndexDeletionPolicy indexDeletionPolicy = getIndexDeletionPolicy(config);
IndexWriter writer = new IndexWriter(runData.getDirectory(),
runData.getConfig().get("autocommit", OpenIndexTask.DEFAULT_AUTO_COMMIT),
runData.getAnalyzer(),
true);
true, indexDeletionPolicy);
CreateIndexTask.setIndexWriterConfig(writer, config);
runData.setIndexWriter(writer);
return 1;

View File

@ -0,0 +1,82 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Random;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
/**
* Deletes a percentage of documents from an index randomly
* over the number of documents. The parameter, X, is in
* percent. EG 50 means 1/2 of all documents will be
* deleted.
*
* <p><b>NOTE</b>: the param is an absolute percentage of
* maxDoc(). This means if you delete 50%, and then delete
* 50% again, the 2nd delete will do nothing.
*/
public class DeleteByPercentTask extends PerfTask {
double percent;
int numDeleted = 0;
Random random = new Random(System.currentTimeMillis());
public DeleteByPercentTask(PerfRunData runData) {
super(runData);
}
public void setup() throws Exception {
super.setup();
}
public void setParams(String params) {
super.setParams(params);
percent = Double.parseDouble(params)/100;
}
public boolean supportsParams() {
return true;
}
public int doLogic() throws Exception {
IndexReader r = getRunData().getIndexReader();
int maxDoc = r.maxDoc();
int numDeleted = 0;
// percent is an absolute target:
int numToDelete = ((int) (maxDoc * percent)) - r.numDeletedDocs();
if (numToDelete < 0) {
r.undeleteAll();
numToDelete = (int) (maxDoc * percent);
}
while (numDeleted < numToDelete) {
double delRate = ((double) (numToDelete-numDeleted))/r.numDocs();
TermDocs termDocs = r.termDocs(null);
while (termDocs.next() && numDeleted < numToDelete) {
if (random.nextDouble() <= delRate) {
r.deleteDocument(termDocs.doc());
numDeleted++;
}
}
termDocs.close();
}
System.out.println("--> processed (delete) " + numDeleted + " docs");
return numDeleted;
}
}

View File

@ -0,0 +1,54 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.index.IndexReader;
public class FlushReaderTask extends PerfTask {
String userData = null;
public FlushReaderTask(PerfRunData runData) {
super(runData);
}
public boolean supportsParams() {
return true;
}
public void setParams(String params) {
super.setParams(params);
userData = params;
}
public int doLogic() throws IOException {
IndexReader reader = getRunData().getIndexReader();
if (userData != null) {
Map map = new HashMap();
map.put(OpenReaderTask.USER_DATA, userData);
reader.flush(map);
} else {
reader.flush();
}
return 1;
}
}

View File

@ -38,7 +38,7 @@ public class OpenIndexTask extends PerfTask {
public static final int DEFAULT_MAX_FIELD_LENGTH = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
public static final int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
public static final double DEFAULT_RAM_FLUSH_MB = (int) IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB;
public static final boolean DEFAULT_AUTO_COMMIT = true;
public static final boolean DEFAULT_AUTO_COMMIT = false;
public OpenIndexTask(PerfRunData runData) {
super(runData);

View File

@ -18,18 +18,26 @@ package org.apache.lucene.benchmark.byTask.tasks;
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
/**
* Open an index reader.
* <br>Other side effects: index redaer object in perfRunData is set.
* <br> Optional params readOnly,commitUserData eg. OpenReader(false,commit1)
*/
public class OpenReaderTask extends PerfTask {
public static final String USER_DATA = "userData";
private boolean readOnly = true;
private String commitUserData = null;
public OpenReaderTask(PerfRunData runData) {
super(runData);
@ -37,14 +45,51 @@ public class OpenReaderTask extends PerfTask {
public int doLogic() throws IOException {
Directory dir = getRunData().getDirectory();
IndexReader reader = IndexReader.open(dir, readOnly);
getRunData().setIndexReader(reader);
Config config = getRunData().getConfig();
IndexReader r = null;
if (commitUserData != null) {
r = openCommitPoint(commitUserData, dir, config, readOnly);
} else {
IndexDeletionPolicy indexDeletionPolicy = CreateIndexTask.getIndexDeletionPolicy(config);
r = IndexReader.open(dir, indexDeletionPolicy, readOnly);
}
System.out.println("--> openReader: "+r.getCommitUserData());
getRunData().setIndexReader(r);
return 1;
}
public static IndexReader openCommitPoint(String userData, Directory dir, Config config, boolean readOnly) throws IOException {
IndexReader r = null;
Collection commits = IndexReader.listCommits(dir);
Iterator i = commits.iterator();
while (i.hasNext()) {
IndexCommit ic = (IndexCommit)i.next();
Map map = ic.getUserData();
String ud = null;
if (map != null) {
ud = (String)map.get(USER_DATA);
}
if (ud != null && ud.equals(userData)) {
IndexDeletionPolicy indexDeletionPolicy = CreateIndexTask.getIndexDeletionPolicy(config);
r = IndexReader.open(ic, indexDeletionPolicy, readOnly);
break;
}
}
if (r == null) throw new IOException("cannot find commitPoint userData:"+userData);
return r;
}
public void setParams(String params) {
super.setParams(params);
readOnly = Boolean.valueOf(params).booleanValue();
if (params != null) {
String[] split = params.split(",");
if (split.length > 0) {
readOnly = Boolean.valueOf(split[0]).booleanValue();
}
if (split.length > 1) {
commitUserData = split[1];
}
}
}
public boolean supportsParams() {

View File

@ -0,0 +1,53 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
public class PrintReaderTask extends PerfTask {
private String userData = null;
public PrintReaderTask(PerfRunData runData) {
super(runData);
}
public void setParams(String params) {
super.setParams(params);
userData = params;
}
public boolean supportsParams() {
return true;
}
public int doLogic() throws Exception {
Directory dir = getRunData().getDirectory();
Config config = getRunData().getConfig();
IndexReader r = null;
if (userData == null)
r = IndexReader.open(dir);
else
r = OpenReaderTask.openCommitPoint(userData, dir, config, true);
System.out.println("--> numDocs:"+r.numDocs()+" dels:"+r.numDeletedDocs());
r.close();
return 1;
}
}

View File

@ -0,0 +1,31 @@
package org.apache.lucene.benchmark.utils;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.List;
import org.apache.lucene.index.IndexDeletionPolicy;
public class NoDeletionPolicy implements IndexDeletionPolicy {
public void onCommit(List commits) throws IOException {
}
public void onInit(List commits) throws IOException {
}
}