mirror of https://github.com/apache/lucene.git
LUCENE-1539: add DeleteByPercent, FlushReader tasks, and ability to open reader on a labelled commit point
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@784587 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
26b9dea62b
commit
5b472b2d62
|
@ -3,6 +3,15 @@ Lucene Benchmark Contrib Change Log
|
|||
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
|
||||
|
||||
$Id:$
|
||||
6/12/09
|
||||
LUCENE-1539: Added DeleteByPercentTask which enables deleting a
|
||||
percentage of documents and searching on them. Changed CommitIndex
|
||||
to optionally accept a label (recorded as userData=<label> in the
|
||||
commit point). Added FlushReaderTask, and modified OpenReaderTask
|
||||
to also optionally take a label referencing a commit point to open.
|
||||
Also changed default autoCommit (when IndexWriter is opened) to
|
||||
true. (Jason Rutherglen via Mike McCandless)
|
||||
|
||||
12/20/08
|
||||
LUCENE-1495: Allow task sequence to run for specfied number of seconds by adding ": 2.7s" (for example).
|
||||
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
#/**
|
||||
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# * contributor license agreements. See the NOTICE file distributed with
|
||||
# * this work for additional information regarding copyright ownership.
|
||||
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# * (the "License"); you may not use this file except in compliance with
|
||||
# * the License. You may obtain a copy of the License at
|
||||
# *
|
||||
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||
# *
|
||||
# * Unless required by applicable law or agreed to in writing, software
|
||||
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# * See the License for the specific language governing permissions and
|
||||
# * limitations under the License.
|
||||
# */
|
||||
# -------------------------------------------------------------------------------------
|
||||
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||
|
||||
autocommit=false
|
||||
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
|
||||
directory=FSDirectory
|
||||
#directory=RamDirectory
|
||||
|
||||
doc.stored=true
|
||||
doc.tokenized=true
|
||||
doc.term.vector=false
|
||||
doc.add.log.step=500
|
||||
|
||||
docs.dir=reuters-out
|
||||
#docs.dir=reuters-111
|
||||
|
||||
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
|
||||
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
|
||||
|
||||
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
|
||||
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
|
||||
deletion.policy=org.apache.lucene.benchmark.utils.NoDeletionPolicy
|
||||
|
||||
# task at this depth or less would print when they start
|
||||
task.max.depth.log=2
|
||||
|
||||
log.queries=true
|
||||
# -------------------------------------------------------------------------------------
|
||||
|
||||
{ "Rounds"
|
||||
|
||||
ResetSystemErase
|
||||
|
||||
{ "Populate"
|
||||
-CreateIndex
|
||||
{ "MAddDocs" AddDoc > : 1000
|
||||
CommitIndex(original)
|
||||
CloseIndex
|
||||
}
|
||||
|
||||
OpenReader(false,original)
|
||||
DeleteByPercent(5)
|
||||
{ "SearchSameRdr5" Search > : 500
|
||||
FlushReader(5%)
|
||||
CloseReader
|
||||
PrintReader(5%)
|
||||
|
||||
OpenReader(false,5%)
|
||||
DeleteByPercent(10)
|
||||
{ "SearchSameRdr10" Search > : 500
|
||||
FlushReader(10%)
|
||||
CloseReader
|
||||
PrintReader(10%)
|
||||
|
||||
OpenReader(false,10%)
|
||||
DeleteByPercent(20)
|
||||
{ "SearchSameRdr20" Search > : 500
|
||||
FlushReader(20%)
|
||||
CloseReader
|
||||
PrintReader(20%)
|
||||
|
||||
OpenReader(false,20%)
|
||||
DeleteByPercent(60)
|
||||
{ "SearchSameRdr60" Search > : 500
|
||||
FlushReader(60%)
|
||||
CloseReader
|
||||
PrintReader(60%)
|
||||
|
||||
OpenReader(false,60%)
|
||||
DeleteByPercent(75)
|
||||
{ "SearchSameRdr75" Search > : 500
|
||||
FlushReader(75%)
|
||||
CloseReader
|
||||
PrintReader(75%)
|
||||
|
||||
# Test lower percentage of deletes (so undeleteAll is used)
|
||||
OpenReader(false,75%)
|
||||
DeleteByPercent(7)
|
||||
{ "SearchSameRdr7" Search > : 500
|
||||
FlushReader(7%)
|
||||
CloseReader
|
||||
PrintReader(7%)
|
||||
|
||||
NewRound
|
||||
|
||||
} : 1
|
||||
|
||||
RepSumByName
|
||||
RepSumByPrefRound MAddDocs
|
|
@ -55,6 +55,7 @@ log.queries=true
|
|||
CreateIndex
|
||||
[{ "MAddDocs" AddDoc } : 5000] : 4
|
||||
Optimize
|
||||
CommitIndex(commit1)
|
||||
CloseIndex
|
||||
}
|
||||
|
||||
|
|
|
@ -76,7 +76,7 @@ public class AddDocTask extends PerfTask {
|
|||
return 1;
|
||||
}
|
||||
|
||||
private void log (int count) {
|
||||
protected void log (int count) {
|
||||
if (logStep<0) {
|
||||
// init once per instance
|
||||
logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);
|
||||
|
|
|
@ -16,22 +16,40 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
||||
/**
|
||||
* Commits the IndexReader.
|
||||
* Commits the IndexWriter.
|
||||
*
|
||||
*/
|
||||
public class CommitIndexTask extends PerfTask {
|
||||
String commitUserData = null;
|
||||
|
||||
public CommitIndexTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
public boolean supportsParams() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void setParams(String params) {
|
||||
commitUserData = params;
|
||||
}
|
||||
|
||||
public int doLogic() throws Exception {
|
||||
IndexWriter iw = getRunData().getIndexWriter();
|
||||
if (iw!=null) {
|
||||
iw.commit();
|
||||
if (iw != null) {
|
||||
if (commitUserData == null) iw.commit();
|
||||
else {
|
||||
Map map = new HashMap();
|
||||
map.put(OpenReaderTask.USER_DATA, commitUserData);
|
||||
iw.commit(map);
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MergeScheduler;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
|
@ -92,13 +93,37 @@ public class CreateIndexTask extends PerfTask {
|
|||
}
|
||||
}
|
||||
|
||||
public static IndexDeletionPolicy getIndexDeletionPolicy(Config config) {
|
||||
String deletionPolicyName = config.get("deletion.policy", "org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy");
|
||||
IndexDeletionPolicy indexDeletionPolicy = null;
|
||||
RuntimeException err = null;
|
||||
try {
|
||||
indexDeletionPolicy = ((IndexDeletionPolicy) Class.forName(deletionPolicyName).newInstance());
|
||||
} catch (IllegalAccessException iae) {
|
||||
err = new RuntimeException("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy");
|
||||
err.initCause(iae);
|
||||
} catch (InstantiationException ie) {
|
||||
err = new RuntimeException("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy");
|
||||
err.initCause(ie);
|
||||
} catch (ClassNotFoundException cnfe) {
|
||||
err = new RuntimeException("unable to load class '" + deletionPolicyName + "' as IndexDeletionPolicy");
|
||||
err.initCause(cnfe);
|
||||
}
|
||||
if (err != null)
|
||||
throw err;
|
||||
return indexDeletionPolicy;
|
||||
}
|
||||
|
||||
public int doLogic() throws IOException {
|
||||
PerfRunData runData = getRunData();
|
||||
Config config = runData.getConfig();
|
||||
|
||||
IndexDeletionPolicy indexDeletionPolicy = getIndexDeletionPolicy(config);
|
||||
|
||||
IndexWriter writer = new IndexWriter(runData.getDirectory(),
|
||||
runData.getConfig().get("autocommit", OpenIndexTask.DEFAULT_AUTO_COMMIT),
|
||||
runData.getAnalyzer(),
|
||||
true);
|
||||
true, indexDeletionPolicy);
|
||||
CreateIndexTask.setIndexWriterConfig(writer, config);
|
||||
runData.setIndexWriter(writer);
|
||||
return 1;
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
|
||||
/**
|
||||
* Deletes a percentage of documents from an index randomly
|
||||
* over the number of documents. The parameter, X, is in
|
||||
* percent. EG 50 means 1/2 of all documents will be
|
||||
* deleted.
|
||||
*
|
||||
* <p><b>NOTE</b>: the param is an absolute percentage of
|
||||
* maxDoc(). This means if you delete 50%, and then delete
|
||||
* 50% again, the 2nd delete will do nothing.
|
||||
*/
|
||||
public class DeleteByPercentTask extends PerfTask {
|
||||
double percent;
|
||||
int numDeleted = 0;
|
||||
Random random = new Random(System.currentTimeMillis());
|
||||
|
||||
public DeleteByPercentTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
public void setup() throws Exception {
|
||||
super.setup();
|
||||
}
|
||||
|
||||
public void setParams(String params) {
|
||||
super.setParams(params);
|
||||
percent = Double.parseDouble(params)/100;
|
||||
}
|
||||
|
||||
public boolean supportsParams() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int doLogic() throws Exception {
|
||||
IndexReader r = getRunData().getIndexReader();
|
||||
int maxDoc = r.maxDoc();
|
||||
int numDeleted = 0;
|
||||
// percent is an absolute target:
|
||||
int numToDelete = ((int) (maxDoc * percent)) - r.numDeletedDocs();
|
||||
if (numToDelete < 0) {
|
||||
r.undeleteAll();
|
||||
numToDelete = (int) (maxDoc * percent);
|
||||
}
|
||||
while (numDeleted < numToDelete) {
|
||||
double delRate = ((double) (numToDelete-numDeleted))/r.numDocs();
|
||||
TermDocs termDocs = r.termDocs(null);
|
||||
while (termDocs.next() && numDeleted < numToDelete) {
|
||||
if (random.nextDouble() <= delRate) {
|
||||
r.deleteDocument(termDocs.doc());
|
||||
numDeleted++;
|
||||
}
|
||||
}
|
||||
termDocs.close();
|
||||
}
|
||||
System.out.println("--> processed (delete) " + numDeleted + " docs");
|
||||
return numDeleted;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
public class FlushReaderTask extends PerfTask {
|
||||
String userData = null;
|
||||
|
||||
public FlushReaderTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
public boolean supportsParams() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public void setParams(String params) {
|
||||
super.setParams(params);
|
||||
userData = params;
|
||||
}
|
||||
|
||||
public int doLogic() throws IOException {
|
||||
IndexReader reader = getRunData().getIndexReader();
|
||||
if (userData != null) {
|
||||
Map map = new HashMap();
|
||||
map.put(OpenReaderTask.USER_DATA, userData);
|
||||
reader.flush(map);
|
||||
} else {
|
||||
reader.flush();
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -38,7 +38,7 @@ public class OpenIndexTask extends PerfTask {
|
|||
public static final int DEFAULT_MAX_FIELD_LENGTH = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
|
||||
public static final int DEFAULT_MERGE_PFACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR;
|
||||
public static final double DEFAULT_RAM_FLUSH_MB = (int) IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB;
|
||||
public static final boolean DEFAULT_AUTO_COMMIT = true;
|
||||
public static final boolean DEFAULT_AUTO_COMMIT = false;
|
||||
|
||||
public OpenIndexTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
|
|
|
@ -18,18 +18,26 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
/**
|
||||
* Open an index reader.
|
||||
* <br>Other side effects: index redaer object in perfRunData is set.
|
||||
* <br> Optional params readOnly,commitUserData eg. OpenReader(false,commit1)
|
||||
*/
|
||||
public class OpenReaderTask extends PerfTask {
|
||||
|
||||
public static final String USER_DATA = "userData";
|
||||
private boolean readOnly = true;
|
||||
private String commitUserData = null;
|
||||
|
||||
public OpenReaderTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
|
@ -37,14 +45,51 @@ public class OpenReaderTask extends PerfTask {
|
|||
|
||||
public int doLogic() throws IOException {
|
||||
Directory dir = getRunData().getDirectory();
|
||||
IndexReader reader = IndexReader.open(dir, readOnly);
|
||||
getRunData().setIndexReader(reader);
|
||||
Config config = getRunData().getConfig();
|
||||
IndexReader r = null;
|
||||
if (commitUserData != null) {
|
||||
r = openCommitPoint(commitUserData, dir, config, readOnly);
|
||||
} else {
|
||||
IndexDeletionPolicy indexDeletionPolicy = CreateIndexTask.getIndexDeletionPolicy(config);
|
||||
r = IndexReader.open(dir, indexDeletionPolicy, readOnly);
|
||||
}
|
||||
System.out.println("--> openReader: "+r.getCommitUserData());
|
||||
getRunData().setIndexReader(r);
|
||||
return 1;
|
||||
}
|
||||
|
||||
public static IndexReader openCommitPoint(String userData, Directory dir, Config config, boolean readOnly) throws IOException {
|
||||
IndexReader r = null;
|
||||
Collection commits = IndexReader.listCommits(dir);
|
||||
Iterator i = commits.iterator();
|
||||
while (i.hasNext()) {
|
||||
IndexCommit ic = (IndexCommit)i.next();
|
||||
Map map = ic.getUserData();
|
||||
String ud = null;
|
||||
if (map != null) {
|
||||
ud = (String)map.get(USER_DATA);
|
||||
}
|
||||
if (ud != null && ud.equals(userData)) {
|
||||
IndexDeletionPolicy indexDeletionPolicy = CreateIndexTask.getIndexDeletionPolicy(config);
|
||||
r = IndexReader.open(ic, indexDeletionPolicy, readOnly);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (r == null) throw new IOException("cannot find commitPoint userData:"+userData);
|
||||
return r;
|
||||
}
|
||||
|
||||
public void setParams(String params) {
|
||||
super.setParams(params);
|
||||
readOnly = Boolean.valueOf(params).booleanValue();
|
||||
if (params != null) {
|
||||
String[] split = params.split(",");
|
||||
if (split.length > 0) {
|
||||
readOnly = Boolean.valueOf(split[0]).booleanValue();
|
||||
}
|
||||
if (split.length > 1) {
|
||||
commitUserData = split[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean supportsParams() {
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
public class PrintReaderTask extends PerfTask {
|
||||
private String userData = null;
|
||||
|
||||
public PrintReaderTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
}
|
||||
|
||||
public void setParams(String params) {
|
||||
super.setParams(params);
|
||||
userData = params;
|
||||
}
|
||||
|
||||
public boolean supportsParams() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int doLogic() throws Exception {
|
||||
Directory dir = getRunData().getDirectory();
|
||||
Config config = getRunData().getConfig();
|
||||
IndexReader r = null;
|
||||
if (userData == null)
|
||||
r = IndexReader.open(dir);
|
||||
else
|
||||
r = OpenReaderTask.openCommitPoint(userData, dir, config, true);
|
||||
System.out.println("--> numDocs:"+r.numDocs()+" dels:"+r.numDeletedDocs());
|
||||
r.close();
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
package org.apache.lucene.benchmark.utils;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
|
||||
public class NoDeletionPolicy implements IndexDeletionPolicy {
|
||||
public void onCommit(List commits) throws IOException {
|
||||
}
|
||||
|
||||
public void onInit(List commits) throws IOException {
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue