LUCENE-3606: remove indexreader modification tasks from benchmark

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1210486 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-12-05 14:57:44 +00:00
parent f05679bb5f
commit 1dc3a251c4
10 changed files with 10 additions and 490 deletions

View File

@ -1,105 +0,0 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
log.step=500
docs.dir=reuters-out
#docs.dir=reuters-111
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
deletion.policy=org.apache.lucene.index.NoDeletionPolicy
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
-CreateIndex
{ "MAddDocs" AddDoc > : 1000
CommitIndex(original)
CloseIndex
}
OpenReader(false,original)
DeleteByPercent(5)
{ "SearchSameRdr5" Search > : 500
FlushReader(5%)
CloseReader
PrintReader(5%)
OpenReader(false,5%)
DeleteByPercent(10)
{ "SearchSameRdr10" Search > : 500
FlushReader(10%)
CloseReader
PrintReader(10%)
OpenReader(false,10%)
DeleteByPercent(20)
{ "SearchSameRdr20" Search > : 500
FlushReader(20%)
CloseReader
PrintReader(20%)
OpenReader(false,20%)
DeleteByPercent(60)
{ "SearchSameRdr60" Search > : 500
FlushReader(60%)
CloseReader
PrintReader(60%)
OpenReader(false,60%)
DeleteByPercent(75)
{ "SearchSameRdr75" Search > : 500
FlushReader(75%)
CloseReader
PrintReader(75%)
# Test lower percentage of deletes (so undeleteAll is used)
OpenReader(false,75%)
DeleteByPercent(7)
{ "SearchSameRdr7" Search > : 500
FlushReader(7%)
CloseReader
PrintReader(7%)
NewRound
} : 1
RepSumByName
RepSumByPrefRound MAddDocs

View File

@ -1,70 +0,0 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# --------------------------------------------------------
# Deletes: what is the cost of deleting documents?
# --------------------------------------------------------
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
merge.factor=mrg:10
max.buffered=buf:100
compound=true
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
log.step=10000
log.step.DeleteDoc=100
docs.dir=reuters-out
#docs.dir=reuters-111
content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource
#content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
#query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=1
log.queries=false
# -------------------------------------------------------------------------------------
ResetSystemErase
CreateIndex
CloseIndex
{ "Populate"
OpenIndex
{ AddDoc(10) > : 200000
ForcMerge(1)
CloseIndex
>
{ "Deletions"
OpenReader(false) DeleteDoc CloseReader
} : 4000
RepSumByName

View File

@ -21,7 +21,6 @@ import java.util.Map;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
/**
* Commits the IndexWriter.
@ -51,14 +50,6 @@ public class CommitIndexTask extends PerfTask {
IndexWriter iw = getRunData().getIndexWriter();
if (iw != null) {
iw.commit(commitUserData);
} else {
IndexReader r = getRunData().getIndexReader();
if (r != null) {
r.commit(commitUserData);
r.decRef();
} else {
throw new IllegalStateException("neither IndexWriter nor IndexReader is currently open");
}
}
return 1;

View File

@ -1,95 +0,0 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Random;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.util.Bits;
/**
* Deletes a percentage of documents from an index randomly
* over the number of documents. The parameter, X, is in
* percent. EG 50 means 1/2 of all documents will be
* deleted.
*
* <p><b>NOTE</b>: the param is an absolute percentage of
* maxDoc(). This means if you delete 50%, and then delete
* 50% again, the 2nd delete will do nothing.
*
* <p> Parameters:
* <ul>
* <li> delete.percent.rand.seed - defines the seed to
* initialize Random (default 1717)
* </ul>
*/
public class DeleteByPercentTask extends PerfTask {
double percent;
int numDeleted = 0;
final Random random;
public DeleteByPercentTask(PerfRunData runData) {
super(runData);
random = new Random(runData.getConfig().get("delete.percent.rand.seed", 1717));
}
@Override
public void setParams(String params) {
super.setParams(params);
percent = Double.parseDouble(params)/100;
}
@Override
public boolean supportsParams() {
return true;
}
@Override
public int doLogic() throws Exception {
IndexReader r = getRunData().getIndexReader();
int maxDoc = r.maxDoc();
int numDeleted = 0;
// percent is an absolute target:
int numToDelete = ((int) (maxDoc * percent)) - r.numDeletedDocs();
if (numToDelete < 0) {
r.undeleteAll();
numToDelete = (int) (maxDoc * percent);
}
while (numDeleted < numToDelete) {
double delRate = ((double) (numToDelete-numDeleted))/r.numDocs();
Bits liveDocs = MultiFields.getLiveDocs(r);
int doc = 0;
while (doc < maxDoc && numDeleted < numToDelete) {
if ((liveDocs == null || liveDocs.get(doc)) && random.nextDouble() <= delRate) {
r.deleteDocument(doc);
numDeleted++;
if (liveDocs == null) {
liveDocs = MultiFields.getLiveDocs(r);
assert liveDocs != null;
}
}
doc++;
}
}
System.out.println("--> processed (delete) " + numDeleted + " docs");
r.decRef();
return numDeleted;
}
}

View File

@ -1,90 +0,0 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.index.IndexReader;
/**
* Delete a document by docid. If no docid param is supplied, deletes doc with
* <code>id = last-deleted-doc + doc.delete.step</code>.
*/
public class DeleteDocTask extends PerfTask {
/**
* Gap between ids of deleted docs, applies when no docid param is provided.
*/
public static final int DEFAULT_DOC_DELETE_STEP = 8;
public DeleteDocTask(PerfRunData runData) {
super(runData);
}
private int deleteStep = -1;
private static int lastDeleted = -1;
private int docid = -1;
private boolean byStep = true;
@Override
public int doLogic() throws Exception {
IndexReader r = getRunData().getIndexReader();
r.deleteDocument(docid);
lastDeleted = docid;
r.decRef();
return 1; // one work item done here
}
/* (non-Javadoc)
* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#setup()
*/
@Override
public void setup() throws Exception {
super.setup();
if (deleteStep<0) {
deleteStep = getRunData().getConfig().get("doc.delete.step",DEFAULT_DOC_DELETE_STEP);
}
// set the docid to be deleted
docid = (byStep ? lastDeleted + deleteStep : docid);
}
@Override
protected String getLogMessage(int recsCount) {
return "deleted " + recsCount + " docs, last deleted: " + lastDeleted;
}
/**
* Set the params (docid only)
* @param params docid to delete, or -1 for deleting by delete gap settings.
*/
@Override
public void setParams(String params) {
super.setParams(params);
docid = (int) Float.parseFloat(params);
byStep = (docid < 0);
}
/* (non-Javadoc)
* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
*/
@Override
public boolean supportsParams() {
return true;
}
}

View File

@ -1,58 +0,0 @@
package org.apache.lucene.benchmark.byTask.tasks;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.index.IndexReader;
public class FlushReaderTask extends PerfTask {
String userData = null;
public FlushReaderTask(PerfRunData runData) {
super(runData);
}
@Override
public boolean supportsParams() {
return true;
}
@Override
public void setParams(String params) {
super.setParams(params);
userData = params;
}
@Override
public int doLogic() throws IOException {
IndexReader reader = getRunData().getIndexReader();
if (userData != null) {
Map<String,String> map = new HashMap<String,String>();
map.put(OpenReaderTask.USER_DATA, userData);
reader.flush(map);
} else {
reader.flush();
}
reader.decRef();
return 1;
}
}

View File

@ -22,20 +22,17 @@ import java.util.Collection;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
/**
* Open an index reader.
* <br>Other side effects: index reader object in perfRunData is set.
* <br> Optional params readOnly,commitUserData eg. OpenReader(false,commit1)
* <br> Optional params commitUserData eg. OpenReader(false,commit1)
*/
public class OpenReaderTask extends PerfTask {
public static final String USER_DATA = "userData";
private boolean readOnly = true;
private String commitUserData = null;
public OpenReaderTask(PerfRunData runData) {
@ -45,22 +42,11 @@ public class OpenReaderTask extends PerfTask {
@Override
public int doLogic() throws IOException {
Directory dir = getRunData().getDirectory();
Config config = getRunData().getConfig();
IndexReader r = null;
final IndexDeletionPolicy deletionPolicy;
if (readOnly) {
deletionPolicy = null;
} else {
deletionPolicy = CreateIndexTask.getIndexDeletionPolicy(config);
}
if (commitUserData != null) {
r = IndexReader.open(OpenReaderTask.findIndexCommit(dir, commitUserData),
deletionPolicy,
readOnly);
r = IndexReader.open(OpenReaderTask.findIndexCommit(dir, commitUserData));
} else {
r = IndexReader.open(dir,
deletionPolicy,
readOnly);
r = IndexReader.open(dir);
}
getRunData().setIndexReader(r);
// We transfer reference to the run data
@ -74,10 +60,7 @@ public class OpenReaderTask extends PerfTask {
if (params != null) {
String[] split = params.split(",");
if (split.length > 0) {
readOnly = Boolean.valueOf(split[0]).booleanValue();
}
if (split.length > 1) {
commitUserData = split[1];
commitUserData = split[0];
}
}
}

View File

@ -44,11 +44,9 @@ public class PrintReaderTask extends PerfTask {
Directory dir = getRunData().getDirectory();
IndexReader r = null;
if (userData == null)
r = IndexReader.open(dir, true);
r = IndexReader.open(dir);
else
r = IndexReader.open(OpenReaderTask.findIndexCommit(dir, userData),
null,
true);
r = IndexReader.open(OpenReaderTask.findIndexCommit(dir, userData));
System.out.println("--> numDocs:"+r.numDocs()+" dels:"+r.numDeletedDocs());
r.close();
return 1;

View File

@ -167,7 +167,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
"{ AddDoc } : 100",
"ForceMerge(1)",
"CloseIndex",
"OpenReader(true)",
"OpenReader",
"{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
"CloseReader",
};
@ -206,7 +206,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
"{ AddDoc } : 1000",
"ForceMerge(1)",
"CloseIndex",
"OpenReader(false)",
"OpenReader",
"{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
"CloseReader",
};
@ -623,40 +623,6 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
}
}
public void testDeleteByPercent() throws Exception {
// 1. alg definition (required in every "logic" test)
String algLines[] = {
"# ----- properties ",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(),
"ram.flush.mb=-1",
"max.buffered=2",
"content.source.log.step=3",
"doc.term.vector=false",
"content.source.forever=false",
"directory=RAMDirectory",
"doc.stored=false",
"doc.tokenized=false",
"debug.level=1",
"# ----- alg ",
"CreateIndex",
"{ \"AddDocs\" AddDoc > : * ",
"CloseIndex()",
"OpenReader(false)",
"DeleteByPercent(20)",
"CloseReader"
};
// 2. execute the algorithm (required in every "logic" test)
Benchmark benchmark = execBenchmark(algLines);
// 3. test number of docs in the index
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
int ndocsExpected = 16; // first 20 reuters docs, minus 20%
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
ir.close();
}
/**
* Test that we can set merge scheduler".
*/

View File

@ -57,7 +57,7 @@ public class SimpleMain {
// open readers
TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
IndexReader indexReader = IndexReader.open(indexDir, true);
IndexReader indexReader = IndexReader.open(indexDir);
ExampleUtils.log("search the sample documents...");
List<FacetResult> facetRes = SimpleSearcher.searchWithFacets(indexReader, taxo);
@ -82,7 +82,7 @@ public class SimpleMain {
// open readers
TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
IndexReader indexReader = IndexReader.open(indexDir, true);
IndexReader indexReader = IndexReader.open(indexDir);
ExampleUtils.log("search the sample documents...");
List<FacetResult> facetRes = SimpleSearcher.searchWithDrillDown(indexReader, taxo);