Added some more algorithms for testing things out, implemented basic TREC query driver based on the sample in the javadocs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@585677 13f79535-47bb-0310-9956-ffa450edef68
2007-10-17 20:36:20 +00:00 · 2007-10-17 20:36:20 +00:00 · a614f0d99a
parent 112f227ecc
commit a614f0d99a
5 changed files with 336 additions and 0 deletions
--- a/contrib/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
+++ b/contrib/benchmark/conf/indexing-flush-by-RAM-multithreaded.alg
@ -0,0 +1,70 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+#merge.factor=mrg:10:100:10:100:10:100:10:100
+#max.buffered=buf:10:10:100:100:10:10:100:100
+ram.flush.mb=flush:32:40:48:56:32:40:48:56
+compound=cmpnd:true:true:true:true:false:false:false:false
+
+autocommit=false
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=2000
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=2
+
+log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+
+    ResetSystemErase
+
+    { "Populate"
+        CreateIndex
+        [{ "MAddDocs" AddDoc } : 5000] : 4
+        Optimize
+        CloseIndex
+    }
+
+
+    RepSumByPref MAddDocs
+
+    NewRound
+
+} : 8
+
+RepSumByNameRound
+RepSumByName
+RepSumByPrefRound MAddDocs
--- a/contrib/benchmark/conf/indexing-flush-by-RAM.alg
+++ b/contrib/benchmark/conf/indexing-flush-by-RAM.alg
@ -0,0 +1,70 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+#merge.factor=mrg:10:100:10:100:10:100:10:100
+#max.buffered=buf:10:10:100:100:10:10:100:100
+ram.flush.mb=flush:32:40:48:56:32:40:48:56
+compound=cmpnd:true:true:true:true:false:false:false:false
+
+autocommit=false
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=2000
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=2
+
+log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+
+    ResetSystemErase
+
+    { "Populate"
+        CreateIndex
+        { "MAddDocs" AddDoc } : 20000
+        Optimize
+        CloseIndex
+    }
+
+
+    RepSumByPref MAddDocs
+
+    NewRound
+
+} : 8
+
+RepSumByNameRound
+RepSumByName
+RepSumByPrefRound MAddDocs
--- a/contrib/benchmark/conf/indexing-multithreaded.alg
+++ b/contrib/benchmark/conf/indexing-multithreaded.alg
@ -0,0 +1,70 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+merge.factor=mrg:10:100:10:100:10:100:10:100
+max.buffered=buf:10:10:100:100:10:10:100:100
+#ram.flush.mb=flush:32:40:48:56:32:40:48:56
+compound=cmpnd:true:true:true:true:false:false:false:false
+
+autocommit=false
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=2000
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=2
+
+log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+
+    ResetSystemErase
+
+    { "Populate"
+        CreateIndex
+        [{ "MAddDocs" AddDoc } : 5000] : 4
+        Optimize
+        CloseIndex
+    }
+
+
+    RepSumByPref MAddDocs
+
+    NewRound
+
+} : 8
+
+RepSumByNameRound
+RepSumByName
+RepSumByPrefRound MAddDocs
--- a/contrib/benchmark/conf/indexing.alg
+++ b/contrib/benchmark/conf/indexing.alg
@ -0,0 +1,70 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+# multi val params are iterated by NewRound's, added to reports, start with column name.
+
+merge.factor=mrg:10:100:10:100:10:100:10:100
+max.buffered=buf:10:10:100:100:10:10:100:100
+#ram.flush.mb=flush:32:40:48:56:32:40:48:56
+compound=cmpnd:true:true:true:true:false:false:false:false
+
+autocommit=false
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+#directory=RamDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+doc.add.log.step=2000
+
+docs.dir=reuters-out
+#docs.dir=reuters-111
+
+#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
+doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
+
+#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+# task at this depth or less would print when they start
+task.max.depth.log=2
+
+log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+
+    ResetSystemErase
+
+    { "Populate"
+        CreateIndex
+        { "MAddDocs" AddDoc } : 20000
+        Optimize
+        CloseIndex
+    }
+
+
+    RepSumByPref MAddDocs
+
+    NewRound
+
+} : 8
+
+RepSumByNameRound
+RepSumByName
+RepSumByPrefRound MAddDocs
--- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
+++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
@ -0,0 +1,56 @@
+package org.apache.lucene.benchmark.quality.trec;
+
+import org.apache.lucene.benchmark.quality.trec.TrecJudge;
+import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
+import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
+import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
+import org.apache.lucene.benchmark.quality.*;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Searcher;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.PrintWriter;
+
+
+/**
+ *
+ *
+ **/
+public class QueryDriver {
+  public static void main(String[] args) throws Exception {
+
+    
+    File topicsFile = new File(args[0]);
+    File qrelsFile = new File(args[1]);
+    Searcher searcher = new IndexSearcher(args[3]);
+
+    int maxResults = 1000;
+    String docNameField = "docname";
+
+    PrintWriter logger = new PrintWriter(System.out, true);
+
+    // use trec utilities to read trec topics into quality queries
+    TrecTopicsReader qReader = new TrecTopicsReader();
+    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+
+    // prepare judge, with trec utilities that read from a QRels file
+    Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+
+    // validate topics & judgments match each other
+    judge.validateData(qqs, logger);
+
+    // set the parsing of quality queries into Lucene queries.
+    QualityQueryParser qqParser = new SimpleQQParser("title", "body");
+
+    // run the benchmark
+    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
+    SubmissionReport submitLog = null;
+    QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
+
+    // print an avarage sum of the results
+    QualityStats avg = QualityStats.average(stats);
+    avg.log("SUMMARY", 2, logger, "  ");
+  }
+}