Added some more algorithms for testing things out, implemented basic TREC query driver based on the sample in the javadocs.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@585677 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2007-10-17 20:36:20 +00:00
parent 112f227ecc
commit a614f0d99a
5 changed files with 336 additions and 0 deletions

View File

@ -0,0 +1,70 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
#merge.factor=mrg:10:100:10:100:10:100:10:100
#max.buffered=buf:10:10:100:100:10:10:100:100
ram.flush.mb=flush:32:40:48:56:32:40:48:56
compound=cmpnd:true:true:true:true:false:false:false:false
autocommit=false
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
doc.add.log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
CreateIndex
[{ "MAddDocs" AddDoc } : 5000] : 4
Optimize
CloseIndex
}
RepSumByPref MAddDocs
NewRound
} : 8
RepSumByNameRound
RepSumByName
RepSumByPrefRound MAddDocs

View File

@ -0,0 +1,70 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
#merge.factor=mrg:10:100:10:100:10:100:10:100
#max.buffered=buf:10:10:100:100:10:10:100:100
ram.flush.mb=flush:32:40:48:56:32:40:48:56
compound=cmpnd:true:true:true:true:false:false:false:false
autocommit=false
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
doc.add.log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
CreateIndex
{ "MAddDocs" AddDoc } : 20000
Optimize
CloseIndex
}
RepSumByPref MAddDocs
NewRound
} : 8
RepSumByNameRound
RepSumByName
RepSumByPrefRound MAddDocs

View File

@ -0,0 +1,70 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
merge.factor=mrg:10:100:10:100:10:100:10:100
max.buffered=buf:10:10:100:100:10:10:100:100
#ram.flush.mb=flush:32:40:48:56:32:40:48:56
compound=cmpnd:true:true:true:true:false:false:false:false
autocommit=false
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
doc.add.log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
CreateIndex
[{ "MAddDocs" AddDoc } : 5000] : 4
Optimize
CloseIndex
}
RepSumByPref MAddDocs
NewRound
} : 8
RepSumByNameRound
RepSumByName
RepSumByPrefRound MAddDocs

View File

@ -0,0 +1,70 @@
#/**
# * Licensed to the Apache Software Foundation (ASF) under one or more
# * contributor license agreements. See the NOTICE file distributed with
# * this work for additional information regarding copyright ownership.
# * The ASF licenses this file to You under the Apache License, Version 2.0
# * (the "License"); you may not use this file except in compliance with
# * the License. You may obtain a copy of the License at
# *
# * http://www.apache.org/licenses/LICENSE-2.0
# *
# * Unless required by applicable law or agreed to in writing, software
# * distributed under the License is distributed on an "AS IS" BASIS,
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# * See the License for the specific language governing permissions and
# * limitations under the License.
# */
# -------------------------------------------------------------------------------------
# multi val params are iterated by NewRound's, added to reports, start with column name.
merge.factor=mrg:10:100:10:100:10:100:10:100
max.buffered=buf:10:10:100:100:10:10:100:100
#ram.flush.mb=flush:32:40:48:56:32:40:48:56
compound=cmpnd:true:true:true:true:false:false:false:false
autocommit=false
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
directory=FSDirectory
#directory=RamDirectory
doc.stored=true
doc.tokenized=true
doc.term.vector=false
doc.add.log.step=2000
docs.dir=reuters-out
#docs.dir=reuters-111
#doc.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleDocMaker
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
#query.maker=org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
# task at this depth or less would print when they start
task.max.depth.log=2
log.queries=true
# -------------------------------------------------------------------------------------
{ "Rounds"
ResetSystemErase
{ "Populate"
CreateIndex
{ "MAddDocs" AddDoc } : 20000
Optimize
CloseIndex
}
RepSumByPref MAddDocs
NewRound
} : 8
RepSumByNameRound
RepSumByName
RepSumByPrefRound MAddDocs

View File

@ -0,0 +1,56 @@
package org.apache.lucene.benchmark.quality.trec;
import org.apache.lucene.benchmark.quality.trec.TrecJudge;
import org.apache.lucene.benchmark.quality.trec.TrecTopicsReader;
import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
import org.apache.lucene.benchmark.quality.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.PrintWriter;
/**
*
*
**/
public class QueryDriver {
public static void main(String[] args) throws Exception {
File topicsFile = new File(args[0]);
File qrelsFile = new File(args[1]);
Searcher searcher = new IndexSearcher(args[3]);
int maxResults = 1000;
String docNameField = "docname";
PrintWriter logger = new PrintWriter(System.out, true);
// use trec utilities to read trec topics into quality queries
TrecTopicsReader qReader = new TrecTopicsReader();
QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
// prepare judge, with trec utilities that read from a QRels file
Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
// validate topics & judgments match each other
judge.validateData(qqs, logger);
// set the parsing of quality queries into Lucene queries.
QualityQueryParser qqParser = new SimpleQQParser("title", "body");
// run the benchmark
QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
SubmissionReport submitLog = null;
QualityStats stats[] = qrun.execute(maxResults, judge, submitLog, logger);
// print an avarage sum of the results
QualityStats avg = QualityStats.average(stats);
avg.log("SUMMARY", 2, logger, " ");
}
}