mirror of https://github.com/apache/lucene.git
LUCENE-1128 and 1129: Add highlighting support to benchmarking, plus fix minor traversalSize bug in ReadTask, also added a few new algorithms to try out
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@614885 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f75f490eb9
commit
1183763dbe
|
@ -4,6 +4,11 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
||||||
|
|
||||||
$Id:$
|
$Id:$
|
||||||
|
|
||||||
|
|
||||||
|
1/23/2008
|
||||||
|
LUCENE-1129: ReadTask properly uses the traversalSize value
|
||||||
|
LUCENE-1128: Added support for benchmarking the highlighter
|
||||||
|
|
||||||
01/20/08
|
01/20/08
|
||||||
LUCENE-1139: various fixes
|
LUCENE-1139: various fixes
|
||||||
- add merge.scheduler, merge.policy config properties
|
- add merge.scheduler, merge.policy config properties
|
||||||
|
@ -12,6 +17,7 @@ $Id:$
|
||||||
- OptimizeTask now takes int param to call optimize(int maxNumSegments)
|
- OptimizeTask now takes int param to call optimize(int maxNumSegments)
|
||||||
- CloseIndexTask now takes bool param to call close(false) (abort running merges)
|
- CloseIndexTask now takes bool param to call close(false) (abort running merges)
|
||||||
|
|
||||||
|
|
||||||
01/03/08
|
01/03/08
|
||||||
LUCENE-1116: quality package improvements:
|
LUCENE-1116: quality package improvements:
|
||||||
- add MRR computation;
|
- add MRR computation;
|
||||||
|
|
|
@ -109,6 +109,7 @@
|
||||||
<path id="classpath">
|
<path id="classpath">
|
||||||
<pathelement path="${common.dir}/build/classes/java"/>
|
<pathelement path="${common.dir}/build/classes/java"/>
|
||||||
<pathelement path="${common.dir}/build/classes/demo"/>
|
<pathelement path="${common.dir}/build/classes/demo"/>
|
||||||
|
<pathelement path="${common.dir}/build/contrib/highlighter/classes/java"/>
|
||||||
<pathelement path="${basedir}/lib/${digester.jar}"/>
|
<pathelement path="${basedir}/lib/${digester.jar}"/>
|
||||||
<pathelement path="${basedir}/lib/${collections.jar}"/>
|
<pathelement path="${basedir}/lib/${collections.jar}"/>
|
||||||
<pathelement path="${basedir}/lib/${logging.jar}"/>
|
<pathelement path="${basedir}/lib/${logging.jar}"/>
|
||||||
|
@ -163,9 +164,14 @@
|
||||||
<subant target="compile-demo">
|
<subant target="compile-demo">
|
||||||
<fileset dir="${common.dir}" includes="build.xml"/>
|
<fileset dir="${common.dir}" includes="build.xml"/>
|
||||||
</subant>
|
</subant>
|
||||||
</target>
|
</target>
|
||||||
|
<target name="compile-highlighter">
|
||||||
|
<subant target="compile">
|
||||||
|
<fileset dir="${common.dir}/contrib/highlighter" includes="build.xml"/>
|
||||||
|
</subant>
|
||||||
|
</target>
|
||||||
|
|
||||||
<target name="init" depends="common.init,compile-demo,check-files"/>
|
<target name="init" depends="common.init,compile-demo, compile-highlighter,check-files"/>
|
||||||
|
|
||||||
<!-- make sure online collections (reuters) are first downloaded -->
|
<!-- make sure online collections (reuters) are first downloaded -->
|
||||||
<target name="test" depends="init,get-files">
|
<target name="test" depends="init,get-files">
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
#/**
|
||||||
|
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# * contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# * this work for additional information regarding copyright ownership.
|
||||||
|
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# * (the "License"); you may not use this file except in compliance with
|
||||||
|
# * the License. You may obtain a copy of the License at
|
||||||
|
# *
|
||||||
|
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# *
|
||||||
|
# * Unless required by applicable law or agreed to in writing, software
|
||||||
|
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# * See the License for the specific language governing permissions and
|
||||||
|
# * limitations under the License.
|
||||||
|
# */
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||||
|
|
||||||
|
ram.flush.mb=flush:32:32
|
||||||
|
compound=cmpnd:true:false
|
||||||
|
|
||||||
|
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
|
||||||
|
directory=FSDirectory
|
||||||
|
|
||||||
|
doc.stored=true
|
||||||
|
doc.tokenized=true
|
||||||
|
doc.term.vector=true
|
||||||
|
doc.term.vector.offsets=true
|
||||||
|
doc.term.vector.positions=true
|
||||||
|
doc.add.log.step=2000
|
||||||
|
|
||||||
|
docs.dir=reuters-out
|
||||||
|
|
||||||
|
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
|
||||||
|
|
||||||
|
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
|
||||||
|
|
||||||
|
# task at this depth or less would print when they start
|
||||||
|
task.max.depth.log=2
|
||||||
|
|
||||||
|
log.queries=true
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
{ "Populate"
|
||||||
|
CreateIndex
|
||||||
|
{ "MAddDocs" AddDoc } : 20000
|
||||||
|
Optimize
|
||||||
|
CloseIndex
|
||||||
|
}
|
||||||
|
{ "Rounds"
|
||||||
|
|
||||||
|
ResetSystemSoft
|
||||||
|
|
||||||
|
|
||||||
|
OpenReader
|
||||||
|
{ "SearchHlgtSameRdr" SearchTravRetHighlight(maxFrags[10],fields[body]) > : 1000
|
||||||
|
|
||||||
|
CloseReader
|
||||||
|
|
||||||
|
RepSumByPref MAddDocs
|
||||||
|
|
||||||
|
NewRound
|
||||||
|
|
||||||
|
} : 4
|
||||||
|
|
||||||
|
RepSumByNameRound
|
||||||
|
RepSumByName
|
||||||
|
RepSumByPrefRound MAddDocs
|
|
@ -0,0 +1,69 @@
|
||||||
|
#/**
|
||||||
|
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# * contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# * this work for additional information regarding copyright ownership.
|
||||||
|
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# * (the "License"); you may not use this file except in compliance with
|
||||||
|
# * the License. You may obtain a copy of the License at
|
||||||
|
# *
|
||||||
|
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# *
|
||||||
|
# * Unless required by applicable law or agreed to in writing, software
|
||||||
|
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# * See the License for the specific language governing permissions and
|
||||||
|
# * limitations under the License.
|
||||||
|
# */
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||||
|
|
||||||
|
ram.flush.mb=flush:32:32
|
||||||
|
compound=cmpnd:true:false
|
||||||
|
|
||||||
|
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
|
||||||
|
directory=FSDirectory
|
||||||
|
|
||||||
|
doc.stored=true
|
||||||
|
doc.tokenized=true
|
||||||
|
doc.term.vector=false
|
||||||
|
doc.term.vector.offsets=false
|
||||||
|
doc.term.vector.positions=false
|
||||||
|
doc.add.log.step=2000
|
||||||
|
|
||||||
|
docs.dir=reuters-out
|
||||||
|
|
||||||
|
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
|
||||||
|
|
||||||
|
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
|
||||||
|
|
||||||
|
# task at this depth or less would print when they start
|
||||||
|
task.max.depth.log=2
|
||||||
|
|
||||||
|
log.queries=true
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
{ "Populate"
|
||||||
|
CreateIndex
|
||||||
|
{ "MAddDocs" AddDoc } : 20000
|
||||||
|
Optimize
|
||||||
|
CloseIndex
|
||||||
|
}
|
||||||
|
{ "Rounds"
|
||||||
|
|
||||||
|
ResetSystemSoft
|
||||||
|
OpenReader
|
||||||
|
{ "SrchTrvRetNewRdr" SearchTravRet(10) > : 1000
|
||||||
|
CloseReader
|
||||||
|
OpenReader
|
||||||
|
{ "SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
|
||||||
|
|
||||||
|
CloseReader
|
||||||
|
|
||||||
|
RepSumByPref SearchHlgtSameRdr
|
||||||
|
|
||||||
|
NewRound
|
||||||
|
|
||||||
|
} : 2
|
||||||
|
|
||||||
|
RepSumByNameRound
|
||||||
|
RepSumByName
|
||||||
|
RepSumByPrefRound MAddDocs
|
|
@ -0,0 +1,69 @@
|
||||||
|
#/**
|
||||||
|
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# * contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# * this work for additional information regarding copyright ownership.
|
||||||
|
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# * (the "License"); you may not use this file except in compliance with
|
||||||
|
# * the License. You may obtain a copy of the License at
|
||||||
|
# *
|
||||||
|
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# *
|
||||||
|
# * Unless required by applicable law or agreed to in writing, software
|
||||||
|
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# * See the License for the specific language governing permissions and
|
||||||
|
# * limitations under the License.
|
||||||
|
# */
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
# multi val params are iterated by NewRound's, added to reports, start with column name.
|
||||||
|
|
||||||
|
ram.flush.mb=flush:32:32
|
||||||
|
compound=cmpnd:true:false
|
||||||
|
|
||||||
|
analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
|
||||||
|
directory=FSDirectory
|
||||||
|
|
||||||
|
doc.stored=true
|
||||||
|
doc.tokenized=true
|
||||||
|
doc.term.vector=true
|
||||||
|
doc.term.vector.offsets=true
|
||||||
|
doc.term.vector.positions=true
|
||||||
|
doc.add.log.step=2000
|
||||||
|
|
||||||
|
docs.dir=reuters-out
|
||||||
|
|
||||||
|
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
|
||||||
|
|
||||||
|
query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
|
||||||
|
|
||||||
|
# task at this depth or less would print when they start
|
||||||
|
task.max.depth.log=2
|
||||||
|
|
||||||
|
log.queries=true
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
{ "Populate"
|
||||||
|
CreateIndex
|
||||||
|
{ "MAddDocs" AddDoc } : 20000
|
||||||
|
Optimize
|
||||||
|
CloseIndex
|
||||||
|
}
|
||||||
|
{ "Rounds"
|
||||||
|
|
||||||
|
ResetSystemSoft
|
||||||
|
OpenReader
|
||||||
|
{ "SrchTrvRetNewRdr" SearchTravRet(10) > : 1000
|
||||||
|
CloseReader
|
||||||
|
OpenReader
|
||||||
|
{ "SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
|
||||||
|
|
||||||
|
CloseReader
|
||||||
|
|
||||||
|
RepSumByPref SearchHlgtSameRdr
|
||||||
|
|
||||||
|
NewRound
|
||||||
|
|
||||||
|
} : 2
|
||||||
|
|
||||||
|
RepSumByNameRound
|
||||||
|
RepSumByName
|
||||||
|
RepSumByPrefRound MAddDocs
|
|
@ -247,6 +247,10 @@ The following is an informal description of the supported syntax.
|
||||||
<li><b>SearchTravRetLoadFieldSelectorTask</b> takes a string
|
<li><b>SearchTravRetLoadFieldSelectorTask</b> takes a string
|
||||||
parameter: a comma separated list of Fields to load.
|
parameter: a comma separated list of Fields to load.
|
||||||
</li>
|
</li>
|
||||||
|
<li><b>SearchTravRetHighlighterTask</b> takes a string
|
||||||
|
parameter: a comma separated list of parameters to define highlighting. See that
|
||||||
|
tasks javadocs for more information
|
||||||
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
<br>Example - <font color="#FF0066">AddDoc(2000)</font> - would add a document
|
<br>Example - <font color="#FF0066">AddDoc(2000)</font> - would add a document
|
||||||
of size 2000 (~bytes).
|
of size 2000 (~bytes).
|
||||||
|
|
|
@ -17,26 +17,31 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Fieldable;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.search.Hits;
|
import org.apache.lucene.search.Hits;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.highlight.*;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Read index (abstract) task.
|
* Read index (abstract) task.
|
||||||
* Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
|
* Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
|
||||||
* methods to configure the actual action.
|
* methods to configure the actual action.
|
||||||
*
|
* <p/>
|
||||||
* <p>Note: All ReadTasks reuse the reader if it is already open.
|
* <p>Note: All ReadTasks reuse the reader if it is already open.
|
||||||
* Otherwise a reader is opened at start and closed at the end.
|
* Otherwise a reader is opened at start and closed at the end.
|
||||||
*
|
* <p/>
|
||||||
* <p>Other side effects: none.
|
* <p>Other side effects: none.
|
||||||
*/
|
*/
|
||||||
public abstract class ReadTask extends PerfTask {
|
public abstract class ReadTask extends PerfTask {
|
||||||
|
@ -48,7 +53,7 @@ public abstract class ReadTask extends PerfTask {
|
||||||
public int doLogic() throws Exception {
|
public int doLogic() throws Exception {
|
||||||
int res = 0;
|
int res = 0;
|
||||||
boolean closeReader = false;
|
boolean closeReader = false;
|
||||||
|
|
||||||
// open reader or use existing one
|
// open reader or use existing one
|
||||||
IndexReader ir = getRunData().getIndexReader();
|
IndexReader ir = getRunData().getIndexReader();
|
||||||
if (ir == null) {
|
if (ir == null) {
|
||||||
|
@ -57,18 +62,18 @@ public abstract class ReadTask extends PerfTask {
|
||||||
closeReader = true;
|
closeReader = true;
|
||||||
//res++; //this is confusing, comment it out
|
//res++; //this is confusing, comment it out
|
||||||
}
|
}
|
||||||
|
|
||||||
// optionally warm and add num docs traversed to count
|
// optionally warm and add num docs traversed to count
|
||||||
if (withWarm()) {
|
if (withWarm()) {
|
||||||
Document doc = null;
|
Document doc = null;
|
||||||
for (int m = 0; m < ir.maxDoc(); m++) {
|
for (int m = 0; m < ir.maxDoc(); m++) {
|
||||||
if (!ir.isDeleted(m)) {
|
if (!ir.isDeleted(m)) {
|
||||||
doc = ir.document(m);
|
doc = ir.document(m);
|
||||||
res += (doc==null ? 0 : 1);
|
res += (doc == null ? 0 : 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (withSearch()) {
|
if (withSearch()) {
|
||||||
res++;
|
res++;
|
||||||
IndexSearcher searcher = new IndexSearcher(ir);
|
IndexSearcher searcher = new IndexSearcher(ir);
|
||||||
|
@ -76,32 +81,53 @@ public abstract class ReadTask extends PerfTask {
|
||||||
Query q = queryMaker.makeQuery();
|
Query q = queryMaker.makeQuery();
|
||||||
Hits hits = searcher.search(q);
|
Hits hits = searcher.search(q);
|
||||||
//System.out.println("searched: "+q);
|
//System.out.println("searched: "+q);
|
||||||
|
|
||||||
if (withTraverse() && hits!=null) {
|
if (withTraverse() && hits != null) {
|
||||||
int traversalSize = Math.min(hits.length(), traversalSize());
|
int traversalSize = Math.min(hits.length(), traversalSize());
|
||||||
if (traversalSize > 0) {
|
if (traversalSize > 0) {
|
||||||
boolean retrieve = withRetrieve();
|
boolean retrieve = withRetrieve();
|
||||||
for (int m = 0; m < hits.length(); m++) {
|
int numHighlight = Math.min(numToHighlight(), hits.length());
|
||||||
|
Analyzer analyzer = getRunData().getAnalyzer();
|
||||||
|
Highlighter highlighter = null;
|
||||||
|
int maxFrags = 1;
|
||||||
|
if (numHighlight > 0) {
|
||||||
|
highlighter = getHighlighter(q);
|
||||||
|
maxFrags = maxNumFragments();
|
||||||
|
}
|
||||||
|
boolean merge = isMergeContiguousFragments();
|
||||||
|
for (int m = 0; m < traversalSize; m++) {
|
||||||
int id = hits.id(m);
|
int id = hits.id(m);
|
||||||
res++;
|
res++;
|
||||||
if (retrieve) {
|
if (retrieve) {
|
||||||
res += retrieveDoc(ir, id);
|
Document document = retrieveDoc(ir, id);
|
||||||
|
res += document != null ? 1 : 0;
|
||||||
|
if (numHighlight > 0 && m < numHighlight) {
|
||||||
|
Collection/*<String>*/ fieldsToHighlight = getFieldsToHighlight(document);
|
||||||
|
for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) {
|
||||||
|
String field = (String) iterator.next();
|
||||||
|
String text = document.get(field);
|
||||||
|
TokenStream ts = TokenSources.getAnyTokenStream(ir, id, field, document, analyzer);
|
||||||
|
res += doHighlight(ts, text, highlighter, merge, maxFrags);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
searcher.close();
|
searcher.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (closeReader) {
|
if (closeReader) {
|
||||||
ir.close();
|
ir.close();
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected int retrieveDoc(IndexReader ir, int id) throws IOException {
|
|
||||||
return (ir.document(id) == null ? 0 : 1);
|
|
||||||
|
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
|
||||||
|
return ir.document(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -112,33 +138,82 @@ public abstract class ReadTask extends PerfTask {
|
||||||
/**
|
/**
|
||||||
* Return true if search should be performed.
|
* Return true if search should be performed.
|
||||||
*/
|
*/
|
||||||
public abstract boolean withSearch ();
|
public abstract boolean withSearch();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return true if warming should be performed.
|
* Return true if warming should be performed.
|
||||||
*/
|
*/
|
||||||
public abstract boolean withWarm ();
|
public abstract boolean withWarm();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return true if, with search, results should be traversed.
|
* Return true if, with search, results should be traversed.
|
||||||
*/
|
*/
|
||||||
public abstract boolean withTraverse ();
|
public abstract boolean withTraverse();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
|
* Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
|
||||||
* of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
|
* of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
|
||||||
*
|
* <p/>
|
||||||
* Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
|
* Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
|
||||||
|
*
|
||||||
* @return Integer.MAX_VALUE
|
* @return Integer.MAX_VALUE
|
||||||
*/
|
*/
|
||||||
public int traversalSize()
|
public int traversalSize() {
|
||||||
{
|
|
||||||
return Integer.MAX_VALUE;
|
return Integer.MAX_VALUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return true if, with search & results traversing, docs should be retrieved.
|
* Return true if, with search & results traversing, docs should be retrieved.
|
||||||
*/
|
*/
|
||||||
public abstract boolean withRetrieve ();
|
public abstract boolean withRetrieve();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set to the number of documents to highlight.
|
||||||
|
*
|
||||||
|
* @return The number of the results to highlight. O means no docs will be highlighted.
|
||||||
|
*/
|
||||||
|
public int numToHighlight() {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Highlighter getHighlighter(Query q){
|
||||||
|
return new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(q));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the maxiumum number of highlighter fragments
|
||||||
|
*/
|
||||||
|
public int maxNumFragments(){
|
||||||
|
return 10;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return true if the highlighter should merge contiguous fragments
|
||||||
|
*/
|
||||||
|
public boolean isMergeContiguousFragments(){
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int doHighlight(TokenStream ts, String text, Highlighter highlighter, boolean mergeContiguous, int maxFragments) throws IOException {
|
||||||
|
TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFragments);
|
||||||
|
return frag != null ? frag.length : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Define the fields to highlight. Base implementation returns all fields
|
||||||
|
* @param document The Document
|
||||||
|
* @return A Collection of Field names (Strings)
|
||||||
|
*/
|
||||||
|
protected Collection/*<String>*/ getFieldsToHighlight(Document document) {
|
||||||
|
List/*<Fieldable>*/ fieldables = document.getFields();
|
||||||
|
Set/*<String>*/ result = new HashSet(fieldables.size());
|
||||||
|
for (Iterator iterator = fieldables.iterator(); iterator.hasNext();) {
|
||||||
|
Fieldable fieldable = (Fieldable) iterator.next();
|
||||||
|
result.add(fieldable.name());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,126 @@
|
||||||
|
package org.apache.lucene.benchmark.byTask.tasks;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search and Traverse and Retrieve docs task. Highlight the fields in the retrieved documents.
|
||||||
|
*
|
||||||
|
* Uses the {@link org.apache.lucene.search.highlight.SimpleHTMLFormatter} for formatting.
|
||||||
|
*
|
||||||
|
* <p>Note: This task reuses the reader if it is already open.
|
||||||
|
* Otherwise a reader is opened at start and closed at the end.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <p>Takes optional multivalued, comma separated param string as: size[<traversal size>],highlight[<int>],maxFrags[<int>],mergeContiguous[<boolean>],fields[name1;name2;...]</p>
|
||||||
|
* <ul>
|
||||||
|
* <li>traversal size - The number of hits to traverse, otherwise all will be traversed</li>
|
||||||
|
* <li>highlight - The number of the hits to highlight. Will always be less than or equal to traversal size. Default is Integer.MAX_VALUE (i.e. hits.length())</li>
|
||||||
|
* <li>maxFrags - The maximum number of fragments to score by the highlighter</li>
|
||||||
|
* <li>mergeContiguous - true if contiguous fragments should be merged.</li>
|
||||||
|
* <li>fields - The fields to highlight. If not specified all fields will be highlighted (or at least attempted)</li>
|
||||||
|
* </ul>
|
||||||
|
* Example:
|
||||||
|
* <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(size[10],highlight[10],mergeContiguous[true],maxFrags[3],fields[body]) > : 1000
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Documents must be stored in order for this task to work. Additionally, term vector positions can be used as well.
|
||||||
|
*
|
||||||
|
* <p>Other side effects: counts additional 1 (record) for each traversed hit,
|
||||||
|
* and 1 more for each retrieved (non null) document and 1 for each fragment returned.</p>
|
||||||
|
*/
|
||||||
|
public class SearchTravRetHighlightTask extends SearchTravTask {
|
||||||
|
|
||||||
|
protected int numToHighlight = Integer.MAX_VALUE;
|
||||||
|
protected boolean mergeContiguous;
|
||||||
|
protected int maxFrags = 2;
|
||||||
|
protected Set paramFields = Collections.EMPTY_SET;
|
||||||
|
|
||||||
|
|
||||||
|
public SearchTravRetHighlightTask(PerfRunData runData) {
|
||||||
|
super(runData);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setup() throws Exception {
|
||||||
|
super.setup();
|
||||||
|
//check to make sure either the doc is being stored
|
||||||
|
PerfRunData data = getRunData();
|
||||||
|
if (data.getConfig().get("doc.stored", false) == false){
|
||||||
|
throw new Exception("doc.stored must be set to true");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean withRetrieve() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int numToHighlight() {
|
||||||
|
return numToHighlight;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isMergeContiguousFragments() {
|
||||||
|
return mergeContiguous;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int maxNumFragments() {
|
||||||
|
return maxFrags;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Collection/*<String>*/ getFieldsToHighlight(Document document) {
|
||||||
|
Collection result = super.getFieldsToHighlight(document);
|
||||||
|
//if stored is false, then result will be empty, in which case just get all the param fields
|
||||||
|
if (paramFields.isEmpty() == false && result.isEmpty() == false) {
|
||||||
|
result.retainAll(paramFields);
|
||||||
|
} else {
|
||||||
|
result = paramFields;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setParams(String params) {
|
||||||
|
String [] splits = params.split(",");
|
||||||
|
for (int i = 0; i < splits.length; i++) {
|
||||||
|
if (splits[i].startsWith("size[") == true){
|
||||||
|
traversalSize = (int)Float.parseFloat(splits[i].substring("size[".length(),splits[i].length() - 1));
|
||||||
|
} else if (splits[i].startsWith("highlight[") == true){
|
||||||
|
numToHighlight = (int)Float.parseFloat(splits[i].substring("highlight[".length(),splits[i].length() - 1));
|
||||||
|
} else if (splits[i].startsWith("maxFrags[") == true){
|
||||||
|
maxFrags = (int)Float.parseFloat(splits[i].substring("maxFrags[".length(),splits[i].length() - 1));
|
||||||
|
} else if (splits[i].startsWith("mergeContiguous[") == true){
|
||||||
|
mergeContiguous = Boolean.valueOf(splits[i].substring("mergeContiguous[".length(),splits[i].length() - 1)).booleanValue();
|
||||||
|
} else if (splits[i].startsWith("fields[") == true){
|
||||||
|
paramFields = new HashSet();
|
||||||
|
String fieldNames = splits[i].substring("fields[".length(), splits[i].length() - 1);
|
||||||
|
String [] fieldSplits = fieldNames.split(";");
|
||||||
|
for (int j = 0; j < fieldSplits.length; j++) {
|
||||||
|
paramFields.add(fieldSplits[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
import org.apache.lucene.document.FieldSelector;
|
import org.apache.lucene.document.FieldSelector;
|
||||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
import java.util.StringTokenizer;
|
import java.util.StringTokenizer;
|
||||||
|
@ -51,8 +52,8 @@ public class SearchTravRetLoadFieldSelectorTask extends SearchTravTask {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected int retrieveDoc(IndexReader ir, int id) throws IOException {
|
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
|
||||||
return (ir.document(id, fieldSelector) == null ? 0 : 1);
|
return ir.document(id, fieldSelector);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setParams(String params) {
|
public void setParams(String params) {
|
||||||
|
|
|
@ -120,7 +120,7 @@ public class Algorithm {
|
||||||
if ((char)stok.ttype == '*') {
|
if ((char)stok.ttype == '*') {
|
||||||
((TaskSequence)prevTask).setRepetitions(TaskSequence.REPEAT_EXHAUST);
|
((TaskSequence)prevTask).setRepetitions(TaskSequence.REPEAT_EXHAUST);
|
||||||
} else {
|
} else {
|
||||||
if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expexted repetitions number: - "+stok.toString());
|
if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expected repetitions number: - "+stok.toString());
|
||||||
((TaskSequence)prevTask).setRepetitions((int)stok.nval);
|
((TaskSequence)prevTask).setRepetitions((int)stok.nval);
|
||||||
}
|
}
|
||||||
// check for rate specification (ops/min)
|
// check for rate specification (ops/min)
|
||||||
|
@ -130,7 +130,7 @@ public class Algorithm {
|
||||||
} else {
|
} else {
|
||||||
// get rate number
|
// get rate number
|
||||||
stok.nextToken();
|
stok.nextToken();
|
||||||
if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expexted rate number: - "+stok.toString());
|
if (stok.ttype!=StreamTokenizer.TT_NUMBER) throw new Exception("expected rate number: - "+stok.toString());
|
||||||
// check for unit - min or sec, sec is default
|
// check for unit - min or sec, sec is default
|
||||||
stok.nextToken();
|
stok.nextToken();
|
||||||
if (stok.ttype!='/') {
|
if (stok.ttype!='/') {
|
||||||
|
@ -138,14 +138,14 @@ public class Algorithm {
|
||||||
((TaskSequence)prevTask).setRate((int)stok.nval,false); // set rate per sec
|
((TaskSequence)prevTask).setRate((int)stok.nval,false); // set rate per sec
|
||||||
} else {
|
} else {
|
||||||
stok.nextToken();
|
stok.nextToken();
|
||||||
if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expexted rate unit: 'min' or 'sec' - "+stok.toString());
|
if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
|
||||||
String unit = stok.sval.toLowerCase();
|
String unit = stok.sval.toLowerCase();
|
||||||
if ("min".equals(unit)) {
|
if ("min".equals(unit)) {
|
||||||
((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min
|
((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min
|
||||||
} else if ("sec".equals(unit)) {
|
} else if ("sec".equals(unit)) {
|
||||||
((TaskSequence)prevTask).setRate((int)stok.nval,false); // set rate per sec
|
((TaskSequence)prevTask).setRate((int)stok.nval,false); // set rate per sec
|
||||||
} else {
|
} else {
|
||||||
throw new Exception("expexted rate unit: 'min' or 'sec' - "+stok.toString());
|
throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,9 @@ import java.util.Iterator;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.DocData;
|
import org.apache.lucene.benchmark.byTask.feeds.DocData;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
|
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
|
import org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker;
|
||||||
|
import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
|
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
|
||||||
|
import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
|
||||||
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
|
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
@ -94,6 +96,109 @@ public class TestPerfTasksLogic extends TestCase {
|
||||||
ir.close();
|
ir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testHighlighting() throws Exception {
|
||||||
|
// 1. alg definition (required in every "logic" test)
|
||||||
|
String algLines[] = {
|
||||||
|
"doc.stored=true",
|
||||||
|
"doc.maker="+Reuters20DocMaker.class.getName(),
|
||||||
|
"query.maker=" + ReutersQueryMaker.class.getName(),
|
||||||
|
"ResetSystemErase",
|
||||||
|
"CreateIndex",
|
||||||
|
"{ AddDoc } : 1000",
|
||||||
|
"Optimize",
|
||||||
|
"CloseIndex",
|
||||||
|
"OpenReader",
|
||||||
|
"{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
|
||||||
|
"CloseReader",
|
||||||
|
};
|
||||||
|
|
||||||
|
// 2. we test this value later
|
||||||
|
CountingHighlighterTestTask.numHighlightedResults = 0;
|
||||||
|
CountingHighlighterTestTask.numDocsRetrieved = 0;
|
||||||
|
// 3. execute the algorithm (required in every "logic" test)
|
||||||
|
Benchmark benchmark = execBenchmark(algLines);
|
||||||
|
|
||||||
|
// 4. test specific checks after the benchmark run completed.
|
||||||
|
assertEquals("TestSearchTask was supposed to be called!",147,CountingHighlighterTestTask.numDocsRetrieved);
|
||||||
|
//pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
|
||||||
|
//we probably should use a different doc/query maker, but...
|
||||||
|
assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
|
||||||
|
|
||||||
|
assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
|
||||||
|
// now we should be able to open the index for write.
|
||||||
|
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
|
||||||
|
iw.close();
|
||||||
|
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||||
|
assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testHighlightingTV() throws Exception {
|
||||||
|
// 1. alg definition (required in every "logic" test)
|
||||||
|
String algLines[] = {
|
||||||
|
"doc.stored=true",//doc storage is required in order to have text to highlight
|
||||||
|
"doc.term.vector.offsets=true",
|
||||||
|
"doc.maker="+Reuters20DocMaker.class.getName(),
|
||||||
|
"query.maker=" + ReutersQueryMaker.class.getName(),
|
||||||
|
"ResetSystemErase",
|
||||||
|
"CreateIndex",
|
||||||
|
"{ AddDoc } : 1000",
|
||||||
|
"Optimize",
|
||||||
|
"CloseIndex",
|
||||||
|
"OpenReader",
|
||||||
|
"{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
|
||||||
|
"CloseReader",
|
||||||
|
};
|
||||||
|
|
||||||
|
// 2. we test this value later
|
||||||
|
CountingHighlighterTestTask.numHighlightedResults = 0;
|
||||||
|
CountingHighlighterTestTask.numDocsRetrieved = 0;
|
||||||
|
// 3. execute the algorithm (required in every "logic" test)
|
||||||
|
Benchmark benchmark = execBenchmark(algLines);
|
||||||
|
|
||||||
|
// 4. test specific checks after the benchmark run completed.
|
||||||
|
assertEquals("TestSearchTask was supposed to be called!",147,CountingHighlighterTestTask.numDocsRetrieved);
|
||||||
|
//pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs
|
||||||
|
//we probably should use a different doc/query maker, but...
|
||||||
|
assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0);
|
||||||
|
|
||||||
|
assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory()));
|
||||||
|
// now we should be able to open the index for write.
|
||||||
|
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
|
||||||
|
iw.close();
|
||||||
|
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||||
|
assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testHighlightingNoTvNoStore() throws Exception {
|
||||||
|
// 1. alg definition (required in every "logic" test)
|
||||||
|
String algLines[] = {
|
||||||
|
"doc.stored=false",
|
||||||
|
"doc.maker="+Reuters20DocMaker.class.getName(),
|
||||||
|
"query.maker=" + ReutersQueryMaker.class.getName(),
|
||||||
|
"ResetSystemErase",
|
||||||
|
"CreateIndex",
|
||||||
|
"{ AddDoc } : 1000",
|
||||||
|
"Optimize",
|
||||||
|
"CloseIndex",
|
||||||
|
"OpenReader",
|
||||||
|
"{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200",
|
||||||
|
"CloseReader",
|
||||||
|
};
|
||||||
|
|
||||||
|
// 2. we test this value later
|
||||||
|
CountingHighlighterTestTask.numHighlightedResults = 0;
|
||||||
|
CountingHighlighterTestTask.numDocsRetrieved = 0;
|
||||||
|
// 3. execute the algorithm (required in every "logic" test)
|
||||||
|
try {
|
||||||
|
Benchmark benchmark = execBenchmark(algLines);
|
||||||
|
assertTrue("CountingHighlighterTest should have thrown an exception", false);
|
||||||
|
} catch (Exception e) {
|
||||||
|
assertTrue(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test Exhasting Doc Maker logic
|
* Test Exhasting Doc Maker logic
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.benchmark.byTask.tasks;
|
||||||
|
|
||||||
|
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.search.highlight.Highlighter;
|
||||||
|
import org.apache.lucene.search.highlight.TextFragment;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test Search task which counts number of searches.
|
||||||
|
*/
|
||||||
|
public class CountingHighlighterTestTask extends SearchTravRetHighlightTask {
|
||||||
|
|
||||||
|
public static int numHighlightedResults = 0;
|
||||||
|
public static int numDocsRetrieved = 0;
|
||||||
|
|
||||||
|
public CountingHighlighterTestTask(PerfRunData runData) {
|
||||||
|
super(runData);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
|
||||||
|
Document document = ir.document(id);
|
||||||
|
if (document != null) {
|
||||||
|
numDocsRetrieved++;
|
||||||
|
}
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected int doHighlight(TokenStream ts, String text, Highlighter highlighter, boolean mergeContiguous, int maxFragments) throws IOException {
|
||||||
|
TextFragment[] frag = highlighter.getBestTextFragments(ts, text, mergeContiguous, maxFragments);
|
||||||
|
numHighlightedResults += frag != null ? frag.length : 0;
|
||||||
|
return frag != null ? frag.length : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue