ReadTokensTask now converts tokens to their indexed forms (char[]->byte[]), just as the indexer does. This allows measurement of the conversion process, which is important for analysis components that customize it, e.g. (ICU)CollationKeyFilter.

NB: as a result, benchmarks that incorporate this task will no longer be directly comparable between 3.X and 4.0

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1087471 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2011-03-31 22:44:20 +00:00
parent 9cefe60a4b
commit 1caaea77b2
1 changed files with 5 additions and 1 deletions

View File

@ -22,6 +22,7 @@ import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.document.Document;
@ -98,8 +99,11 @@ public class ReadTokensTask extends PerfTask {
// reset the TokenStream to the first token
stream.reset();
while(stream.incrementToken())
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
while(stream.incrementToken()) {
termAtt.fillBytesRef();
tokenCount++;
}
}
totalTokenCount += tokenCount;
return tokenCount;