mirror of https://github.com/apache/lucene.git
ReadTokensTask now converts tokens to their indexed forms (char[]->byte[]), just as the indexer does. This allows measurement of the conversion process, which is important for analysis components that customize it, e.g. (ICU)CollationKeyFilter.
NB: as a result, benchmarks that incorporate this task will no longer be directly comparable between 3.X and 4.0 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1087471 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9cefe60a4b
commit
1caaea77b2
|
@ -22,6 +22,7 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -98,8 +99,11 @@ public class ReadTokensTask extends PerfTask {
|
|||
// reset the TokenStream to the first token
|
||||
stream.reset();
|
||||
|
||||
while(stream.incrementToken())
|
||||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||
while(stream.incrementToken()) {
|
||||
termAtt.fillBytesRef();
|
||||
tokenCount++;
|
||||
}
|
||||
}
|
||||
totalTokenCount += tokenCount;
|
||||
return tokenCount;
|
||||
|
|
Loading…
Reference in New Issue