mirror of https://github.com/apache/lucene.git
ReadTokensTask now converts tokens to their indexed forms (char[]->byte[]), just as the indexer does. This allows measurement of the conversion process, which is important for analysis components that customize it, e.g. (ICU)CollationKeyFilter.
NB: as a result, benchmarks that incorporate this task will no longer be directly comparable between 3.X and 4.0 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1087471 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9cefe60a4b
commit
1caaea77b2
|
@ -22,6 +22,7 @@ import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
@ -98,8 +99,11 @@ public class ReadTokensTask extends PerfTask {
|
||||||
// reset the TokenStream to the first token
|
// reset the TokenStream to the first token
|
||||||
stream.reset();
|
stream.reset();
|
||||||
|
|
||||||
while(stream.incrementToken())
|
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
while(stream.incrementToken()) {
|
||||||
|
termAtt.fillBytesRef();
|
||||||
tokenCount++;
|
tokenCount++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
totalTokenCount += tokenCount;
|
totalTokenCount += tokenCount;
|
||||||
return tokenCount;
|
return tokenCount;
|
||||||
|
|
Loading…
Reference in New Issue