ReadTokensTask now converts tokens to their indexed forms (char[]->byte[]), just as the indexer does. This allows measurement of the conversion process, which is important for analysis components that customize it, e.g. (ICU)CollationKeyFilter.

NB: as a result, benchmarks that incorporate this task will no longer be directly comparable between 3.X and 4.0

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1087471 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Steven Rowe 2011-03-31 22:44:20 +00:00
parent 9cefe60a4b
commit 1caaea77b2
1 changed files with 5 additions and 1 deletions

View File

@ -22,6 +22,7 @@ import java.util.List;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -98,9 +99,12 @@ public class ReadTokensTask extends PerfTask {
// reset the TokenStream to the first token // reset the TokenStream to the first token
stream.reset(); stream.reset();
while(stream.incrementToken()) TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
while(stream.incrementToken()) {
termAtt.fillBytesRef();
tokenCount++; tokenCount++;
} }
}
totalTokenCount += tokenCount; totalTokenCount += tokenCount;
return tokenCount; return tokenCount;
} }