ReadTokensTask now converts tokens to their indexed forms (char[]->byte[]), just as the indexer does. This allows measurement of the conversion process, which is important for analysis components that customize it, e.g. (ICU)CollationKeyFilter.

NB: as a result, benchmarks that incorporate this task will no longer be directly comparable between 3.X and 4.0 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1087471 13f79535-47bb-0310-9956-ffa450edef68
2011-03-31 22:44:20 +00:00 · 2011-03-31 22:44:20 +00:00 · 1caaea77b2
parent 9cefe60a4b
commit 1caaea77b2
1 changed files with 5 additions and 1 deletions
--- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
+++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
@ -22,6 +22,7 @@ import java.util.List;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.document.Document;
@ -98,8 +99,11 @@ public class ReadTokensTask extends PerfTask {
      // reset the TokenStream to the first token
      stream.reset();

-      while(stream.incrementToken())
+      TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
+      while(stream.incrementToken()) {
+        termAtt.fillBytesRef();
        tokenCount++;
+      }
    }
    totalTokenCount += tokenCount;
    return tokenCount;