From 1caaea77b2ed7762e323431fea748a545307db60 Mon Sep 17 00:00:00 2001 From: Steven Rowe Date: Thu, 31 Mar 2011 22:44:20 +0000 Subject: [PATCH] ReadTokensTask now converts tokens to their indexed forms (char[]->byte[]), just as the indexer does. This allows measurement of the conversion process, which is important for analysis components that customize it, e.g. (ICU)CollationKeyFilter. NB: as a result, benchmarks that incorporate this task will no longer be directly comparable between 3.X and 4.0 git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1087471 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/benchmark/byTask/tasks/ReadTokensTask.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java index fa0ae999b45..f702cb8ee7f 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java @@ -22,6 +22,7 @@ import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.document.Document; @@ -98,8 +99,11 @@ public class ReadTokensTask extends PerfTask { // reset the TokenStream to the first token stream.reset(); - while(stream.incrementToken()) + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + while(stream.incrementToken()) { + termAtt.fillBytesRef(); tokenCount++; + } } totalTokenCount += tokenCount; return tokenCount;