LUCENE-1441: fix KeywordTokenizer to set start/end offset on its token

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@712232 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-11-07 19:36:35 +00:00
parent 24d76157a0
commit 391119f207
2 changed files with 14 additions and 0 deletions

View File

@ -53,6 +53,9 @@ public class KeywordTokenizer extends Tokenizer {
buffer = reusableToken.resizeTermBuffer(1+buffer.length);
}
reusableToken.setTermLength(upto);
reusableToken.setStartOffset(0);
reusableToken.setEndOffset(upto);
return reusableToken;
}
return null;

View File

@ -17,6 +17,8 @@ package org.apache.lucene.analysis;
* limitations under the License.
*/
import java.io.StringReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@ -82,4 +84,13 @@ public class TestKeywordAnalyzer extends LuceneTestCase {
td = reader.termDocs(new Term("partnum", "Q37"));
assertTrue(td.next());
}
// LUCENE-1441
public void testOffsets() throws Exception {
TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"));
Token token = new Token();
assertTrue(stream.next(token) != null);
assertEquals(0, token.startOffset);
assertEquals(4, token.endOffset);
}
}