LUCENE-1643: use reusable RawCollationKey for better performance

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@776252 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-05-19 09:50:24 +00:00
parent 6156cc18a4
commit 2dd7d33e86
2 changed files with 10 additions and 2 deletions

View File

@ -61,6 +61,11 @@ New features
Model to tokenize Chinese words in a more intelligent way.
(Xiaoping Gao via Mike McCandless)
Optimizations
1. LUCENE-1643: Re-use the collation key (RawCollationKey) for
better performance, in ICUCollationKeyFilter. (Robert Muir via
Mike McCandless)
Documentation

View File

@ -19,6 +19,8 @@ package org.apache.lucene.collation;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.RawCollationKey;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
@ -61,6 +63,7 @@ import java.nio.CharBuffer;
*/
public class ICUCollationKeyFilter extends TokenFilter {
private Collator collator = null;
private RawCollationKey reusableKey = new RawCollationKey();
/**
*
@ -78,8 +81,8 @@ public class ICUCollationKeyFilter extends TokenFilter {
if (nextToken != null) {
char[] termBuffer = nextToken.termBuffer();
String termText = new String(termBuffer, 0, nextToken.termLength());
byte[] collationKey = collator.getCollationKey(termText).toByteArray();
ByteBuffer collationKeyBuf = ByteBuffer.wrap(collationKey);
collator.getRawCollationKey(termText, reusableKey);
ByteBuffer collationKeyBuf = ByteBuffer.wrap(reusableKey.bytes, 0, reusableKey.size);
int encodedLength
= IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
if (encodedLength > termBuffer.length) {