LUCENE-1643: use reusable RawCollationKey for better performance

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@776252 13f79535-47bb-0310-9956-ffa450edef68
2025-02-06 18:18:38 +00:00 · 2009-05-19 09:50:24 +00:00 · 2009-05-19 09:50:24 +00:00 · 2dd7d33e86
commit 2dd7d33e86
parent 6156cc18a4
2 changed files with 10 additions and 2 deletions
--- a/contrib/CHANGES.txt
+++ b/contrib/CHANGES.txt
@ -61,6 +61,11 @@ New features
    Model to tokenize Chinese words in a more intelligent way.
    (Xiaoping Gao via Mike McCandless)
  
+Optimizations
+
+  1. LUCENE-1643: Re-use the collation key (RawCollationKey) for
+     better performance, in ICUCollationKeyFilter.  (Robert Muir via
+     Mike McCandless)

 Documentation

--- a/contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
+++ b/contrib/collation/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
@ -19,6 +19,8 @@ package org.apache.lucene.collation;


 import com.ibm.icu.text.Collator;
+import com.ibm.icu.text.RawCollationKey;
+
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
@ -61,6 +63,7 @@ import java.nio.CharBuffer;
 */
 public class ICUCollationKeyFilter extends TokenFilter {
  private Collator collator = null;
+  private RawCollationKey reusableKey = new RawCollationKey();

  /**
   * 
@ -78,8 +81,8 @@ public class ICUCollationKeyFilter extends TokenFilter {
    if (nextToken != null) {
      char[] termBuffer = nextToken.termBuffer();
      String termText = new String(termBuffer, 0, nextToken.termLength());
-      byte[] collationKey = collator.getCollationKey(termText).toByteArray();
-      ByteBuffer collationKeyBuf = ByteBuffer.wrap(collationKey);
+      collator.getRawCollationKey(termText, reusableKey);
+      ByteBuffer collationKeyBuf = ByteBuffer.wrap(reusableKey.bytes, 0, reusableKey.size);
      int encodedLength
        = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
      if (encodedLength > termBuffer.length) {