LUCENE-2084: remove Byte/CharBuffer wrapping for collation key generation

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@895341 13f79535-47bb-0310-9956-ffa450edef68
2010-01-03 09:22:40 +00:00 · 2010-01-03 09:22:40 +00:00 · cdac1f7113
parent a949836869
commit cdac1f7113
6 changed files with 459 additions and 191 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -153,6 +153,11 @@ Optimizations
 * LUCENE-2169: Improved CharArraySet.copy(), if source set is
  also a CharArraySet.  (Simon Willnauer via Uwe Schindler)
 * LUCENE-2084: Change IndexableBinaryStringTools to work on byte[] and char[]
  directly, instead of Byte/CharBuffers, and modify CollationKeyFilter to
  take advantage of this for faster performance.
  (Steven Rowe, Uwe Schindler, Robert Muir)
 Build
 * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
--- a/contrib/CHANGES.txt
+++ b/contrib/CHANGES.txt
@ -74,6 +74,11 @@ Optimizations
   over itsself. Instead it sets only the length. This patch also optimizes
   the logic of the filter and uses NIO for IdentityEncoder. (Uwe Schindler)
 * LUCENE-2084: Change IndexableBinaryStringTools to work on byte[] and char[]
   directly, instead of Byte/CharBuffers, and modify ICUCollationKeyFilter to
   take advantage of this for faster performance.
   (Steven Rowe, Uwe Schindler, Robert Muir)
 Test Cases
 * LUCENE-2115: Cutover contrib tests to use Java5 generics.  (Kay Kay
--- a/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
+++ b/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
@ -23,13 +23,10 @@ import com.ibm.icu.text.RawCollationKey;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.IndexableBinaryStringTools;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 /**
@ -92,15 +89,14 @@ public final class ICUCollationKeyFilter extends TokenFilter {
      char[] termBuffer = termAtt.termBuffer();
      String termText = new String(termBuffer, 0, termAtt.termLength());
      collator.getRawCollationKey(termText, reusableKey);
-      ByteBuffer collationKeyBuf = ByteBuffer.wrap(reusableKey.bytes, 0, reusableKey.size);
+      int encodedLength = IndexableBinaryStringTools.getEncodedLength(
-      int encodedLength
+          reusableKey.bytes, 0, reusableKey.size);
        = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
      if (encodedLength > termBuffer.length) {
        termAtt.resizeTermBuffer(encodedLength);
      }
      termAtt.setTermLength(encodedLength);
-      CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
+      IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
-      IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+          termAtt.termBuffer(), 0, encodedLength);
      return true;
    } else {
      return false;
--- a/src/java/org/apache/lucene/collation/CollationKeyFilter.java
+++ b/src/java/org/apache/lucene/collation/CollationKeyFilter.java
@ -24,8 +24,6 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.IndexableBinaryStringTools;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.text.Collator;
@ -94,15 +92,14 @@ public final class CollationKeyFilter extends TokenFilter {
      char[] termBuffer = termAtt.termBuffer();
      String termText = new String(termBuffer, 0, termAtt.termLength());
      byte[] collationKey = collator.getCollationKey(termText).toByteArray();
-      ByteBuffer collationKeyBuf = ByteBuffer.wrap(collationKey);
+      int encodedLength = IndexableBinaryStringTools.getEncodedLength(
-      int encodedLength
+          collationKey, 0, collationKey.length);
        = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
      if (encodedLength > termBuffer.length) {
        termAtt.resizeTermBuffer(encodedLength);
      }
      termAtt.setTermLength(encodedLength);
-      CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
+      IndexableBinaryStringTools.encode(collationKey, 0, collationKey.length,
-      IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+          termAtt.termBuffer(), 0, encodedLength);
      return true;
    } else {
      return false;
--- a/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java
+++ b/src/java/org/apache/lucene/util/IndexableBinaryStringTools.java
@ -23,29 +23,33 @@ import java.nio.ByteBuffer;
 /**
 * Provides support for converting byte sequences to Strings and back again.
 * The resulting Strings preserve the original byte sequences' sort order.
- * 
+ * <p/>
 * The Strings are constructed using a Base 8000h encoding of the original
 * binary data - each char of an encoded String represents a 15-bit chunk
 * from the byte sequence.  Base 8000h was chosen because it allows for all
 * lower 15 bits of char to be used without restriction; the surrogate range 
 * [U+D8000-U+DFFF] does not represent valid chars, and would require
 * complicated handling to avoid them and allow use of char's high bit.
- * 
+ * <p/>
 * Although unset bits are used as padding in the final char, the original
 * byte sequence could contain trailing bytes with no set bits (null bytes):
 * padding is indistinguishable from valid information.  To overcome this
 * problem, a char is appended, indicating the number of encoded bytes in the
 * final content char.
- * 
+ * <p/>
- * This class's operations are defined over CharBuffers and ByteBuffers, to
+ * Some methods in this class are defined over CharBuffers and ByteBuffers, but
- * allow for wrapped arrays to be reused, reducing memory allocation costs for
+ * these are deprecated in favor of methods that operate directly on byte[] and
- * repeated operations.  Note that this class calls array() and arrayOffset()
+ * char[] arrays.  Note that this class calls array() and arrayOffset()
 * on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be
- * used.  This class interprets the arrayOffset() and limit() values returned by
+ * used.  This class interprets the arrayOffset() and limit() values returned 
- * its input buffers as beginning and end+1 positions on the wrapped array,
+ * by its input buffers as beginning and end+1 positions on the wrapped array,
 * respectively; similarly, on the output buffer, arrayOffset() is the first
 * position written to, and limit() is set to one past the final output array
 * position.
 * <p/>
 * WARNING: This means that the deprecated Buffer-based methods 
 * only work correctly with buffers that have an offset of 0. For example, they
 * will not correctly interpret buffers returned by {@link ByteBuffer#slice}.  
 */
 public class IndexableBinaryStringTools {
@ -68,190 +72,259 @@ public class IndexableBinaryStringTools {
  /**
   * Returns the number of chars required to encode the given byte sequence.
   * 
-   * @param original The byte sequence to be encoded.  Must be backed by an array.
+   * @param original The byte sequence to be encoded. Must be backed by an
   *        array.
   * @return The number of chars required to encode the given byte sequence
-   * @throws IllegalArgumentException If the given ByteBuffer is not backed by an array
+   * @throws IllegalArgumentException If the given ByteBuffer is not backed by
   *         an array
   * @deprecated Use {@link #getEncodedLength(byte[], int, int)} instead. This
   *             method will be removed in Lucene 4.0
   */
  @Deprecated
  public static int getEncodedLength(ByteBuffer original)
    throws IllegalArgumentException {
    if (original.hasArray()) {
-      // Use long for intermediaries to protect against overflow
+      return getEncodedLength(original.array(), original.arrayOffset(),
-      long length = (long)(original.limit() - original.arrayOffset());
+          original.limit() - original.arrayOffset());
      return (int)((length * 8L + 14L) / 15L) + 1;
    } else {
      throw new IllegalArgumentException("original argument must have a backing array");
    }
  }
  /**
   * Returns the number of chars required to encode the given bytes.
   * 
   * @param inputArray byte sequence to be encoded
   * @param inputOffset initial offset into inputArray
   * @param inputLength number of bytes in inputArray
   * @return The number of chars required to encode the number of bytes.
   */
  public static int getEncodedLength(byte[] inputArray, int inputOffset,
      int inputLength) {
    // Use long for intermediaries to protect against overflow
    return (int)(((long)inputLength * 8L + 14L) / 15L) + 1;
  }
  /**
   * Returns the number of bytes required to decode the given char sequence.
   * 
-   * @param encoded The char sequence to be encoded.  Must be backed by an array.
+   * @param encoded The char sequence to be decoded. Must be backed by an array.
   * @return The number of bytes required to decode the given char sequence
-   * @throws IllegalArgumentException If the given CharBuffer is not backed by an array
+   * @throws IllegalArgumentException If the given CharBuffer is not backed by
   *         an array
   * @deprecated Use {@link #getDecodedLength(char[], int, int)} instead. This
   *             method will be removed in Lucene 4.0
   */
  @Deprecated
  public static int getDecodedLength(CharBuffer encoded) 
    throws IllegalArgumentException {
    if (encoded.hasArray()) {
-      int numChars = encoded.limit() - encoded.arrayOffset() - 1;
+      return getDecodedLength(encoded.array(), encoded.arrayOffset(), 
-      if (numChars <= 0) {
+          encoded.limit() - encoded.arrayOffset());
        return 0;
      } else {
        int numFullBytesInFinalChar = encoded.charAt(encoded.limit() - 1);
        int numEncodedChars = numChars - 1;
        return (numEncodedChars * 15 + 7) / 8 + numFullBytesInFinalChar;
      }
    } else {
      throw new IllegalArgumentException("encoded argument must have a backing array");
    }
  }
  /**
-   * Encodes the input byte sequence into the output char sequence.  Before
+   * Returns the number of bytes required to decode the given char sequence.
   * 
   * @param encoded char sequence to be decoded
   * @param offset initial offset
   * @param length number of characters
   * @return The number of bytes required to decode the given char sequence
   */
  public static int getDecodedLength(char[] encoded, int offset, int length) {
    final int numChars = length - 1;
    if (numChars <= 0) {
      return 0;
    } else {
      // Use long for intermediaries to protect against overflow
      final long numFullBytesInFinalChar = encoded[offset + length - 1];
      final long numEncodedChars = numChars - 1;
      return (int)((numEncodedChars * 15L + 7L) / 8L + numFullBytesInFinalChar);
    }
  }
  /**
   * Encodes the input byte sequence into the output char sequence. Before
   * calling this method, ensure that the output CharBuffer has sufficient
   * capacity by calling {@link #getEncodedLength(java.nio.ByteBuffer)}.
   * 
   * @param input The byte sequence to encode
-   * @param output Where the char sequence encoding result will go.  The limit
+   * @param output Where the char sequence encoding result will go. The limit is
-   *  is set to one past the position of the final char.
+   *        set to one past the position of the final char.
   * @throws IllegalArgumentException If either the input or the output buffer
-   *  is not backed by an array
+   *         is not backed by an array
   * @deprecated Use {@link #encode(byte[], int, int, char[], int, int)}
   *             instead. This method will be removed in Lucene 4.0
   */
  @Deprecated
  public static void encode(ByteBuffer input, CharBuffer output) {
    if (input.hasArray() && output.hasArray()) {
-      byte[] inputArray = input.array();
+      final int inputOffset = input.arrayOffset();
-      int inputOffset = input.arrayOffset();
+      final int inputLength = input.limit() - inputOffset;
-      int inputLength = input.limit() - inputOffset; 
+      final int outputOffset = output.arrayOffset();
-      char[] outputArray = output.array();
+      final int outputLength = getEncodedLength(input.array(), inputOffset,
-      int outputOffset = output.arrayOffset();
+          inputLength);
-      int outputLength = getEncodedLength(input);
+      output.limit(outputLength + outputOffset);
      output.limit(outputOffset + outputLength); // Set output final pos + 1
      output.position(0);
-      if (inputLength > 0) {
+      encode(input.array(), inputOffset, inputLength, output.array(),
-        int inputByteNum = inputOffset;
+          outputOffset, outputLength);
        int caseNum = 0;
        int outputCharNum = outputOffset;
        CodingCase codingCase;
        for ( ; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength ;
              ++outputCharNum                                                 ) {
          codingCase = CODING_CASES[caseNum];
          if (2 == codingCase.numBytes) {
            outputArray[outputCharNum]
              = (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
                       + (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift)
                          & codingCase.finalMask)
                       & (short)0x7FFF);
          } else { // numBytes is 3
            outputArray[outputCharNum] 
              = (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
                       + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
                       + (((inputArray[inputByteNum + 2] & 0xFF) >>> codingCase.finalShift) 
                          & codingCase.finalMask)
                       & (short)0x7FFF);          
          }
          inputByteNum += codingCase.advanceBytes;          
          if (++caseNum == CODING_CASES.length) {
            caseNum = 0;
          }
        }
        // Produce final char (if any) and trailing count chars.
        codingCase = CODING_CASES[caseNum];
        if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
          outputArray[outputCharNum++] 
            = (char)((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
                      + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift))
                     & (short)0x7FFF);
          // Add trailing char containing the number of full bytes in final char
          outputArray[outputCharNum++] = (char)1;
        } else if (inputByteNum < inputLength) {
          outputArray[outputCharNum++] 
            = (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
                     & (short)0x7FFF);
          // Add trailing char containing the number of full bytes in final char
          outputArray[outputCharNum++] = caseNum == 0 ? (char)1 : (char)0;
        } else { // No left over bits - last char is completely filled.
          // Add trailing char containing the number of full bytes in final char
          outputArray[outputCharNum++] = (char)1;
        }
      }
    } else {
      throw new IllegalArgumentException("Arguments must have backing arrays");
    }
  }
  /**
-   * Decodes the input char sequence into the output byte sequence.  Before
+   * Encodes the input byte sequence into the output char sequence.  Before
   * calling this method, ensure that the output array has sufficient
   * capacity by calling {@link #getEncodedLength(byte[], int, int)}.
   * 
   * @param inputArray byte sequence to be encoded
   * @param inputOffset initial offset into inputArray
   * @param inputLength number of bytes in inputArray
   * @param outputArray char sequence to store encoded result
   * @param outputOffset initial offset into outputArray
   * @param outputLength length of output, must be getEncodedLength
   */
  public static void encode(byte[] inputArray, int inputOffset,
      int inputLength, char[] outputArray, int outputOffset, int outputLength) {
    assert (outputLength == getEncodedLength(inputArray, inputOffset,
        inputLength));
    if (inputLength > 0) {
      int inputByteNum = inputOffset;
      int caseNum = 0;
      int outputCharNum = outputOffset;
      CodingCase codingCase;
      for (; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength; ++outputCharNum) {
        codingCase = CODING_CASES[caseNum];
        if (2 == codingCase.numBytes) {
          outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
              + (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift) & codingCase.finalMask) & (short) 0x7FFF);
        } else { // numBytes is 3
          outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
              + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
              + (((inputArray[inputByteNum + 2] & 0xFF) >>> codingCase.finalShift) & codingCase.finalMask) & (short) 0x7FFF);
        }
        inputByteNum += codingCase.advanceBytes;
        if (++caseNum == CODING_CASES.length) {
          caseNum = 0;
        }
      }
      // Produce final char (if any) and trailing count chars.
      codingCase = CODING_CASES[caseNum];
      if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
        outputArray[outputCharNum++] = (char) ((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF);
        // Add trailing char containing the number of full bytes in final char
        outputArray[outputCharNum++] = (char) 1;
      } else if (inputByteNum < inputLength) {
        outputArray[outputCharNum++] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) & (short) 0x7FFF);
        // Add trailing char containing the number of full bytes in final char
        outputArray[outputCharNum++] = caseNum == 0 ? (char) 1 : (char) 0;
      } else { // No left over bits - last char is completely filled.
        // Add trailing char containing the number of full bytes in final char
        outputArray[outputCharNum++] = (char) 1;
      }
    }
  }
  /**
   * Decodes the input char sequence into the output byte sequence. Before
   * calling this method, ensure that the output ByteBuffer has sufficient
   * capacity by calling {@link #getDecodedLength(java.nio.CharBuffer)}.
   * 
   * @param input The char sequence to decode
-   * @param output Where the byte sequence decoding result will go.  The limit
+   * @param output Where the byte sequence decoding result will go. The limit is
-   *  is set to one past the position of the final char.
+   *        set to one past the position of the final char.
   * @throws IllegalArgumentException If either the input or the output buffer
-   *  is not backed by an array
+   *         is not backed by an array
   * @deprecated Use {@link #decode(char[], int, int, byte[], int, int)}
   *             instead. This method will be removed in Lucene 4.0
   */
  @Deprecated
  public static void decode(CharBuffer input, ByteBuffer output) {
    if (input.hasArray() && output.hasArray()) {
-      int numInputChars = input.limit() - input.arrayOffset() - 1;
+      final int inputOffset = input.arrayOffset();
-      int numOutputBytes = getDecodedLength(input);
+      final int inputLength = input.limit() - inputOffset;
-      output.limit(numOutputBytes + output.arrayOffset()); // Set output final pos + 1
+      final int outputOffset = output.arrayOffset();
      final int outputLength = getDecodedLength(input.array(), inputOffset,
          inputLength);
      output.limit(outputLength + outputOffset);
      output.position(0);
-      byte[] outputArray = output.array();
+      decode(input.array(), inputOffset, inputLength, output.array(),
-      char[] inputArray = input.array();
+          outputOffset, outputLength);
-      if (numOutputBytes > 0) {
+    } else {
-        int caseNum = 0;
+      throw new IllegalArgumentException("Arguments must have backing arrays");
-        int outputByteNum = output.arrayOffset();
+    }
-        int inputCharNum = input.arrayOffset();
+  }
-        short inputChar;
+
-        CodingCase codingCase;
+  /**
-        for ( ; inputCharNum < numInputChars - 1 ; ++inputCharNum) {
+   * Decodes the input char sequence into the output byte sequence. Before
-          codingCase = CODING_CASES[caseNum];
+   * calling this method, ensure that the output array has sufficient capacity
-          inputChar = (short)inputArray[inputCharNum];
+   * by calling {@link #getDecodedLength(char[], int, int)}.
-          if (2 == codingCase.numBytes) {
+   * 
-            if (0 == caseNum) {
+   * @param inputArray char sequence to be decoded
-              outputArray[outputByteNum] = (byte)(inputChar >>> codingCase.initialShift);
+   * @param inputOffset initial offset into inputArray
-            } else {
+   * @param inputLength number of chars in inputArray
-              outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
+   * @param outputArray byte sequence to store encoded result
-            }
+   * @param outputOffset initial offset into outputArray
-            outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.finalMask) 
+   * @param outputLength length of output, must be
-                                                    << codingCase.finalShift);
+   *        getDecodedLength(inputArray, inputOffset, inputLength)
-          } else { // numBytes is 3
+   */
-            outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
+  public static void decode(char[] inputArray, int inputOffset,
-            outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.middleMask)
+      int inputLength, byte[] outputArray, int outputOffset, int outputLength) {
-                                                    >>> codingCase.middleShift);
+    assert (outputLength == getDecodedLength(inputArray, inputOffset,
-            outputArray[outputByteNum + 2] = (byte)((inputChar & codingCase.finalMask) 
+        inputLength));
-                                                    << codingCase.finalShift);
+    final int numInputChars = inputLength - 1;
-          }
+    final int numOutputBytes = outputLength;
-          outputByteNum += codingCase.advanceBytes;
+
-          if (++caseNum == CODING_CASES.length) {
+    if (numOutputBytes > 0) {
-            caseNum = 0;
+      int caseNum = 0;
-          }
+      int outputByteNum = outputOffset;
-        }
+      int inputCharNum = inputOffset;
-        // Handle final char
+      short inputChar;
-        inputChar = (short)inputArray[inputCharNum];
+      CodingCase codingCase;
      for (; inputCharNum < numInputChars - 1; ++inputCharNum) {
        codingCase = CODING_CASES[caseNum];
-        if (0 == caseNum) {
+        inputChar = (short) inputArray[inputCharNum];
-          outputArray[outputByteNum] = 0;
+        if (2 == codingCase.numBytes) {
          if (0 == caseNum) {
            outputArray[outputByteNum] = (byte) (inputChar >>> codingCase.initialShift);
          } else {
            outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
          }
          outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
        } else { // numBytes is 3
          outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
          outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask) >>> codingCase.middleShift);
          outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
        }
-        outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
+        outputByteNum += codingCase.advanceBytes;
-        int bytesLeft = numOutputBytes - outputByteNum;
+        if (++caseNum == CODING_CASES.length) {
-        if (bytesLeft > 1) {
+          caseNum = 0;
-          if (2 == codingCase.numBytes) {
+        }
-            outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.finalMask) 
+      }
-                                                    >>> codingCase.finalShift);
+      // Handle final char
-          } else { // numBytes is 3
+      inputChar = (short) inputArray[inputCharNum];
-            outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.middleMask)
+      codingCase = CODING_CASES[caseNum];
-                                                    >>> codingCase.middleShift);
+      if (0 == caseNum) {
-            if (bytesLeft > 2) {
+        outputArray[outputByteNum] = 0;
-              outputArray[outputByteNum + 2] = (byte)((inputChar & codingCase.finalMask) 
+      }
-                                                      << codingCase.finalShift);
+      outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
-            }
+      final int bytesLeft = numOutputBytes - outputByteNum;
      if (bytesLeft > 1) {
        if (2 == codingCase.numBytes) {
          outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) >>> codingCase.finalShift);
        } else { // numBytes is 3
          outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask) >>> codingCase.middleShift);
          if (bytesLeft > 2) {
            outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
          }
        }
      }
    } else {
      throw new IllegalArgumentException("Arguments must have backing arrays");
    }
  }
@ -261,11 +334,14 @@ public class IndexableBinaryStringTools {
   * {@link #encode(java.nio.ByteBuffer, java.nio.CharBuffer)}.
   * 
   * @param input The char sequence to decode
-   * @return A byte sequence containing the decoding result.  The limit
+   * @return A byte sequence containing the decoding result. The limit is set to
-   *  is set to one past the position of the final char.
+   *         one past the position of the final char.
   * @throws IllegalArgumentException If the input buffer is not backed by an
-   *  array
+   *         array
   * @deprecated Use {@link #decode(char[], int, int, byte[], int, int)}
   *             instead. This method will be removed in Lucene 4.0
   */
  @Deprecated
  public static ByteBuffer decode(CharBuffer input) {
    byte[] outputArray = new byte[getDecodedLength(input)];
    ByteBuffer output = ByteBuffer.wrap(outputArray);
@ -277,11 +353,14 @@ public class IndexableBinaryStringTools {
   * Encodes the input byte sequence.
   * 
   * @param input The byte sequence to encode
-   * @return A char sequence containing the encoding result.  The limit is set
+   * @return A char sequence containing the encoding result. The limit is set to
-   *  to one past the position of the final char.
+   *         one past the position of the final char.
   * @throws IllegalArgumentException If the input buffer is not backed by an
-   *  array
+   *         array
   * @deprecated Use {@link #encode(byte[], int, int, char[], int, int)}
   *             instead. This method will be removed in Lucene 4.0
   */
  @Deprecated
  public static CharBuffer encode(ByteBuffer input) {
    char[] outputArray = new char[getEncodedLength(input)];
    CharBuffer output = CharBuffer.wrap(outputArray);
--- a/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
+++ b/src/test/org/apache/lucene/util/TestIndexableBinaryStringTools.java
@ -25,7 +25,9 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
  private static final int NUM_RANDOM_TESTS = 2000;
  private static final int MAX_RANDOM_BINARY_LENGTH = 300;
-  public void testSingleBinaryRoundTrip() {
+  /** @deprecated remove this test for Lucene 4.0 */
  @Deprecated
  public void testSingleBinaryRoundTripNIO() {
    byte[] binary = new byte[] 
      { (byte)0x23, (byte)0x98, (byte)0x13, (byte)0xE4, (byte)0x76, (byte)0x41,
        (byte)0xB2, (byte)0xC9, (byte)0x7F, (byte)0x0A, (byte)0xA6, (byte)0xD8 };
@ -35,15 +37,44 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
    ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
    assertEquals("Round trip decode/decode returned different results:"
                 + System.getProperty("line.separator")
-                 + "original: " + binaryDump(binaryBuf)
+                 + "original: " + binaryDumpNIO(binaryBuf)
                 + System.getProperty("line.separator")
-                 + " encoded: " + charArrayDump(encoded)
+                 + " encoded: " + charArrayDumpNIO(encoded)
                 + System.getProperty("line.separator")
-                 + " decoded: " + binaryDump(decoded),
+                 + " decoded: " + binaryDumpNIO(decoded),
                 binaryBuf, decoded);
  }
-  public void testEncodedSortability() {
+  public void testSingleBinaryRoundTrip() {
    byte[] binary = new byte[] { (byte) 0x23, (byte) 0x98, (byte) 0x13,
        (byte) 0xE4, (byte) 0x76, (byte) 0x41, (byte) 0xB2, (byte) 0xC9,
        (byte) 0x7F, (byte) 0x0A, (byte) 0xA6, (byte) 0xD8 };
    int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
        binary.length);
    char encoded[] = new char[encodedLen];
    IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
        encoded.length);
    int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
        encoded.length);
    byte decoded[] = new byte[decodedLen];
    IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
        decoded.length);
    assertEquals("Round trip decode/decode returned different results:"
        + System.getProperty("line.separator") + "original: "
        + binaryDump(binary, binary.length)
        + System.getProperty("line.separator") + " encoded: "
        + charArrayDump(encoded, encoded.length)
        + System.getProperty("line.separator") + " decoded: "
        + binaryDump(decoded, decoded.length),
        binaryDump(binary, binary.length), binaryDump(decoded, decoded.length));
  }
  /** @deprecated remove this test for Lucene 4.0 */
  @Deprecated
  public void testEncodedSortabilityNIO() {
    Random random = newRandom();
    byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
    ByteBuffer originalBuf1 = ByteBuffer.wrap(originalArray1);
@ -88,19 +119,85 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
      assertEquals("Test #" + (testNum + 1) 
                   + ": Original bytes and encoded chars compare differently:"
                   + System.getProperty("line.separator")
-                   + " binary 1: " + binaryDump(originalBuf1)
+                   + " binary 1: " + binaryDumpNIO(originalBuf1)
                   + System.getProperty("line.separator")
-                   + " binary 2: " + binaryDump(originalBuf2)
+                   + " binary 2: " + binaryDumpNIO(originalBuf2)
                   + System.getProperty("line.separator")
-                   + "encoded 1: " + charArrayDump(encodedBuf1)
+                   + "encoded 1: " + charArrayDumpNIO(encodedBuf1)
                   + System.getProperty("line.separator")
-                   + "encoded 2: " + charArrayDump(encodedBuf2)
+                   + "encoded 2: " + charArrayDumpNIO(encodedBuf2)
                   + System.getProperty("line.separator"),
                   originalComparison, encodedComparison);
    }
  }
-  public void testEmptyInput() {
+  public void testEncodedSortability() {
    Random random = newRandom();
    byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
    char[] originalString1 = new char[MAX_RANDOM_BINARY_LENGTH];
    char[] encoded1 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
    byte[] original2 = new byte[MAX_RANDOM_BINARY_LENGTH];
    char[] originalString2 = new char[MAX_RANDOM_BINARY_LENGTH];
    char[] encoded2 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
    for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
      int numBytes1 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
      for (int byteNum = 0; byteNum < numBytes1; ++byteNum) {
        int randomInt = random.nextInt(0x100);
        originalArray1[byteNum] = (byte) randomInt;
        originalString1[byteNum] = (char) randomInt;
      }
      int numBytes2 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
      for (int byteNum = 0; byteNum < numBytes2; ++byteNum) {
        int randomInt = random.nextInt(0x100);
        original2[byteNum] = (byte) randomInt;
        originalString2[byteNum] = (char) randomInt;
      }
      int originalComparison = new String(originalString1, 0, numBytes1)
          .compareTo(new String(originalString2, 0, numBytes2));
      originalComparison = originalComparison < 0 ? -1
          : originalComparison > 0 ? 1 : 0;
      int encodedLen1 = IndexableBinaryStringTools.getEncodedLength(
          originalArray1, 0, numBytes1);
      if (encodedLen1 > encoded1.length)
        encoded1 = new char[ArrayUtil.getNextSize(encodedLen1)];
      IndexableBinaryStringTools.encode(originalArray1, 0, numBytes1, encoded1,
          0, encodedLen1);
      int encodedLen2 = IndexableBinaryStringTools.getEncodedLength(original2,
          0, numBytes2);
      if (encodedLen2 > encoded2.length)
        encoded2 = new char[ArrayUtil.getNextSize(encodedLen2)];
      IndexableBinaryStringTools.encode(original2, 0, numBytes2, encoded2, 0,
          encodedLen2);
      int encodedComparison = new String(encoded1, 0, encodedLen1)
          .compareTo(new String(encoded2, 0, encodedLen2));
      encodedComparison = encodedComparison < 0 ? -1
          : encodedComparison > 0 ? 1 : 0;
      assertEquals("Test #" + (testNum + 1)
          + ": Original bytes and encoded chars compare differently:"
          + System.getProperty("line.separator") + " binary 1: "
          + binaryDump(originalArray1, numBytes1)
          + System.getProperty("line.separator") + " binary 2: "
          + binaryDump(original2, numBytes2)
          + System.getProperty("line.separator") + "encoded 1: "
          + charArrayDump(encoded1, encodedLen1)
          + System.getProperty("line.separator") + "encoded 2: "
          + charArrayDump(encoded2, encodedLen2)
          + System.getProperty("line.separator"), originalComparison,
          encodedComparison);
    }
  }
  /** @deprecated remove this test for Lucene 4.0 */
  @Deprecated
  public void testEmptyInputNIO() {
    byte[] binary = new byte[0];
    CharBuffer encoded = IndexableBinaryStringTools.encode(ByteBuffer.wrap(binary));
    ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
@ -108,7 +205,27 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
    assertEquals("decoded empty input was not empty", decoded.limit(), 0);
  }
-  public void testAllNullInput() {
+  public void testEmptyInput() {
    byte[] binary = new byte[0];
    int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
        binary.length);
    char[] encoded = new char[encodedLen];
    IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
        encoded.length);
    int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
        encoded.length);
    byte[] decoded = new byte[decodedLen];
    IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
        decoded.length);
    assertEquals("decoded empty input was not empty", decoded.length, 0);
  }
  /** @deprecated remove this test for Lucene 4.0 */
  @Deprecated
  public void testAllNullInputNIO() {
    byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
    CharBuffer encoded = IndexableBinaryStringTools.encode(binaryBuf);
@ -117,13 +234,38 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
    assertNotNull("decode() returned null", decodedBuf);
    assertEquals("Round trip decode/decode returned different results:"
                 + System.getProperty("line.separator")
-                 + "  original: " + binaryDump(binaryBuf)
+                 + "  original: " + binaryDumpNIO(binaryBuf)
                 + System.getProperty("line.separator")
-                 + "decodedBuf: " + binaryDump(decodedBuf),
+                 + "decodedBuf: " + binaryDumpNIO(decodedBuf),
                 binaryBuf, decodedBuf);
  }
-  public void testRandomBinaryRoundTrip() {
+  public void testAllNullInput() {
    byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
    int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
        binary.length);
    char encoded[] = new char[encodedLen];
    IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
        encoded.length);
    int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
        encoded.length);
    byte[] decoded = new byte[decodedLen];
    IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
        decoded.length);
    assertEquals("Round trip decode/decode returned different results:"
        + System.getProperty("line.separator") + "  original: "
        + binaryDump(binary, binary.length)
        + System.getProperty("line.separator") + "decodedBuf: "
        + binaryDump(decoded, decoded.length),
        binaryDump(binary, binary.length), binaryDump(decoded, decoded.length));
  }
  /** @deprecated remove this test for Lucene 4.0 */
  @Deprecated
  public void testRandomBinaryRoundTripNIO() {
    Random random = newRandom();
    byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
    ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
@ -142,19 +284,59 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
      assertEquals("Test #" + (testNum + 1) 
                   + ": Round trip decode/decode returned different results:"
                   + System.getProperty("line.separator")
-                   + "  original: " + binaryDump(binaryBuf)
+                   + "  original: " + binaryDumpNIO(binaryBuf)
                   + System.getProperty("line.separator")
-                   + "encodedBuf: " + charArrayDump(encodedBuf)
+                   + "encodedBuf: " + charArrayDumpNIO(encodedBuf)
                   + System.getProperty("line.separator")
-                   + "decodedBuf: " + binaryDump(decodedBuf),
+                   + "decodedBuf: " + binaryDumpNIO(decodedBuf),
                   binaryBuf, decodedBuf);
    }
  }
-  public String binaryDump(ByteBuffer binaryBuf) {
+  public void testRandomBinaryRoundTrip() {
    Random random = newRandom();
    byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
    char[] encoded = new char[MAX_RANDOM_BINARY_LENGTH * 10];
    byte[] decoded = new byte[MAX_RANDOM_BINARY_LENGTH];
    for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
      int numBytes = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1                                                                   
      for (int byteNum = 0; byteNum < numBytes; ++byteNum) {
        binary[byteNum] = (byte) random.nextInt(0x100);
      }
      int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
          numBytes);
      if (encoded.length < encodedLen)
        encoded = new char[ArrayUtil.getNextSize(encodedLen)];
      IndexableBinaryStringTools.encode(binary, 0, numBytes, encoded, 0,
          encodedLen);
      int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
          encodedLen);
      IndexableBinaryStringTools.decode(encoded, 0, encodedLen, decoded, 0,
          decodedLen);
      assertEquals("Test #" + (testNum + 1)
          + ": Round trip decode/decode returned different results:"
          + System.getProperty("line.separator") + "  original: "
          + binaryDump(binary, numBytes) + System.getProperty("line.separator")
          + "encodedBuf: " + charArrayDump(encoded, encodedLen)
          + System.getProperty("line.separator") + "decodedBuf: "
          + binaryDump(decoded, decodedLen), binaryDump(binary, numBytes),
          binaryDump(decoded, decodedLen));
    }
  }
  /** @deprecated remove this method for Lucene 4.0 */
  @Deprecated
  public String binaryDumpNIO(ByteBuffer binaryBuf) {
    return binaryDump(binaryBuf.array(), 
        binaryBuf.limit() - binaryBuf.arrayOffset());
  }
  public String binaryDump(byte[] binary, int numBytes) {
    StringBuilder buf = new StringBuilder();
    int numBytes = binaryBuf.limit() - binaryBuf.arrayOffset();
    byte[] binary = binaryBuf.array();
    for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) {
      String hex = Integer.toHexString((int)binary[byteNum] & 0xFF);
      if (hex.length() == 1) {
@ -167,11 +349,15 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
    }
    return buf.toString();
  }
  /** @deprecated remove this method for Lucene 4.0 */
  @Deprecated
  public String charArrayDumpNIO(CharBuffer charBuf) {
    return charArrayDump(charBuf.array(), 
        charBuf.limit() - charBuf.arrayOffset());
  }
-  public String charArrayDump(CharBuffer charBuf) {
+  public String charArrayDump(char[] charArray, int numBytes) {
    StringBuilder buf = new StringBuilder();
    int numBytes = charBuf.limit() - charBuf.arrayOffset();
    char[] charArray = charBuf.array();
    for (int charNum = 0 ; charNum < numBytes ; ++charNum) {
      String hex = Integer.toHexString((int)charArray[charNum]);
      for (int digit = 0 ; digit < 4 - hex.length() ; ++digit) {