mirror of https://github.com/apache/lucene.git
LUCENE-2084: remove Byte/CharBuffer wrapping for collation key generation
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@895341 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a949836869
commit
cdac1f7113
|
@ -153,6 +153,11 @@ Optimizations
|
||||||
* LUCENE-2169: Improved CharArraySet.copy(), if source set is
|
* LUCENE-2169: Improved CharArraySet.copy(), if source set is
|
||||||
also a CharArraySet. (Simon Willnauer via Uwe Schindler)
|
also a CharArraySet. (Simon Willnauer via Uwe Schindler)
|
||||||
|
|
||||||
|
* LUCENE-2084: Change IndexableBinaryStringTools to work on byte[] and char[]
|
||||||
|
directly, instead of Byte/CharBuffers, and modify CollationKeyFilter to
|
||||||
|
take advantage of this for faster performance.
|
||||||
|
(Steven Rowe, Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
|
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
|
||||||
|
|
|
@ -74,6 +74,11 @@ Optimizations
|
||||||
over itsself. Instead it sets only the length. This patch also optimizes
|
over itsself. Instead it sets only the length. This patch also optimizes
|
||||||
the logic of the filter and uses NIO for IdentityEncoder. (Uwe Schindler)
|
the logic of the filter and uses NIO for IdentityEncoder. (Uwe Schindler)
|
||||||
|
|
||||||
|
* LUCENE-2084: Change IndexableBinaryStringTools to work on byte[] and char[]
|
||||||
|
directly, instead of Byte/CharBuffers, and modify ICUCollationKeyFilter to
|
||||||
|
take advantage of this for faster performance.
|
||||||
|
(Steven Rowe, Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
Test Cases
|
Test Cases
|
||||||
|
|
||||||
* LUCENE-2115: Cutover contrib tests to use Java5 generics. (Kay Kay
|
* LUCENE-2115: Cutover contrib tests to use Java5 generics. (Kay Kay
|
||||||
|
|
|
@ -23,13 +23,10 @@ import com.ibm.icu.text.RawCollationKey;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Token;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
import org.apache.lucene.util.IndexableBinaryStringTools;
|
import org.apache.lucene.util.IndexableBinaryStringTools;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.CharBuffer;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -92,15 +89,14 @@ public final class ICUCollationKeyFilter extends TokenFilter {
|
||||||
char[] termBuffer = termAtt.termBuffer();
|
char[] termBuffer = termAtt.termBuffer();
|
||||||
String termText = new String(termBuffer, 0, termAtt.termLength());
|
String termText = new String(termBuffer, 0, termAtt.termLength());
|
||||||
collator.getRawCollationKey(termText, reusableKey);
|
collator.getRawCollationKey(termText, reusableKey);
|
||||||
ByteBuffer collationKeyBuf = ByteBuffer.wrap(reusableKey.bytes, 0, reusableKey.size);
|
int encodedLength = IndexableBinaryStringTools.getEncodedLength(
|
||||||
int encodedLength
|
reusableKey.bytes, 0, reusableKey.size);
|
||||||
= IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
|
|
||||||
if (encodedLength > termBuffer.length) {
|
if (encodedLength > termBuffer.length) {
|
||||||
termAtt.resizeTermBuffer(encodedLength);
|
termAtt.resizeTermBuffer(encodedLength);
|
||||||
}
|
}
|
||||||
termAtt.setTermLength(encodedLength);
|
termAtt.setTermLength(encodedLength);
|
||||||
CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
|
IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
|
||||||
IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
|
termAtt.termBuffer(), 0, encodedLength);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -24,8 +24,6 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
import org.apache.lucene.util.IndexableBinaryStringTools;
|
import org.apache.lucene.util.IndexableBinaryStringTools;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.CharBuffer;
|
|
||||||
import java.text.Collator;
|
import java.text.Collator;
|
||||||
|
|
||||||
|
|
||||||
|
@ -94,15 +92,14 @@ public final class CollationKeyFilter extends TokenFilter {
|
||||||
char[] termBuffer = termAtt.termBuffer();
|
char[] termBuffer = termAtt.termBuffer();
|
||||||
String termText = new String(termBuffer, 0, termAtt.termLength());
|
String termText = new String(termBuffer, 0, termAtt.termLength());
|
||||||
byte[] collationKey = collator.getCollationKey(termText).toByteArray();
|
byte[] collationKey = collator.getCollationKey(termText).toByteArray();
|
||||||
ByteBuffer collationKeyBuf = ByteBuffer.wrap(collationKey);
|
int encodedLength = IndexableBinaryStringTools.getEncodedLength(
|
||||||
int encodedLength
|
collationKey, 0, collationKey.length);
|
||||||
= IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
|
|
||||||
if (encodedLength > termBuffer.length) {
|
if (encodedLength > termBuffer.length) {
|
||||||
termAtt.resizeTermBuffer(encodedLength);
|
termAtt.resizeTermBuffer(encodedLength);
|
||||||
}
|
}
|
||||||
termAtt.setTermLength(encodedLength);
|
termAtt.setTermLength(encodedLength);
|
||||||
CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
|
IndexableBinaryStringTools.encode(collationKey, 0, collationKey.length,
|
||||||
IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
|
termAtt.termBuffer(), 0, encodedLength);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -23,29 +23,33 @@ import java.nio.ByteBuffer;
|
||||||
/**
|
/**
|
||||||
* Provides support for converting byte sequences to Strings and back again.
|
* Provides support for converting byte sequences to Strings and back again.
|
||||||
* The resulting Strings preserve the original byte sequences' sort order.
|
* The resulting Strings preserve the original byte sequences' sort order.
|
||||||
*
|
* <p/>
|
||||||
* The Strings are constructed using a Base 8000h encoding of the original
|
* The Strings are constructed using a Base 8000h encoding of the original
|
||||||
* binary data - each char of an encoded String represents a 15-bit chunk
|
* binary data - each char of an encoded String represents a 15-bit chunk
|
||||||
* from the byte sequence. Base 8000h was chosen because it allows for all
|
* from the byte sequence. Base 8000h was chosen because it allows for all
|
||||||
* lower 15 bits of char to be used without restriction; the surrogate range
|
* lower 15 bits of char to be used without restriction; the surrogate range
|
||||||
* [U+D8000-U+DFFF] does not represent valid chars, and would require
|
* [U+D8000-U+DFFF] does not represent valid chars, and would require
|
||||||
* complicated handling to avoid them and allow use of char's high bit.
|
* complicated handling to avoid them and allow use of char's high bit.
|
||||||
*
|
* <p/>
|
||||||
* Although unset bits are used as padding in the final char, the original
|
* Although unset bits are used as padding in the final char, the original
|
||||||
* byte sequence could contain trailing bytes with no set bits (null bytes):
|
* byte sequence could contain trailing bytes with no set bits (null bytes):
|
||||||
* padding is indistinguishable from valid information. To overcome this
|
* padding is indistinguishable from valid information. To overcome this
|
||||||
* problem, a char is appended, indicating the number of encoded bytes in the
|
* problem, a char is appended, indicating the number of encoded bytes in the
|
||||||
* final content char.
|
* final content char.
|
||||||
*
|
* <p/>
|
||||||
* This class's operations are defined over CharBuffers and ByteBuffers, to
|
* Some methods in this class are defined over CharBuffers and ByteBuffers, but
|
||||||
* allow for wrapped arrays to be reused, reducing memory allocation costs for
|
* these are deprecated in favor of methods that operate directly on byte[] and
|
||||||
* repeated operations. Note that this class calls array() and arrayOffset()
|
* char[] arrays. Note that this class calls array() and arrayOffset()
|
||||||
* on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be
|
* on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be
|
||||||
* used. This class interprets the arrayOffset() and limit() values returned by
|
* used. This class interprets the arrayOffset() and limit() values returned
|
||||||
* its input buffers as beginning and end+1 positions on the wrapped array,
|
* by its input buffers as beginning and end+1 positions on the wrapped array,
|
||||||
* respectively; similarly, on the output buffer, arrayOffset() is the first
|
* respectively; similarly, on the output buffer, arrayOffset() is the first
|
||||||
* position written to, and limit() is set to one past the final output array
|
* position written to, and limit() is set to one past the final output array
|
||||||
* position.
|
* position.
|
||||||
|
* <p/>
|
||||||
|
* WARNING: This means that the deprecated Buffer-based methods
|
||||||
|
* only work correctly with buffers that have an offset of 0. For example, they
|
||||||
|
* will not correctly interpret buffers returned by {@link ByteBuffer#slice}.
|
||||||
*/
|
*/
|
||||||
public class IndexableBinaryStringTools {
|
public class IndexableBinaryStringTools {
|
||||||
|
|
||||||
|
@ -68,190 +72,259 @@ public class IndexableBinaryStringTools {
|
||||||
/**
|
/**
|
||||||
* Returns the number of chars required to encode the given byte sequence.
|
* Returns the number of chars required to encode the given byte sequence.
|
||||||
*
|
*
|
||||||
* @param original The byte sequence to be encoded. Must be backed by an array.
|
* @param original The byte sequence to be encoded. Must be backed by an
|
||||||
|
* array.
|
||||||
* @return The number of chars required to encode the given byte sequence
|
* @return The number of chars required to encode the given byte sequence
|
||||||
* @throws IllegalArgumentException If the given ByteBuffer is not backed by an array
|
* @throws IllegalArgumentException If the given ByteBuffer is not backed by
|
||||||
|
* an array
|
||||||
|
* @deprecated Use {@link #getEncodedLength(byte[], int, int)} instead. This
|
||||||
|
* method will be removed in Lucene 4.0
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public static int getEncodedLength(ByteBuffer original)
|
public static int getEncodedLength(ByteBuffer original)
|
||||||
throws IllegalArgumentException {
|
throws IllegalArgumentException {
|
||||||
if (original.hasArray()) {
|
if (original.hasArray()) {
|
||||||
// Use long for intermediaries to protect against overflow
|
return getEncodedLength(original.array(), original.arrayOffset(),
|
||||||
long length = (long)(original.limit() - original.arrayOffset());
|
original.limit() - original.arrayOffset());
|
||||||
return (int)((length * 8L + 14L) / 15L) + 1;
|
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("original argument must have a backing array");
|
throw new IllegalArgumentException("original argument must have a backing array");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of chars required to encode the given bytes.
|
||||||
|
*
|
||||||
|
* @param inputArray byte sequence to be encoded
|
||||||
|
* @param inputOffset initial offset into inputArray
|
||||||
|
* @param inputLength number of bytes in inputArray
|
||||||
|
* @return The number of chars required to encode the number of bytes.
|
||||||
|
*/
|
||||||
|
public static int getEncodedLength(byte[] inputArray, int inputOffset,
|
||||||
|
int inputLength) {
|
||||||
|
// Use long for intermediaries to protect against overflow
|
||||||
|
return (int)(((long)inputLength * 8L + 14L) / 15L) + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the number of bytes required to decode the given char sequence.
|
* Returns the number of bytes required to decode the given char sequence.
|
||||||
*
|
*
|
||||||
* @param encoded The char sequence to be encoded. Must be backed by an array.
|
* @param encoded The char sequence to be decoded. Must be backed by an array.
|
||||||
* @return The number of bytes required to decode the given char sequence
|
* @return The number of bytes required to decode the given char sequence
|
||||||
* @throws IllegalArgumentException If the given CharBuffer is not backed by an array
|
* @throws IllegalArgumentException If the given CharBuffer is not backed by
|
||||||
|
* an array
|
||||||
|
* @deprecated Use {@link #getDecodedLength(char[], int, int)} instead. This
|
||||||
|
* method will be removed in Lucene 4.0
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public static int getDecodedLength(CharBuffer encoded)
|
public static int getDecodedLength(CharBuffer encoded)
|
||||||
throws IllegalArgumentException {
|
throws IllegalArgumentException {
|
||||||
if (encoded.hasArray()) {
|
if (encoded.hasArray()) {
|
||||||
int numChars = encoded.limit() - encoded.arrayOffset() - 1;
|
return getDecodedLength(encoded.array(), encoded.arrayOffset(),
|
||||||
if (numChars <= 0) {
|
encoded.limit() - encoded.arrayOffset());
|
||||||
return 0;
|
|
||||||
} else {
|
|
||||||
int numFullBytesInFinalChar = encoded.charAt(encoded.limit() - 1);
|
|
||||||
int numEncodedChars = numChars - 1;
|
|
||||||
return (numEncodedChars * 15 + 7) / 8 + numFullBytesInFinalChar;
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("encoded argument must have a backing array");
|
throw new IllegalArgumentException("encoded argument must have a backing array");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encodes the input byte sequence into the output char sequence. Before
|
* Returns the number of bytes required to decode the given char sequence.
|
||||||
|
*
|
||||||
|
* @param encoded char sequence to be decoded
|
||||||
|
* @param offset initial offset
|
||||||
|
* @param length number of characters
|
||||||
|
* @return The number of bytes required to decode the given char sequence
|
||||||
|
*/
|
||||||
|
public static int getDecodedLength(char[] encoded, int offset, int length) {
|
||||||
|
final int numChars = length - 1;
|
||||||
|
if (numChars <= 0) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
// Use long for intermediaries to protect against overflow
|
||||||
|
final long numFullBytesInFinalChar = encoded[offset + length - 1];
|
||||||
|
final long numEncodedChars = numChars - 1;
|
||||||
|
return (int)((numEncodedChars * 15L + 7L) / 8L + numFullBytesInFinalChar);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes the input byte sequence into the output char sequence. Before
|
||||||
* calling this method, ensure that the output CharBuffer has sufficient
|
* calling this method, ensure that the output CharBuffer has sufficient
|
||||||
* capacity by calling {@link #getEncodedLength(java.nio.ByteBuffer)}.
|
* capacity by calling {@link #getEncodedLength(java.nio.ByteBuffer)}.
|
||||||
*
|
*
|
||||||
* @param input The byte sequence to encode
|
* @param input The byte sequence to encode
|
||||||
* @param output Where the char sequence encoding result will go. The limit
|
* @param output Where the char sequence encoding result will go. The limit is
|
||||||
* is set to one past the position of the final char.
|
* set to one past the position of the final char.
|
||||||
* @throws IllegalArgumentException If either the input or the output buffer
|
* @throws IllegalArgumentException If either the input or the output buffer
|
||||||
* is not backed by an array
|
* is not backed by an array
|
||||||
|
* @deprecated Use {@link #encode(byte[], int, int, char[], int, int)}
|
||||||
|
* instead. This method will be removed in Lucene 4.0
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public static void encode(ByteBuffer input, CharBuffer output) {
|
public static void encode(ByteBuffer input, CharBuffer output) {
|
||||||
if (input.hasArray() && output.hasArray()) {
|
if (input.hasArray() && output.hasArray()) {
|
||||||
byte[] inputArray = input.array();
|
final int inputOffset = input.arrayOffset();
|
||||||
int inputOffset = input.arrayOffset();
|
final int inputLength = input.limit() - inputOffset;
|
||||||
int inputLength = input.limit() - inputOffset;
|
final int outputOffset = output.arrayOffset();
|
||||||
char[] outputArray = output.array();
|
final int outputLength = getEncodedLength(input.array(), inputOffset,
|
||||||
int outputOffset = output.arrayOffset();
|
inputLength);
|
||||||
int outputLength = getEncodedLength(input);
|
output.limit(outputLength + outputOffset);
|
||||||
output.limit(outputOffset + outputLength); // Set output final pos + 1
|
|
||||||
output.position(0);
|
output.position(0);
|
||||||
if (inputLength > 0) {
|
encode(input.array(), inputOffset, inputLength, output.array(),
|
||||||
int inputByteNum = inputOffset;
|
outputOffset, outputLength);
|
||||||
int caseNum = 0;
|
|
||||||
int outputCharNum = outputOffset;
|
|
||||||
CodingCase codingCase;
|
|
||||||
for ( ; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength ;
|
|
||||||
++outputCharNum ) {
|
|
||||||
codingCase = CODING_CASES[caseNum];
|
|
||||||
if (2 == codingCase.numBytes) {
|
|
||||||
outputArray[outputCharNum]
|
|
||||||
= (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
|
||||||
+ (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift)
|
|
||||||
& codingCase.finalMask)
|
|
||||||
& (short)0x7FFF);
|
|
||||||
} else { // numBytes is 3
|
|
||||||
outputArray[outputCharNum]
|
|
||||||
= (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
|
||||||
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
|
|
||||||
+ (((inputArray[inputByteNum + 2] & 0xFF) >>> codingCase.finalShift)
|
|
||||||
& codingCase.finalMask)
|
|
||||||
& (short)0x7FFF);
|
|
||||||
}
|
|
||||||
inputByteNum += codingCase.advanceBytes;
|
|
||||||
if (++caseNum == CODING_CASES.length) {
|
|
||||||
caseNum = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Produce final char (if any) and trailing count chars.
|
|
||||||
codingCase = CODING_CASES[caseNum];
|
|
||||||
|
|
||||||
if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
|
|
||||||
outputArray[outputCharNum++]
|
|
||||||
= (char)((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
|
||||||
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift))
|
|
||||||
& (short)0x7FFF);
|
|
||||||
// Add trailing char containing the number of full bytes in final char
|
|
||||||
outputArray[outputCharNum++] = (char)1;
|
|
||||||
} else if (inputByteNum < inputLength) {
|
|
||||||
outputArray[outputCharNum++]
|
|
||||||
= (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
|
||||||
& (short)0x7FFF);
|
|
||||||
// Add trailing char containing the number of full bytes in final char
|
|
||||||
outputArray[outputCharNum++] = caseNum == 0 ? (char)1 : (char)0;
|
|
||||||
} else { // No left over bits - last char is completely filled.
|
|
||||||
// Add trailing char containing the number of full bytes in final char
|
|
||||||
outputArray[outputCharNum++] = (char)1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("Arguments must have backing arrays");
|
throw new IllegalArgumentException("Arguments must have backing arrays");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decodes the input char sequence into the output byte sequence. Before
|
* Encodes the input byte sequence into the output char sequence. Before
|
||||||
|
* calling this method, ensure that the output array has sufficient
|
||||||
|
* capacity by calling {@link #getEncodedLength(byte[], int, int)}.
|
||||||
|
*
|
||||||
|
* @param inputArray byte sequence to be encoded
|
||||||
|
* @param inputOffset initial offset into inputArray
|
||||||
|
* @param inputLength number of bytes in inputArray
|
||||||
|
* @param outputArray char sequence to store encoded result
|
||||||
|
* @param outputOffset initial offset into outputArray
|
||||||
|
* @param outputLength length of output, must be getEncodedLength
|
||||||
|
*/
|
||||||
|
public static void encode(byte[] inputArray, int inputOffset,
|
||||||
|
int inputLength, char[] outputArray, int outputOffset, int outputLength) {
|
||||||
|
assert (outputLength == getEncodedLength(inputArray, inputOffset,
|
||||||
|
inputLength));
|
||||||
|
if (inputLength > 0) {
|
||||||
|
int inputByteNum = inputOffset;
|
||||||
|
int caseNum = 0;
|
||||||
|
int outputCharNum = outputOffset;
|
||||||
|
CodingCase codingCase;
|
||||||
|
for (; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength; ++outputCharNum) {
|
||||||
|
codingCase = CODING_CASES[caseNum];
|
||||||
|
if (2 == codingCase.numBytes) {
|
||||||
|
outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
||||||
|
+ (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift) & codingCase.finalMask) & (short) 0x7FFF);
|
||||||
|
} else { // numBytes is 3
|
||||||
|
outputArray[outputCharNum] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
||||||
|
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
|
||||||
|
+ (((inputArray[inputByteNum + 2] & 0xFF) >>> codingCase.finalShift) & codingCase.finalMask) & (short) 0x7FFF);
|
||||||
|
}
|
||||||
|
inputByteNum += codingCase.advanceBytes;
|
||||||
|
if (++caseNum == CODING_CASES.length) {
|
||||||
|
caseNum = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Produce final char (if any) and trailing count chars.
|
||||||
|
codingCase = CODING_CASES[caseNum];
|
||||||
|
|
||||||
|
if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
|
||||||
|
outputArray[outputCharNum++] = (char) ((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF);
|
||||||
|
// Add trailing char containing the number of full bytes in final char
|
||||||
|
outputArray[outputCharNum++] = (char) 1;
|
||||||
|
} else if (inputByteNum < inputLength) {
|
||||||
|
outputArray[outputCharNum++] = (char) (((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) & (short) 0x7FFF);
|
||||||
|
// Add trailing char containing the number of full bytes in final char
|
||||||
|
outputArray[outputCharNum++] = caseNum == 0 ? (char) 1 : (char) 0;
|
||||||
|
} else { // No left over bits - last char is completely filled.
|
||||||
|
// Add trailing char containing the number of full bytes in final char
|
||||||
|
outputArray[outputCharNum++] = (char) 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes the input char sequence into the output byte sequence. Before
|
||||||
* calling this method, ensure that the output ByteBuffer has sufficient
|
* calling this method, ensure that the output ByteBuffer has sufficient
|
||||||
* capacity by calling {@link #getDecodedLength(java.nio.CharBuffer)}.
|
* capacity by calling {@link #getDecodedLength(java.nio.CharBuffer)}.
|
||||||
*
|
*
|
||||||
* @param input The char sequence to decode
|
* @param input The char sequence to decode
|
||||||
* @param output Where the byte sequence decoding result will go. The limit
|
* @param output Where the byte sequence decoding result will go. The limit is
|
||||||
* is set to one past the position of the final char.
|
* set to one past the position of the final char.
|
||||||
* @throws IllegalArgumentException If either the input or the output buffer
|
* @throws IllegalArgumentException If either the input or the output buffer
|
||||||
* is not backed by an array
|
* is not backed by an array
|
||||||
|
* @deprecated Use {@link #decode(char[], int, int, byte[], int, int)}
|
||||||
|
* instead. This method will be removed in Lucene 4.0
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public static void decode(CharBuffer input, ByteBuffer output) {
|
public static void decode(CharBuffer input, ByteBuffer output) {
|
||||||
if (input.hasArray() && output.hasArray()) {
|
if (input.hasArray() && output.hasArray()) {
|
||||||
int numInputChars = input.limit() - input.arrayOffset() - 1;
|
final int inputOffset = input.arrayOffset();
|
||||||
int numOutputBytes = getDecodedLength(input);
|
final int inputLength = input.limit() - inputOffset;
|
||||||
output.limit(numOutputBytes + output.arrayOffset()); // Set output final pos + 1
|
final int outputOffset = output.arrayOffset();
|
||||||
|
final int outputLength = getDecodedLength(input.array(), inputOffset,
|
||||||
|
inputLength);
|
||||||
|
output.limit(outputLength + outputOffset);
|
||||||
output.position(0);
|
output.position(0);
|
||||||
byte[] outputArray = output.array();
|
decode(input.array(), inputOffset, inputLength, output.array(),
|
||||||
char[] inputArray = input.array();
|
outputOffset, outputLength);
|
||||||
if (numOutputBytes > 0) {
|
} else {
|
||||||
int caseNum = 0;
|
throw new IllegalArgumentException("Arguments must have backing arrays");
|
||||||
int outputByteNum = output.arrayOffset();
|
}
|
||||||
int inputCharNum = input.arrayOffset();
|
}
|
||||||
short inputChar;
|
|
||||||
CodingCase codingCase;
|
/**
|
||||||
for ( ; inputCharNum < numInputChars - 1 ; ++inputCharNum) {
|
* Decodes the input char sequence into the output byte sequence. Before
|
||||||
codingCase = CODING_CASES[caseNum];
|
* calling this method, ensure that the output array has sufficient capacity
|
||||||
inputChar = (short)inputArray[inputCharNum];
|
* by calling {@link #getDecodedLength(char[], int, int)}.
|
||||||
if (2 == codingCase.numBytes) {
|
*
|
||||||
if (0 == caseNum) {
|
* @param inputArray char sequence to be decoded
|
||||||
outputArray[outputByteNum] = (byte)(inputChar >>> codingCase.initialShift);
|
* @param inputOffset initial offset into inputArray
|
||||||
} else {
|
* @param inputLength number of chars in inputArray
|
||||||
outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
|
* @param outputArray byte sequence to store encoded result
|
||||||
}
|
* @param outputOffset initial offset into outputArray
|
||||||
outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.finalMask)
|
* @param outputLength length of output, must be
|
||||||
<< codingCase.finalShift);
|
* getDecodedLength(inputArray, inputOffset, inputLength)
|
||||||
} else { // numBytes is 3
|
*/
|
||||||
outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
|
public static void decode(char[] inputArray, int inputOffset,
|
||||||
outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.middleMask)
|
int inputLength, byte[] outputArray, int outputOffset, int outputLength) {
|
||||||
>>> codingCase.middleShift);
|
assert (outputLength == getDecodedLength(inputArray, inputOffset,
|
||||||
outputArray[outputByteNum + 2] = (byte)((inputChar & codingCase.finalMask)
|
inputLength));
|
||||||
<< codingCase.finalShift);
|
final int numInputChars = inputLength - 1;
|
||||||
}
|
final int numOutputBytes = outputLength;
|
||||||
outputByteNum += codingCase.advanceBytes;
|
|
||||||
if (++caseNum == CODING_CASES.length) {
|
if (numOutputBytes > 0) {
|
||||||
caseNum = 0;
|
int caseNum = 0;
|
||||||
}
|
int outputByteNum = outputOffset;
|
||||||
}
|
int inputCharNum = inputOffset;
|
||||||
// Handle final char
|
short inputChar;
|
||||||
inputChar = (short)inputArray[inputCharNum];
|
CodingCase codingCase;
|
||||||
|
for (; inputCharNum < numInputChars - 1; ++inputCharNum) {
|
||||||
codingCase = CODING_CASES[caseNum];
|
codingCase = CODING_CASES[caseNum];
|
||||||
if (0 == caseNum) {
|
inputChar = (short) inputArray[inputCharNum];
|
||||||
outputArray[outputByteNum] = 0;
|
if (2 == codingCase.numBytes) {
|
||||||
|
if (0 == caseNum) {
|
||||||
|
outputArray[outputByteNum] = (byte) (inputChar >>> codingCase.initialShift);
|
||||||
|
} else {
|
||||||
|
outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
|
||||||
|
}
|
||||||
|
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
|
||||||
|
} else { // numBytes is 3
|
||||||
|
outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
|
||||||
|
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask) >>> codingCase.middleShift);
|
||||||
|
outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
|
||||||
}
|
}
|
||||||
outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
|
outputByteNum += codingCase.advanceBytes;
|
||||||
int bytesLeft = numOutputBytes - outputByteNum;
|
if (++caseNum == CODING_CASES.length) {
|
||||||
if (bytesLeft > 1) {
|
caseNum = 0;
|
||||||
if (2 == codingCase.numBytes) {
|
}
|
||||||
outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.finalMask)
|
}
|
||||||
>>> codingCase.finalShift);
|
// Handle final char
|
||||||
} else { // numBytes is 3
|
inputChar = (short) inputArray[inputCharNum];
|
||||||
outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.middleMask)
|
codingCase = CODING_CASES[caseNum];
|
||||||
>>> codingCase.middleShift);
|
if (0 == caseNum) {
|
||||||
if (bytesLeft > 2) {
|
outputArray[outputByteNum] = 0;
|
||||||
outputArray[outputByteNum + 2] = (byte)((inputChar & codingCase.finalMask)
|
}
|
||||||
<< codingCase.finalShift);
|
outputArray[outputByteNum] += (byte) (inputChar >>> codingCase.initialShift);
|
||||||
}
|
final int bytesLeft = numOutputBytes - outputByteNum;
|
||||||
|
if (bytesLeft > 1) {
|
||||||
|
if (2 == codingCase.numBytes) {
|
||||||
|
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.finalMask) >>> codingCase.finalShift);
|
||||||
|
} else { // numBytes is 3
|
||||||
|
outputArray[outputByteNum + 1] = (byte) ((inputChar & codingCase.middleMask) >>> codingCase.middleShift);
|
||||||
|
if (bytesLeft > 2) {
|
||||||
|
outputArray[outputByteNum + 2] = (byte) ((inputChar & codingCase.finalMask) << codingCase.finalShift);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("Arguments must have backing arrays");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,11 +334,14 @@ public class IndexableBinaryStringTools {
|
||||||
* {@link #encode(java.nio.ByteBuffer, java.nio.CharBuffer)}.
|
* {@link #encode(java.nio.ByteBuffer, java.nio.CharBuffer)}.
|
||||||
*
|
*
|
||||||
* @param input The char sequence to decode
|
* @param input The char sequence to decode
|
||||||
* @return A byte sequence containing the decoding result. The limit
|
* @return A byte sequence containing the decoding result. The limit is set to
|
||||||
* is set to one past the position of the final char.
|
* one past the position of the final char.
|
||||||
* @throws IllegalArgumentException If the input buffer is not backed by an
|
* @throws IllegalArgumentException If the input buffer is not backed by an
|
||||||
* array
|
* array
|
||||||
|
* @deprecated Use {@link #decode(char[], int, int, byte[], int, int)}
|
||||||
|
* instead. This method will be removed in Lucene 4.0
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public static ByteBuffer decode(CharBuffer input) {
|
public static ByteBuffer decode(CharBuffer input) {
|
||||||
byte[] outputArray = new byte[getDecodedLength(input)];
|
byte[] outputArray = new byte[getDecodedLength(input)];
|
||||||
ByteBuffer output = ByteBuffer.wrap(outputArray);
|
ByteBuffer output = ByteBuffer.wrap(outputArray);
|
||||||
|
@ -277,11 +353,14 @@ public class IndexableBinaryStringTools {
|
||||||
* Encodes the input byte sequence.
|
* Encodes the input byte sequence.
|
||||||
*
|
*
|
||||||
* @param input The byte sequence to encode
|
* @param input The byte sequence to encode
|
||||||
* @return A char sequence containing the encoding result. The limit is set
|
* @return A char sequence containing the encoding result. The limit is set to
|
||||||
* to one past the position of the final char.
|
* one past the position of the final char.
|
||||||
* @throws IllegalArgumentException If the input buffer is not backed by an
|
* @throws IllegalArgumentException If the input buffer is not backed by an
|
||||||
* array
|
* array
|
||||||
|
* @deprecated Use {@link #encode(byte[], int, int, char[], int, int)}
|
||||||
|
* instead. This method will be removed in Lucene 4.0
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public static CharBuffer encode(ByteBuffer input) {
|
public static CharBuffer encode(ByteBuffer input) {
|
||||||
char[] outputArray = new char[getEncodedLength(input)];
|
char[] outputArray = new char[getEncodedLength(input)];
|
||||||
CharBuffer output = CharBuffer.wrap(outputArray);
|
CharBuffer output = CharBuffer.wrap(outputArray);
|
||||||
|
|
|
@ -25,7 +25,9 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||||
private static final int NUM_RANDOM_TESTS = 2000;
|
private static final int NUM_RANDOM_TESTS = 2000;
|
||||||
private static final int MAX_RANDOM_BINARY_LENGTH = 300;
|
private static final int MAX_RANDOM_BINARY_LENGTH = 300;
|
||||||
|
|
||||||
public void testSingleBinaryRoundTrip() {
|
/** @deprecated remove this test for Lucene 4.0 */
|
||||||
|
@Deprecated
|
||||||
|
public void testSingleBinaryRoundTripNIO() {
|
||||||
byte[] binary = new byte[]
|
byte[] binary = new byte[]
|
||||||
{ (byte)0x23, (byte)0x98, (byte)0x13, (byte)0xE4, (byte)0x76, (byte)0x41,
|
{ (byte)0x23, (byte)0x98, (byte)0x13, (byte)0xE4, (byte)0x76, (byte)0x41,
|
||||||
(byte)0xB2, (byte)0xC9, (byte)0x7F, (byte)0x0A, (byte)0xA6, (byte)0xD8 };
|
(byte)0xB2, (byte)0xC9, (byte)0x7F, (byte)0x0A, (byte)0xA6, (byte)0xD8 };
|
||||||
|
@ -35,15 +37,44 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||||
ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
|
ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
|
||||||
assertEquals("Round trip decode/decode returned different results:"
|
assertEquals("Round trip decode/decode returned different results:"
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ "original: " + binaryDump(binaryBuf)
|
+ "original: " + binaryDumpNIO(binaryBuf)
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ " encoded: " + charArrayDump(encoded)
|
+ " encoded: " + charArrayDumpNIO(encoded)
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ " decoded: " + binaryDump(decoded),
|
+ " decoded: " + binaryDumpNIO(decoded),
|
||||||
binaryBuf, decoded);
|
binaryBuf, decoded);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testEncodedSortability() {
|
public void testSingleBinaryRoundTrip() {
|
||||||
|
byte[] binary = new byte[] { (byte) 0x23, (byte) 0x98, (byte) 0x13,
|
||||||
|
(byte) 0xE4, (byte) 0x76, (byte) 0x41, (byte) 0xB2, (byte) 0xC9,
|
||||||
|
(byte) 0x7F, (byte) 0x0A, (byte) 0xA6, (byte) 0xD8 };
|
||||||
|
|
||||||
|
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
|
||||||
|
binary.length);
|
||||||
|
char encoded[] = new char[encodedLen];
|
||||||
|
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
|
||||||
|
encoded.length);
|
||||||
|
|
||||||
|
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
|
||||||
|
encoded.length);
|
||||||
|
byte decoded[] = new byte[decodedLen];
|
||||||
|
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
|
||||||
|
decoded.length);
|
||||||
|
|
||||||
|
assertEquals("Round trip decode/decode returned different results:"
|
||||||
|
+ System.getProperty("line.separator") + "original: "
|
||||||
|
+ binaryDump(binary, binary.length)
|
||||||
|
+ System.getProperty("line.separator") + " encoded: "
|
||||||
|
+ charArrayDump(encoded, encoded.length)
|
||||||
|
+ System.getProperty("line.separator") + " decoded: "
|
||||||
|
+ binaryDump(decoded, decoded.length),
|
||||||
|
binaryDump(binary, binary.length), binaryDump(decoded, decoded.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @deprecated remove this test for Lucene 4.0 */
|
||||||
|
@Deprecated
|
||||||
|
public void testEncodedSortabilityNIO() {
|
||||||
Random random = newRandom();
|
Random random = newRandom();
|
||||||
byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
|
byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
ByteBuffer originalBuf1 = ByteBuffer.wrap(originalArray1);
|
ByteBuffer originalBuf1 = ByteBuffer.wrap(originalArray1);
|
||||||
|
@ -88,19 +119,85 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||||
assertEquals("Test #" + (testNum + 1)
|
assertEquals("Test #" + (testNum + 1)
|
||||||
+ ": Original bytes and encoded chars compare differently:"
|
+ ": Original bytes and encoded chars compare differently:"
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ " binary 1: " + binaryDump(originalBuf1)
|
+ " binary 1: " + binaryDumpNIO(originalBuf1)
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ " binary 2: " + binaryDump(originalBuf2)
|
+ " binary 2: " + binaryDumpNIO(originalBuf2)
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ "encoded 1: " + charArrayDump(encodedBuf1)
|
+ "encoded 1: " + charArrayDumpNIO(encodedBuf1)
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ "encoded 2: " + charArrayDump(encodedBuf2)
|
+ "encoded 2: " + charArrayDumpNIO(encodedBuf2)
|
||||||
+ System.getProperty("line.separator"),
|
+ System.getProperty("line.separator"),
|
||||||
originalComparison, encodedComparison);
|
originalComparison, encodedComparison);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testEmptyInput() {
|
public void testEncodedSortability() {
|
||||||
|
Random random = newRandom();
|
||||||
|
byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
char[] originalString1 = new char[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
char[] encoded1 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
|
||||||
|
byte[] original2 = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
char[] originalString2 = new char[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
char[] encoded2 = new char[MAX_RANDOM_BINARY_LENGTH * 10];
|
||||||
|
|
||||||
|
for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
|
||||||
|
int numBytes1 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
|
||||||
|
|
||||||
|
for (int byteNum = 0; byteNum < numBytes1; ++byteNum) {
|
||||||
|
int randomInt = random.nextInt(0x100);
|
||||||
|
originalArray1[byteNum] = (byte) randomInt;
|
||||||
|
originalString1[byteNum] = (char) randomInt;
|
||||||
|
}
|
||||||
|
|
||||||
|
int numBytes2 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
|
||||||
|
|
||||||
|
for (int byteNum = 0; byteNum < numBytes2; ++byteNum) {
|
||||||
|
int randomInt = random.nextInt(0x100);
|
||||||
|
original2[byteNum] = (byte) randomInt;
|
||||||
|
originalString2[byteNum] = (char) randomInt;
|
||||||
|
}
|
||||||
|
int originalComparison = new String(originalString1, 0, numBytes1)
|
||||||
|
.compareTo(new String(originalString2, 0, numBytes2));
|
||||||
|
originalComparison = originalComparison < 0 ? -1
|
||||||
|
: originalComparison > 0 ? 1 : 0;
|
||||||
|
|
||||||
|
int encodedLen1 = IndexableBinaryStringTools.getEncodedLength(
|
||||||
|
originalArray1, 0, numBytes1);
|
||||||
|
if (encodedLen1 > encoded1.length)
|
||||||
|
encoded1 = new char[ArrayUtil.getNextSize(encodedLen1)];
|
||||||
|
IndexableBinaryStringTools.encode(originalArray1, 0, numBytes1, encoded1,
|
||||||
|
0, encodedLen1);
|
||||||
|
|
||||||
|
int encodedLen2 = IndexableBinaryStringTools.getEncodedLength(original2,
|
||||||
|
0, numBytes2);
|
||||||
|
if (encodedLen2 > encoded2.length)
|
||||||
|
encoded2 = new char[ArrayUtil.getNextSize(encodedLen2)];
|
||||||
|
IndexableBinaryStringTools.encode(original2, 0, numBytes2, encoded2, 0,
|
||||||
|
encodedLen2);
|
||||||
|
|
||||||
|
int encodedComparison = new String(encoded1, 0, encodedLen1)
|
||||||
|
.compareTo(new String(encoded2, 0, encodedLen2));
|
||||||
|
encodedComparison = encodedComparison < 0 ? -1
|
||||||
|
: encodedComparison > 0 ? 1 : 0;
|
||||||
|
|
||||||
|
assertEquals("Test #" + (testNum + 1)
|
||||||
|
+ ": Original bytes and encoded chars compare differently:"
|
||||||
|
+ System.getProperty("line.separator") + " binary 1: "
|
||||||
|
+ binaryDump(originalArray1, numBytes1)
|
||||||
|
+ System.getProperty("line.separator") + " binary 2: "
|
||||||
|
+ binaryDump(original2, numBytes2)
|
||||||
|
+ System.getProperty("line.separator") + "encoded 1: "
|
||||||
|
+ charArrayDump(encoded1, encodedLen1)
|
||||||
|
+ System.getProperty("line.separator") + "encoded 2: "
|
||||||
|
+ charArrayDump(encoded2, encodedLen2)
|
||||||
|
+ System.getProperty("line.separator"), originalComparison,
|
||||||
|
encodedComparison);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @deprecated remove this test for Lucene 4.0 */
|
||||||
|
@Deprecated
|
||||||
|
public void testEmptyInputNIO() {
|
||||||
byte[] binary = new byte[0];
|
byte[] binary = new byte[0];
|
||||||
CharBuffer encoded = IndexableBinaryStringTools.encode(ByteBuffer.wrap(binary));
|
CharBuffer encoded = IndexableBinaryStringTools.encode(ByteBuffer.wrap(binary));
|
||||||
ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
|
ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
|
||||||
|
@ -108,7 +205,27 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||||
assertEquals("decoded empty input was not empty", decoded.limit(), 0);
|
assertEquals("decoded empty input was not empty", decoded.limit(), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAllNullInput() {
|
public void testEmptyInput() {
|
||||||
|
byte[] binary = new byte[0];
|
||||||
|
|
||||||
|
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
|
||||||
|
binary.length);
|
||||||
|
char[] encoded = new char[encodedLen];
|
||||||
|
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
|
||||||
|
encoded.length);
|
||||||
|
|
||||||
|
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
|
||||||
|
encoded.length);
|
||||||
|
byte[] decoded = new byte[decodedLen];
|
||||||
|
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
|
||||||
|
decoded.length);
|
||||||
|
|
||||||
|
assertEquals("decoded empty input was not empty", decoded.length, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @deprecated remove this test for Lucene 4.0 */
|
||||||
|
@Deprecated
|
||||||
|
public void testAllNullInputNIO() {
|
||||||
byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
|
ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
|
||||||
CharBuffer encoded = IndexableBinaryStringTools.encode(binaryBuf);
|
CharBuffer encoded = IndexableBinaryStringTools.encode(binaryBuf);
|
||||||
|
@ -117,13 +234,38 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||||
assertNotNull("decode() returned null", decodedBuf);
|
assertNotNull("decode() returned null", decodedBuf);
|
||||||
assertEquals("Round trip decode/decode returned different results:"
|
assertEquals("Round trip decode/decode returned different results:"
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ " original: " + binaryDump(binaryBuf)
|
+ " original: " + binaryDumpNIO(binaryBuf)
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ "decodedBuf: " + binaryDump(decodedBuf),
|
+ "decodedBuf: " + binaryDumpNIO(decodedBuf),
|
||||||
binaryBuf, decodedBuf);
|
binaryBuf, decodedBuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomBinaryRoundTrip() {
|
public void testAllNullInput() {
|
||||||
|
byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
|
||||||
|
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
|
||||||
|
binary.length);
|
||||||
|
char encoded[] = new char[encodedLen];
|
||||||
|
IndexableBinaryStringTools.encode(binary, 0, binary.length, encoded, 0,
|
||||||
|
encoded.length);
|
||||||
|
|
||||||
|
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
|
||||||
|
encoded.length);
|
||||||
|
byte[] decoded = new byte[decodedLen];
|
||||||
|
IndexableBinaryStringTools.decode(encoded, 0, encoded.length, decoded, 0,
|
||||||
|
decoded.length);
|
||||||
|
|
||||||
|
assertEquals("Round trip decode/decode returned different results:"
|
||||||
|
+ System.getProperty("line.separator") + " original: "
|
||||||
|
+ binaryDump(binary, binary.length)
|
||||||
|
+ System.getProperty("line.separator") + "decodedBuf: "
|
||||||
|
+ binaryDump(decoded, decoded.length),
|
||||||
|
binaryDump(binary, binary.length), binaryDump(decoded, decoded.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @deprecated remove this test for Lucene 4.0 */
|
||||||
|
@Deprecated
|
||||||
|
public void testRandomBinaryRoundTripNIO() {
|
||||||
Random random = newRandom();
|
Random random = newRandom();
|
||||||
byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
|
byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
|
ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
|
||||||
|
@ -142,19 +284,59 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||||
assertEquals("Test #" + (testNum + 1)
|
assertEquals("Test #" + (testNum + 1)
|
||||||
+ ": Round trip decode/decode returned different results:"
|
+ ": Round trip decode/decode returned different results:"
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ " original: " + binaryDump(binaryBuf)
|
+ " original: " + binaryDumpNIO(binaryBuf)
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ "encodedBuf: " + charArrayDump(encodedBuf)
|
+ "encodedBuf: " + charArrayDumpNIO(encodedBuf)
|
||||||
+ System.getProperty("line.separator")
|
+ System.getProperty("line.separator")
|
||||||
+ "decodedBuf: " + binaryDump(decodedBuf),
|
+ "decodedBuf: " + binaryDumpNIO(decodedBuf),
|
||||||
binaryBuf, decodedBuf);
|
binaryBuf, decodedBuf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String binaryDump(ByteBuffer binaryBuf) {
|
public void testRandomBinaryRoundTrip() {
|
||||||
|
Random random = newRandom();
|
||||||
|
byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
char[] encoded = new char[MAX_RANDOM_BINARY_LENGTH * 10];
|
||||||
|
byte[] decoded = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
for (int testNum = 0; testNum < NUM_RANDOM_TESTS; ++testNum) {
|
||||||
|
int numBytes = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
|
||||||
|
|
||||||
|
for (int byteNum = 0; byteNum < numBytes; ++byteNum) {
|
||||||
|
binary[byteNum] = (byte) random.nextInt(0x100);
|
||||||
|
}
|
||||||
|
|
||||||
|
int encodedLen = IndexableBinaryStringTools.getEncodedLength(binary, 0,
|
||||||
|
numBytes);
|
||||||
|
if (encoded.length < encodedLen)
|
||||||
|
encoded = new char[ArrayUtil.getNextSize(encodedLen)];
|
||||||
|
IndexableBinaryStringTools.encode(binary, 0, numBytes, encoded, 0,
|
||||||
|
encodedLen);
|
||||||
|
|
||||||
|
int decodedLen = IndexableBinaryStringTools.getDecodedLength(encoded, 0,
|
||||||
|
encodedLen);
|
||||||
|
IndexableBinaryStringTools.decode(encoded, 0, encodedLen, decoded, 0,
|
||||||
|
decodedLen);
|
||||||
|
|
||||||
|
assertEquals("Test #" + (testNum + 1)
|
||||||
|
+ ": Round trip decode/decode returned different results:"
|
||||||
|
+ System.getProperty("line.separator") + " original: "
|
||||||
|
+ binaryDump(binary, numBytes) + System.getProperty("line.separator")
|
||||||
|
+ "encodedBuf: " + charArrayDump(encoded, encodedLen)
|
||||||
|
+ System.getProperty("line.separator") + "decodedBuf: "
|
||||||
|
+ binaryDump(decoded, decodedLen), binaryDump(binary, numBytes),
|
||||||
|
binaryDump(decoded, decodedLen));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @deprecated remove this method for Lucene 4.0 */
|
||||||
|
@Deprecated
|
||||||
|
public String binaryDumpNIO(ByteBuffer binaryBuf) {
|
||||||
|
return binaryDump(binaryBuf.array(),
|
||||||
|
binaryBuf.limit() - binaryBuf.arrayOffset());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String binaryDump(byte[] binary, int numBytes) {
|
||||||
StringBuilder buf = new StringBuilder();
|
StringBuilder buf = new StringBuilder();
|
||||||
int numBytes = binaryBuf.limit() - binaryBuf.arrayOffset();
|
|
||||||
byte[] binary = binaryBuf.array();
|
|
||||||
for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) {
|
for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) {
|
||||||
String hex = Integer.toHexString((int)binary[byteNum] & 0xFF);
|
String hex = Integer.toHexString((int)binary[byteNum] & 0xFF);
|
||||||
if (hex.length() == 1) {
|
if (hex.length() == 1) {
|
||||||
|
@ -167,11 +349,15 @@ public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
return buf.toString();
|
return buf.toString();
|
||||||
}
|
}
|
||||||
|
/** @deprecated remove this method for Lucene 4.0 */
|
||||||
|
@Deprecated
|
||||||
|
public String charArrayDumpNIO(CharBuffer charBuf) {
|
||||||
|
return charArrayDump(charBuf.array(),
|
||||||
|
charBuf.limit() - charBuf.arrayOffset());
|
||||||
|
}
|
||||||
|
|
||||||
public String charArrayDump(CharBuffer charBuf) {
|
public String charArrayDump(char[] charArray, int numBytes) {
|
||||||
StringBuilder buf = new StringBuilder();
|
StringBuilder buf = new StringBuilder();
|
||||||
int numBytes = charBuf.limit() - charBuf.arrayOffset();
|
|
||||||
char[] charArray = charBuf.array();
|
|
||||||
for (int charNum = 0 ; charNum < numBytes ; ++charNum) {
|
for (int charNum = 0 ; charNum < numBytes ; ++charNum) {
|
||||||
String hex = Integer.toHexString((int)charArray[charNum]);
|
String hex = Integer.toHexString((int)charArray[charNum]);
|
||||||
for (int digit = 0 ; digit < 4 - hex.length() ; ++digit) {
|
for (int digit = 0 ; digit < 4 - hex.length() ; ++digit) {
|
||||||
|
|
Loading…
Reference in New Issue