mirror of https://github.com/apache/lucene.git
LUCENE-1434: add IndexableBinaryStringTools
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@755742 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
96863198a5
commit
5d05762169
|
@ -178,6 +178,11 @@ New features
|
||||||
resources with the original reader. (Torin Danil via Mike
|
resources with the original reader. (Torin Danil via Mike
|
||||||
McCandless)
|
McCandless)
|
||||||
|
|
||||||
|
17. LUCENE-1434: Added org.apache.lucene.util.IndexableBinaryStringTools,
|
||||||
|
to encode byte[] as String values that are valid terms, and
|
||||||
|
maintain sort order of the original byte[] when the bytes are
|
||||||
|
interpreted as unsigned. (Steven Rowe via Mike McCandless)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing
|
||||||
|
|
|
@ -0,0 +1,315 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides support for converting byte sequences to Strings and back again.
|
||||||
|
* The resulting Strings preserve the original byte sequences' sort order.
|
||||||
|
*
|
||||||
|
* The Strings are constructed using a Base 8000h encoding of the original
|
||||||
|
* binary data - each char of an encoded String represents a 15-bit chunk
|
||||||
|
* from the byte sequence. Base 8000h was chosen because it allows for all
|
||||||
|
* lower 15 bits of char to be used without restriction; the surrogate range
|
||||||
|
* [U+D8000-U+DFFF] does not represent valid chars, and would require
|
||||||
|
* complicated handling to avoid them and allow use of char's high bit.
|
||||||
|
*
|
||||||
|
* Although unset bits are used as padding in the final char, the original
|
||||||
|
* byte sequence could contain trailing bytes with no set bits (null bytes):
|
||||||
|
* padding is indistinguishable from valid information. To overcome this
|
||||||
|
* problem, a char is appended, indicating the number of encoded bytes in the
|
||||||
|
* final content char.
|
||||||
|
*
|
||||||
|
* This class's operations are defined over CharBuffers and ByteBuffers, to
|
||||||
|
* allow for wrapped arrays to be reused, reducing memory allocation costs for
|
||||||
|
* repeated operations. Note that this class calls array() and arrayOffset()
|
||||||
|
* on the CharBuffers and ByteBuffers it uses, so only wrapped arrays may be
|
||||||
|
* used. This class interprets the arrayOffset() and limit() values returned by
|
||||||
|
* its input buffers as beginning and end+1 positions on the wrapped array,
|
||||||
|
* resprectively; similarly, on the output buffer, arrayOffset() is the first
|
||||||
|
* position written to, and limit() is set to one past the final output array
|
||||||
|
* position.
|
||||||
|
*/
|
||||||
|
public class IndexableBinaryStringTools {
|
||||||
|
|
||||||
|
private static final CodingCase[] CODING_CASES = {
|
||||||
|
// CodingCase(int initialShift, int finalShift)
|
||||||
|
new CodingCase( 7, 1 ),
|
||||||
|
// CodingCase(int initialShift, int middleShift, int finalShift)
|
||||||
|
new CodingCase(14, 6, 2),
|
||||||
|
new CodingCase(13, 5, 3),
|
||||||
|
new CodingCase(12, 4, 4),
|
||||||
|
new CodingCase(11, 3, 5),
|
||||||
|
new CodingCase(10, 2, 6),
|
||||||
|
new CodingCase( 9, 1, 7),
|
||||||
|
new CodingCase( 8, 0 )
|
||||||
|
};
|
||||||
|
|
||||||
|
// Export only static methods
|
||||||
|
private IndexableBinaryStringTools() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of chars required to encode the given byte sequence.
|
||||||
|
*
|
||||||
|
* @param original The byte sequence to be encoded. Must be backed by an array.
|
||||||
|
* @return The number of chars required to encode the given byte sequence
|
||||||
|
* @throws IllegalArgumentException If the given ByteBuffer is not backed by an array
|
||||||
|
*/
|
||||||
|
public static int getEncodedLength(ByteBuffer original)
|
||||||
|
throws IllegalArgumentException {
|
||||||
|
if (original.hasArray()) {
|
||||||
|
// Use long for intermediaries to protect against overflow
|
||||||
|
long length = (long)(original.limit() - original.arrayOffset());
|
||||||
|
return (int)((length * 8L + 14L) / 15L) + 1;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("original argument must have a backing array");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of bytes required to decode the given char sequence.
|
||||||
|
*
|
||||||
|
* @param encoded The char sequence to be encoded. Must be backed by an array.
|
||||||
|
* @return The number of bytes required to decode the given char sequence
|
||||||
|
* @throws IllegalArgumentException If the given CharBuffer is not backed by an array
|
||||||
|
*/
|
||||||
|
public static int getDecodedLength(CharBuffer encoded)
|
||||||
|
throws IllegalArgumentException {
|
||||||
|
if (encoded.hasArray()) {
|
||||||
|
int numChars = encoded.limit() - encoded.arrayOffset() - 1;
|
||||||
|
if (numChars <= 0) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
int numFullBytesInFinalChar = encoded.charAt(encoded.limit() - 1);
|
||||||
|
int numEncodedChars = numChars - 1;
|
||||||
|
return (numEncodedChars * 15 + 7) / 8 + numFullBytesInFinalChar;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("encoded argument must have a backing array");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes the input byte sequence into the output char sequence. Before
|
||||||
|
* calling this method, ensure that the output CharBuffer has sufficient
|
||||||
|
* capacity by calling {@link #getEncodedLength(java.nio.ByteBuffer)}.
|
||||||
|
*
|
||||||
|
* @param input The byte sequence to encode
|
||||||
|
* @param output Where the char sequence encoding result will go. The limit
|
||||||
|
* is set to one past the position of the final char.
|
||||||
|
* @throws IllegalArgumentException If either the input or the output buffer
|
||||||
|
* is not backed by an array
|
||||||
|
*/
|
||||||
|
public static void encode(ByteBuffer input, CharBuffer output) {
|
||||||
|
if (input.hasArray() && output.hasArray()) {
|
||||||
|
byte[] inputArray = input.array();
|
||||||
|
int inputOffset = input.arrayOffset();
|
||||||
|
int inputLength = input.limit() - inputOffset;
|
||||||
|
char[] outputArray = output.array();
|
||||||
|
int outputOffset = output.arrayOffset();
|
||||||
|
int outputLength = getEncodedLength(input);
|
||||||
|
output.limit(outputOffset + outputLength); // Set output final pos + 1
|
||||||
|
output.position(0);
|
||||||
|
if (inputLength > 0) {
|
||||||
|
int inputByteNum = inputOffset;
|
||||||
|
int caseNum = 0;
|
||||||
|
int outputCharNum = outputOffset;
|
||||||
|
CodingCase codingCase;
|
||||||
|
for ( ; inputByteNum + CODING_CASES[caseNum].numBytes <= inputLength ;
|
||||||
|
++outputCharNum ) {
|
||||||
|
codingCase = CODING_CASES[caseNum];
|
||||||
|
if (2 == codingCase.numBytes) {
|
||||||
|
outputArray[outputCharNum]
|
||||||
|
= (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
||||||
|
+ (((inputArray[inputByteNum + 1] & 0xFF) >>> codingCase.finalShift)
|
||||||
|
& codingCase.finalMask)
|
||||||
|
& (short)0x7FFF);
|
||||||
|
} else { // numBytes is 3
|
||||||
|
outputArray[outputCharNum]
|
||||||
|
= (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
||||||
|
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
|
||||||
|
+ (((inputArray[inputByteNum + 2] & 0xFF) >>> codingCase.finalShift)
|
||||||
|
& codingCase.finalMask)
|
||||||
|
& (short)0x7FFF);
|
||||||
|
}
|
||||||
|
inputByteNum += codingCase.advanceBytes;
|
||||||
|
if (++caseNum == CODING_CASES.length) {
|
||||||
|
caseNum = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Produce final char (if any) and trailing count chars.
|
||||||
|
codingCase = CODING_CASES[caseNum];
|
||||||
|
|
||||||
|
if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
|
||||||
|
outputArray[outputCharNum++]
|
||||||
|
= (char)((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
||||||
|
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift))
|
||||||
|
& (short)0x7FFF);
|
||||||
|
// Add trailing char containing the number of full bytes in final char
|
||||||
|
outputArray[outputCharNum++] = (char)1;
|
||||||
|
} else if (inputByteNum < inputLength) {
|
||||||
|
outputArray[outputCharNum++]
|
||||||
|
= (char)(((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
|
||||||
|
& (short)0x7FFF);
|
||||||
|
// Add trailing char containing the number of full bytes in final char
|
||||||
|
outputArray[outputCharNum++] = caseNum == 0 ? (char)1 : (char)0;
|
||||||
|
} else { // No left over bits - last char is completely filled.
|
||||||
|
// Add trailing char containing the number of full bytes in final char
|
||||||
|
outputArray[outputCharNum++] = (char)1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Arguments must have backing arrays");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes the input char sequence into the output byte sequence. Before
|
||||||
|
* calling this method, ensure that the output ByteBuffer has sufficient
|
||||||
|
* capacity by calling {@link #getDecodedLength(java.nio.CharBuffer)}.
|
||||||
|
*
|
||||||
|
* @param input The char sequence to decode
|
||||||
|
* @param output Where the byte sequence decoding result will go. The limit
|
||||||
|
* is set to one past the position of the final char.
|
||||||
|
* @throws IllegalArgumentException If either the input or the output buffer
|
||||||
|
* is not backed by an array
|
||||||
|
*/
|
||||||
|
public static void decode(CharBuffer input, ByteBuffer output) {
|
||||||
|
if (input.hasArray() && output.hasArray()) {
|
||||||
|
int numInputChars = input.limit() - input.arrayOffset() - 1;
|
||||||
|
int numOutputBytes = getDecodedLength(input);
|
||||||
|
output.limit(numOutputBytes + output.arrayOffset()); // Set output final pos + 1
|
||||||
|
output.position(0);
|
||||||
|
byte[] outputArray = output.array();
|
||||||
|
char[] inputArray = input.array();
|
||||||
|
if (numOutputBytes > 0) {
|
||||||
|
int caseNum = 0;
|
||||||
|
int outputByteNum = output.arrayOffset();
|
||||||
|
int inputCharNum = input.arrayOffset();
|
||||||
|
short inputChar;
|
||||||
|
CodingCase codingCase;
|
||||||
|
for ( ; inputCharNum < numInputChars - 1 ; ++inputCharNum) {
|
||||||
|
codingCase = CODING_CASES[caseNum];
|
||||||
|
inputChar = (short)inputArray[inputCharNum];
|
||||||
|
if (2 == codingCase.numBytes) {
|
||||||
|
if (0 == caseNum) {
|
||||||
|
outputArray[outputByteNum] = (byte)(inputChar >>> codingCase.initialShift);
|
||||||
|
} else {
|
||||||
|
outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
|
||||||
|
}
|
||||||
|
outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.finalMask)
|
||||||
|
<< codingCase.finalShift);
|
||||||
|
} else { // numBytes is 3
|
||||||
|
outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
|
||||||
|
outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.middleMask)
|
||||||
|
>>> codingCase.middleShift);
|
||||||
|
outputArray[outputByteNum + 2] = (byte)((inputChar & codingCase.finalMask)
|
||||||
|
<< codingCase.finalShift);
|
||||||
|
}
|
||||||
|
outputByteNum += codingCase.advanceBytes;
|
||||||
|
if (++caseNum == CODING_CASES.length) {
|
||||||
|
caseNum = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Handle final char
|
||||||
|
inputChar = (short)inputArray[inputCharNum];
|
||||||
|
codingCase = CODING_CASES[caseNum];
|
||||||
|
if (0 == caseNum) {
|
||||||
|
outputArray[outputByteNum] = 0;
|
||||||
|
}
|
||||||
|
outputArray[outputByteNum] += (byte)(inputChar >>> codingCase.initialShift);
|
||||||
|
int bytesLeft = numOutputBytes - outputByteNum;
|
||||||
|
if (bytesLeft > 1) {
|
||||||
|
if (2 == codingCase.numBytes) {
|
||||||
|
outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.finalMask)
|
||||||
|
>>> codingCase.finalShift);
|
||||||
|
} else { // numBytes is 3
|
||||||
|
outputArray[outputByteNum + 1] = (byte)((inputChar & codingCase.middleMask)
|
||||||
|
>>> codingCase.middleShift);
|
||||||
|
if (bytesLeft > 2) {
|
||||||
|
outputArray[outputByteNum + 2] = (byte)((inputChar & codingCase.finalMask)
|
||||||
|
<< codingCase.finalShift);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Arguments must have backing arrays");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes the given char sequence, which must have been encoded by
|
||||||
|
* {@link #encode(java.nio.ByteBuffer)} or
|
||||||
|
* {@link #encode(java.nio.ByteBuffer, java.nio.CharBuffer)}.
|
||||||
|
*
|
||||||
|
* @param input The char sequence to decode
|
||||||
|
* @return A byte sequence containing the decoding result. The limit
|
||||||
|
* is set to one past the position of the final char.
|
||||||
|
* @throws IllegalArgumentException If the input buffer is not backed by an
|
||||||
|
* array
|
||||||
|
*/
|
||||||
|
public static ByteBuffer decode(CharBuffer input) {
|
||||||
|
byte[] outputArray = new byte[getDecodedLength(input)];
|
||||||
|
ByteBuffer output = ByteBuffer.wrap(outputArray);
|
||||||
|
decode(input, output);
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes the input byte sequence.
|
||||||
|
*
|
||||||
|
* @param input The byte sequence to encode
|
||||||
|
* @return A char sequence containing the encoding result. The limit is set
|
||||||
|
* to one past the position of the final char.
|
||||||
|
* @throws IllegalArgumentException If the input buffer is not backed by an
|
||||||
|
* array
|
||||||
|
*/
|
||||||
|
public static CharBuffer encode(ByteBuffer input) {
|
||||||
|
char[] outputArray = new char[getEncodedLength(input)];
|
||||||
|
CharBuffer output = CharBuffer.wrap(outputArray);
|
||||||
|
encode(input, output);
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
static class CodingCase {
|
||||||
|
int numBytes, initialShift, middleShift, finalShift, advanceBytes = 2;
|
||||||
|
short middleMask, finalMask;
|
||||||
|
|
||||||
|
CodingCase(int initialShift, int middleShift, int finalShift) {
|
||||||
|
this.numBytes = 3;
|
||||||
|
this.initialShift = initialShift;
|
||||||
|
this.middleShift = middleShift;
|
||||||
|
this.finalShift = finalShift;
|
||||||
|
this.finalMask = (short)((short)0xFF >>> finalShift);
|
||||||
|
this.middleMask = (short)((short)0xFF << middleShift);
|
||||||
|
}
|
||||||
|
|
||||||
|
CodingCase(int initialShift, int finalShift) {
|
||||||
|
this.numBytes = 2;
|
||||||
|
this.initialShift = initialShift;
|
||||||
|
this.finalShift = finalShift;
|
||||||
|
this.finalMask = (short)((short)0xFF >>> finalShift);
|
||||||
|
if (finalShift != 0) {
|
||||||
|
advanceBytes = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,187 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
|
||||||
|
public class TestIndexableBinaryStringTools extends LuceneTestCase {
|
||||||
|
private static final int NUM_RANDOM_TESTS = 20000;
|
||||||
|
private static final int MAX_RANDOM_BINARY_LENGTH = 300;
|
||||||
|
|
||||||
|
public void testSingleBinaryRoundTrip() {
|
||||||
|
byte[] binary = new byte[]
|
||||||
|
{ (byte)0x23, (byte)0x98, (byte)0x13, (byte)0xE4, (byte)0x76, (byte)0x41,
|
||||||
|
(byte)0xB2, (byte)0xC9, (byte)0x7F, (byte)0x0A, (byte)0xA6, (byte)0xD8 };
|
||||||
|
|
||||||
|
ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
|
||||||
|
CharBuffer encoded = IndexableBinaryStringTools.encode(binaryBuf);
|
||||||
|
ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
|
||||||
|
assertEquals("Round trip decode/decode returned different results:"
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ "original: " + binaryDump(binaryBuf)
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ " encoded: " + charArrayDump(encoded)
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ " decoded: " + binaryDump(decoded),
|
||||||
|
binaryBuf, decoded);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEncodedSortability() {
|
||||||
|
Random random = newRandom();
|
||||||
|
byte[] originalArray1 = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
ByteBuffer originalBuf1 = ByteBuffer.wrap(originalArray1);
|
||||||
|
char[] originalString1 = new char[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
CharBuffer originalStringBuf1 = CharBuffer.wrap(originalString1);
|
||||||
|
char[] encoded1 = new char[IndexableBinaryStringTools.getEncodedLength(originalBuf1)];
|
||||||
|
CharBuffer encodedBuf1 = CharBuffer.wrap(encoded1);
|
||||||
|
byte[] original2 = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
ByteBuffer originalBuf2 = ByteBuffer.wrap(original2);
|
||||||
|
char[] originalString2 = new char[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
CharBuffer originalStringBuf2 = CharBuffer.wrap(originalString2);
|
||||||
|
char[] encoded2 = new char[IndexableBinaryStringTools.getEncodedLength(originalBuf2)];
|
||||||
|
CharBuffer encodedBuf2 = CharBuffer.wrap(encoded2);
|
||||||
|
for (int testNum = 0 ; testNum < NUM_RANDOM_TESTS ; ++testNum) {
|
||||||
|
int numBytes1 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
|
||||||
|
originalBuf1.limit(numBytes1);
|
||||||
|
originalStringBuf1.limit(numBytes1);
|
||||||
|
|
||||||
|
for (int byteNum = 0 ; byteNum < numBytes1 ; ++byteNum) {
|
||||||
|
int randomInt = random.nextInt(0x100);
|
||||||
|
originalArray1[byteNum] = (byte) randomInt;
|
||||||
|
originalString1[byteNum] = (char)randomInt;
|
||||||
|
}
|
||||||
|
|
||||||
|
int numBytes2 = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1; // Min == 1
|
||||||
|
originalBuf2.limit(numBytes2);
|
||||||
|
originalStringBuf2.limit(numBytes2);
|
||||||
|
for (int byteNum = 0 ; byteNum < numBytes2 ; ++byteNum) {
|
||||||
|
int randomInt = random.nextInt(0x100);
|
||||||
|
original2[byteNum] = (byte)randomInt;
|
||||||
|
originalString2[byteNum] = (char)randomInt;
|
||||||
|
}
|
||||||
|
int originalComparison = originalStringBuf1.compareTo(originalStringBuf2);
|
||||||
|
originalComparison = originalComparison < 0 ? -1 : originalComparison > 0 ? 1 : 0;
|
||||||
|
|
||||||
|
IndexableBinaryStringTools.encode(originalBuf1, encodedBuf1);
|
||||||
|
IndexableBinaryStringTools.encode(originalBuf2, encodedBuf2);
|
||||||
|
|
||||||
|
int encodedComparison = encodedBuf1.compareTo(encodedBuf2);
|
||||||
|
encodedComparison = encodedComparison < 0 ? -1 : encodedComparison > 0 ? 1 : 0;
|
||||||
|
|
||||||
|
assertEquals("Test #" + (testNum + 1)
|
||||||
|
+ ": Original bytes and encoded chars compare differently:"
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ " binary 1: " + binaryDump(originalBuf1)
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ " binary 2: " + binaryDump(originalBuf2)
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ "encoded 1: " + charArrayDump(encodedBuf1)
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ "encoded 2: " + charArrayDump(encodedBuf2)
|
||||||
|
+ System.getProperty("line.separator"),
|
||||||
|
originalComparison, encodedComparison);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEmptyInput() {
|
||||||
|
byte[] binary = new byte[0];
|
||||||
|
CharBuffer encoded = IndexableBinaryStringTools.encode(ByteBuffer.wrap(binary));
|
||||||
|
ByteBuffer decoded = IndexableBinaryStringTools.decode(encoded);
|
||||||
|
assertNotNull("decode() returned null", decoded);
|
||||||
|
assertEquals("decoded empty input was not empty", decoded.limit(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAllNullInput() {
|
||||||
|
byte[] binary = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
|
||||||
|
CharBuffer encoded = IndexableBinaryStringTools.encode(binaryBuf);
|
||||||
|
assertNotNull("encode() returned null", encoded);
|
||||||
|
ByteBuffer decodedBuf = IndexableBinaryStringTools.decode(encoded);
|
||||||
|
assertNotNull("decode() returned null", decodedBuf);
|
||||||
|
assertEquals("Round trip decode/decode returned different results:"
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ " original: " + binaryDump(binaryBuf)
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ "decodedBuf: " + binaryDump(decodedBuf),
|
||||||
|
binaryBuf, decodedBuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRandomBinaryRoundTrip() {
|
||||||
|
Random random = newRandom();
|
||||||
|
byte[] binary = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
ByteBuffer binaryBuf = ByteBuffer.wrap(binary);
|
||||||
|
char[] encoded = new char[IndexableBinaryStringTools.getEncodedLength(binaryBuf)];
|
||||||
|
CharBuffer encodedBuf = CharBuffer.wrap(encoded);
|
||||||
|
byte[] decoded = new byte[MAX_RANDOM_BINARY_LENGTH];
|
||||||
|
ByteBuffer decodedBuf = ByteBuffer.wrap(decoded);
|
||||||
|
for (int testNum = 0 ; testNum < NUM_RANDOM_TESTS ; ++testNum) {
|
||||||
|
int numBytes = random.nextInt(MAX_RANDOM_BINARY_LENGTH - 1) + 1 ; // Min == 1
|
||||||
|
binaryBuf.limit(numBytes);
|
||||||
|
for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) {
|
||||||
|
binary[byteNum] = (byte)random.nextInt(0x100);
|
||||||
|
}
|
||||||
|
IndexableBinaryStringTools.encode(binaryBuf, encodedBuf);
|
||||||
|
IndexableBinaryStringTools.decode(encodedBuf, decodedBuf);
|
||||||
|
assertEquals("Test #" + (testNum + 1)
|
||||||
|
+ ": Round trip decode/decode returned different results:"
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ " original: " + binaryDump(binaryBuf)
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ "encodedBuf: " + charArrayDump(encodedBuf)
|
||||||
|
+ System.getProperty("line.separator")
|
||||||
|
+ "decodedBuf: " + binaryDump(decodedBuf),
|
||||||
|
binaryBuf, decodedBuf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String binaryDump(ByteBuffer binaryBuf) {
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
int numBytes = binaryBuf.limit() - binaryBuf.arrayOffset();
|
||||||
|
byte[] binary = binaryBuf.array();
|
||||||
|
for (int byteNum = 0 ; byteNum < numBytes ; ++byteNum) {
|
||||||
|
String hex = Integer.toHexString((int)binary[byteNum] & 0xFF);
|
||||||
|
if (hex.length() == 1) {
|
||||||
|
buf.append('0');
|
||||||
|
}
|
||||||
|
buf.append(hex.toUpperCase());
|
||||||
|
if (byteNum < numBytes - 1) {
|
||||||
|
buf.append(' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String charArrayDump(CharBuffer charBuf) {
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
int numBytes = charBuf.limit() - charBuf.arrayOffset();
|
||||||
|
char[] charArray = charBuf.array();
|
||||||
|
for (int charNum = 0 ; charNum < numBytes ; ++charNum) {
|
||||||
|
String hex = Integer.toHexString((int)charArray[charNum]);
|
||||||
|
for (int digit = 0 ; digit < 4 - hex.length() ; ++digit) {
|
||||||
|
buf.append('0');
|
||||||
|
}
|
||||||
|
buf.append(hex.toUpperCase());
|
||||||
|
if (charNum < numBytes - 1) {
|
||||||
|
buf.append(' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue