mirror of https://github.com/apache/lucene.git
LUCENE-3590: fix copyBytes to respect offset, remove dup'ed compareTo code, add javadocs and TODOs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206789 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
60d9125050
commit
1ee685d837
|
@ -25,6 +25,7 @@ import java.util.Comparator;
|
|||
*
|
||||
* @lucene.experimental */
|
||||
public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
||||
/** An empty byte array for convenience */
|
||||
public static final byte[] EMPTY_BYTES = new byte[0];
|
||||
|
||||
/** The contents of the BytesRef. Should never be {@code null}. */
|
||||
|
@ -36,8 +37,9 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
|||
/** Length of used bytes. */
|
||||
public int length;
|
||||
|
||||
/** Create a BytesRef with {@link #EMPTY_BYTES} */
|
||||
public BytesRef() {
|
||||
bytes = EMPTY_BYTES;
|
||||
this(EMPTY_BYTES);
|
||||
}
|
||||
|
||||
/** This instance will directly reference bytes w/o making a copy.
|
||||
|
@ -53,20 +55,23 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
|||
/** This instance will directly reference bytes w/o making a copy.
|
||||
* bytes should not be null */
|
||||
public BytesRef(byte[] bytes) {
|
||||
assert bytes != null;
|
||||
this.bytes = bytes;
|
||||
this.offset = 0;
|
||||
this.length = bytes.length;
|
||||
this(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a BytesRef pointing to a new array of size <code>capacity</code>.
|
||||
* Offset and length will both be zero.
|
||||
*/
|
||||
public BytesRef(int capacity) {
|
||||
this.bytes = new byte[capacity];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param text Initialize the byte[] from the UTF8 bytes
|
||||
* for the provided String. This must be well-formed
|
||||
* unicode text, with no unpaired surrogates or U+FFFF.
|
||||
* Initialize the byte[] from the UTF8 bytes
|
||||
* for the provided String.
|
||||
*
|
||||
* @param text This must be well-formed
|
||||
* unicode text, with no unpaired surrogates.
|
||||
*/
|
||||
public BytesRef(CharSequence text) {
|
||||
this();
|
||||
|
@ -79,11 +84,20 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
|||
* @param text Must be well-formed unicode text, with no
|
||||
* unpaired surrogates or invalid UTF16 code units.
|
||||
*/
|
||||
// TODO broken if offset != 0
|
||||
public void copyChars(CharSequence text) {
|
||||
UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: compares the bytes against another BytesRef,
|
||||
* returning true if the bytes are equal.
|
||||
*
|
||||
* @param other Another BytesRef, should not be null.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public boolean bytesEquals(BytesRef other) {
|
||||
assert other != null;
|
||||
if (length == other.length) {
|
||||
int otherUpto = other.offset;
|
||||
final byte[] otherBytes = other.bytes;
|
||||
|
@ -186,20 +200,24 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
|||
/**
|
||||
* Copies the bytes from the given {@link BytesRef}
|
||||
* <p>
|
||||
* NOTE: this method resets the offset to 0 and resizes the reference array
|
||||
* if needed.
|
||||
* NOTE: if this would exceed the array size, this method creates a
|
||||
* new reference array.
|
||||
*/
|
||||
public void copyBytes(BytesRef other) {
|
||||
if (bytes.length < other.length) {
|
||||
bytes = new byte[other.length];
|
||||
offset = 0;
|
||||
}
|
||||
System.arraycopy(other.bytes, other.offset, bytes, 0, other.length);
|
||||
System.arraycopy(other.bytes, other.offset, bytes, offset, other.length);
|
||||
length = other.length;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Appends the bytes from the given {@link BytesRef}
|
||||
* <p>
|
||||
* NOTE: if this would exceed the array size, this method creates a
|
||||
* new reference array.
|
||||
*/
|
||||
public void append(BytesRef other) {
|
||||
int newLen = length + other.length;
|
||||
if (bytes.length < newLen) {
|
||||
|
@ -212,30 +230,15 @@ public final class BytesRef implements Comparable<BytesRef>,Cloneable {
|
|||
length = newLen;
|
||||
}
|
||||
|
||||
// TODO: stupid if existing offset is non-zero.
|
||||
/** @lucene.internal */
|
||||
public void grow(int newLength) {
|
||||
bytes = ArrayUtil.grow(bytes, newLength);
|
||||
}
|
||||
|
||||
/** Unsigned byte order comparison */
|
||||
public int compareTo(BytesRef other) {
|
||||
if (this == other) return 0;
|
||||
|
||||
final byte[] aBytes = this.bytes;
|
||||
int aUpto = this.offset;
|
||||
final byte[] bBytes = other.bytes;
|
||||
int bUpto = other.offset;
|
||||
|
||||
final int aStop = aUpto + Math.min(this.length, other.length);
|
||||
|
||||
while(aUpto < aStop) {
|
||||
int aByte = aBytes[aUpto++] & 0xff;
|
||||
int bByte = bBytes[bUpto++] & 0xff;
|
||||
int diff = aByte - bByte;
|
||||
if (diff != 0) return diff;
|
||||
}
|
||||
|
||||
// One is a prefix of the other, or, they are equal:
|
||||
return this.length - other.length;
|
||||
return utf8SortedAsUnicodeSortOrder.compare(this, other);
|
||||
}
|
||||
|
||||
private final static Comparator<BytesRef> utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator();
|
||||
|
|
|
@ -123,6 +123,7 @@ public final class UnicodeUtil {
|
|||
|
||||
/** Encode characters from a char[] source, starting at
|
||||
* offset for length chars. Returns a hash of the resulting bytes. After encoding, result.offset will always be 0. */
|
||||
// TODO: broken if incoming result.offset != 0
|
||||
public static int UTF16toUTF8WithHash(final char[] source, final int offset, final int length, BytesRef result) {
|
||||
int hash = 0;
|
||||
int upto = 0;
|
||||
|
@ -179,6 +180,7 @@ public final class UnicodeUtil {
|
|||
/** Encode characters from a char[] source, starting at
|
||||
* offset for length chars. After encoding, result.offset will always be 0.
|
||||
*/
|
||||
// TODO: broken if incoming result.offset != 0
|
||||
public static void UTF16toUTF8(final char[] source, final int offset, final int length, BytesRef result) {
|
||||
|
||||
int upto = 0;
|
||||
|
@ -234,6 +236,7 @@ public final class UnicodeUtil {
|
|||
/** Encode characters from this String, starting at offset
|
||||
* for length characters. After encoding, result.offset will always be 0.
|
||||
*/
|
||||
// TODO: broken if incoming result.offset != 0
|
||||
public static void UTF16toUTF8(final CharSequence s, final int offset, final int length, BytesRef result) {
|
||||
final int end = offset + length;
|
||||
|
||||
|
@ -427,8 +430,10 @@ public final class UnicodeUtil {
|
|||
return codePointCount;
|
||||
}
|
||||
|
||||
// TODO: broken if incoming result.offset != 0
|
||||
public static void UTF8toUTF32(final BytesRef utf8, final IntsRef utf32) {
|
||||
// pre-alloc for worst case
|
||||
// TODO: ints cannot be null, should be an assert
|
||||
if (utf32.ints == null || utf32.ints.length < utf8.length) {
|
||||
utf32.ints = new int[utf8.length];
|
||||
}
|
||||
|
@ -567,6 +572,7 @@ public final class UnicodeUtil {
|
|||
* can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed).
|
||||
* Explicit checks for valid UTF-8 are not performed.
|
||||
*/
|
||||
// TODO: broken if chars.offset != 0
|
||||
public static void UTF8toUTF16(byte[] utf8, int offset, int length, CharsRef chars) {
|
||||
int out_offset = chars.offset = 0;
|
||||
final char[] out = chars.chars = ArrayUtil.grow(chars.chars, length);
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class TestBytesRef extends LuceneTestCase {
|
||||
public void testEmpty() {
|
||||
BytesRef b = new BytesRef();
|
||||
assertEquals(BytesRef.EMPTY_BYTES, b.bytes);
|
||||
assertEquals(0, b.offset);
|
||||
assertEquals(0, b.length);
|
||||
}
|
||||
|
||||
public void testFromBytes() {
|
||||
byte bytes[] = new byte[] { (byte)'a', (byte)'b', (byte)'c', (byte)'d' };
|
||||
BytesRef b = new BytesRef(bytes);
|
||||
assertEquals(bytes, b.bytes);
|
||||
assertEquals(0, b.offset);
|
||||
assertEquals(4, b.length);
|
||||
|
||||
BytesRef b2 = new BytesRef(bytes, 1, 3);
|
||||
assertEquals("bcd", b2.utf8ToString());
|
||||
|
||||
assertFalse(b.equals(b2));
|
||||
}
|
||||
|
||||
public void testFromChars() {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
String s = _TestUtil.randomUnicodeString(random);
|
||||
String s2 = new BytesRef(s).utf8ToString();
|
||||
assertEquals(s, s2);
|
||||
}
|
||||
|
||||
// only for 4.x
|
||||
assertEquals("\uFFFF", new BytesRef("\uFFFF").utf8ToString());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue