HBASE-9283 Struct and StructIterator should properly handle trailing nulls

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1518320 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2013-08-28 18:32:04 +00:00
parent b821c20689
commit 102f84d272
4 changed files with 191 additions and 13 deletions

View File

@ -36,6 +36,32 @@ import org.apache.hadoop.hbase.util.PositionedByteRange;
* another {@code Struct}. {@code Struct}s are not {@code nullable} but their
* component fields may be.
* </p>
* <h3>Trailing Nulls</h3>
* <p>
* {@code Struct} treats the right-most nullable field members as special.
* Rather than writing null values to the output buffer, {@code Struct} omits
* those records all together. When reading back a value, it will look for the
* scenario where the end of the buffer has been reached but there are still
* nullable fields remaining in the {@code Struct} definition. When this
* happens, it will produce null entries for the remaining values. For example:
* <pre>
* StructBuilder builder = new StructBuilder()
* .add(OrderedNumeric.ASCENDING) // nullable
* .add(OrderedString.ASCENDING) // nullable
* Struct shorter = builder.toStruct();
* Struct longer = builder.add(OrderedNumeric.ASCENDING) // nullable
* .toStruct();
*
* PositionedByteRange buf1 = new SimplePositionedByteRange(7);
* PositionedByteRange buf2 = new SimplePositionedByteRange(7);
* Object[] val = new Object[] { BigDecimal.ONE, "foo" };
* shorter.encode(buf1, val); // write short value with short Struct
* buf1.setPosition(0); // reset position marker, prepare for read
* longer.decode(buf1); // => { BigDecimal.ONE, "foo", null } ; long Struct reads implied null
* longer.encode(buf2, val); // write short value with long struct
* Bytes.equals(buf1.getBytes(), buf2.getBytes()); // => true; long Struct skips writing null
* </pre>
* </p>
* <h3>Sort Order</h3>
* <p>
* {@code Struct} instances sort according to the composite order of their
@ -104,9 +130,9 @@ public class Struct implements DataType<Object[]> {
@SuppressWarnings("unchecked")
@Override
public int encodedLength(Object[] val) {
assert fields.length == val.length;
assert fields.length >= val.length;
int sum = 0;
for (int i = 0; i < fields.length; i++)
for (int i = 0; i < val.length; i++)
sum += fields[i].encodedLength(val[i]);
return sum;
}
@ -155,9 +181,14 @@ public class Struct implements DataType<Object[]> {
@SuppressWarnings("unchecked")
@Override
public int encode(PositionedByteRange dst, Object[] val) {
assert fields.length == val.length;
int written = 0;
for (int i = 0; i < fields.length; i++) {
if (val.length == 0) return 0;
assert fields.length >= val.length;
int end, written = 0;
// find the last occurrence of a non-null or null and non-nullable value
for (end = val.length - 1; end > -1; end--) {
if (null != val[end] || (null == val[end] && !fields[end].isNullable())) break;
}
for (int i = 0; i <= end; i++) {
written += fields[i].encode(dst, val[i]);
}
return written;

View File

@ -69,7 +69,9 @@ public class StructIterator implements Iterator<Object> {
@Override
public boolean hasNext() {
return idx < types.length;
// hasNext can return true when position == length in the case of a
// nullable field trailing a struct.
return idx < types.length && src.getPosition() <= src.getLength();
}
@Override
@ -78,7 +80,9 @@ public class StructIterator implements Iterator<Object> {
@Override
public Object next() {
if (!hasNext()) throw new NoSuchElementException();
return types[idx++].decode(src);
DataType<?> t = types[idx++];
if (src.getPosition() == src.getLength() && t.isNullable()) return null;
return t.decode(src);
}
/**
@ -87,6 +91,8 @@ public class StructIterator implements Iterator<Object> {
*/
public int skip() {
if (!hasNext()) throw new NoSuchElementException();
return types[idx++].skip(src);
DataType<?> t = types[idx++];
if (src.getPosition() == src.getLength() && t.isNullable()) return 0;
return t.skip(src);
}
}

View File

@ -154,11 +154,10 @@ public class TestStruct {
final transient String str;
public Pojo2(Object... vals) {
byte[] empty = new byte[0];
byteField1Asc = vals.length > 0 ? (byte[]) vals[0] : empty;
byteField2Dsc = vals.length > 1 ? (byte[]) vals[1] : empty;
stringFieldDsc = vals.length > 2 ? (String) vals[2] : "";
byteField3Dsc = vals.length > 3 ? (byte[]) vals[3] : empty;
byteField1Asc = vals.length > 0 ? (byte[]) vals[0] : null;
byteField2Dsc = vals.length > 1 ? (byte[]) vals[1] : null;
stringFieldDsc = vals.length > 2 ? (String) vals[2] : null;
byteField3Dsc = vals.length > 3 ? (byte[]) vals[3] : null;
str = new StringBuilder()
.append("{ ")
.append(Bytes.toStringBinary(byteField1Asc)).append(", ")

View File

@ -0,0 +1,142 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.types;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import java.math.BigDecimal;
import java.util.Arrays;
import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.util.PositionedByteRange;
import org.apache.hadoop.hbase.util.SimplePositionedByteRange;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category(SmallTests.class)
public class TestStructNullExtension {
/**
* Verify null extension respects the type's isNullable field.
*/
@Test(expected = NullPointerException.class)
public void testNonNullableNullExtension() {
Struct s = new StructBuilder()
.add(new RawStringTerminated("|")) // not nullable
.toStruct();
PositionedByteRange buf = new SimplePositionedByteRange(4);
s.encode(buf, new Object[1]);
}
/**
* Positive cases for null extension.
*/
@Test
public void testNullableNullExtension() {
// the following field members are used because they're all nullable
StructBuilder builder = new StructBuilder()
.add(OrderedNumeric.ASCENDING)
.add(OrderedString.ASCENDING);
Struct shorter = builder.toStruct();
Struct longer = builder
// intentionally include a wrapped instance to test wrapper behavior.
.add(new TerminatedWrapper<String>(OrderedString.ASCENDING, "/"))
.add(OrderedNumeric.ASCENDING)
.toStruct();
PositionedByteRange buf1 = new SimplePositionedByteRange(7);
Object[] val1 = new Object[] { BigDecimal.ONE, "foo" }; // => 2 bytes + 5 bytes
assertEquals("Encoding shorter value wrote a surprising number of bytes.",
buf1.getLength(), shorter.encode(buf1, val1));
int shortLen = buf1.getLength();
// test iterator
buf1.setPosition(0);
StructIterator it = longer.iterator(buf1);
it.skip();
it.skip();
assertEquals("Position should be at end. Broken test.", buf1.getLength(), buf1.getPosition());
assertEquals("Failed to skip null element with extended struct.", 0, it.skip());
assertEquals("Failed to skip null element with extended struct.", 0, it.skip());
buf1.setPosition(0);
it = longer.iterator(buf1);
assertEquals(BigDecimal.ONE, it.next());
assertEquals("foo", it.next());
assertEquals("Position should be at end. Broken test.", buf1.getLength(), buf1.getPosition());
assertNull("Failed to skip null element with extended struct.", it.next());
assertNull("Failed to skip null element with extended struct.", it.next());
// test Struct
buf1.setPosition(0);
assertArrayEquals("Simple struct decoding is broken.", val1, shorter.decode(buf1));
buf1.setPosition(0);
assertArrayEquals("Decoding short value with extended struct should append null elements.",
Arrays.copyOf(val1, 4), longer.decode(buf1));
// test omission of trailing members
PositionedByteRange buf2 = new SimplePositionedByteRange(7);
buf1.setPosition(0);
assertEquals(
"Encoding a short value with extended struct should have same result as using short struct.",
shortLen, longer.encode(buf2, val1));
assertArrayEquals(
"Encoding a short value with extended struct should have same result as using short struct",
buf1.getBytes(), buf2.getBytes());
// test null trailing members
// all fields are nullable, so nothing should hit the buffer.
val1 = new Object[] { null, null, null, null }; // => 0 bytes
buf1.set(0);
buf2.set(0);
assertEquals("Encoding null-truncated value wrote a surprising number of bytes.",
buf1.getLength(), longer.encode(buf1, new Object[0]));
assertEquals("Encoding null-extended value wrote a surprising number of bytes.",
buf1.getLength(), longer.encode(buf1, val1));
assertArrayEquals("Encoded unexpected result.", buf1.getBytes(), buf2.getBytes());
assertArrayEquals("Decoded unexpected result.", val1, longer.decode(buf2));
// all fields are nullable, so only 1 should hit the buffer.
Object[] val2 = new Object[] { BigDecimal.ONE, null, null, null }; // => 2 bytes
buf1.set(2);
buf2.set(2);
assertEquals("Encoding null-truncated value wrote a surprising number of bytes.",
buf1.getLength(), longer.encode(buf1, Arrays.copyOf(val2, 1)));
assertEquals("Encoding null-extended value wrote a surprising number of bytes.",
buf2.getLength(), longer.encode(buf2, val2));
assertArrayEquals("Encoded unexpected result.", buf1.getBytes(), buf2.getBytes());
buf2.setPosition(0);
assertArrayEquals("Decoded unexpected result.", val2, longer.decode(buf2));
// all fields are nullable, so only 1, null, "foo" should hit the buffer.
// => 2 bytes + 1 byte + 6 bytes
Object[] val3 = new Object[] { BigDecimal.ONE, null, "foo", null };
buf1.set(9);
buf2.set(9);
assertEquals("Encoding null-truncated value wrote a surprising number of bytes.",
buf1.getLength(), longer.encode(buf1, Arrays.copyOf(val3, 3)));
assertEquals("Encoding null-extended value wrote a surprising number of bytes.",
buf2.getLength(), longer.encode(buf2, val3));
assertArrayEquals("Encoded unexpected result.", buf1.getBytes(), buf2.getBytes());
buf2.setPosition(0);
assertArrayEquals("Decoded unexpected result.", val3, longer.decode(buf2));
}
}