From 87357a0534f510773f6e06087fc4efbe8d74bf32 Mon Sep 17 00:00:00 2001 From: "navis.ryu" Date: Tue, 24 Nov 2015 10:29:25 +0900 Subject: [PATCH] fixed #2001 GenericIndexed.fromIterable compares all values even when it's not sorted --- .../io/druid/segment/data/GenericIndexed.java | 50 ++++++-------- .../segment/data/GenericIndexedTest.java | 67 ++++++++++++------- 2 files changed, 63 insertions(+), 54 deletions(-) diff --git a/processing/src/main/java/io/druid/segment/data/GenericIndexed.java b/processing/src/main/java/io/druid/segment/data/GenericIndexed.java index 2e3bf445394..0ef522bc948 100644 --- a/processing/src/main/java/io/druid/segment/data/GenericIndexed.java +++ b/processing/src/main/java/io/druid/segment/data/GenericIndexed.java @@ -33,9 +33,9 @@ import java.util.Iterator; /** * A generic, flat storage mechanism. Use static methods fromArray() or fromIterable() to construct. If input * is sorted, supports binary search index lookups. If input is not sorted, only supports array-like index lookups. - * + * * V1 Storage Format: - * + * * byte 1: version (0x1) * byte 2 == 0x1 => allowReverseLookup * bytes 3-6 => numBytesUsed @@ -64,48 +64,42 @@ public class GenericIndexed implements Indexed } boolean allowReverseLookup = true; - int count = 1; - T prevVal = objects.next(); - while (objects.hasNext()) { - T next = objects.next(); - if (!(strategy.compare(prevVal, next) < 0)) { - allowReverseLookup = false; - } - if (prevVal instanceof Closeable) { - CloseQuietly.close((Closeable) prevVal); - } + int count = 0; - prevVal = next; - ++count; - } - if (prevVal instanceof Closeable) { - CloseQuietly.close((Closeable) prevVal); - } - - ByteArrayOutputStream headerBytes = new ByteArrayOutputStream(4 + (count * 4)); + ByteArrayOutputStream headerBytes = new ByteArrayOutputStream(); ByteArrayOutputStream valueBytes = new ByteArrayOutputStream(); - int offset = 0; - try { - headerBytes.write(Ints.toByteArray(count)); + int offset = 0; + T prevVal = null; + do { + count++; + T next = objects.next(); + if (allowReverseLookup && prevVal != null && !(strategy.compare(prevVal, next) < 0)) { + allowReverseLookup = false; + } - for (T object : objectsIterable) { - final byte[] bytes = strategy.toBytes(object); + final byte[] bytes = strategy.toBytes(next); offset += 4 + bytes.length; headerBytes.write(Ints.toByteArray(offset)); valueBytes.write(Ints.toByteArray(bytes.length)); valueBytes.write(bytes); - if (object instanceof Closeable) { - CloseQuietly.close((Closeable) object); + if (prevVal instanceof Closeable) { + CloseQuietly.close((Closeable) prevVal); } + prevVal = next; + } while (objects.hasNext()); + + if (prevVal instanceof Closeable) { + CloseQuietly.close((Closeable) prevVal); } } catch (IOException e) { throw new RuntimeException(e); } - ByteBuffer theBuffer = ByteBuffer.allocate(headerBytes.size() + valueBytes.size()); + ByteBuffer theBuffer = ByteBuffer.allocate(Ints.BYTES + headerBytes.size() + valueBytes.size()); + theBuffer.put(Ints.toByteArray(count)); theBuffer.put(headerBytes.toByteArray()); theBuffer.put(valueBytes.toByteArray()); theBuffer.flip(); diff --git a/processing/src/test/java/io/druid/segment/data/GenericIndexedTest.java b/processing/src/test/java/io/druid/segment/data/GenericIndexedTest.java index 16dd8aa3d86..3c120d3ae2c 100644 --- a/processing/src/test/java/io/druid/segment/data/GenericIndexedTest.java +++ b/processing/src/test/java/io/druid/segment/data/GenericIndexedTest.java @@ -55,19 +55,7 @@ public class GenericIndexedTest final String[] strings = {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"}; Indexed indexed = GenericIndexed.fromArray(strings, GenericIndexed.STRING_STRATEGY); - Assert.assertEquals(strings.length, indexed.size()); - for (int i = 0; i < strings.length; i++) { - Assert.assertEquals(strings[i], indexed.get(i)); - } - - HashMap mixedUp = Maps.newHashMap(); - for (int i = 0; i < strings.length; i++) { - mixedUp.put(strings[i], i); - } - - for (Map.Entry entry : mixedUp.entrySet()) { - Assert.assertEquals(entry.getValue().intValue(), indexed.indexOf(entry.getKey())); - } + checkBasicAPIs(strings, indexed, true); Assert.assertEquals(-13, indexed.indexOf("q")); Assert.assertEquals(-9, indexed.indexOf("howdydo")); @@ -85,25 +73,52 @@ public class GenericIndexedTest ) ); - Assert.assertEquals(strings.length, deserialized.size()); - for (int i = 0; i < strings.length; i++) { - Assert.assertEquals(strings[i], deserialized.get(i)); - } - - HashMap mixedUp = Maps.newHashMap(); - for (int i = 0; i < strings.length; i++) { - mixedUp.put(strings[i], i); - } - - for (Map.Entry entry : mixedUp.entrySet()) { - Assert.assertEquals(entry.getValue().intValue(), deserialized.indexOf(entry.getKey())); - } + checkBasicAPIs(strings, deserialized, true); Assert.assertEquals(-13, deserialized.indexOf("q")); Assert.assertEquals(-9, deserialized.indexOf("howdydo")); Assert.assertEquals(-1, deserialized.indexOf("1111")); } + @Test + public void testNotSortedSerialization() throws Exception + { + final String[] strings = {"a", "b", "c", "d", "e", "f", "g", "h", "i", "k", "j", "l"}; + + GenericIndexed deserialized = serializeAndDeserialize( + GenericIndexed.fromArray( + strings, GenericIndexed.STRING_STRATEGY + ) + ); + checkBasicAPIs(strings, deserialized, false); + } + + private void checkBasicAPIs(String[] strings, Indexed index, boolean allowReverseLookup) + { + Assert.assertEquals(strings.length, index.size()); + for (int i = 0; i < strings.length; i++) { + Assert.assertEquals(strings[i], index.get(i)); + } + + if (allowReverseLookup) { + HashMap mixedUp = Maps.newHashMap(); + for (int i = 0; i < strings.length; i++) { + mixedUp.put(strings[i], i); + } + for (Map.Entry entry : mixedUp.entrySet()) { + Assert.assertEquals(entry.getValue().intValue(), index.indexOf(entry.getKey())); + } + } else { + try { + index.indexOf("xxx"); + Assert.fail("should throw exception"); + } + catch (UnsupportedOperationException e) { + // not supported + } + } + } + private GenericIndexed serializeAndDeserialize(GenericIndexed indexed) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream();