From 548c901c0b21a21fe5f93c115a8750e00b39cebf Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 14 Feb 2013 23:34:49 -0800 Subject: [PATCH] Additional fix for columns with cardinality 0 --- .../com/metamx/druid/index/v1/IndexIO.java | 28 +++++++++---- .../druid/index/v1/IndexMergerTest.java | 39 ++++++++++++++----- 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java index 5a2c7307ddd..d26e73f5b3c 100644 --- a/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java +++ b/index-common/src/main/java/com/metamx/druid/index/v1/IndexIO.java @@ -21,6 +21,7 @@ package com.metamx.druid.index.v1; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; @@ -62,7 +63,6 @@ import com.metamx.druid.kv.VSizeIndexedInts; import com.metamx.druid.utils.SerializerUtils; import it.uniroma3.mat.extendedset.intset.ConciseSet; import it.uniroma3.mat.extendedset.intset.ImmutableConciseSet; - import org.joda.time.Interval; import java.io.ByteArrayOutputStream; @@ -369,8 +369,8 @@ public class IndexIO ); } - LinkedHashSet skippedFiles = Sets.newLinkedHashSet(); - Set skippedDimensions = Sets.newLinkedHashSet(); + final LinkedHashSet skippedFiles = Sets.newLinkedHashSet(); + final Set skippedDimensions = Sets.newLinkedHashSet(); for (String filename : v8SmooshedFiles.getInternalFilenames()) { log.info("Processing file[%s]", filename); if (filename.startsWith("dim_")) { @@ -570,25 +570,37 @@ public class IndexIO final ByteBuffer indexBuffer = v8SmooshedFiles.mapFile("index.drd"); indexBuffer.get(); // Skip the version byte - final GenericIndexed dims = GenericIndexed.read( + final GenericIndexed dims8 = GenericIndexed.read( indexBuffer, GenericIndexed.stringStrategy ); + final GenericIndexed dims9 = GenericIndexed.fromIterable( + Iterables.filter( + dims8, new Predicate() + { + @Override + public boolean apply(String s) + { + return !skippedDimensions.contains(s); + } + } + ), + GenericIndexed.stringStrategy + ); final GenericIndexed availableMetrics = GenericIndexed.read( indexBuffer, GenericIndexed.stringStrategy ); final Interval dataInterval = new Interval(serializerUtils.readString(indexBuffer)); Set columns = Sets.newTreeSet(); - columns.addAll(Lists.newArrayList(dims)); + columns.addAll(Lists.newArrayList(dims9)); columns.addAll(Lists.newArrayList(availableMetrics)); - columns.removeAll(skippedDimensions); GenericIndexed cols = GenericIndexed.fromIterable(columns, GenericIndexed.stringStrategy); - final int numBytes = cols.getSerializedSize() + dims.getSerializedSize() + 16; + final int numBytes = cols.getSerializedSize() + dims9.getSerializedSize() + 16; final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes); cols.writeToChannel(writer); - dims.writeToChannel(writer); + dims9.writeToChannel(writer); serializerUtils.writeLong(writer, dataInterval.getStartMillis()); serializerUtils.writeLong(writer, dataInterval.getEndMillis()); writer.close(); diff --git a/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java b/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java index 86d03f6dd76..407cd36253f 100644 --- a/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java +++ b/server/src/test/java/com/metamx/druid/index/v1/IndexMergerTest.java @@ -33,6 +33,7 @@ import org.junit.Test; import java.io.File; import java.util.Arrays; +import java.util.List; /** */ @@ -116,11 +117,14 @@ public class IndexMergerTest @Test public void testPersistEmptyColumn() throws Exception { - final IncrementalIndex toPersist = new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[]{}); - final File tmpDir = Files.createTempDir(); + final IncrementalIndex toPersist1 = new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[]{}); + final IncrementalIndex toPersist2 = new IncrementalIndex(0L, QueryGranularity.NONE, new AggregatorFactory[]{}); + final File tmpDir1 = Files.createTempDir(); + final File tmpDir2 = Files.createTempDir(); + final File tmpDir3 = Files.createTempDir(); try { - toPersist.add( + toPersist1.add( new MapBasedInputRow( 1L, ImmutableList.of("dim1", "dim2"), @@ -128,17 +132,32 @@ public class IndexMergerTest ) ); - final QueryableIndex merged = IndexIO.loadIndex( - IndexMerger.persist(toPersist, tmpDir) + toPersist2.add( + new MapBasedInputRow( + 1L, + ImmutableList.of("dim1", "dim2"), + ImmutableMap.of("dim1", ImmutableList.of(), "dim2", "bar") + ) ); + final QueryableIndex index1 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tmpDir1)); + final QueryableIndex index2 = IndexIO.loadIndex(IndexMerger.persist(toPersist1, tmpDir2)); + final QueryableIndex merged = IndexIO.loadIndex( + IndexMerger.mergeQueryableIndex(Arrays.asList(index1, index2), new AggregatorFactory[]{}, tmpDir3) + ); + + Assert.assertEquals(1, index1.getTimeColumn().getLength()); + Assert.assertEquals(ImmutableList.of("dim2"), ImmutableList.copyOf(index1.getAvailableDimensions())); + + Assert.assertEquals(1, index2.getTimeColumn().getLength()); + Assert.assertEquals(ImmutableList.of("dim2"), ImmutableList.copyOf(index2.getAvailableDimensions())); + Assert.assertEquals(1, merged.getTimeColumn().getLength()); - Assert.assertEquals(ImmutableList.of("dim1", "dim2"), ImmutableList.copyOf(merged.getAvailableDimensions())); - Assert.assertEquals(null, merged.getColumn("dim1")); + Assert.assertEquals(ImmutableList.of("dim2"), ImmutableList.copyOf(merged.getAvailableDimensions())); } finally { - FileUtils.deleteQuietly(tmpDir); + FileUtils.deleteQuietly(tmpDir1); + FileUtils.deleteQuietly(tmpDir2); + FileUtils.deleteQuietly(tmpDir3); } - - } }