IndexMaker speedups

* About 15% speedup

Conflicts:
	processing/src/main/java/io/druid/segment/IndexMaker.java
This commit is contained in:
Charles Allen 2015-04-22 18:26:17 -07:00
parent edb82607fe
commit 303727e6a9
2 changed files with 38 additions and 15 deletions

View File

@ -22,6 +22,7 @@ import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.base.Predicate;
import com.google.common.base.Splitter;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
@ -1566,6 +1567,7 @@ public class IndexMaker
private final Map<String, IntBuffer> converters;
private final int indexNumber;
MMappedIndexRowIterable(
Iterable<Rowboat> index,
List<String> convertedDims,
@ -1602,6 +1604,18 @@ public class IndexMaker
@Override
public Iterator<Rowboat> iterator()
{
final IntBuffer[] converterArray = FluentIterable
.from(convertedDims)
.transform(
new Function<String, IntBuffer>()
{
@Override
public IntBuffer apply(String input)
{
return converters.get(input);
}
}
).toArray(IntBuffer.class);
return Iterators.transform(
index.iterator(),
new Function<Rowboat, Rowboat>()
@ -1609,10 +1623,10 @@ public class IndexMaker
@Override
public Rowboat apply(Rowboat input)
{
int[][] dims = input.getDims();
int[][] newDims = new int[convertedDims.size()][];
final int[][] dims = input.getDims();
final int[][] newDims = new int[convertedDims.size()][];
for (int i = 0; i < newDims.length; ++i) {
IntBuffer converter = converters.get(convertedDims.get(i));
final IntBuffer converter = converterArray[i];
if (converter == null) {
continue;

View File

@ -17,8 +17,9 @@
package io.druid.segment;
import com.google.common.base.Function;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.metamx.common.ISE;
import com.metamx.common.guava.CloseQuietly;
@ -41,11 +42,11 @@ import io.druid.segment.data.IndexedIterable;
import io.druid.segment.data.ListIndexed;
import org.joda.time.Interval;
import javax.annotation.Nullable;
import java.io.Closeable;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
@ -172,7 +173,7 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
final GenericColumn timestamps = input.getColumn(Column.TIME_COLUMN_NAME).getGenericColumn();
final Object[] metrics;
final Map<String, DictionaryEncodedColumn> dimensions;
final DictionaryEncodedColumn[] dictionaryEncodedColumns;
final int numMetrics = getMetricNames().size();
@ -180,10 +181,19 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
boolean done = false;
{
dimensions = Maps.newLinkedHashMap();
for (String dim : getDimensionNames()) {
dimensions.put(dim, input.getColumn(dim).getDictionaryEncoding());
this.dictionaryEncodedColumns = FluentIterable
.from(getDimensionNames())
.transform(
new Function<String, DictionaryEncodedColumn>()
{
@Override
public DictionaryEncodedColumn apply(String dimName)
{
return input.getColumn(dimName)
.getDictionaryEncoding();
}
}
).toArray(DictionaryEncodedColumn.class);
final Indexed<String> availableMetrics = getMetricNames();
metrics = new Object[availableMetrics.size()];
@ -215,7 +225,7 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
CloseQuietly.close((Closeable) metric);
}
}
for (Object dimension : dimensions.values()) {
for (Object dimension : dictionaryEncodedColumns) {
if (dimension instanceof Closeable) {
CloseQuietly.close((Closeable) dimension);
}
@ -232,10 +242,9 @@ public class QueryableIndexIndexableAdapter implements IndexableAdapter
throw new NoSuchElementException();
}
int[][] dims = new int[dimensions.size()][];
final int[][] dims = new int[dictionaryEncodedColumns.length][];
int dimIndex = 0;
for (String dim : dimensions.keySet()) {
final DictionaryEncodedColumn dict = dimensions.get(dim);
for (final DictionaryEncodedColumn dict : dictionaryEncodedColumns) {
final IndexedInts dimVals;
if (dict.hasMultipleValues()) {
dimVals = dict.getMultiValueRow(currRow);