add missing vector object selector for multi-value string columns, refactor some stuff (#13379)

* add vector object selector for multi-value string columns, refactor some stuff

* use for nested columns too

* add test

* inspections
This commit is contained in:
Clint Wylie 2022-11-17 21:08:54 -08:00 committed by GitHub
parent 8c9ffcfe37
commit 7f4e386509
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 728 additions and 375 deletions

View File

@ -47,7 +47,9 @@ import org.apache.druid.utils.CloseableUtils;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.BitSet; import java.util.BitSet;
import java.util.List;
/** /**
* *
@ -373,26 +375,11 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<St
@Override @Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset) public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset)
{ {
class QueryableSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector, IdLookup final class StringVectorSelector extends StringSingleValueDimensionVectorSelector
{ {
private final int[] vector = new int[offset.getMaxVectorSize()]; public StringVectorSelector()
private int id = ReadableVectorInspector.NULL_ID;
@Override
public int[] getRowVector()
{ {
if (id == offset.getId()) { super(column, offset);
return vector;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
id = offset.getId();
return vector;
} }
@Override @Override
@ -415,82 +402,25 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<St
return StringDictionaryEncodedColumn.this.lookupNameUtf8(id); return StringDictionaryEncodedColumn.this.lookupNameUtf8(id);
} }
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return this;
}
@Override @Override
public int lookupId(@Nullable final String name) public int lookupId(@Nullable final String name)
{ {
return StringDictionaryEncodedColumn.this.lookupId(name); return StringDictionaryEncodedColumn.this.lookupId(name);
} }
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
} }
return new QueryableSingleValueDimensionVectorSelector(); return new StringVectorSelector();
} }
@Override @Override
public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset) public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset)
{ {
class QueryableMultiValueDimensionVectorSelector implements MultiValueDimensionVectorSelector, IdLookup final class MultiStringVectorSelector extends StringMultiValueDimensionVectorSelector
{ {
private final IndexedInts[] vector = new IndexedInts[offset.getMaxVectorSize()];
private int id = ReadableVectorInspector.NULL_ID;
@Override public MultiStringVectorSelector()
public IndexedInts[] getRowVector()
{ {
if (id == offset.getId()) { super(multiValueColumn, offset);
return vector;
}
if (offset.isContiguous()) {
final int currentOffset = offset.getStartOffset();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(i + currentOffset);
}
} else {
final int[] offsets = offset.getOffsets();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(offsets[i]);
}
}
id = offset.getId();
return vector;
} }
@Override @Override
@ -513,94 +443,52 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<St
return StringDictionaryEncodedColumn.this.lookupNameUtf8(id); return StringDictionaryEncodedColumn.this.lookupNameUtf8(id);
} }
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return this;
}
@Override @Override
public int lookupId(@Nullable final String name) public int lookupId(@Nullable final String name)
{ {
return StringDictionaryEncodedColumn.this.lookupId(name); return StringDictionaryEncodedColumn.this.lookupId(name);
} }
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
} }
return new MultiStringVectorSelector();
return new QueryableMultiValueDimensionVectorSelector();
} }
@Override @Override
public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset)
{ {
if (!hasMultipleValues()) { if (!hasMultipleValues()) {
class DictionaryEncodedStringSingleValueVectorObjectSelector implements VectorObjectSelector final class StringVectorSelector extends StringVectorObjectSelector
{ {
private final int[] vector = new int[offset.getMaxVectorSize()]; public StringVectorSelector()
private final Object[] strings = new Object[offset.getMaxVectorSize()];
private int id = ReadableVectorInspector.NULL_ID;
@Override
public Object[] getObjectVector()
{ {
if (id == offset.getId()) { super(column, offset);
return strings;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
for (int i = 0; i < offset.getCurrentVectorSize(); i++) {
strings[i] = lookupName(vector[i]);
}
id = offset.getId();
return strings;
} }
@Nullable
@Override @Override
public int getMaxVectorSize() public String lookupName(int id)
{ {
return offset.getMaxVectorSize(); return StringDictionaryEncodedColumn.this.lookupName(id);
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
} }
} }
return new DictionaryEncodedStringSingleValueVectorObjectSelector(); return new StringVectorSelector();
} else { } else {
throw new UnsupportedOperationException("Multivalue string object selector not implemented yet"); final class MultiStringVectorSelector extends MultiValueStringVectorObjectSelector
{
public MultiStringVectorSelector()
{
super(multiValueColumn, offset);
}
@Nullable
@Override
public String lookupName(int id)
{
return StringDictionaryEncodedColumn.this.lookupName(id);
}
}
return new MultiStringVectorSelector();
} }
} }
@ -609,4 +497,308 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn<St
{ {
CloseableUtils.closeAll(cachedDictionary, column, multiValueColumn); CloseableUtils.closeAll(cachedDictionary, column, multiValueColumn);
} }
/**
* Base type for a {@link SingleValueDimensionVectorSelector} for a dictionary encoded {@link ColumnType#STRING}
* built around a {@link ColumnarInts}. Dictionary not included - BYO dictionary lookup methods.
*
* Assumes that all implementations return true for {@link #supportsLookupNameUtf8()}.
*/
public abstract static class StringSingleValueDimensionVectorSelector
implements SingleValueDimensionVectorSelector, IdLookup
{
private final ColumnarInts column;
private final ReadableVectorOffset offset;
private final int[] vector;
private int id = ReadableVectorInspector.NULL_ID;
public StringSingleValueDimensionVectorSelector(
ColumnarInts column,
ReadableVectorOffset offset
)
{
this.column = column;
this.offset = offset;
this.vector = new int[offset.getMaxVectorSize()];
}
@Override
public int[] getRowVector()
{
if (id == offset.getId()) {
return vector;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
id = offset.getId();
return vector;
}
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return this;
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
}
/**
* Base type for a {@link MultiValueDimensionVectorSelector} for a dictionary encoded {@link ColumnType#STRING}
* built around a {@link ColumnarMultiInts}. Dictionary not included - BYO dictionary lookup methods.
*
* Assumes that all implementations return true for {@link #supportsLookupNameUtf8()}.
*/
public abstract static class StringMultiValueDimensionVectorSelector
implements MultiValueDimensionVectorSelector, IdLookup
{
private final ColumnarMultiInts multiValueColumn;
private final ReadableVectorOffset offset;
private final IndexedInts[] vector;
private int id = ReadableVectorInspector.NULL_ID;
public StringMultiValueDimensionVectorSelector(
ColumnarMultiInts multiValueColumn,
ReadableVectorOffset offset
)
{
this.multiValueColumn = multiValueColumn;
this.offset = offset;
this.vector = new IndexedInts[offset.getMaxVectorSize()];
}
@Override
public IndexedInts[] getRowVector()
{
if (id == offset.getId()) {
return vector;
}
if (offset.isContiguous()) {
final int currentOffset = offset.getStartOffset();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(i + currentOffset);
}
} else {
final int[] offsets = offset.getOffsets();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(offsets[i]);
}
}
id = offset.getId();
return vector;
}
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return this;
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
}
/**
* Base type for a {@link VectorObjectSelector} for a dictionary encoded {@link ColumnType#STRING}
* built around a {@link ColumnarInts}. Dictionary not included - BYO dictionary lookup methods.
*/
public abstract static class StringVectorObjectSelector implements VectorObjectSelector
{
private final ColumnarInts column;
private final ReadableVectorOffset offset;
private final int[] vector;
private final Object[] strings;
private int id = ReadableVectorInspector.NULL_ID;
public StringVectorObjectSelector(
ColumnarInts column,
ReadableVectorOffset offset
)
{
this.column = column;
this.offset = offset;
this.vector = new int[offset.getMaxVectorSize()];
this.strings = new Object[offset.getMaxVectorSize()];
}
@Override
public Object[] getObjectVector()
{
if (id == offset.getId()) {
return strings;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
for (int i = 0; i < offset.getCurrentVectorSize(); i++) {
strings[i] = lookupName(vector[i]);
}
id = offset.getId();
return strings;
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Nullable
public abstract String lookupName(int id);
}
/**
* Base type for a {@link VectorObjectSelector} for a dictionary encoded {@link ColumnType#STRING}
* built around a {@link ColumnarMultiInts}. Dictionary not included - BYO dictionary lookup methods.
*/
public abstract static class MultiValueStringVectorObjectSelector implements VectorObjectSelector
{
private final ColumnarMultiInts multiValueColumn;
private final ReadableVectorOffset offset;
private final IndexedInts[] vector;
private final Object[] strings;
private int id = ReadableVectorInspector.NULL_ID;
public MultiValueStringVectorObjectSelector(
ColumnarMultiInts multiValueColumn,
ReadableVectorOffset offset
)
{
this.multiValueColumn = multiValueColumn;
this.offset = offset;
this.vector = new IndexedInts[offset.getMaxVectorSize()];
this.strings = new Object[offset.getMaxVectorSize()];
}
@Nullable
public abstract String lookupName(int id);
@Override
public Object[] getObjectVector()
{
if (id == offset.getId()) {
return strings;
}
if (offset.isContiguous()) {
final int currentOffset = offset.getStartOffset();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(i + currentOffset);
}
} else {
final int[] offsets = offset.getOffsets();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(offsets[i]);
}
}
for (int i = 0; i < offset.getCurrentVectorSize(); i++) {
IndexedInts ithRow = vector[i];
if (ithRow.size() == 0) {
strings[i] = null;
} else if (ithRow.size() == 1) {
strings[i] = lookupName(ithRow.get(0));
} else {
List<String> row = new ArrayList<>(ithRow.size());
// noinspection SSBasedInspection
for (int j = 0; j < ithRow.size(); j++) {
row.add(lookupName(ithRow.get(j)));
}
strings[i] = row;
}
}
id = offset.getId();
return strings;
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
}
} }

View File

@ -38,7 +38,6 @@ import org.apache.druid.segment.filter.BooleanValueMatcher;
import org.apache.druid.segment.historical.HistoricalDimensionSelector; import org.apache.druid.segment.historical.HistoricalDimensionSelector;
import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector; import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector;
import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
import org.apache.druid.segment.vector.ReadableVectorInspector;
import org.apache.druid.segment.vector.ReadableVectorOffset; import org.apache.druid.segment.vector.ReadableVectorOffset;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorObjectSelector; import org.apache.druid.segment.vector.VectorObjectSelector;
@ -359,26 +358,11 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
@Override @Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset) public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset)
{ {
class QueryableSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector, IdLookup final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector
{ {
private final int[] vector = new int[offset.getMaxVectorSize()]; public StringVectorSelector()
private int id = ReadableVectorInspector.NULL_ID;
@Override
public int[] getRowVector()
{ {
if (id == offset.getId()) { super(column, offset);
return vector;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
id = offset.getId();
return vector;
} }
@Override @Override
@ -402,81 +386,23 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
} }
@Override @Override
public boolean supportsLookupNameUtf8() public int lookupId(@Nullable String name)
{
return true;
}
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return this;
}
@Override
public int lookupId(@Nullable final String name)
{ {
return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name);
} }
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
} }
return new QueryableSingleValueDimensionVectorSelector(); return new StringVectorSelector();
} }
@Override @Override
public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset) public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset)
{ {
class QueryableMultiValueDimensionVectorSelector implements MultiValueDimensionVectorSelector, IdLookup final class MultiStringVectorSelector extends StringDictionaryEncodedColumn.StringMultiValueDimensionVectorSelector
{ {
private final IndexedInts[] vector = new IndexedInts[offset.getMaxVectorSize()]; public MultiStringVectorSelector()
private int id = ReadableVectorInspector.NULL_ID;
@Override
public IndexedInts[] getRowVector()
{ {
if (id == offset.getId()) { super(multiValueColumn, offset);
return vector;
}
if (offset.isContiguous()) {
final int currentOffset = offset.getStartOffset();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(i + currentOffset);
}
} else {
final int[] offsets = offset.getOffsets();
final int numRows = offset.getCurrentVectorSize();
for (int i = 0; i < numRows; i++) {
// Must use getUnshared, otherwise all elements in the vector could be the same shared object.
vector[i] = multiValueColumn.getUnshared(offsets[i]);
}
}
id = offset.getId();
return vector;
} }
@Override @Override
@ -499,94 +425,52 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode
return utf8Dictionary.get(id); return utf8Dictionary.get(id);
} }
@Override
public boolean supportsLookupNameUtf8()
{
return true;
}
@Override @Override
public boolean nameLookupPossibleInAdvance() public int lookupId(@Nullable String name)
{
return true;
}
@Nullable
@Override
public IdLookup idLookup()
{
return this;
}
@Override
public int lookupId(@Nullable final String name)
{ {
return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name);
} }
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
} }
return new QueryableMultiValueDimensionVectorSelector(); return new MultiStringVectorSelector();
} }
@Override @Override
public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset)
{ {
if (!hasMultipleValues()) { if (!hasMultipleValues()) {
class DictionaryEncodedStringSingleValueVectorObjectSelector implements VectorObjectSelector final class StringVectorSelector extends StringDictionaryEncodedColumn.StringVectorObjectSelector
{ {
private final int[] vector = new int[offset.getMaxVectorSize()]; public StringVectorSelector()
private final String[] strings = new String[offset.getMaxVectorSize()];
private int id = ReadableVectorInspector.NULL_ID;
@Override
public Object[] getObjectVector()
{ {
if (id == offset.getId()) { super(column, offset);
return strings;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
for (int i = 0; i < offset.getCurrentVectorSize(); i++) {
strings[i] = lookupName(vector[i]);
}
id = offset.getId();
return strings;
} }
@Nullable
@Override @Override
public int getMaxVectorSize() public String lookupName(int id)
{ {
return offset.getMaxVectorSize(); return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id);
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
} }
} }
return new StringVectorSelector();
return new DictionaryEncodedStringSingleValueVectorObjectSelector();
} else { } else {
throw new UnsupportedOperationException("Multivalue string object selector not implemented yet"); final class MultiStringVectorSelector extends StringDictionaryEncodedColumn.MultiValueStringVectorObjectSelector
{
public MultiStringVectorSelector()
{
super(multiValueColumn, offset);
}
@Nullable
@Override
public String lookupName(int id)
{
return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id);
}
}
return new MultiStringVectorSelector();
} }
} }

View File

@ -40,6 +40,7 @@ import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.LongColumnSelector;
import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DictionaryEncodedColumn; import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.StringDictionaryEncodedColumn;
import org.apache.druid.segment.column.Types; import org.apache.druid.segment.column.Types;
import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.ColumnarDoubles; import org.apache.druid.segment.data.ColumnarDoubles;
@ -514,27 +515,11 @@ public class NestedFieldLiteralDictionaryEncodedColumn<TStringDictionary extends
@Override @Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(ReadableVectorOffset offset) public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(ReadableVectorOffset offset)
{ {
// also copied from StringDictionaryEncodedColumn final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector
class QueryableSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector, IdLookup
{ {
private final int[] vector = new int[offset.getMaxVectorSize()]; public StringVectorSelector()
private int id = ReadableVectorInspector.NULL_ID;
@Override
public int[] getRowVector()
{ {
if (id == offset.getId()) { super(column, offset);
return vector;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
id = offset.getId();
return vector;
} }
@Override @Override
@ -550,39 +535,28 @@ public class NestedFieldLiteralDictionaryEncodedColumn<TStringDictionary extends
return NestedFieldLiteralDictionaryEncodedColumn.this.lookupName(id); return NestedFieldLiteralDictionaryEncodedColumn.this.lookupName(id);
} }
@Override
public boolean nameLookupPossibleInAdvance()
{
return true;
}
@Nullable @Nullable
@Override @Override
public IdLookup idLookup() public ByteBuffer lookupNameUtf8(int id)
{ {
return this; // only supported for single type string columns
return globalDictionary.get(dictionary.indexOf(id));
} }
@Override @Override
public int lookupId(@Nullable final String name) public boolean supportsLookupNameUtf8()
{
return singleType != null && singleType.is(ValueType.STRING);
}
@Override
public int lookupId(@Nullable String name)
{ {
return NestedFieldLiteralDictionaryEncodedColumn.this.lookupId(name); return NestedFieldLiteralDictionaryEncodedColumn.this.lookupId(name);
} }
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
} }
return new QueryableSingleValueDimensionVectorSelector(); return new StringVectorSelector();
} }
@Override @Override
@ -594,48 +568,22 @@ public class NestedFieldLiteralDictionaryEncodedColumn<TStringDictionary extends
@Override @Override
public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset)
{ {
// also copied from StringDictionaryEncodedColumn final class StringVectorSelector extends StringDictionaryEncodedColumn.StringVectorObjectSelector
class DictionaryEncodedStringSingleValueVectorObjectSelector implements VectorObjectSelector
{ {
private final int[] vector = new int[offset.getMaxVectorSize()]; public StringVectorSelector()
private final String[] strings = new String[offset.getMaxVectorSize()];
private int id = ReadableVectorInspector.NULL_ID;
@Override
public Object[] getObjectVector()
{ {
if (id == offset.getId()) { super(column, offset);
return strings;
}
if (offset.isContiguous()) {
column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize());
} else {
column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize());
}
for (int i = 0; i < offset.getCurrentVectorSize(); i++) {
strings[i] = lookupName(vector[i]);
}
id = offset.getId();
return strings;
} }
@Nullable
@Override @Override
public int getMaxVectorSize() public String lookupName(int id)
{ {
return offset.getMaxVectorSize(); return NestedFieldLiteralDictionaryEncodedColumn.this.lookupName(id);
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
} }
} }
return new DictionaryEncodedStringSingleValueVectorObjectSelector(); return new StringVectorSelector();
} }
@Override @Override

View File

@ -45,6 +45,7 @@ import org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter;
import org.apache.druid.segment.incremental.IndexSizeExceededException; import org.apache.druid.segment.incremental.IndexSizeExceededException;
import org.apache.druid.segment.incremental.OnheapIncrementalIndex; import org.apache.druid.segment.incremental.OnheapIncrementalIndex;
import org.apache.druid.segment.serde.ComplexMetrics; import org.apache.druid.segment.serde.ComplexMetrics;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -52,12 +53,14 @@ import org.junit.Test;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
public class StringLastTimeseriesQueryTest public class StringLastTimeseriesQueryTest extends InitializedNullHandlingTest
{ {
private static final String VISITOR_ID = "visitor_id"; private static final String VISITOR_ID = "visitor_id";
private static final String CLIENT_TYPE = "client_type"; private static final String CLIENT_TYPE = "client_type";
private static final String LAST_CLIENT_TYPE = "last_client_type"; private static final String LAST_CLIENT_TYPE = "last_client_type";
private static final String MULTI_VALUE = "mv";
private static final DateTime TIME1 = DateTimes.of("2016-03-04T00:00:00.000Z"); private static final DateTime TIME1 = DateTimes.of("2016-03-04T00:00:00.000Z");
private static final DateTime TIME2 = DateTimes.of("2016-03-04T01:00:00.000Z"); private static final DateTime TIME2 = DateTimes.of("2016-03-04T01:00:00.000Z");
@ -84,22 +87,22 @@ public class StringLastTimeseriesQueryTest
incrementalIndex.add( incrementalIndex.add(
new MapBasedInputRow( new MapBasedInputRow(
TIME1, TIME1,
Lists.newArrayList(VISITOR_ID, CLIENT_TYPE), Lists.newArrayList(VISITOR_ID, CLIENT_TYPE, MULTI_VALUE),
ImmutableMap.of(VISITOR_ID, "0", CLIENT_TYPE, "iphone") ImmutableMap.of(VISITOR_ID, "0", CLIENT_TYPE, "iphone", MULTI_VALUE, ImmutableList.of("a", "b"))
) )
); );
incrementalIndex.add( incrementalIndex.add(
new MapBasedInputRow( new MapBasedInputRow(
TIME1, TIME1,
Lists.newArrayList(VISITOR_ID, CLIENT_TYPE), Lists.newArrayList(VISITOR_ID, CLIENT_TYPE, MULTI_VALUE),
ImmutableMap.of(VISITOR_ID, "1", CLIENT_TYPE, "iphone") ImmutableMap.of(VISITOR_ID, "1", CLIENT_TYPE, "iphone", MULTI_VALUE, ImmutableList.of("c", "d"))
) )
); );
incrementalIndex.add( incrementalIndex.add(
new MapBasedInputRow( new MapBasedInputRow(
TIME2, TIME2,
Lists.newArrayList(VISITOR_ID, CLIENT_TYPE), Lists.newArrayList(VISITOR_ID, CLIENT_TYPE, MULTI_VALUE),
ImmutableMap.of(VISITOR_ID, "0", CLIENT_TYPE, "android") ImmutableMap.of(VISITOR_ID, "0", CLIENT_TYPE, "android", MULTI_VALUE, ImmutableList.of("a", "e"))
) )
); );
@ -121,7 +124,8 @@ public class StringLastTimeseriesQueryTest
new StringLastAggregatorFactory("nonfolding", CLIENT_TYPE, null, 1024), new StringLastAggregatorFactory("nonfolding", CLIENT_TYPE, null, 1024),
new StringLastAggregatorFactory("folding", LAST_CLIENT_TYPE, null, 1024), new StringLastAggregatorFactory("folding", LAST_CLIENT_TYPE, null, 1024),
new StringLastAggregatorFactory("nonexistent", "nonexistent", null, 1024), new StringLastAggregatorFactory("nonexistent", "nonexistent", null, 1024),
new StringLastAggregatorFactory("numeric", "cnt", null, 1024) new StringLastAggregatorFactory("numeric", "cnt", null, 1024),
new StringLastAggregatorFactory("multiValue", MULTI_VALUE, null, 1024)
) )
) )
.build(); .build();
@ -135,6 +139,7 @@ public class StringLastTimeseriesQueryTest
.put("folding", new SerializablePairLongString(TIME2.getMillis(), "android")) .put("folding", new SerializablePairLongString(TIME2.getMillis(), "android"))
.put("nonexistent", new SerializablePairLongString(DateTimes.MIN.getMillis(), null)) .put("nonexistent", new SerializablePairLongString(DateTimes.MIN.getMillis(), null))
.put("numeric", new SerializablePairLongString(DateTimes.MIN.getMillis(), null)) .put("numeric", new SerializablePairLongString(DateTimes.MIN.getMillis(), null))
.put("multiValue", new SerializablePairLongString(TIME2.getMillis(), "[a, e]"))
.build() .build()
) )
) )

View File

@ -0,0 +1,324 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.vector;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Maps;
import junitparams.converters.Nullable;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.DoubleDimensionSchema;
import org.apache.druid.data.input.impl.FloatDimensionSchema;
import org.apache.druid.data.input.impl.LongDimensionSchema;
import org.apache.druid.data.input.impl.MapInputRowParser;
import org.apache.druid.data.input.impl.StringDimensionSchema;
import org.apache.druid.data.input.impl.TimeAndDimsParseSpec;
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.java.util.common.io.Closer;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.segment.ColumnCache;
import org.apache.druid.segment.IndexBuilder;
import org.apache.druid.segment.QueryableIndex;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
import org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMediumFactory;
import org.apache.druid.testing.InitializedNullHandlingTest;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class QueryableIndexVectorColumnSelectorFactoryTest extends InitializedNullHandlingTest
{
private static final String TS = "t";
private static final String STRING = "string_column";
private static final String MULTI_STRING = "multi_string_column";
private static final String DOUBLE = "double_column";
private static final String FLOAT = "float_column";
private static final String LONG = "long_column";
private static final List<Map<String, Object>> RAW_ROWS = ImmutableList.of(
makeRow("2022-01-01T00:00Z", "a", "aa", 1.0, 1.0f, 1L),
makeRow("2022-01-01T00:01Z", "b", ImmutableList.of("bb", "cc"), null, 3.3f, 1999L),
makeRow("2022-01-01T00:02Z", null, ImmutableList.of("aa", "dd"), 9.9, null, -500L),
makeRow("2022-01-01T00:03Z", "c", ImmutableList.of("dd", "ee"), -1.1, -999.999f, null),
makeRow("2022-01-01T00:04Z", "d", ImmutableList.of("aa", "ff"), -90998.132, 1234.5678f, 1234L),
makeRow("2022-01-01T00:05Z", "e", null, 3.3, 11f, -9000L)
);
private static final DimensionsSpec DIMS = new DimensionsSpec(
ImmutableList.of(
new StringDimensionSchema(STRING),
new StringDimensionSchema(MULTI_STRING),
new DoubleDimensionSchema(DOUBLE),
new FloatDimensionSchema(FLOAT),
new LongDimensionSchema(LONG)
)
);
private static final MapInputRowParser OLD_SCHOOL = new MapInputRowParser(
new TimeAndDimsParseSpec(
new TimestampSpec(TS, "iso", null),
DIMS
)
);
private static Map<String, Object> makeRow(
Object t,
@Nullable Object str,
@Nullable Object mStr,
@Nullable Object d,
@Nullable Object f,
@Nullable Object l
)
{
Map<String, Object> row = Maps.newHashMapWithExpectedSize(6);
row.put(TS, t);
if (str != null) {
row.put(STRING, str);
}
if (mStr != null) {
row.put(MULTI_STRING, mStr);
}
if (d != null) {
row.put(DOUBLE, d);
}
if (f != null) {
row.put(FLOAT, f);
}
if (l != null) {
row.put(LONG, l);
}
return row;
}
@Rule
public TemporaryFolder temporaryFolder = new TemporaryFolder();
Closer closer;
ColumnCache theCache;
QueryableIndex index;
@Before
public void setup() throws IOException
{
closer = Closer.create();
index = IndexBuilder.create(TestHelper.makeJsonMapper())
.tmpDir(temporaryFolder.newFolder())
.segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance())
.schema(
new IncrementalIndexSchema.Builder()
.withDimensionsSpec(DIMS)
.withMetrics(new CountAggregatorFactory("chocula"))
.withRollup(false)
.build()
)
.rows(
RAW_ROWS.stream().sequential().map(r -> OLD_SCHOOL.parseBatch(r).get(0)).collect(Collectors.toList())
)
.buildMMappedIndex();
closer.register(index);
theCache = new ColumnCache(index, closer);
}
@After
public void teardown() throws IOException
{
closer.close();
}
@Test
public void testSingleValueSelector()
{
NoFilterVectorOffset offset = new NoFilterVectorOffset(4, 0, RAW_ROWS.size());
QueryableIndexVectorColumnSelectorFactory factory = new QueryableIndexVectorColumnSelectorFactory(
index,
offset,
theCache,
VirtualColumns.EMPTY
);
// cannot make single value selector on multi-value string
Assert.assertThrows(ISE.class, () -> factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(MULTI_STRING)));
// we make nil selectors for number columns though
Assert.assertTrue(factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(DOUBLE)) instanceof NilVectorSelector);
Assert.assertTrue(factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(FLOAT)) instanceof NilVectorSelector);
Assert.assertTrue(factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(LONG)) instanceof NilVectorSelector);
// but we can for real multi-value strings
SingleValueDimensionVectorSelector vectorSelector = factory.makeSingleValueDimensionSelector(
DefaultDimensionSpec.of(STRING)
);
VectorObjectSelector objectSelector = factory.makeObjectSelector(STRING);
int rowCounter = 0;
while (!offset.isDone()) {
int[] ints = vectorSelector.getRowVector();
Assert.assertNotNull(ints);
for (int i = 0; i < vectorSelector.getCurrentVectorSize(); i++) {
Assert.assertEquals(RAW_ROWS.get(rowCounter + i).get(STRING), vectorSelector.lookupName(ints[i]));
}
Object[] objects = objectSelector.getObjectVector();
for (int i = 0; i < vectorSelector.getCurrentVectorSize(); i++) {
Assert.assertEquals("row " + i, RAW_ROWS.get(rowCounter + i).get(STRING), objects[i]);
}
rowCounter += objectSelector.getCurrentVectorSize();
offset.advance();
}
}
@Test
public void testMultiValueSelector()
{
NoFilterVectorOffset offset = new NoFilterVectorOffset(4, 0, RAW_ROWS.size());
QueryableIndexVectorColumnSelectorFactory factory = new QueryableIndexVectorColumnSelectorFactory(
index,
offset,
theCache,
VirtualColumns.EMPTY
);
// cannot make these for anything except for multi-value strings
Assert.assertThrows(ISE.class, () -> factory.makeMultiValueDimensionSelector(DefaultDimensionSpec.of(STRING)));
Assert.assertThrows(ISE.class, () -> factory.makeMultiValueDimensionSelector(DefaultDimensionSpec.of(DOUBLE)));
Assert.assertThrows(ISE.class, () -> factory.makeMultiValueDimensionSelector(DefaultDimensionSpec.of(FLOAT)));
Assert.assertThrows(ISE.class, () -> factory.makeMultiValueDimensionSelector(DefaultDimensionSpec.of(LONG)));
// but we can for real multi-value strings
MultiValueDimensionVectorSelector vectorSelector = factory.makeMultiValueDimensionSelector(
DefaultDimensionSpec.of(MULTI_STRING)
);
VectorObjectSelector objectSelector = factory.makeObjectSelector(MULTI_STRING);
int rowCounter = 0;
while (!offset.isDone()) {
IndexedInts[] indexedInts = vectorSelector.getRowVector();
Assert.assertNotNull(indexedInts);
for (int i = 0; i < vectorSelector.getCurrentVectorSize(); i++) {
IndexedInts currentRow = indexedInts[i];
if (currentRow.size() == 0) {
Assert.assertNull(RAW_ROWS.get(rowCounter + i).get(MULTI_STRING));
} else if (currentRow.size() == 1) {
Assert.assertEquals(RAW_ROWS.get(rowCounter + i).get(MULTI_STRING), vectorSelector.lookupName(currentRow.get(0)));
} else {
// noinspection SSBasedInspection
for (int j = 0; j < currentRow.size(); j++) {
List expected = (List) RAW_ROWS.get(rowCounter + i).get(MULTI_STRING);
Assert.assertEquals(expected.get(j), vectorSelector.lookupName(currentRow.get(j)));
}
}
}
Object[] objects = objectSelector.getObjectVector();
for (int i = 0; i < vectorSelector.getCurrentVectorSize(); i++) {
Assert.assertEquals("row " + i, RAW_ROWS.get(rowCounter + i).get(MULTI_STRING), objects[i]);
}
rowCounter += objectSelector.getCurrentVectorSize();
offset.advance();
}
}
@Test
public void testNumericSelectors()
{
NoFilterVectorOffset offset = new NoFilterVectorOffset(4, 0, RAW_ROWS.size());
QueryableIndexVectorColumnSelectorFactory factory = new QueryableIndexVectorColumnSelectorFactory(
index,
offset,
theCache,
VirtualColumns.EMPTY
);
// cannot make these for anything except for multi-value strings
Assert.assertThrows(UOE.class, () -> factory.makeValueSelector(STRING));
Assert.assertThrows(UOE.class, () -> factory.makeValueSelector(MULTI_STRING));
VectorValueSelector doubleSelector = factory.makeValueSelector(DOUBLE);
VectorValueSelector floatSelector = factory.makeValueSelector(FLOAT);
VectorValueSelector longSelector = factory.makeValueSelector(LONG);
int rowCounter = 0;
while (!offset.isDone()) {
double[] doubles = doubleSelector.getDoubleVector();
boolean[] doubleNulls = doubleSelector.getNullVector();
for (int i = 0; i < doubleSelector.getCurrentVectorSize(); i++) {
final Object raw = RAW_ROWS.get(rowCounter + i).get(DOUBLE);
if (doubleNulls != null && doubleNulls[i]) {
Assert.assertNull(raw);
} else {
if (raw == null) {
Assert.assertEquals(0.0, doubles[i], 0.0);
} else {
Assert.assertEquals((double) raw, doubles[i], 0.0);
}
}
}
float[] floats = floatSelector.getFloatVector();
boolean[] floatNulls = floatSelector.getNullVector();
for (int i = 0; i < floatSelector.getCurrentVectorSize(); i++) {
final Object raw = RAW_ROWS.get(rowCounter + i).get(FLOAT);
if (floatNulls != null && floatNulls[i]) {
Assert.assertNull(raw);
} else {
if (raw == null) {
Assert.assertEquals(0.0f, floats[i], 0.0);
} else {
Assert.assertEquals((float) raw, floats[i], 0.0);
}
}
}
long[] longs = longSelector.getLongVector();
boolean[] longNulls = longSelector.getNullVector();
for (int i = 0; i < longSelector.getCurrentVectorSize(); i++) {
final Object raw = RAW_ROWS.get(rowCounter + i).get(LONG);
if (longNulls != null && longNulls[i]) {
Assert.assertNull(raw);
} else {
if (raw == null) {
Assert.assertEquals(0L, longs[i], 0.0);
} else {
Assert.assertEquals((long) raw, longs[i]);
}
}
}
rowCounter += doubleSelector.getCurrentVectorSize();
offset.advance();
}
}
}