diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java index ba50991215b..7af2273f509 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringDictionaryEncodedColumn.java @@ -47,7 +47,9 @@ import org.apache.druid.utils.CloseableUtils; import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.BitSet; +import java.util.List; /** * @@ -373,26 +375,11 @@ public class StringDictionaryEncodedColumn implements DictionaryEncodedColumn row = new ArrayList<>(ithRow.size()); + // noinspection SSBasedInspection + for (int j = 0; j < ithRow.size(); j++) { + row.add(lookupName(ithRow.get(j))); + } + strings[i] = row; + } + } + + id = offset.getId(); + return strings; + } + + @Override + public int getMaxVectorSize() + { + return offset.getMaxVectorSize(); + } + + @Override + public int getCurrentVectorSize() + { + return offset.getCurrentVectorSize(); + } + } } diff --git a/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java index ba06f9d481d..0df381914bb 100644 --- a/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/column/StringFrontCodedDictionaryEncodedColumn.java @@ -38,7 +38,6 @@ import org.apache.druid.segment.filter.BooleanValueMatcher; import org.apache.druid.segment.historical.HistoricalDimensionSelector; import org.apache.druid.segment.historical.SingleValueHistoricalDimensionSelector; import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector; -import org.apache.druid.segment.vector.ReadableVectorInspector; import org.apache.druid.segment.vector.ReadableVectorOffset; import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector; import org.apache.druid.segment.vector.VectorObjectSelector; @@ -359,26 +358,11 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode @Override public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(final ReadableVectorOffset offset) { - class QueryableSingleValueDimensionVectorSelector implements SingleValueDimensionVectorSelector, IdLookup + final class StringVectorSelector extends StringDictionaryEncodedColumn.StringSingleValueDimensionVectorSelector { - private final int[] vector = new int[offset.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - @Override - public int[] getRowVector() + public StringVectorSelector() { - if (id == offset.getId()) { - return vector; - } - - if (offset.isContiguous()) { - column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); - } else { - column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); - } - - id = offset.getId(); - return vector; + super(column, offset); } @Override @@ -402,81 +386,23 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode } @Override - public boolean supportsLookupNameUtf8() - { - return true; - } - - @Override - public boolean nameLookupPossibleInAdvance() - { - return true; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return this; - } - - @Override - public int lookupId(@Nullable final String name) + public int lookupId(@Nullable String name) { return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); } - - @Override - public int getCurrentVectorSize() - { - return offset.getCurrentVectorSize(); - } - - @Override - public int getMaxVectorSize() - { - return offset.getMaxVectorSize(); - } } - return new QueryableSingleValueDimensionVectorSelector(); + return new StringVectorSelector(); } @Override public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(final ReadableVectorOffset offset) { - class QueryableMultiValueDimensionVectorSelector implements MultiValueDimensionVectorSelector, IdLookup + final class MultiStringVectorSelector extends StringDictionaryEncodedColumn.StringMultiValueDimensionVectorSelector { - private final IndexedInts[] vector = new IndexedInts[offset.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - @Override - public IndexedInts[] getRowVector() + public MultiStringVectorSelector() { - if (id == offset.getId()) { - return vector; - } - - if (offset.isContiguous()) { - final int currentOffset = offset.getStartOffset(); - final int numRows = offset.getCurrentVectorSize(); - - for (int i = 0; i < numRows; i++) { - // Must use getUnshared, otherwise all elements in the vector could be the same shared object. - vector[i] = multiValueColumn.getUnshared(i + currentOffset); - } - } else { - final int[] offsets = offset.getOffsets(); - final int numRows = offset.getCurrentVectorSize(); - - for (int i = 0; i < numRows; i++) { - // Must use getUnshared, otherwise all elements in the vector could be the same shared object. - vector[i] = multiValueColumn.getUnshared(offsets[i]); - } - } - - id = offset.getId(); - return vector; + super(multiValueColumn, offset); } @Override @@ -499,94 +425,52 @@ public class StringFrontCodedDictionaryEncodedColumn implements DictionaryEncode return utf8Dictionary.get(id); } - @Override - public boolean supportsLookupNameUtf8() - { - return true; - } @Override - public boolean nameLookupPossibleInAdvance() - { - return true; - } - - @Nullable - @Override - public IdLookup idLookup() - { - return this; - } - - @Override - public int lookupId(@Nullable final String name) + public int lookupId(@Nullable String name) { return StringFrontCodedDictionaryEncodedColumn.this.lookupId(name); } - - @Override - public int getCurrentVectorSize() - { - return offset.getCurrentVectorSize(); - } - - @Override - public int getMaxVectorSize() - { - return offset.getMaxVectorSize(); - } } - return new QueryableMultiValueDimensionVectorSelector(); + return new MultiStringVectorSelector(); } @Override public VectorObjectSelector makeVectorObjectSelector(ReadableVectorOffset offset) { if (!hasMultipleValues()) { - class DictionaryEncodedStringSingleValueVectorObjectSelector implements VectorObjectSelector + final class StringVectorSelector extends StringDictionaryEncodedColumn.StringVectorObjectSelector { - private final int[] vector = new int[offset.getMaxVectorSize()]; - private final String[] strings = new String[offset.getMaxVectorSize()]; - private int id = ReadableVectorInspector.NULL_ID; - - @Override - - public Object[] getObjectVector() + public StringVectorSelector() { - if (id == offset.getId()) { - return strings; - } - - if (offset.isContiguous()) { - column.get(vector, offset.getStartOffset(), offset.getCurrentVectorSize()); - } else { - column.get(vector, offset.getOffsets(), offset.getCurrentVectorSize()); - } - for (int i = 0; i < offset.getCurrentVectorSize(); i++) { - strings[i] = lookupName(vector[i]); - } - id = offset.getId(); - - return strings; + super(column, offset); } + @Nullable @Override - public int getMaxVectorSize() + public String lookupName(int id) { - return offset.getMaxVectorSize(); - } - - @Override - public int getCurrentVectorSize() - { - return offset.getCurrentVectorSize(); + return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); } } - - return new DictionaryEncodedStringSingleValueVectorObjectSelector(); + return new StringVectorSelector(); } else { - throw new UnsupportedOperationException("Multivalue string object selector not implemented yet"); + final class MultiStringVectorSelector extends StringDictionaryEncodedColumn.MultiValueStringVectorObjectSelector + { + public MultiStringVectorSelector() + { + super(multiValueColumn, offset); + } + + @Nullable + @Override + public String lookupName(int id) + { + return StringFrontCodedDictionaryEncodedColumn.this.lookupName(id); + } + } + return new MultiStringVectorSelector(); } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralDictionaryEncodedColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralDictionaryEncodedColumn.java index 2eac0d4d92b..855112e6b4d 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralDictionaryEncodedColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedFieldLiteralDictionaryEncodedColumn.java @@ -40,6 +40,7 @@ import org.apache.druid.segment.IdLookup; import org.apache.druid.segment.LongColumnSelector; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DictionaryEncodedColumn; +import org.apache.druid.segment.column.StringDictionaryEncodedColumn; import org.apache.druid.segment.column.Types; import org.apache.druid.segment.column.ValueType; import org.apache.druid.segment.data.ColumnarDoubles; @@ -514,27 +515,11 @@ public class NestedFieldLiteralDictionaryEncodedColumn> RAW_ROWS = ImmutableList.of( + makeRow("2022-01-01T00:00Z", "a", "aa", 1.0, 1.0f, 1L), + makeRow("2022-01-01T00:01Z", "b", ImmutableList.of("bb", "cc"), null, 3.3f, 1999L), + makeRow("2022-01-01T00:02Z", null, ImmutableList.of("aa", "dd"), 9.9, null, -500L), + makeRow("2022-01-01T00:03Z", "c", ImmutableList.of("dd", "ee"), -1.1, -999.999f, null), + makeRow("2022-01-01T00:04Z", "d", ImmutableList.of("aa", "ff"), -90998.132, 1234.5678f, 1234L), + makeRow("2022-01-01T00:05Z", "e", null, 3.3, 11f, -9000L) + ); + + private static final DimensionsSpec DIMS = new DimensionsSpec( + ImmutableList.of( + new StringDimensionSchema(STRING), + new StringDimensionSchema(MULTI_STRING), + new DoubleDimensionSchema(DOUBLE), + new FloatDimensionSchema(FLOAT), + new LongDimensionSchema(LONG) + ) + ); + + private static final MapInputRowParser OLD_SCHOOL = new MapInputRowParser( + new TimeAndDimsParseSpec( + new TimestampSpec(TS, "iso", null), + DIMS + ) + ); + + private static Map makeRow( + Object t, + @Nullable Object str, + @Nullable Object mStr, + @Nullable Object d, + @Nullable Object f, + @Nullable Object l + ) + { + Map row = Maps.newHashMapWithExpectedSize(6); + row.put(TS, t); + if (str != null) { + row.put(STRING, str); + } + if (mStr != null) { + row.put(MULTI_STRING, mStr); + } + if (d != null) { + row.put(DOUBLE, d); + } + if (f != null) { + row.put(FLOAT, f); + } + if (l != null) { + row.put(LONG, l); + } + return row; + } + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + Closer closer; + ColumnCache theCache; + + QueryableIndex index; + + @Before + public void setup() throws IOException + { + closer = Closer.create(); + index = IndexBuilder.create(TestHelper.makeJsonMapper()) + .tmpDir(temporaryFolder.newFolder()) + .segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()) + .schema( + new IncrementalIndexSchema.Builder() + .withDimensionsSpec(DIMS) + .withMetrics(new CountAggregatorFactory("chocula")) + .withRollup(false) + .build() + ) + .rows( + RAW_ROWS.stream().sequential().map(r -> OLD_SCHOOL.parseBatch(r).get(0)).collect(Collectors.toList()) + ) + .buildMMappedIndex(); + + closer.register(index); + theCache = new ColumnCache(index, closer); + } + + @After + public void teardown() throws IOException + { + closer.close(); + } + + @Test + public void testSingleValueSelector() + { + NoFilterVectorOffset offset = new NoFilterVectorOffset(4, 0, RAW_ROWS.size()); + QueryableIndexVectorColumnSelectorFactory factory = new QueryableIndexVectorColumnSelectorFactory( + index, + offset, + theCache, + VirtualColumns.EMPTY + ); + + // cannot make single value selector on multi-value string + Assert.assertThrows(ISE.class, () -> factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(MULTI_STRING))); + // we make nil selectors for number columns though + Assert.assertTrue(factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(DOUBLE)) instanceof NilVectorSelector); + Assert.assertTrue(factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(FLOAT)) instanceof NilVectorSelector); + Assert.assertTrue(factory.makeSingleValueDimensionSelector(DefaultDimensionSpec.of(LONG)) instanceof NilVectorSelector); + + // but we can for real multi-value strings + SingleValueDimensionVectorSelector vectorSelector = factory.makeSingleValueDimensionSelector( + DefaultDimensionSpec.of(STRING) + ); + + VectorObjectSelector objectSelector = factory.makeObjectSelector(STRING); + + int rowCounter = 0; + while (!offset.isDone()) { + int[] ints = vectorSelector.getRowVector(); + Assert.assertNotNull(ints); + for (int i = 0; i < vectorSelector.getCurrentVectorSize(); i++) { + Assert.assertEquals(RAW_ROWS.get(rowCounter + i).get(STRING), vectorSelector.lookupName(ints[i])); + } + + Object[] objects = objectSelector.getObjectVector(); + for (int i = 0; i < vectorSelector.getCurrentVectorSize(); i++) { + Assert.assertEquals("row " + i, RAW_ROWS.get(rowCounter + i).get(STRING), objects[i]); + } + rowCounter += objectSelector.getCurrentVectorSize(); + offset.advance(); + } + } + + @Test + public void testMultiValueSelector() + { + NoFilterVectorOffset offset = new NoFilterVectorOffset(4, 0, RAW_ROWS.size()); + QueryableIndexVectorColumnSelectorFactory factory = new QueryableIndexVectorColumnSelectorFactory( + index, + offset, + theCache, + VirtualColumns.EMPTY + ); + + // cannot make these for anything except for multi-value strings + Assert.assertThrows(ISE.class, () -> factory.makeMultiValueDimensionSelector(DefaultDimensionSpec.of(STRING))); + Assert.assertThrows(ISE.class, () -> factory.makeMultiValueDimensionSelector(DefaultDimensionSpec.of(DOUBLE))); + Assert.assertThrows(ISE.class, () -> factory.makeMultiValueDimensionSelector(DefaultDimensionSpec.of(FLOAT))); + Assert.assertThrows(ISE.class, () -> factory.makeMultiValueDimensionSelector(DefaultDimensionSpec.of(LONG))); + + // but we can for real multi-value strings + MultiValueDimensionVectorSelector vectorSelector = factory.makeMultiValueDimensionSelector( + DefaultDimensionSpec.of(MULTI_STRING) + ); + + VectorObjectSelector objectSelector = factory.makeObjectSelector(MULTI_STRING); + + int rowCounter = 0; + while (!offset.isDone()) { + IndexedInts[] indexedInts = vectorSelector.getRowVector(); + Assert.assertNotNull(indexedInts); + for (int i = 0; i < vectorSelector.getCurrentVectorSize(); i++) { + IndexedInts currentRow = indexedInts[i]; + if (currentRow.size() == 0) { + Assert.assertNull(RAW_ROWS.get(rowCounter + i).get(MULTI_STRING)); + } else if (currentRow.size() == 1) { + Assert.assertEquals(RAW_ROWS.get(rowCounter + i).get(MULTI_STRING), vectorSelector.lookupName(currentRow.get(0))); + } else { + // noinspection SSBasedInspection + for (int j = 0; j < currentRow.size(); j++) { + List expected = (List) RAW_ROWS.get(rowCounter + i).get(MULTI_STRING); + Assert.assertEquals(expected.get(j), vectorSelector.lookupName(currentRow.get(j))); + } + } + } + + Object[] objects = objectSelector.getObjectVector(); + for (int i = 0; i < vectorSelector.getCurrentVectorSize(); i++) { + Assert.assertEquals("row " + i, RAW_ROWS.get(rowCounter + i).get(MULTI_STRING), objects[i]); + } + rowCounter += objectSelector.getCurrentVectorSize(); + offset.advance(); + } + } + + @Test + public void testNumericSelectors() + { + NoFilterVectorOffset offset = new NoFilterVectorOffset(4, 0, RAW_ROWS.size()); + QueryableIndexVectorColumnSelectorFactory factory = new QueryableIndexVectorColumnSelectorFactory( + index, + offset, + theCache, + VirtualColumns.EMPTY + ); + + // cannot make these for anything except for multi-value strings + Assert.assertThrows(UOE.class, () -> factory.makeValueSelector(STRING)); + Assert.assertThrows(UOE.class, () -> factory.makeValueSelector(MULTI_STRING)); + + VectorValueSelector doubleSelector = factory.makeValueSelector(DOUBLE); + VectorValueSelector floatSelector = factory.makeValueSelector(FLOAT); + VectorValueSelector longSelector = factory.makeValueSelector(LONG); + + int rowCounter = 0; + while (!offset.isDone()) { + double[] doubles = doubleSelector.getDoubleVector(); + boolean[] doubleNulls = doubleSelector.getNullVector(); + for (int i = 0; i < doubleSelector.getCurrentVectorSize(); i++) { + final Object raw = RAW_ROWS.get(rowCounter + i).get(DOUBLE); + if (doubleNulls != null && doubleNulls[i]) { + Assert.assertNull(raw); + } else { + if (raw == null) { + Assert.assertEquals(0.0, doubles[i], 0.0); + } else { + Assert.assertEquals((double) raw, doubles[i], 0.0); + } + } + } + + float[] floats = floatSelector.getFloatVector(); + boolean[] floatNulls = floatSelector.getNullVector(); + for (int i = 0; i < floatSelector.getCurrentVectorSize(); i++) { + final Object raw = RAW_ROWS.get(rowCounter + i).get(FLOAT); + if (floatNulls != null && floatNulls[i]) { + Assert.assertNull(raw); + } else { + if (raw == null) { + Assert.assertEquals(0.0f, floats[i], 0.0); + } else { + Assert.assertEquals((float) raw, floats[i], 0.0); + } + } + } + + long[] longs = longSelector.getLongVector(); + boolean[] longNulls = longSelector.getNullVector(); + for (int i = 0; i < longSelector.getCurrentVectorSize(); i++) { + final Object raw = RAW_ROWS.get(rowCounter + i).get(LONG); + if (longNulls != null && longNulls[i]) { + Assert.assertNull(raw); + } else { + if (raw == null) { + Assert.assertEquals(0L, longs[i], 0.0); + } else { + Assert.assertEquals((long) raw, longs[i]); + } + } + } + + rowCounter += doubleSelector.getCurrentVectorSize(); + offset.advance(); + } + } +}