diff --git a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java index 475ded6922b..e8cc3ceac02 100644 --- a/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java +++ b/processing/src/main/java/org/apache/druid/query/expression/NestedDataExpressions.java @@ -38,6 +38,7 @@ import org.apache.druid.segment.nested.StructuredDataProcessor; import javax.annotation.Nullable; import javax.inject.Inject; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -479,7 +480,7 @@ public class NestedDataExpressions final StructuredDataProcessor processor = new StructuredDataProcessor() { @Override - public int processLiteralField(String fieldName, Object fieldValue) + public int processLiteralField(ArrayList fieldPath, Object fieldValue) { // do nothing, we only want the list of fields returned by this processor return 0; @@ -500,9 +501,9 @@ public class NestedDataExpressions // maybe in the future ProcessResults should deal in PathFinder.PathPart instead of strings for fields StructuredDataProcessor.ProcessResults info = processor.processFields(unwrap(input)); List transformed = info.getLiteralFields() - .stream() - .map(p -> NestedPathFinder.toNormalizedJsonPath(NestedPathFinder.parseJqPath(p))) - .collect(Collectors.toList()); + .stream() + .map(NestedPathFinder::toNormalizedJsonPath) + .collect(Collectors.toList()); return ExprEval.ofType( ExpressionType.STRING_ARRAY, transformed diff --git a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexer.java b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexer.java index 5cd0cf79e05..6e4775e7037 100644 --- a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexer.java @@ -35,10 +35,13 @@ import org.apache.druid.segment.nested.GlobalDictionarySortedCollector; import org.apache.druid.segment.nested.GlobalDimensionDictionary; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.segment.nested.NestedLiteralTypeInfo; +import org.apache.druid.segment.nested.NestedPathFinder; +import org.apache.druid.segment.nested.NestedPathPart; import org.apache.druid.segment.nested.StructuredData; import org.apache.druid.segment.nested.StructuredDataProcessor; import javax.annotation.Nullable; +import java.util.ArrayList; import java.util.Map; import java.util.Objects; import java.util.SortedMap; @@ -56,8 +59,9 @@ public class NestedDataColumnIndexer implements DimensionIndexer fieldPath, Object fieldValue) { + final String fieldName = NestedPathFinder.toNormalizedJsonPath(fieldPath); LiteralFieldIndexer fieldIndexer = fieldIndexers.get(fieldName); if (fieldIndexer == null) { estimatedFieldKeySize += StructuredDataProcessor.estimateStringSize(fieldName); diff --git a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java index c4682d20711..c5aed95c00e 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/CompressedNestedDataComplexColumn.java @@ -77,7 +77,7 @@ import java.util.concurrent.ConcurrentHashMap; * Implementation of {@link NestedDataComplexColumn} which uses a {@link CompressedVariableSizedBlobColumn} for the * 'raw' {@link StructuredData} values and provides selectors for nested 'literal' field columns. */ -public final class CompressedNestedDataComplexColumn> +public abstract class CompressedNestedDataComplexColumn> extends NestedDataComplexColumn { private final NestedDataColumnMetadata metadata; @@ -123,6 +123,12 @@ public final class CompressedNestedDataComplexColumn parsePath(String path); + + public abstract String getField(List path); + + public abstract String getFieldFileName(String fileNameBase, String field, int fieldIndex); + public GenericIndexed getFields() { return fields; @@ -133,7 +139,7 @@ public final class CompressedNestedDataComplexColumn> fieldParts = new ArrayList<>(fields.size()); for (int i = 0; i < fields.size(); i++) { - fieldParts.add(NestedPathFinder.parseJqPath(fields.get(i))); + fieldParts.add(parsePath(fields.get(i))); } return fieldParts; } @@ -405,11 +411,6 @@ public final class CompressedNestedDataComplexColumn path) - { - return NestedPathFinder.toNormalizedJqPath(path); - } - private ColumnHolder getColumnHolder(String field) { return columns.computeIfAbsent(field, this::readNestedFieldColumn); @@ -421,12 +422,17 @@ public final class CompressedNestedDataComplexColumn bitmapIndexWriter.writeTo(channel, smoosher); } }; - final String fieldFileName = NestedDataColumnSerializer.getFieldFileName(columnName, fieldName); + final String fieldFileName = NestedDataColumnSerializer.getInternalFileName(columnName, fieldName); final long size = fieldSerializer.getSerializedSize(); log.debug("Column [%s] serializing [%s] field of size [%d].", columnName, fieldName, size); try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(fieldFileName, size)) { diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java index 41b84a638e7..6da8472d2ac 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializer.java @@ -56,6 +56,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.WritableByteChannel; +import java.util.ArrayList; import java.util.Map; import java.util.SortedMap; @@ -69,6 +70,8 @@ public class NestedDataColumnSerializer implements GenericColumnSerializer fieldPath, Object fieldValue) { - final GlobalDictionaryEncodedFieldColumnWriter writer = fieldWriters.get(fieldName); + final GlobalDictionaryEncodedFieldColumnWriter writer = fieldWriters.get( + NestedPathFinder.toNormalizedJsonPath(fieldPath) + ); if (writer != null) { try { ExprEval eval = ExprEval.bestEffortOf(fieldValue); @@ -180,8 +185,10 @@ public class NestedDataColumnSerializer implements GenericColumnSerializer field : fields.entrySet()) { final String fieldName = field.getKey(); + final String fieldFileName = NESTED_FIELD_PREFIX + ctr++; fieldsWriter.write(fieldName); fieldsInfoWriter.write(field.getValue()); final GlobalDictionaryEncodedFieldColumnWriter writer; @@ -190,7 +197,7 @@ public class NestedDataColumnSerializer implements GenericColumnSerializer field : fields.entrySet()) { // remove writer so that it can be collected when we are done with it GlobalDictionaryEncodedFieldColumnWriter writer = fieldWriters.remove(field.getKey()); @@ -357,11 +365,6 @@ public class NestedDataColumnSerializer implements GenericColumnSerializer { + private final byte version; private final NestedDataColumnMetadata metadata; private final CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier; private final ImmutableBitmap nullValues; @@ -78,9 +79,9 @@ public class NestedDataColumnSupplier implements Supplier TypeStrategy doubleTypeStrategy ) { - byte version = bb.get(); + this.version = bb.get(); - if (version == 0x03) { + if (version == 0x03 || version == 0x04) { try { final SmooshedFileMapper mapper = columnBuilder.getFileMapper(); metadata = jsonMapper.readValue( @@ -157,10 +158,10 @@ public class NestedDataColumnSupplier implements Supplier } } catch (IOException ex) { - throw new RE(ex, "Failed to deserialize V3 column."); + throw new RE(ex, "Failed to deserialize V%s column.", version); } } else { - throw new RE("Unknown version" + version); + throw new RE("Unknown version " + version); } fileMapper = Preconditions.checkNotNull(columnBuilder.getFileMapper(), "Null fileMapper"); @@ -170,9 +171,17 @@ public class NestedDataColumnSupplier implements Supplier @Override public ComplexColumn get() + { + if (version == 0x03) { + return makeV3(); + } + return makeV4(); + } + + private NestedDataColumnV3 makeV3() { if (frontCodedDictionarySupplier != null) { - return new CompressedNestedDataComplexColumn<>( + return new NestedDataColumnV3<>( metadata, columnConfig, compressedRawColumnSupplier, @@ -185,7 +194,37 @@ public class NestedDataColumnSupplier implements Supplier fileMapper ); } - return new CompressedNestedDataComplexColumn<>( + return new NestedDataColumnV3<>( + metadata, + columnConfig, + compressedRawColumnSupplier, + nullValues, + fields, + fieldInfo, + dictionary::singleThreaded, + longDictionarySupplier, + doubleDictionarySupplier, + fileMapper + ); + } + + private NestedDataColumnV4 makeV4() + { + if (frontCodedDictionarySupplier != null) { + return new NestedDataColumnV4<>( + metadata, + columnConfig, + compressedRawColumnSupplier, + nullValues, + fields, + fieldInfo, + frontCodedDictionarySupplier, + longDictionarySupplier, + doubleDictionarySupplier, + fileMapper + ); + } + return new NestedDataColumnV4<>( metadata, columnConfig, compressedRawColumnSupplier, @@ -202,9 +241,7 @@ public class NestedDataColumnSupplier implements Supplier private ByteBuffer loadInternalFile(SmooshedFileMapper fileMapper, String internalFileName) throws IOException { return fileMapper.mapFile( - NestedDataColumnSerializer.getInternalFileName( - metadata.getFileNameBase(), internalFileName - ) + NestedDataColumnSerializer.getInternalFileName(metadata.getFileNameBase(), internalFileName) ); } } diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV3.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV3.java new file mode 100644 index 00000000000..0bac9fed3dd --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV3.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.base.Supplier; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.data.CompressedVariableSizedBlobColumnSupplier; +import org.apache.druid.segment.data.FixedIndexed; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; + +import java.nio.ByteBuffer; +import java.util.List; + +public final class NestedDataColumnV3> + extends CompressedNestedDataComplexColumn +{ + public NestedDataColumnV3( + NestedDataColumnMetadata metadata, + ColumnConfig columnConfig, + CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + ImmutableBitmap nullValues, + GenericIndexed fields, + NestedLiteralTypeInfo fieldInfo, + Supplier stringDictionary, + Supplier> longDictionarySupplier, + Supplier> doubleDictionarySupplier, + SmooshedFileMapper fileMapper + ) + { + super( + metadata, + columnConfig, + compressedRawColumnSupplier, + nullValues, + fields, + fieldInfo, + stringDictionary, + longDictionarySupplier, + doubleDictionarySupplier, + fileMapper + ); + } + + @Override + public List parsePath(String path) + { + return NestedPathFinder.parseJqPath(path); + } + + @Override + public String getFieldFileName(String fileNameBase, String field, int fieldIndex) + { + return StringUtils.format("%s_%s", fileNameBase, field); + } + + @Override + public String getField(List path) + { + return NestedPathFinder.toNormalizedJqPath(path); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV4.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV4.java new file mode 100644 index 00000000000..cf9c2799b10 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnV4.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.nested; + +import com.google.common.base.Supplier; +import org.apache.druid.collections.bitmap.ImmutableBitmap; +import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; +import org.apache.druid.segment.column.ColumnConfig; +import org.apache.druid.segment.data.CompressedVariableSizedBlobColumnSupplier; +import org.apache.druid.segment.data.FixedIndexed; +import org.apache.druid.segment.data.GenericIndexed; +import org.apache.druid.segment.data.Indexed; + +import java.nio.ByteBuffer; +import java.util.List; + +public final class NestedDataColumnV4> + extends CompressedNestedDataComplexColumn +{ + public NestedDataColumnV4( + NestedDataColumnMetadata metadata, + ColumnConfig columnConfig, + CompressedVariableSizedBlobColumnSupplier compressedRawColumnSupplier, + ImmutableBitmap nullValues, + GenericIndexed fields, + NestedLiteralTypeInfo fieldInfo, + Supplier stringDictionary, + Supplier> longDictionarySupplier, + Supplier> doubleDictionarySupplier, + SmooshedFileMapper fileMapper + ) + { + super( + metadata, + columnConfig, + compressedRawColumnSupplier, + nullValues, + fields, + fieldInfo, + stringDictionary, + longDictionarySupplier, + doubleDictionarySupplier, + fileMapper + ); + } + + @Override + public List parsePath(String path) + { + return NestedPathFinder.parseJsonPath(path); + } + + @Override + public String getFieldFileName(String fileNameBase, String field, int fieldIndex) + { + return NestedDataColumnSerializer.getInternalFileName( + fileNameBase, + NestedDataColumnSerializer.NESTED_FIELD_PREFIX + fieldIndex + ); + } + + @Override + public String getField(List path) + { + return NestedPathFinder.toNormalizedJsonPath(path); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedPathFinder.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedPathFinder.java index e77a629bc23..93872a2a475 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedPathFinder.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedPathFinder.java @@ -162,7 +162,7 @@ public class NestedPathFinder public static String toNormalizedJqPath(List paths) { if (paths.isEmpty()) { - return StructuredDataProcessor.ROOT_LITERAL; + return "."; } StringBuilder bob = new StringBuilder(); boolean first = true; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataProcessor.java b/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataProcessor.java index 243f56a46dd..2bca541ade4 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataProcessor.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/StructuredDataProcessor.java @@ -19,10 +19,9 @@ package org.apache.druid.segment.nested; -import com.google.common.collect.ImmutableSet; - import javax.annotation.Nullable; import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -31,29 +30,23 @@ import java.util.Set; public abstract class StructuredDataProcessor { - public static final String ROOT_LITERAL = "."; - private static final Set ROOT_LITERAL_FIELDS = ImmutableSet.of(ROOT_LITERAL); - - public abstract int processLiteralField(String fieldName, Object fieldValue); + public abstract int processLiteralField(ArrayList fieldPath, Object fieldValue); /** - * Process fields, returning a list of all "normalized" 'jq' paths to literal fields, consistent with the output of - * {@link NestedPathFinder#toNormalizedJqPath(List)}. - * - * Note: in the future, {@link ProcessResults#getLiteralFields()} should instead probably be modified to deal in - * lists of {@link NestedPathPart} instead so that callers can decide how to represent the path instead of assuing - * 'jq' syntax. + * Process fields, returning a list of all paths to literal fields, represented as an ordered sequence of + * {@link NestedPathPart}. */ public ProcessResults processFields(Object raw) { Queue toProcess = new ArrayDeque<>(); raw = StructuredData.unwrap(raw); + ArrayList newPath = new ArrayList<>(); if (raw instanceof Map) { - toProcess.add(new MapField("", (Map) raw)); + toProcess.add(new MapField(newPath, (Map) raw)); } else if (raw instanceof List) { - toProcess.add(new ListField(ROOT_LITERAL, (List) raw)); + toProcess.add(new ListField(newPath, (List) raw)); } else { - return new ProcessResults().withFields(ROOT_LITERAL_FIELDS).withSize(processLiteralField(ROOT_LITERAL, raw)); + return new ProcessResults().addLiteralField(newPath, processLiteralField(newPath, raw)); } ProcessResults accumulator = new ProcessResults(); @@ -76,17 +69,18 @@ public abstract class StructuredDataProcessor for (Map.Entry entry : map.getMap().entrySet()) { // add estimated size of string key processResults.addSize(estimateStringSize(entry.getKey())); - final String fieldName = map.getName() + ".\"" + entry.getKey() + "\""; Object value = StructuredData.unwrap(entry.getValue()); // lists and maps go back in the queue + final ArrayList newPath = new ArrayList<>(map.getPath()); + newPath.add(new NestedPathField(entry.getKey())); if (value instanceof List) { List theList = (List) value; - toProcess.add(new ListField(fieldName, theList)); + toProcess.add(new ListField(newPath, theList)); } else if (value instanceof Map) { - toProcess.add(new MapField(fieldName, (Map) value)); + toProcess.add(new MapField(newPath, (Map) value)); } else { // literals get processed - processResults.addLiteralField(fieldName, processLiteralField(fieldName, value)); + processResults.addLiteralField(newPath, processLiteralField(newPath, value)); } } return processResults; @@ -98,16 +92,17 @@ public abstract class StructuredDataProcessor ProcessResults results = new ProcessResults().withSize(8); final List theList = list.getList(); for (int i = 0; i < theList.size(); i++) { - final String listFieldName = list.getName() + "[" + i + "]"; + final ArrayList newPath = new ArrayList<>(list.getPath()); + newPath.add(new NestedPathArrayElement(i)); final Object element = StructuredData.unwrap(theList.get(i)); // maps and lists go back into the queue if (element instanceof Map) { - toProcess.add(new MapField(listFieldName, (Map) element)); + toProcess.add(new MapField(newPath, (Map) element)); } else if (element instanceof List) { - toProcess.add(new ListField(listFieldName, (List) element)); + toProcess.add(new ListField(newPath, (List) element)); } else { // literals get processed - results.addLiteralField(listFieldName, processLiteralField(listFieldName, element)); + results.addLiteralField(newPath, processLiteralField(newPath, element)); } } return results; @@ -115,16 +110,16 @@ public abstract class StructuredDataProcessor abstract static class Field { - private final String name; + private final ArrayList path; - protected Field(String name) + protected Field(ArrayList path) { - this.name = name; + this.path = path; } - public String getName() + public ArrayList getPath() { - return name; + return path; } } @@ -132,9 +127,9 @@ public abstract class StructuredDataProcessor { private final List list; - ListField(String name, List list) + ListField(ArrayList path, List list) { - super(name); + super(path); this.list = list; } @@ -148,9 +143,9 @@ public abstract class StructuredDataProcessor { private final Map map; - MapField(String name, Map map) + MapField(ArrayList path, Map map) { - super(name); + super(path); this.map = map; } @@ -165,7 +160,7 @@ public abstract class StructuredDataProcessor */ public static class ProcessResults { - private Set literalFields; + private Set> literalFields; private int estimatedSize; public ProcessResults() @@ -174,7 +169,7 @@ public abstract class StructuredDataProcessor estimatedSize = 0; } - public Set getLiteralFields() + public Set> getLiteralFields() { return literalFields; } @@ -190,19 +185,13 @@ public abstract class StructuredDataProcessor return this; } - public ProcessResults addLiteralField(String fieldName, int sizeOfValue) + public ProcessResults addLiteralField(ArrayList fieldPath, int sizeOfValue) { - literalFields.add(fieldName); + literalFields.add(fieldPath); this.estimatedSize += sizeOfValue; return this; } - public ProcessResults withFields(Set fields) - { - this.literalFields = fields; - return this; - } - public ProcessResults withSize(int size) { this.estimatedSize = size; diff --git a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java index 66c7257be9f..8f618325102 100644 --- a/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java +++ b/processing/src/test/java/org/apache/druid/query/expression/NestedDataExpressionsTest.java @@ -129,12 +129,12 @@ public class NestedDataExpressionsTest extends InitializedNullHandlingTest Expr expr = Parser.parse("json_paths(nest)", MACRO_TABLE); ExprEval eval = expr.eval(inputBindings); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); - Assert.assertArrayEquals(new Object[]{"$.y", "$.z", "$.x"}, (Object[]) eval.value()); + Assert.assertArrayEquals(new Object[]{"$.x", "$.y", "$.z"}, (Object[]) eval.value()); expr = Parser.parse("json_paths(nester)", MACRO_TABLE); eval = expr.eval(inputBindings); Assert.assertEquals(ExpressionType.STRING_ARRAY, eval.type()); - Assert.assertArrayEquals(new Object[]{"$.x[0]", "$.x[1]", "$.x[2]", "$.y.b", "$.y.a"}, (Object[]) eval.value()); + Assert.assertArrayEquals(new Object[]{"$.x[0]", "$.y.a", "$.x[1]", "$.y.b", "$.x[2]"}, (Object[]) eval.value()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java index 28b6bab9d77..c55f07c0307 100644 --- a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerTest.java @@ -38,7 +38,7 @@ public class NestedDataColumnIndexerTest extends InitializedNullHandlingTest EncodedKeyComponent key; // new raw value, new field, new dictionary entry key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); - Assert.assertEquals(230, key.getEffectiveSizeBytes()); + Assert.assertEquals(228, key.getEffectiveSizeBytes()); Assert.assertEquals(1, indexer.getCardinality()); // adding same value only adds estimated size of value itself key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); @@ -67,7 +67,7 @@ public class NestedDataColumnIndexerTest extends InitializedNullHandlingTest Assert.assertEquals(5, indexer.getCardinality()); // new raw value, new fields key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), false); - Assert.assertEquals(292, key.getEffectiveSizeBytes()); + Assert.assertEquals(286, key.getEffectiveSizeBytes()); Assert.assertEquals(5, indexer.getCardinality()); // new raw value key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), false); diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java index 900e07af7bd..251496ee395 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierTest.java @@ -28,6 +28,7 @@ import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import org.apache.druid.collections.bitmap.RoaringBitmapFactory; +import org.apache.druid.guice.NestedDataModule; import org.apache.druid.java.util.common.io.Closer; import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; @@ -41,9 +42,12 @@ import org.apache.druid.segment.DimensionSelector; import org.apache.druid.segment.IndexSpec; import org.apache.druid.segment.NestedDataColumnIndexer; import org.apache.druid.segment.ObjectColumnSelector; +import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.TestHelper; +import org.apache.druid.segment.column.BitmapColumnIndex; import org.apache.druid.segment.column.ColumnBuilder; +import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; import org.apache.druid.segment.column.DruidPredicateIndex; @@ -52,9 +56,11 @@ import org.apache.druid.segment.column.StringValueSetIndex; import org.apache.druid.segment.column.TypeStrategy; import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; +import org.apache.druid.utils.CompressionUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; @@ -102,6 +108,12 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest ByteBuffer baseBuffer; + @BeforeClass + public static void staticSetup() + { + NestedDataModule.registerHandlersAndSerde(); + } + @Before public void setup() throws IOException { @@ -214,6 +226,49 @@ public class NestedDataColumnSupplierTest extends InitializedNullHandlingTest Assert.assertEquals(expectedReason, failureReason.get()); } + @Test + public void testLegacyV3ReaderFormat() throws IOException + { + String columnName = "shipTo"; + String firstValue = "Cole"; + File tmpLocation = tempFolder.newFolder(); + File v3Segment = new File( + NestedDataColumnSupplierTest.class.getClassLoader().getResource("nested_segment_v3/index.zip").getFile() + ); + CompressionUtils.unzip(v3Segment, tmpLocation); + try (Closer closer = Closer.create()) { + QueryableIndex theIndex = closer.register(TestHelper.getTestIndexIO().loadIndex(tmpLocation)); + ColumnHolder holder = theIndex.getColumnHolder(columnName); + Assert.assertNotNull(holder); + Assert.assertEquals(NestedDataComplexTypeSerde.TYPE, holder.getCapabilities().toColumnType()); + + NestedDataColumnV3 v3 = closer.register((NestedDataColumnV3) holder.getColumn()); + Assert.assertNotNull(v3); + + List path = ImmutableList.of(new NestedPathField("lastName")); + ColumnHolder nestedColumnHolder = v3.getColumnHolder(path); + Assert.assertNotNull(nestedColumnHolder); + Assert.assertEquals(ColumnType.STRING, nestedColumnHolder.getCapabilities().toColumnType()); + NestedFieldLiteralDictionaryEncodedColumn nestedColumn = + (NestedFieldLiteralDictionaryEncodedColumn) nestedColumnHolder.getColumn(); + + Assert.assertNotNull(nestedColumn); + + ColumnValueSelector selector = nestedColumn.makeColumnValueSelector( + new SimpleAscendingOffset(theIndex.getNumRows()) + ); + + ColumnIndexSupplier indexSupplier = v3.getColumnIndexSupplier(path); + Assert.assertNotNull(indexSupplier); + StringValueSetIndex valueSetIndex = indexSupplier.as(StringValueSetIndex.class); + Assert.assertNotNull(valueSetIndex); + + BitmapColumnIndex indexForValue = valueSetIndex.forValue(firstValue); + Assert.assertEquals(firstValue, selector.getObject()); + Assert.assertTrue(indexForValue.computeBitmapResult(resultFactory).get(0)); + } + } + private void smokeTest(NestedDataComplexColumn column) throws IOException { SimpleAscendingOffset offset = new SimpleAscendingOffset(data.size()); diff --git a/processing/src/test/resources/nested_segment_v3/index.zip b/processing/src/test/resources/nested_segment_v3/index.zip new file mode 100644 index 00000000000..a86282d75dd Binary files /dev/null and b/processing/src/test/resources/nested_segment_v3/index.zip differ diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java index af32ac68575..1e9239c70d0 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteNestedDataQueryTest.java @@ -2233,7 +2233,7 @@ public class CalciteNestedDataQueryTest extends BaseCalciteQueryTest ), ImmutableList.of( new Object[]{"[\"$\"]", 5L}, - new Object[]{"[\"$.n.x\",\"$.array[0]\",\"$.array[1]\"]", 2L} + new Object[]{"[\"$.array[1]\",\"$.array[0]\",\"$.n.x\"]", 2L} ), RowSignature.builder() .add("EXPR$0", ColumnType.STRING_ARRAY)