mirror of https://github.com/apache/druid.git
IcebergInputSource : Add option to toggle case sensitivity while reading columns from iceberg catalog (#16496)
* Toggle case sensitivity while reading columns from iceberg * Fix tests * Drop case check and set unconditionally
This commit is contained in:
parent
0936798122
commit
b53d75758f
|
@ -1061,20 +1061,22 @@ The catalog object supports `local` and `hive` catalog types.
|
|||
|
||||
The following table lists the properties of a `local` catalog:
|
||||
|
||||
|Property|Description|Required|
|
||||
|--------|-----------|---------|
|
||||
|type|Set this value to `local`.|yes|
|
||||
|warehousePath|The location of the warehouse associated with the catalog|yes|
|
||||
|catalogProperties|Map of any additional properties that needs to be attached to the catalog|no|
|
||||
|Property|Description|Default|Required|
|
||||
|--------|-----------|-------|---------|
|
||||
|type|Set this value to `local`.|None|yes|
|
||||
|warehousePath|The location of the warehouse associated with the catalog.|None|yes|
|
||||
|catalogProperties|Map of any additional properties that needs to be attached to the catalog.|None|no|
|
||||
|caseSensitive|Toggle case sensitivity for column names during Iceberg table reads.|true|no|
|
||||
|
||||
The following table lists the properties of a `hive` catalog:
|
||||
|
||||
|Property|Description|Required|
|
||||
|--------|-----------|---------|
|
||||
|type|Set this value to `hive`.|yes|
|
||||
|warehousePath|The location of the warehouse associated with the catalog|yes|
|
||||
|catalogUri|The URI associated with the hive catalog|yes|
|
||||
|catalogProperties|Map of any additional properties that needs to be attached to the catalog|no|
|
||||
|Property|Description|Default|Required|
|
||||
|--------|-----------|-------|---------|
|
||||
|type|Set this value to `hive`.|None|yes|
|
||||
|warehousePath|The location of the warehouse associated with the catalog.|None|yes|
|
||||
|catalogUri|The URI associated with the hive catalog.|None|yes|
|
||||
|catalogProperties|Map of any additional properties that needs to be attached to the catalog.|None|no|
|
||||
|caseSensitive|Toggle case sensitivity for column names during Iceberg table reads.|true|no|
|
||||
|
||||
### Iceberg filter object
|
||||
|
||||
|
|
|
@ -57,6 +57,9 @@ public class HiveIcebergCatalog extends IcebergCatalog
|
|||
@JsonProperty
|
||||
private Map<String, String> catalogProperties;
|
||||
|
||||
@JsonProperty
|
||||
private final Boolean caseSensitive;
|
||||
|
||||
private final Configuration configuration;
|
||||
|
||||
private BaseMetastoreCatalog hiveCatalog;
|
||||
|
@ -69,6 +72,7 @@ public class HiveIcebergCatalog extends IcebergCatalog
|
|||
@JsonProperty("catalogUri") String catalogUri,
|
||||
@JsonProperty("catalogProperties") @Nullable
|
||||
Map<String, Object> catalogProperties,
|
||||
@JsonProperty("caseSensitive") Boolean caseSensitive,
|
||||
@JacksonInject @Json ObjectMapper mapper,
|
||||
@JacksonInject @HiveConf Configuration configuration
|
||||
)
|
||||
|
@ -76,6 +80,7 @@ public class HiveIcebergCatalog extends IcebergCatalog
|
|||
this.warehousePath = Preconditions.checkNotNull(warehousePath, "warehousePath cannot be null");
|
||||
this.catalogUri = Preconditions.checkNotNull(catalogUri, "catalogUri cannot be null");
|
||||
this.catalogProperties = DynamicConfigProviderUtils.extraConfigAndSetStringMap(catalogProperties, DRUID_DYNAMIC_CONFIG_PROVIDER_KEY, mapper);
|
||||
this.caseSensitive = caseSensitive == null ? true : caseSensitive;
|
||||
this.configuration = configuration;
|
||||
this.catalogProperties
|
||||
.forEach(this.configuration::set);
|
||||
|
@ -137,4 +142,10 @@ public class HiveIcebergCatalog extends IcebergCatalog
|
|||
{
|
||||
return catalogProperties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCaseSensitive()
|
||||
{
|
||||
return caseSensitive;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,6 +50,11 @@ public abstract class IcebergCatalog
|
|||
|
||||
public abstract BaseMetastoreCatalog retrieveCatalog();
|
||||
|
||||
public boolean isCaseSensitive()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the iceberg data files upto the latest snapshot associated with the table
|
||||
*
|
||||
|
@ -92,6 +97,8 @@ public abstract class IcebergCatalog
|
|||
if (snapshotTime != null) {
|
||||
tableScan = tableScan.asOfTime(snapshotTime.getMillis());
|
||||
}
|
||||
|
||||
tableScan = tableScan.caseSensitive(isCaseSensitive());
|
||||
CloseableIterable<FileScanTask> tasks = tableScan.planFiles();
|
||||
CloseableIterable.transform(tasks, FileScanTask::file)
|
||||
.forEach(dataFile -> dataFilePaths.add(dataFile.path().toString()));
|
||||
|
|
|
@ -43,18 +43,23 @@ public class LocalCatalog extends IcebergCatalog
|
|||
@JsonProperty
|
||||
private final Map<String, String> catalogProperties;
|
||||
|
||||
@JsonProperty
|
||||
private final Boolean caseSensitive;
|
||||
|
||||
private BaseMetastoreCatalog catalog;
|
||||
|
||||
@JsonCreator
|
||||
public LocalCatalog(
|
||||
@JsonProperty("warehousePath") String warehousePath,
|
||||
@JsonProperty("catalogProperties") @Nullable
|
||||
Map<String, String> catalogProperties
|
||||
Map<String, String> catalogProperties,
|
||||
@JsonProperty("caseSensitive") Boolean caseSensitive
|
||||
)
|
||||
{
|
||||
Preconditions.checkNotNull(warehousePath, "warehousePath is null");
|
||||
this.warehousePath = warehousePath;
|
||||
this.catalogProperties = catalogProperties;
|
||||
this.caseSensitive = caseSensitive == null ? true : caseSensitive;
|
||||
this.catalog = retrieveCatalog();
|
||||
|
||||
}
|
||||
|
@ -71,6 +76,12 @@ public class LocalCatalog extends IcebergCatalog
|
|||
return catalogProperties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCaseSensitive()
|
||||
{
|
||||
return caseSensitive;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BaseMetastoreCatalog retrieveCatalog()
|
||||
{
|
||||
|
@ -100,12 +111,13 @@ public class LocalCatalog extends IcebergCatalog
|
|||
}
|
||||
LocalCatalog that = (LocalCatalog) o;
|
||||
return warehousePath.equals(that.warehousePath)
|
||||
&& Objects.equals(catalogProperties, that.catalogProperties);
|
||||
&& Objects.equals(catalogProperties, that.catalogProperties)
|
||||
&& Objects.equals(caseSensitive, that.caseSensitive);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode()
|
||||
{
|
||||
return Objects.hash(warehousePath, catalogProperties);
|
||||
return Objects.hash(warehousePath, catalogProperties, caseSensitive);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ public class HiveIcebergCatalogTest
|
|||
warehouseDir.getPath(),
|
||||
"hdfs://testuri",
|
||||
new HashMap<>(),
|
||||
true,
|
||||
mapper,
|
||||
new Configuration()
|
||||
);
|
||||
|
@ -49,6 +50,7 @@ public class HiveIcebergCatalogTest
|
|||
warehouseDir.getPath(),
|
||||
"hdfs://testuri",
|
||||
null,
|
||||
null,
|
||||
mapper,
|
||||
new Configuration()
|
||||
);
|
||||
|
@ -68,6 +70,7 @@ public class HiveIcebergCatalogTest
|
|||
warehouseDir.getPath(),
|
||||
"hdfs://testuri",
|
||||
catalogMap,
|
||||
null,
|
||||
mapper,
|
||||
new Configuration()
|
||||
);
|
||||
|
|
|
@ -67,6 +67,7 @@ public class IcebergInputSourceTest
|
|||
|
||||
private IcebergCatalog testCatalog;
|
||||
private TableIdentifier tableIdentifier;
|
||||
private File warehouseDir;
|
||||
|
||||
private Schema tableSchema = new Schema(
|
||||
Types.NestedField.required(1, "id", Types.StringType.get()),
|
||||
|
@ -80,8 +81,8 @@ public class IcebergInputSourceTest
|
|||
@Before
|
||||
public void setup() throws IOException
|
||||
{
|
||||
final File warehouseDir = FileUtils.createTempDir();
|
||||
testCatalog = new LocalCatalog(warehouseDir.getPath(), new HashMap<>());
|
||||
warehouseDir = FileUtils.createTempDir();
|
||||
testCatalog = new LocalCatalog(warehouseDir.getPath(), new HashMap<>(), true);
|
||||
tableIdentifier = TableIdentifier.of(Namespace.of(NAMESPACE), TABLENAME);
|
||||
|
||||
createAndLoadTable(tableIdentifier);
|
||||
|
@ -187,6 +188,33 @@ public class IcebergInputSourceTest
|
|||
Assert.assertEquals(1, splits.count());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCaseInsensitiveFiltering() throws IOException
|
||||
{
|
||||
LocalCatalog caseInsensitiveCatalog = new LocalCatalog(warehouseDir.getPath(), new HashMap<>(), false);
|
||||
Table icebergTableFromSchema = testCatalog.retrieveCatalog().loadTable(tableIdentifier);
|
||||
|
||||
icebergTableFromSchema.updateSchema().renameColumn("name", "Name").commit();
|
||||
IcebergInputSource inputSource = new IcebergInputSource(
|
||||
TABLENAME,
|
||||
NAMESPACE,
|
||||
new IcebergEqualsFilter("name", "Foo"),
|
||||
caseInsensitiveCatalog,
|
||||
new LocalInputSourceFactory(),
|
||||
null
|
||||
);
|
||||
|
||||
Stream<InputSplit<List<String>>> splits = inputSource.createSplits(null, new MaxSizeSplitHintSpec(null, null));
|
||||
List<File> localInputSourceList = splits.map(inputSource::withSplit)
|
||||
.map(inpSource -> (LocalInputSource) inpSource)
|
||||
.map(LocalInputSource::getFiles)
|
||||
.flatMap(List::stream)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Assert.assertEquals(1, inputSource.estimateNumSplits(null, new MaxSizeSplitHintSpec(1L, null)));
|
||||
Assert.assertEquals(1, localInputSourceList.size());
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown()
|
||||
{
|
||||
|
@ -197,7 +225,6 @@ public class IcebergInputSourceTest
|
|||
{
|
||||
//Setup iceberg table and schema
|
||||
Table icebergTableFromSchema = testCatalog.retrieveCatalog().createTable(tableIdentifier, tableSchema);
|
||||
|
||||
//Generate an iceberg record and write it to a file
|
||||
GenericRecord record = GenericRecord.create(tableSchema);
|
||||
ImmutableList.Builder<GenericRecord> builder = ImmutableList.builder();
|
||||
|
|
|
@ -36,7 +36,7 @@ public class LocalCatalogTest
|
|||
{
|
||||
final File warehouseDir = FileUtils.createTempDir();
|
||||
DefaultObjectMapper mapper = new DefaultObjectMapper();
|
||||
LocalCatalog before = new LocalCatalog(warehouseDir.getPath(), new HashMap<>());
|
||||
LocalCatalog before = new LocalCatalog(warehouseDir.getPath(), new HashMap<>(), true);
|
||||
LocalCatalog after = mapper.readValue(
|
||||
mapper.writeValueAsString(before), LocalCatalog.class);
|
||||
Assert.assertEquals(before, after);
|
||||
|
|
Loading…
Reference in New Issue