mirror of
https://github.com/apache/druid.git
synced 2025-02-09 03:24:55 +00:00
Merge pull request #1753 from jon-wei/segmentmetadataquery_flags
Allow SegmentMetadataQuery to skip cardinality and size calculations
This commit is contained in:
commit
2cb0fb4669
@ -29,6 +29,7 @@ There are several main parts to a segment metadata query:
|
|||||||
|toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no|
|
|toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no|
|
||||||
|merge|Merge all individual segment metadata results into a single result|no|
|
|merge|Merge all individual segment metadata results into a single result|no|
|
||||||
|context|See [Context](../querying/query-context.html)|no|
|
|context|See [Context](../querying/query-context.html)|no|
|
||||||
|
|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "size"]. See section [analysisTypes](#analysistypes) for more details.|no|
|
||||||
|
|
||||||
The format of the result is:
|
The format of the result is:
|
||||||
|
|
||||||
@ -86,3 +87,21 @@ The grammar is as follows:
|
|||||||
``` json
|
``` json
|
||||||
"toInclude": { "type": "list", "columns": [<string list of column names>]}
|
"toInclude": { "type": "list", "columns": [<string list of column names>]}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### analysisTypes
|
||||||
|
|
||||||
|
This is a list of properties that determines the amount of information returned about the columns, i.e. analyses to be performed on the columns.
|
||||||
|
|
||||||
|
By default, all analysis types will be used. If a property is not needed, omitting it from this list will result in a more efficient query.
|
||||||
|
|
||||||
|
There are 2 types of column analyses:
|
||||||
|
|
||||||
|
#### cardinality
|
||||||
|
|
||||||
|
* Estimated floor of cardinality for each column. Only relevant for dimension columns.
|
||||||
|
|
||||||
|
#### size
|
||||||
|
|
||||||
|
* Estimated byte size for the segment columns if they were stored in a flat format
|
||||||
|
|
||||||
|
* Estimated total segment byte size in if it was stored in a flat format
|
||||||
|
@ -905,6 +905,7 @@ public class Druids
|
|||||||
toInclude,
|
toInclude,
|
||||||
merge,
|
merge,
|
||||||
context,
|
context,
|
||||||
|
null,
|
||||||
false
|
false
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@ import com.google.common.primitives.Longs;
|
|||||||
import com.metamx.common.logger.Logger;
|
import com.metamx.common.logger.Logger;
|
||||||
import com.metamx.common.StringUtils;
|
import com.metamx.common.StringUtils;
|
||||||
import io.druid.query.metadata.metadata.ColumnAnalysis;
|
import io.druid.query.metadata.metadata.ColumnAnalysis;
|
||||||
|
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
|
||||||
import io.druid.segment.QueryableIndex;
|
import io.druid.segment.QueryableIndex;
|
||||||
import io.druid.segment.StorageAdapter;
|
import io.druid.segment.StorageAdapter;
|
||||||
import io.druid.segment.column.BitmapIndex;
|
import io.druid.segment.column.BitmapIndex;
|
||||||
@ -38,6 +39,7 @@ import io.druid.segment.serde.ComplexMetricSerde;
|
|||||||
import io.druid.segment.serde.ComplexMetrics;
|
import io.druid.segment.serde.ComplexMetrics;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@ -55,7 +57,7 @@ public class SegmentAnalyzer
|
|||||||
*/
|
*/
|
||||||
private static final int NUM_BYTES_IN_TEXT_FLOAT = 8;
|
private static final int NUM_BYTES_IN_TEXT_FLOAT = 8;
|
||||||
|
|
||||||
public Map<String, ColumnAnalysis> analyze(QueryableIndex index)
|
public Map<String, ColumnAnalysis> analyze(QueryableIndex index, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
|
||||||
{
|
{
|
||||||
Preconditions.checkNotNull(index, "Index cannot be null");
|
Preconditions.checkNotNull(index, "Index cannot be null");
|
||||||
|
|
||||||
@ -69,16 +71,16 @@ public class SegmentAnalyzer
|
|||||||
final ValueType type = capabilities.getType();
|
final ValueType type = capabilities.getType();
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case LONG:
|
case LONG:
|
||||||
analysis = analyzeLongColumn(column);
|
analysis = analyzeLongColumn(column, analysisTypes);
|
||||||
break;
|
break;
|
||||||
case FLOAT:
|
case FLOAT:
|
||||||
analysis = analyzeFloatColumn(column);
|
analysis = analyzeFloatColumn(column, analysisTypes);
|
||||||
break;
|
break;
|
||||||
case STRING:
|
case STRING:
|
||||||
analysis = analyzeStringColumn(column);
|
analysis = analyzeStringColumn(column, analysisTypes);
|
||||||
break;
|
break;
|
||||||
case COMPLEX:
|
case COMPLEX:
|
||||||
analysis = analyzeComplexColumn(column);
|
analysis = analyzeComplexColumn(column, analysisTypes);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
log.warn("Unknown column type[%s].", type);
|
log.warn("Unknown column type[%s].", type);
|
||||||
@ -90,13 +92,13 @@ public class SegmentAnalyzer
|
|||||||
|
|
||||||
columns.put(
|
columns.put(
|
||||||
Column.TIME_COLUMN_NAME,
|
Column.TIME_COLUMN_NAME,
|
||||||
lengthBasedAnalysis(index.getColumn(Column.TIME_COLUMN_NAME), NUM_BYTES_IN_TIMESTAMP)
|
lengthBasedAnalysis(index.getColumn(Column.TIME_COLUMN_NAME), NUM_BYTES_IN_TIMESTAMP, analysisTypes)
|
||||||
);
|
);
|
||||||
|
|
||||||
return columns;
|
return columns;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, ColumnAnalysis> analyze(StorageAdapter adapter)
|
public Map<String, ColumnAnalysis> analyze(StorageAdapter adapter, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
|
||||||
{
|
{
|
||||||
Preconditions.checkNotNull(adapter, "Adapter cannot be null");
|
Preconditions.checkNotNull(adapter, "Adapter cannot be null");
|
||||||
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
|
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
|
||||||
@ -114,16 +116,34 @@ public class SegmentAnalyzer
|
|||||||
ValueType capType = capabilities.getType();
|
ValueType capType = capabilities.getType();
|
||||||
switch (capType) {
|
switch (capType) {
|
||||||
case LONG:
|
case LONG:
|
||||||
analysis = lengthBasedAnalysisForAdapter(capType.name(), capabilities, numRows, Longs.BYTES);
|
analysis = lengthBasedAnalysisForAdapter(
|
||||||
|
analysisTypes,
|
||||||
|
capType.name(), capabilities,
|
||||||
|
numRows, Longs.BYTES
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case FLOAT:
|
case FLOAT:
|
||||||
analysis = lengthBasedAnalysisForAdapter(capType.name(), capabilities, numRows, NUM_BYTES_IN_TEXT_FLOAT);
|
analysis = lengthBasedAnalysisForAdapter(
|
||||||
|
analysisTypes,
|
||||||
|
capType.name(), capabilities,
|
||||||
|
numRows, NUM_BYTES_IN_TEXT_FLOAT
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case STRING:
|
case STRING:
|
||||||
analysis = new ColumnAnalysis(capType.name(), 0, adapter.getDimensionCardinality(columnName), null);
|
analysis = new ColumnAnalysis(
|
||||||
|
capType.name(),
|
||||||
|
0,
|
||||||
|
analysisHasCardinality(analysisTypes) ? adapter.getDimensionCardinality(columnName) : 0,
|
||||||
|
null
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
case COMPLEX:
|
case COMPLEX:
|
||||||
analysis = new ColumnAnalysis(capType.name(), 0, null, null);
|
analysis = new ColumnAnalysis(
|
||||||
|
capType.name(),
|
||||||
|
0,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
log.warn("Unknown column type[%s].", capType);
|
log.warn("Unknown column type[%s].", capType);
|
||||||
@ -135,33 +155,39 @@ public class SegmentAnalyzer
|
|||||||
|
|
||||||
columns.put(
|
columns.put(
|
||||||
Column.TIME_COLUMN_NAME,
|
Column.TIME_COLUMN_NAME,
|
||||||
lengthBasedAnalysisForAdapter(ValueType.LONG.name(), null, numRows, NUM_BYTES_IN_TIMESTAMP)
|
lengthBasedAnalysisForAdapter(analysisTypes, ValueType.LONG.name(), null, numRows, NUM_BYTES_IN_TIMESTAMP)
|
||||||
);
|
);
|
||||||
|
|
||||||
return columns;
|
return columns;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ColumnAnalysis analyzeLongColumn(Column column)
|
|
||||||
|
public ColumnAnalysis analyzeLongColumn(Column column, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
|
||||||
{
|
{
|
||||||
return lengthBasedAnalysis(column, Longs.BYTES);
|
return lengthBasedAnalysis(column, Longs.BYTES, analysisTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ColumnAnalysis analyzeFloatColumn(Column column)
|
public ColumnAnalysis analyzeFloatColumn(Column column, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
|
||||||
{
|
{
|
||||||
return lengthBasedAnalysis(column, NUM_BYTES_IN_TEXT_FLOAT);
|
return lengthBasedAnalysis(column, NUM_BYTES_IN_TEXT_FLOAT, analysisTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
private ColumnAnalysis lengthBasedAnalysis(Column column, final int numBytes)
|
private ColumnAnalysis lengthBasedAnalysis(Column column, final int numBytes, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
|
||||||
{
|
{
|
||||||
final ColumnCapabilities capabilities = column.getCapabilities();
|
final ColumnCapabilities capabilities = column.getCapabilities();
|
||||||
if (capabilities.hasMultipleValues()) {
|
if (capabilities.hasMultipleValues()) {
|
||||||
return ColumnAnalysis.error("multi_value");
|
return ColumnAnalysis.error("multi_value");
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ColumnAnalysis(capabilities.getType().name(), column.getLength() * numBytes, null, null);
|
int size = 0;
|
||||||
|
if (analysisHasSize(analysisTypes)) {
|
||||||
|
size = column.getLength() * numBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new ColumnAnalysis(capabilities.getType().name(), size, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ColumnAnalysis analyzeStringColumn(Column column)
|
public ColumnAnalysis analyzeStringColumn(Column column, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
|
||||||
{
|
{
|
||||||
final ColumnCapabilities capabilities = column.getCapabilities();
|
final ColumnCapabilities capabilities = column.getCapabilities();
|
||||||
|
|
||||||
@ -170,21 +196,28 @@ public class SegmentAnalyzer
|
|||||||
|
|
||||||
int cardinality = bitmapIndex.getCardinality();
|
int cardinality = bitmapIndex.getCardinality();
|
||||||
long size = 0;
|
long size = 0;
|
||||||
for (int i = 0; i < cardinality; ++i) {
|
|
||||||
String value = bitmapIndex.getValue(i);
|
|
||||||
|
|
||||||
if (value != null) {
|
if (analysisHasSize(analysisTypes)) {
|
||||||
size += StringUtils.toUtf8(value).length * bitmapIndex.getBitmap(value).size();
|
for (int i = 0; i < cardinality; ++i) {
|
||||||
|
String value = bitmapIndex.getValue(i);
|
||||||
|
if (value != null) {
|
||||||
|
size += StringUtils.toUtf8(value).length * bitmapIndex.getBitmap(value).size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ColumnAnalysis(capabilities.getType().name(), size, cardinality, null);
|
return new ColumnAnalysis(
|
||||||
|
capabilities.getType().name(),
|
||||||
|
size,
|
||||||
|
analysisHasCardinality(analysisTypes) ? cardinality : 0,
|
||||||
|
null
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ColumnAnalysis.error("string_no_bitmap");
|
return ColumnAnalysis.error("string_no_bitmap");
|
||||||
}
|
}
|
||||||
|
|
||||||
public ColumnAnalysis analyzeComplexColumn(Column column)
|
public ColumnAnalysis analyzeComplexColumn(Column column, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
|
||||||
{
|
{
|
||||||
final ColumnCapabilities capabilities = column.getCapabilities();
|
final ColumnCapabilities capabilities = column.getCapabilities();
|
||||||
final ComplexColumn complexColumn = column.getComplexColumn();
|
final ComplexColumn complexColumn = column.getComplexColumn();
|
||||||
@ -202,8 +235,10 @@ public class SegmentAnalyzer
|
|||||||
|
|
||||||
final int length = column.getLength();
|
final int length = column.getLength();
|
||||||
long size = 0;
|
long size = 0;
|
||||||
for (int i = 0; i < length; ++i) {
|
if (analysisHasSize(analysisTypes)) {
|
||||||
size += inputSizeFn.apply(complexColumn.getRowValue(i));
|
for (int i = 0; i < length; ++i) {
|
||||||
|
size += inputSizeFn.apply(complexColumn.getRowValue(i));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ColumnAnalysis(typeName, size, null, null);
|
return new ColumnAnalysis(typeName, size, null, null);
|
||||||
@ -220,6 +255,7 @@ public class SegmentAnalyzer
|
|||||||
}
|
}
|
||||||
|
|
||||||
private ColumnAnalysis lengthBasedAnalysisForAdapter(
|
private ColumnAnalysis lengthBasedAnalysisForAdapter(
|
||||||
|
EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes,
|
||||||
String type, ColumnCapabilities capabilities,
|
String type, ColumnCapabilities capabilities,
|
||||||
int numRows, final int numBytes
|
int numRows, final int numBytes
|
||||||
)
|
)
|
||||||
@ -227,7 +263,20 @@ public class SegmentAnalyzer
|
|||||||
if (capabilities != null && capabilities.hasMultipleValues()) {
|
if (capabilities != null && capabilities.hasMultipleValues()) {
|
||||||
return ColumnAnalysis.error("multi_value");
|
return ColumnAnalysis.error("multi_value");
|
||||||
}
|
}
|
||||||
return new ColumnAnalysis(type, numRows * numBytes, null, null);
|
return new ColumnAnalysis(
|
||||||
|
type,
|
||||||
|
analysisHasSize(analysisTypes) ? numRows * numBytes : 0,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean analysisHasSize(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
|
||||||
|
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean analysisHasCardinality(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
|
||||||
|
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.CARDINALITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -42,6 +42,7 @@ import io.druid.query.metadata.metadata.SegmentAnalysis;
|
|||||||
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
|
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
|
||||||
import io.druid.segment.QueryableIndex;
|
import io.druid.segment.QueryableIndex;
|
||||||
import io.druid.segment.Segment;
|
import io.druid.segment.Segment;
|
||||||
|
import io.druid.segment.StorageAdapter;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
@ -82,15 +83,23 @@ public class SegmentMetadataQueryRunnerFactory implements QueryRunnerFactory<Seg
|
|||||||
SegmentMetadataQuery query = (SegmentMetadataQuery) inQ;
|
SegmentMetadataQuery query = (SegmentMetadataQuery) inQ;
|
||||||
|
|
||||||
final QueryableIndex index = segment.asQueryableIndex();
|
final QueryableIndex index = segment.asQueryableIndex();
|
||||||
|
|
||||||
final Map<String, ColumnAnalysis> analyzedColumns;
|
final Map<String, ColumnAnalysis> analyzedColumns;
|
||||||
|
final int numRows;
|
||||||
long totalSize = 0;
|
long totalSize = 0;
|
||||||
if (index == null) {
|
if (index == null) {
|
||||||
// IncrementalIndexSegments (used by in-memory hydrants in the realtime service) do not have a QueryableIndex
|
// IncrementalIndexSegments (used by in-memory hydrants in the realtime service) do not have a QueryableIndex
|
||||||
analyzedColumns = analyzer.analyze(segment.asStorageAdapter());
|
StorageAdapter segmentAdapter = segment.asStorageAdapter();
|
||||||
|
analyzedColumns = analyzer.analyze(segmentAdapter, query.getAnalysisTypes());
|
||||||
|
numRows = segmentAdapter.getNumRows();
|
||||||
} else {
|
} else {
|
||||||
analyzedColumns = analyzer.analyze(index);
|
analyzedColumns = analyzer.analyze(index, query.getAnalysisTypes());
|
||||||
|
numRows = index.getNumRows();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query.hasSize()) {
|
||||||
// Initialize with the size of the whitespace, 1 byte per
|
// Initialize with the size of the whitespace, 1 byte per
|
||||||
totalSize = analyzedColumns.size() * index.getNumRows();
|
totalSize = analyzedColumns.size() * numRows;
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
|
Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
|
||||||
|
@ -19,6 +19,7 @@ package io.druid.query.metadata.metadata;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonValue;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
import io.druid.common.utils.JodaUtils;
|
import io.druid.common.utils.JodaUtils;
|
||||||
import io.druid.query.BaseQuery;
|
import io.druid.query.BaseQuery;
|
||||||
@ -30,17 +31,43 @@ import io.druid.query.spec.QuerySegmentSpec;
|
|||||||
import org.joda.time.Interval;
|
import org.joda.time.Interval;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
||||||
{
|
{
|
||||||
|
public enum AnalysisType
|
||||||
|
{
|
||||||
|
CARDINALITY,
|
||||||
|
SIZE;
|
||||||
|
|
||||||
|
@JsonValue
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return this.name().toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
@JsonCreator
|
||||||
|
public static AnalysisType fromString(String name) {
|
||||||
|
return valueOf(name.toUpperCase());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public static final Interval DEFAULT_INTERVAL = new Interval(
|
public static final Interval DEFAULT_INTERVAL = new Interval(
|
||||||
JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT
|
JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT
|
||||||
);
|
);
|
||||||
|
|
||||||
|
public static final EnumSet<AnalysisType> DEFAULT_ANALYSIS_TYPES = EnumSet.of(
|
||||||
|
AnalysisType.CARDINALITY,
|
||||||
|
AnalysisType.SIZE
|
||||||
|
);
|
||||||
|
|
||||||
private final ColumnIncluderator toInclude;
|
private final ColumnIncluderator toInclude;
|
||||||
private final boolean merge;
|
private final boolean merge;
|
||||||
private final boolean usingDefaultInterval;
|
private final boolean usingDefaultInterval;
|
||||||
|
private final EnumSet analysisTypes;
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public SegmentMetadataQuery(
|
public SegmentMetadataQuery(
|
||||||
@ -49,6 +76,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
|||||||
@JsonProperty("toInclude") ColumnIncluderator toInclude,
|
@JsonProperty("toInclude") ColumnIncluderator toInclude,
|
||||||
@JsonProperty("merge") Boolean merge,
|
@JsonProperty("merge") Boolean merge,
|
||||||
@JsonProperty("context") Map<String, Object> context,
|
@JsonProperty("context") Map<String, Object> context,
|
||||||
|
@JsonProperty("analysisTypes") EnumSet<AnalysisType> analysisTypes,
|
||||||
@JsonProperty("usingDefaultInterval") Boolean useDefaultInterval
|
@JsonProperty("usingDefaultInterval") Boolean useDefaultInterval
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
@ -64,9 +92,9 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
|||||||
} else {
|
} else {
|
||||||
this.usingDefaultInterval = useDefaultInterval == null ? false : useDefaultInterval;
|
this.usingDefaultInterval = useDefaultInterval == null ? false : useDefaultInterval;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.toInclude = toInclude == null ? new AllColumnIncluderator() : toInclude;
|
this.toInclude = toInclude == null ? new AllColumnIncluderator() : toInclude;
|
||||||
this.merge = merge == null ? false : merge;
|
this.merge = merge == null ? false : merge;
|
||||||
|
this.analysisTypes = (analysisTypes == null) ? DEFAULT_ANALYSIS_TYPES : analysisTypes;
|
||||||
Preconditions.checkArgument(
|
Preconditions.checkArgument(
|
||||||
dataSource instanceof TableDataSource,
|
dataSource instanceof TableDataSource,
|
||||||
"SegmentMetadataQuery only supports table datasource"
|
"SegmentMetadataQuery only supports table datasource"
|
||||||
@ -103,6 +131,22 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
|||||||
return Query.SEGMENT_METADATA;
|
return Query.SEGMENT_METADATA;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public EnumSet getAnalysisTypes()
|
||||||
|
{
|
||||||
|
return analysisTypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasCardinality()
|
||||||
|
{
|
||||||
|
return analysisTypes.contains(AnalysisType.CARDINALITY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasSize()
|
||||||
|
{
|
||||||
|
return analysisTypes.contains(AnalysisType.SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Query<SegmentAnalysis> withOverriddenContext(Map<String, Object> contextOverride)
|
public Query<SegmentAnalysis> withOverriddenContext(Map<String, Object> contextOverride)
|
||||||
{
|
{
|
||||||
@ -112,6 +156,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
|||||||
toInclude,
|
toInclude,
|
||||||
merge,
|
merge,
|
||||||
computeOverridenContext(contextOverride),
|
computeOverridenContext(contextOverride),
|
||||||
|
analysisTypes,
|
||||||
usingDefaultInterval
|
usingDefaultInterval
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -125,6 +170,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
|||||||
toInclude,
|
toInclude,
|
||||||
merge,
|
merge,
|
||||||
getContext(),
|
getContext(),
|
||||||
|
analysisTypes,
|
||||||
usingDefaultInterval
|
usingDefaultInterval
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -138,6 +184,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
|
|||||||
toInclude,
|
toInclude,
|
||||||
merge,
|
merge,
|
||||||
getContext(),
|
getContext(),
|
||||||
|
analysisTypes,
|
||||||
usingDefaultInterval
|
usingDefaultInterval
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -17,9 +17,11 @@
|
|||||||
|
|
||||||
package io.druid.query.metadata;
|
package io.druid.query.metadata;
|
||||||
|
|
||||||
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.metamx.common.guava.Sequences;
|
import com.metamx.common.guava.Sequences;
|
||||||
import io.druid.query.LegacyDataSource;
|
import io.druid.query.LegacyDataSource;
|
||||||
|
import io.druid.query.Query;
|
||||||
import io.druid.query.QueryRunner;
|
import io.druid.query.QueryRunner;
|
||||||
import io.druid.query.QueryRunnerFactory;
|
import io.druid.query.QueryRunnerFactory;
|
||||||
import io.druid.query.QueryRunnerTestHelper;
|
import io.druid.query.QueryRunnerTestHelper;
|
||||||
@ -32,9 +34,11 @@ import io.druid.segment.QueryableIndexSegment;
|
|||||||
import io.druid.segment.Segment;
|
import io.druid.segment.Segment;
|
||||||
import io.druid.segment.TestIndex;
|
import io.druid.segment.TestIndex;
|
||||||
import io.druid.segment.column.ValueType;
|
import io.druid.segment.column.ValueType;
|
||||||
|
import org.joda.time.Interval;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
@ -43,11 +47,21 @@ import java.util.Map;
|
|||||||
*/
|
*/
|
||||||
public class SegmentAnalyzerTest
|
public class SegmentAnalyzerTest
|
||||||
{
|
{
|
||||||
|
private static final EnumSet<SegmentMetadataQuery.AnalysisType> emptyAnalyses =
|
||||||
|
EnumSet.noneOf(SegmentMetadataQuery.AnalysisType.class);
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testIncrementalWorks() throws Exception
|
public void testIncrementalWorks() throws Exception
|
||||||
|
{
|
||||||
|
testIncrementalWorksHelper(null);
|
||||||
|
testIncrementalWorksHelper(emptyAnalyses);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testIncrementalWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) throws Exception
|
||||||
{
|
{
|
||||||
final List<SegmentAnalysis> results = getSegmentAnalysises(
|
final List<SegmentAnalysis> results = getSegmentAnalysises(
|
||||||
new IncrementalIndexSegment(TestIndex.getIncrementalTestIndex(false), null)
|
new IncrementalIndexSegment(TestIndex.getIncrementalTestIndex(false), null),
|
||||||
|
analyses
|
||||||
);
|
);
|
||||||
|
|
||||||
Assert.assertEquals(1, results.size());
|
Assert.assertEquals(1, results.size());
|
||||||
@ -61,28 +75,44 @@ public class SegmentAnalyzerTest
|
|||||||
TestIndex.COLUMNS.length,
|
TestIndex.COLUMNS.length,
|
||||||
columns.size()
|
columns.size()
|
||||||
); // All columns including time and empty/null column
|
); // All columns including time and empty/null column
|
||||||
|
|
||||||
for (String dimension : TestIndex.DIMENSIONS) {
|
for (String dimension : TestIndex.DIMENSIONS) {
|
||||||
final ColumnAnalysis columnAnalysis = columns.get(dimension);
|
final ColumnAnalysis columnAnalysis = columns.get(dimension);
|
||||||
|
|
||||||
Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType());
|
Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType());
|
||||||
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
|
if (analyses == null) {
|
||||||
|
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
|
||||||
|
} else {
|
||||||
|
Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
|
||||||
|
Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (String metric : TestIndex.METRICS) {
|
for (String metric : TestIndex.METRICS) {
|
||||||
final ColumnAnalysis columnAnalysis = columns.get(metric);
|
final ColumnAnalysis columnAnalysis = columns.get(metric);
|
||||||
|
|
||||||
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
|
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
|
||||||
Assert.assertTrue(metric, columnAnalysis.getSize() > 0);
|
if (analyses == null) {
|
||||||
|
Assert.assertTrue(metric, columnAnalysis.getSize() > 0);
|
||||||
|
} else {
|
||||||
|
Assert.assertEquals(metric, 0, columnAnalysis.getSize());
|
||||||
|
}
|
||||||
Assert.assertNull(metric, columnAnalysis.getCardinality());
|
Assert.assertNull(metric, columnAnalysis.getCardinality());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMappedWorks() throws Exception
|
public void testMappedWorks() throws Exception
|
||||||
|
{
|
||||||
|
testMappedWorksHelper(null);
|
||||||
|
testMappedWorksHelper(emptyAnalyses);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testMappedWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) throws Exception
|
||||||
{
|
{
|
||||||
final List<SegmentAnalysis> results = getSegmentAnalysises(
|
final List<SegmentAnalysis> results = getSegmentAnalysises(
|
||||||
new QueryableIndexSegment("test_1", TestIndex.getMMappedTestIndex())
|
new QueryableIndexSegment("test_1", TestIndex.getMMappedTestIndex()),
|
||||||
|
analyses
|
||||||
);
|
);
|
||||||
|
|
||||||
Assert.assertEquals(1, results.size());
|
Assert.assertEquals(1, results.size());
|
||||||
@ -102,8 +132,13 @@ public class SegmentAnalyzerTest
|
|||||||
Assert.assertNull(columnAnalysis);
|
Assert.assertNull(columnAnalysis);
|
||||||
} else {
|
} else {
|
||||||
Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType());
|
Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType());
|
||||||
Assert.assertTrue(dimension, columnAnalysis.getSize() > 0);
|
if (analyses == null) {
|
||||||
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
|
Assert.assertTrue(dimension, columnAnalysis.getSize() > 0);
|
||||||
|
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
|
||||||
|
} else {
|
||||||
|
Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
|
||||||
|
Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,7 +146,11 @@ public class SegmentAnalyzerTest
|
|||||||
final ColumnAnalysis columnAnalysis = columns.get(metric);
|
final ColumnAnalysis columnAnalysis = columns.get(metric);
|
||||||
|
|
||||||
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
|
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
|
||||||
Assert.assertTrue(metric, columnAnalysis.getSize() > 0);
|
if (analyses == null) {
|
||||||
|
Assert.assertTrue(metric, columnAnalysis.getSize() > 0);
|
||||||
|
} else {
|
||||||
|
Assert.assertEquals(metric, 0, columnAnalysis.getSize());
|
||||||
|
}
|
||||||
Assert.assertNull(metric, columnAnalysis.getCardinality());
|
Assert.assertNull(metric, columnAnalysis.getCardinality());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -123,7 +162,7 @@ public class SegmentAnalyzerTest
|
|||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private List<SegmentAnalysis> getSegmentAnalysises(Segment index)
|
private List<SegmentAnalysis> getSegmentAnalysises(Segment index, EnumSet<SegmentMetadataQuery.AnalysisType> analyses)
|
||||||
{
|
{
|
||||||
final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner(
|
final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner(
|
||||||
(QueryRunnerFactory) new SegmentMetadataQueryRunnerFactory(
|
(QueryRunnerFactory) new SegmentMetadataQueryRunnerFactory(
|
||||||
@ -133,7 +172,7 @@ public class SegmentAnalyzerTest
|
|||||||
);
|
);
|
||||||
|
|
||||||
final SegmentMetadataQuery query = new SegmentMetadataQuery(
|
final SegmentMetadataQuery query = new SegmentMetadataQuery(
|
||||||
new LegacyDataSource("test"), QuerySegmentSpecs.create("2011/2012"), null, null, null, false
|
new LegacyDataSource("test"), QuerySegmentSpecs.create("2011/2012"), null, null, null, analyses, false
|
||||||
);
|
);
|
||||||
HashMap<String, Object> context = new HashMap<String, Object>();
|
HashMap<String, Object> context = new HashMap<String, Object>();
|
||||||
return Sequences.toList(query.run(runner, context), Lists.<SegmentAnalysis>newArrayList());
|
return Sequences.toList(query.run(runner, context), Lists.<SegmentAnalysis>newArrayList());
|
||||||
|
@ -50,6 +50,7 @@ import org.junit.Assert;
|
|||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
@ -164,12 +165,20 @@ public class SegmentMetadataQueryTest
|
|||||||
String queryStr = "{\n"
|
String queryStr = "{\n"
|
||||||
+ " \"queryType\":\"segmentMetadata\",\n"
|
+ " \"queryType\":\"segmentMetadata\",\n"
|
||||||
+ " \"dataSource\":\"test_ds\",\n"
|
+ " \"dataSource\":\"test_ds\",\n"
|
||||||
+ " \"intervals\":[\"2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z\"]\n"
|
+ " \"intervals\":[\"2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z\"],\n"
|
||||||
|
+ " \"analysisTypes\":[\"cardinality\",\"size\"]\n"
|
||||||
+ "}";
|
+ "}";
|
||||||
|
|
||||||
|
EnumSet<SegmentMetadataQuery.AnalysisType> expectedAnalysisTypes = EnumSet.of(
|
||||||
|
SegmentMetadataQuery.AnalysisType.CARDINALITY,
|
||||||
|
SegmentMetadataQuery.AnalysisType.SIZE
|
||||||
|
);
|
||||||
|
|
||||||
Query query = mapper.readValue(queryStr, Query.class);
|
Query query = mapper.readValue(queryStr, Query.class);
|
||||||
Assert.assertTrue(query instanceof SegmentMetadataQuery);
|
Assert.assertTrue(query instanceof SegmentMetadataQuery);
|
||||||
Assert.assertEquals("test_ds", Iterables.getOnlyElement(query.getDataSource().getNames()));
|
Assert.assertEquals("test_ds", Iterables.getOnlyElement(query.getDataSource().getNames()));
|
||||||
Assert.assertEquals(new Interval("2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z"), query.getIntervals().get(0));
|
Assert.assertEquals(new Interval("2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z"), query.getIntervals().get(0));
|
||||||
|
Assert.assertEquals(expectedAnalysisTypes, ((SegmentMetadataQuery) query).getAnalysisTypes());
|
||||||
|
|
||||||
// test serialize and deserialize
|
// test serialize and deserialize
|
||||||
Assert.assertEquals(query, mapper.readValue(mapper.writeValueAsString(query), Query.class));
|
Assert.assertEquals(query, mapper.readValue(mapper.writeValueAsString(query), Query.class));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user