Merge pull request #1753 from jon-wei/segmentmetadataquery_flags

Allow SegmentMetadataQuery to skip cardinality and size calculations
This commit is contained in:
Xavier Léauté 2015-09-22 14:52:28 -07:00
commit 2cb0fb4669
7 changed files with 216 additions and 43 deletions

View File

@ -29,6 +29,7 @@ There are several main parts to a segment metadata query:
|toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no| |toInclude|A JSON Object representing what columns should be included in the result. Defaults to "all".|no|
|merge|Merge all individual segment metadata results into a single result|no| |merge|Merge all individual segment metadata results into a single result|no|
|context|See [Context](../querying/query-context.html)|no| |context|See [Context](../querying/query-context.html)|no|
|analysisTypes|A list of Strings specifying what column properties (e.g. cardinality, size) should be calculated and returned in the result. Defaults to ["cardinality", "size"]. See section [analysisTypes](#analysistypes) for more details.|no|
The format of the result is: The format of the result is:
@ -86,3 +87,21 @@ The grammar is as follows:
``` json ``` json
"toInclude": { "type": "list", "columns": [<string list of column names>]} "toInclude": { "type": "list", "columns": [<string list of column names>]}
``` ```
### analysisTypes
This is a list of properties that determines the amount of information returned about the columns, i.e. analyses to be performed on the columns.
By default, all analysis types will be used. If a property is not needed, omitting it from this list will result in a more efficient query.
There are 2 types of column analyses:
#### cardinality
* Estimated floor of cardinality for each column. Only relevant for dimension columns.
#### size
* Estimated byte size for the segment columns if they were stored in a flat format
* Estimated total segment byte size in if it was stored in a flat format

View File

@ -905,6 +905,7 @@ public class Druids
toInclude, toInclude,
merge, merge,
context, context,
null,
false false
); );
} }

View File

@ -26,6 +26,7 @@ import com.google.common.primitives.Longs;
import com.metamx.common.logger.Logger; import com.metamx.common.logger.Logger;
import com.metamx.common.StringUtils; import com.metamx.common.StringUtils;
import io.druid.query.metadata.metadata.ColumnAnalysis; import io.druid.query.metadata.metadata.ColumnAnalysis;
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
import io.druid.segment.QueryableIndex; import io.druid.segment.QueryableIndex;
import io.druid.segment.StorageAdapter; import io.druid.segment.StorageAdapter;
import io.druid.segment.column.BitmapIndex; import io.druid.segment.column.BitmapIndex;
@ -38,6 +39,7 @@ import io.druid.segment.serde.ComplexMetricSerde;
import io.druid.segment.serde.ComplexMetrics; import io.druid.segment.serde.ComplexMetrics;
import java.util.Collections; import java.util.Collections;
import java.util.EnumSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -55,7 +57,7 @@ public class SegmentAnalyzer
*/ */
private static final int NUM_BYTES_IN_TEXT_FLOAT = 8; private static final int NUM_BYTES_IN_TEXT_FLOAT = 8;
public Map<String, ColumnAnalysis> analyze(QueryableIndex index) public Map<String, ColumnAnalysis> analyze(QueryableIndex index, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
{ {
Preconditions.checkNotNull(index, "Index cannot be null"); Preconditions.checkNotNull(index, "Index cannot be null");
@ -69,16 +71,16 @@ public class SegmentAnalyzer
final ValueType type = capabilities.getType(); final ValueType type = capabilities.getType();
switch (type) { switch (type) {
case LONG: case LONG:
analysis = analyzeLongColumn(column); analysis = analyzeLongColumn(column, analysisTypes);
break; break;
case FLOAT: case FLOAT:
analysis = analyzeFloatColumn(column); analysis = analyzeFloatColumn(column, analysisTypes);
break; break;
case STRING: case STRING:
analysis = analyzeStringColumn(column); analysis = analyzeStringColumn(column, analysisTypes);
break; break;
case COMPLEX: case COMPLEX:
analysis = analyzeComplexColumn(column); analysis = analyzeComplexColumn(column, analysisTypes);
break; break;
default: default:
log.warn("Unknown column type[%s].", type); log.warn("Unknown column type[%s].", type);
@ -90,13 +92,13 @@ public class SegmentAnalyzer
columns.put( columns.put(
Column.TIME_COLUMN_NAME, Column.TIME_COLUMN_NAME,
lengthBasedAnalysis(index.getColumn(Column.TIME_COLUMN_NAME), NUM_BYTES_IN_TIMESTAMP) lengthBasedAnalysis(index.getColumn(Column.TIME_COLUMN_NAME), NUM_BYTES_IN_TIMESTAMP, analysisTypes)
); );
return columns; return columns;
} }
public Map<String, ColumnAnalysis> analyze(StorageAdapter adapter) public Map<String, ColumnAnalysis> analyze(StorageAdapter adapter, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
{ {
Preconditions.checkNotNull(adapter, "Adapter cannot be null"); Preconditions.checkNotNull(adapter, "Adapter cannot be null");
Map<String, ColumnAnalysis> columns = Maps.newTreeMap(); Map<String, ColumnAnalysis> columns = Maps.newTreeMap();
@ -114,16 +116,34 @@ public class SegmentAnalyzer
ValueType capType = capabilities.getType(); ValueType capType = capabilities.getType();
switch (capType) { switch (capType) {
case LONG: case LONG:
analysis = lengthBasedAnalysisForAdapter(capType.name(), capabilities, numRows, Longs.BYTES); analysis = lengthBasedAnalysisForAdapter(
analysisTypes,
capType.name(), capabilities,
numRows, Longs.BYTES
);
break; break;
case FLOAT: case FLOAT:
analysis = lengthBasedAnalysisForAdapter(capType.name(), capabilities, numRows, NUM_BYTES_IN_TEXT_FLOAT); analysis = lengthBasedAnalysisForAdapter(
analysisTypes,
capType.name(), capabilities,
numRows, NUM_BYTES_IN_TEXT_FLOAT
);
break; break;
case STRING: case STRING:
analysis = new ColumnAnalysis(capType.name(), 0, adapter.getDimensionCardinality(columnName), null); analysis = new ColumnAnalysis(
capType.name(),
0,
analysisHasCardinality(analysisTypes) ? adapter.getDimensionCardinality(columnName) : 0,
null
);
break; break;
case COMPLEX: case COMPLEX:
analysis = new ColumnAnalysis(capType.name(), 0, null, null); analysis = new ColumnAnalysis(
capType.name(),
0,
null,
null
);
break; break;
default: default:
log.warn("Unknown column type[%s].", capType); log.warn("Unknown column type[%s].", capType);
@ -135,33 +155,39 @@ public class SegmentAnalyzer
columns.put( columns.put(
Column.TIME_COLUMN_NAME, Column.TIME_COLUMN_NAME,
lengthBasedAnalysisForAdapter(ValueType.LONG.name(), null, numRows, NUM_BYTES_IN_TIMESTAMP) lengthBasedAnalysisForAdapter(analysisTypes, ValueType.LONG.name(), null, numRows, NUM_BYTES_IN_TIMESTAMP)
); );
return columns; return columns;
} }
public ColumnAnalysis analyzeLongColumn(Column column)
public ColumnAnalysis analyzeLongColumn(Column column, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
{ {
return lengthBasedAnalysis(column, Longs.BYTES); return lengthBasedAnalysis(column, Longs.BYTES, analysisTypes);
} }
public ColumnAnalysis analyzeFloatColumn(Column column) public ColumnAnalysis analyzeFloatColumn(Column column, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
{ {
return lengthBasedAnalysis(column, NUM_BYTES_IN_TEXT_FLOAT); return lengthBasedAnalysis(column, NUM_BYTES_IN_TEXT_FLOAT, analysisTypes);
} }
private ColumnAnalysis lengthBasedAnalysis(Column column, final int numBytes) private ColumnAnalysis lengthBasedAnalysis(Column column, final int numBytes, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
{ {
final ColumnCapabilities capabilities = column.getCapabilities(); final ColumnCapabilities capabilities = column.getCapabilities();
if (capabilities.hasMultipleValues()) { if (capabilities.hasMultipleValues()) {
return ColumnAnalysis.error("multi_value"); return ColumnAnalysis.error("multi_value");
} }
return new ColumnAnalysis(capabilities.getType().name(), column.getLength() * numBytes, null, null); int size = 0;
if (analysisHasSize(analysisTypes)) {
size = column.getLength() * numBytes;
}
return new ColumnAnalysis(capabilities.getType().name(), size, null, null);
} }
public ColumnAnalysis analyzeStringColumn(Column column) public ColumnAnalysis analyzeStringColumn(Column column, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
{ {
final ColumnCapabilities capabilities = column.getCapabilities(); final ColumnCapabilities capabilities = column.getCapabilities();
@ -170,21 +196,28 @@ public class SegmentAnalyzer
int cardinality = bitmapIndex.getCardinality(); int cardinality = bitmapIndex.getCardinality();
long size = 0; long size = 0;
for (int i = 0; i < cardinality; ++i) {
String value = bitmapIndex.getValue(i);
if (value != null) { if (analysisHasSize(analysisTypes)) {
size += StringUtils.toUtf8(value).length * bitmapIndex.getBitmap(value).size(); for (int i = 0; i < cardinality; ++i) {
String value = bitmapIndex.getValue(i);
if (value != null) {
size += StringUtils.toUtf8(value).length * bitmapIndex.getBitmap(value).size();
}
} }
} }
return new ColumnAnalysis(capabilities.getType().name(), size, cardinality, null); return new ColumnAnalysis(
capabilities.getType().name(),
size,
analysisHasCardinality(analysisTypes) ? cardinality : 0,
null
);
} }
return ColumnAnalysis.error("string_no_bitmap"); return ColumnAnalysis.error("string_no_bitmap");
} }
public ColumnAnalysis analyzeComplexColumn(Column column) public ColumnAnalysis analyzeComplexColumn(Column column, EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes)
{ {
final ColumnCapabilities capabilities = column.getCapabilities(); final ColumnCapabilities capabilities = column.getCapabilities();
final ComplexColumn complexColumn = column.getComplexColumn(); final ComplexColumn complexColumn = column.getComplexColumn();
@ -202,8 +235,10 @@ public class SegmentAnalyzer
final int length = column.getLength(); final int length = column.getLength();
long size = 0; long size = 0;
for (int i = 0; i < length; ++i) { if (analysisHasSize(analysisTypes)) {
size += inputSizeFn.apply(complexColumn.getRowValue(i)); for (int i = 0; i < length; ++i) {
size += inputSizeFn.apply(complexColumn.getRowValue(i));
}
} }
return new ColumnAnalysis(typeName, size, null, null); return new ColumnAnalysis(typeName, size, null, null);
@ -220,6 +255,7 @@ public class SegmentAnalyzer
} }
private ColumnAnalysis lengthBasedAnalysisForAdapter( private ColumnAnalysis lengthBasedAnalysisForAdapter(
EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes,
String type, ColumnCapabilities capabilities, String type, ColumnCapabilities capabilities,
int numRows, final int numBytes int numRows, final int numBytes
) )
@ -227,7 +263,20 @@ public class SegmentAnalyzer
if (capabilities != null && capabilities.hasMultipleValues()) { if (capabilities != null && capabilities.hasMultipleValues()) {
return ColumnAnalysis.error("multi_value"); return ColumnAnalysis.error("multi_value");
} }
return new ColumnAnalysis(type, numRows * numBytes, null, null); return new ColumnAnalysis(
type,
analysisHasSize(analysisTypes) ? numRows * numBytes : 0,
null,
null
);
}
private boolean analysisHasSize(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.SIZE);
}
private boolean analysisHasCardinality(EnumSet<SegmentMetadataQuery.AnalysisType> analysisTypes) {
return analysisTypes.contains(SegmentMetadataQuery.AnalysisType.CARDINALITY);
} }
} }

View File

@ -42,6 +42,7 @@ import io.druid.query.metadata.metadata.SegmentAnalysis;
import io.druid.query.metadata.metadata.SegmentMetadataQuery; import io.druid.query.metadata.metadata.SegmentMetadataQuery;
import io.druid.segment.QueryableIndex; import io.druid.segment.QueryableIndex;
import io.druid.segment.Segment; import io.druid.segment.Segment;
import io.druid.segment.StorageAdapter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -82,15 +83,23 @@ public class SegmentMetadataQueryRunnerFactory implements QueryRunnerFactory<Seg
SegmentMetadataQuery query = (SegmentMetadataQuery) inQ; SegmentMetadataQuery query = (SegmentMetadataQuery) inQ;
final QueryableIndex index = segment.asQueryableIndex(); final QueryableIndex index = segment.asQueryableIndex();
final Map<String, ColumnAnalysis> analyzedColumns; final Map<String, ColumnAnalysis> analyzedColumns;
final int numRows;
long totalSize = 0; long totalSize = 0;
if (index == null) { if (index == null) {
// IncrementalIndexSegments (used by in-memory hydrants in the realtime service) do not have a QueryableIndex // IncrementalIndexSegments (used by in-memory hydrants in the realtime service) do not have a QueryableIndex
analyzedColumns = analyzer.analyze(segment.asStorageAdapter()); StorageAdapter segmentAdapter = segment.asStorageAdapter();
analyzedColumns = analyzer.analyze(segmentAdapter, query.getAnalysisTypes());
numRows = segmentAdapter.getNumRows();
} else { } else {
analyzedColumns = analyzer.analyze(index); analyzedColumns = analyzer.analyze(index, query.getAnalysisTypes());
numRows = index.getNumRows();
}
if (query.hasSize()) {
// Initialize with the size of the whitespace, 1 byte per // Initialize with the size of the whitespace, 1 byte per
totalSize = analyzedColumns.size() * index.getNumRows(); totalSize = analyzedColumns.size() * numRows;
} }
Map<String, ColumnAnalysis> columns = Maps.newTreeMap(); Map<String, ColumnAnalysis> columns = Maps.newTreeMap();

View File

@ -19,6 +19,7 @@ package io.druid.query.metadata.metadata;
import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import io.druid.common.utils.JodaUtils; import io.druid.common.utils.JodaUtils;
import io.druid.query.BaseQuery; import io.druid.query.BaseQuery;
@ -30,17 +31,43 @@ import io.druid.query.spec.QuerySegmentSpec;
import org.joda.time.Interval; import org.joda.time.Interval;
import java.util.Arrays; import java.util.Arrays;
import java.util.EnumSet;
import java.util.Map; import java.util.Map;
public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis> public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
{ {
public enum AnalysisType
{
CARDINALITY,
SIZE;
@JsonValue
@Override
public String toString() {
return this.name().toLowerCase();
}
@JsonCreator
public static AnalysisType fromString(String name) {
return valueOf(name.toUpperCase());
}
}
public static final Interval DEFAULT_INTERVAL = new Interval( public static final Interval DEFAULT_INTERVAL = new Interval(
JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT
); );
public static final EnumSet<AnalysisType> DEFAULT_ANALYSIS_TYPES = EnumSet.of(
AnalysisType.CARDINALITY,
AnalysisType.SIZE
);
private final ColumnIncluderator toInclude; private final ColumnIncluderator toInclude;
private final boolean merge; private final boolean merge;
private final boolean usingDefaultInterval; private final boolean usingDefaultInterval;
private final EnumSet analysisTypes;
@JsonCreator @JsonCreator
public SegmentMetadataQuery( public SegmentMetadataQuery(
@ -49,6 +76,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
@JsonProperty("toInclude") ColumnIncluderator toInclude, @JsonProperty("toInclude") ColumnIncluderator toInclude,
@JsonProperty("merge") Boolean merge, @JsonProperty("merge") Boolean merge,
@JsonProperty("context") Map<String, Object> context, @JsonProperty("context") Map<String, Object> context,
@JsonProperty("analysisTypes") EnumSet<AnalysisType> analysisTypes,
@JsonProperty("usingDefaultInterval") Boolean useDefaultInterval @JsonProperty("usingDefaultInterval") Boolean useDefaultInterval
) )
{ {
@ -64,9 +92,9 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
} else { } else {
this.usingDefaultInterval = useDefaultInterval == null ? false : useDefaultInterval; this.usingDefaultInterval = useDefaultInterval == null ? false : useDefaultInterval;
} }
this.toInclude = toInclude == null ? new AllColumnIncluderator() : toInclude; this.toInclude = toInclude == null ? new AllColumnIncluderator() : toInclude;
this.merge = merge == null ? false : merge; this.merge = merge == null ? false : merge;
this.analysisTypes = (analysisTypes == null) ? DEFAULT_ANALYSIS_TYPES : analysisTypes;
Preconditions.checkArgument( Preconditions.checkArgument(
dataSource instanceof TableDataSource, dataSource instanceof TableDataSource,
"SegmentMetadataQuery only supports table datasource" "SegmentMetadataQuery only supports table datasource"
@ -103,6 +131,22 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
return Query.SEGMENT_METADATA; return Query.SEGMENT_METADATA;
} }
@JsonProperty
public EnumSet getAnalysisTypes()
{
return analysisTypes;
}
public boolean hasCardinality()
{
return analysisTypes.contains(AnalysisType.CARDINALITY);
}
public boolean hasSize()
{
return analysisTypes.contains(AnalysisType.SIZE);
}
@Override @Override
public Query<SegmentAnalysis> withOverriddenContext(Map<String, Object> contextOverride) public Query<SegmentAnalysis> withOverriddenContext(Map<String, Object> contextOverride)
{ {
@ -112,6 +156,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
toInclude, toInclude,
merge, merge,
computeOverridenContext(contextOverride), computeOverridenContext(contextOverride),
analysisTypes,
usingDefaultInterval usingDefaultInterval
); );
} }
@ -125,6 +170,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
toInclude, toInclude,
merge, merge,
getContext(), getContext(),
analysisTypes,
usingDefaultInterval usingDefaultInterval
); );
} }
@ -138,6 +184,7 @@ public class SegmentMetadataQuery extends BaseQuery<SegmentAnalysis>
toInclude, toInclude,
merge, merge,
getContext(), getContext(),
analysisTypes,
usingDefaultInterval usingDefaultInterval
); );
} }

View File

@ -17,9 +17,11 @@
package io.druid.query.metadata; package io.druid.query.metadata;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.metamx.common.guava.Sequences; import com.metamx.common.guava.Sequences;
import io.druid.query.LegacyDataSource; import io.druid.query.LegacyDataSource;
import io.druid.query.Query;
import io.druid.query.QueryRunner; import io.druid.query.QueryRunner;
import io.druid.query.QueryRunnerFactory; import io.druid.query.QueryRunnerFactory;
import io.druid.query.QueryRunnerTestHelper; import io.druid.query.QueryRunnerTestHelper;
@ -32,9 +34,11 @@ import io.druid.segment.QueryableIndexSegment;
import io.druid.segment.Segment; import io.druid.segment.Segment;
import io.druid.segment.TestIndex; import io.druid.segment.TestIndex;
import io.druid.segment.column.ValueType; import io.druid.segment.column.ValueType;
import org.joda.time.Interval;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -43,11 +47,21 @@ import java.util.Map;
*/ */
public class SegmentAnalyzerTest public class SegmentAnalyzerTest
{ {
private static final EnumSet<SegmentMetadataQuery.AnalysisType> emptyAnalyses =
EnumSet.noneOf(SegmentMetadataQuery.AnalysisType.class);
@Test @Test
public void testIncrementalWorks() throws Exception public void testIncrementalWorks() throws Exception
{
testIncrementalWorksHelper(null);
testIncrementalWorksHelper(emptyAnalyses);
}
private void testIncrementalWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) throws Exception
{ {
final List<SegmentAnalysis> results = getSegmentAnalysises( final List<SegmentAnalysis> results = getSegmentAnalysises(
new IncrementalIndexSegment(TestIndex.getIncrementalTestIndex(false), null) new IncrementalIndexSegment(TestIndex.getIncrementalTestIndex(false), null),
analyses
); );
Assert.assertEquals(1, results.size()); Assert.assertEquals(1, results.size());
@ -61,28 +75,44 @@ public class SegmentAnalyzerTest
TestIndex.COLUMNS.length, TestIndex.COLUMNS.length,
columns.size() columns.size()
); // All columns including time and empty/null column ); // All columns including time and empty/null column
for (String dimension : TestIndex.DIMENSIONS) { for (String dimension : TestIndex.DIMENSIONS) {
final ColumnAnalysis columnAnalysis = columns.get(dimension); final ColumnAnalysis columnAnalysis = columns.get(dimension);
Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType()); Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType());
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0); if (analyses == null) {
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
} else {
Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
}
} }
for (String metric : TestIndex.METRICS) { for (String metric : TestIndex.METRICS) {
final ColumnAnalysis columnAnalysis = columns.get(metric); final ColumnAnalysis columnAnalysis = columns.get(metric);
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType()); Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
Assert.assertTrue(metric, columnAnalysis.getSize() > 0); if (analyses == null) {
Assert.assertTrue(metric, columnAnalysis.getSize() > 0);
} else {
Assert.assertEquals(metric, 0, columnAnalysis.getSize());
}
Assert.assertNull(metric, columnAnalysis.getCardinality()); Assert.assertNull(metric, columnAnalysis.getCardinality());
} }
} }
@Test @Test
public void testMappedWorks() throws Exception public void testMappedWorks() throws Exception
{
testMappedWorksHelper(null);
testMappedWorksHelper(emptyAnalyses);
}
private void testMappedWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) throws Exception
{ {
final List<SegmentAnalysis> results = getSegmentAnalysises( final List<SegmentAnalysis> results = getSegmentAnalysises(
new QueryableIndexSegment("test_1", TestIndex.getMMappedTestIndex()) new QueryableIndexSegment("test_1", TestIndex.getMMappedTestIndex()),
analyses
); );
Assert.assertEquals(1, results.size()); Assert.assertEquals(1, results.size());
@ -102,8 +132,13 @@ public class SegmentAnalyzerTest
Assert.assertNull(columnAnalysis); Assert.assertNull(columnAnalysis);
} else { } else {
Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType()); Assert.assertEquals(dimension, ValueType.STRING.name(), columnAnalysis.getType());
Assert.assertTrue(dimension, columnAnalysis.getSize() > 0); if (analyses == null) {
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0); Assert.assertTrue(dimension, columnAnalysis.getSize() > 0);
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
} else {
Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
}
} }
} }
@ -111,7 +146,11 @@ public class SegmentAnalyzerTest
final ColumnAnalysis columnAnalysis = columns.get(metric); final ColumnAnalysis columnAnalysis = columns.get(metric);
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType()); Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
Assert.assertTrue(metric, columnAnalysis.getSize() > 0); if (analyses == null) {
Assert.assertTrue(metric, columnAnalysis.getSize() > 0);
} else {
Assert.assertEquals(metric, 0, columnAnalysis.getSize());
}
Assert.assertNull(metric, columnAnalysis.getCardinality()); Assert.assertNull(metric, columnAnalysis.getCardinality());
} }
} }
@ -123,7 +162,7 @@ public class SegmentAnalyzerTest
* *
* @return * @return
*/ */
private List<SegmentAnalysis> getSegmentAnalysises(Segment index) private List<SegmentAnalysis> getSegmentAnalysises(Segment index, EnumSet<SegmentMetadataQuery.AnalysisType> analyses)
{ {
final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner( final QueryRunner runner = QueryRunnerTestHelper.makeQueryRunner(
(QueryRunnerFactory) new SegmentMetadataQueryRunnerFactory( (QueryRunnerFactory) new SegmentMetadataQueryRunnerFactory(
@ -133,7 +172,7 @@ public class SegmentAnalyzerTest
); );
final SegmentMetadataQuery query = new SegmentMetadataQuery( final SegmentMetadataQuery query = new SegmentMetadataQuery(
new LegacyDataSource("test"), QuerySegmentSpecs.create("2011/2012"), null, null, null, false new LegacyDataSource("test"), QuerySegmentSpecs.create("2011/2012"), null, null, null, analyses, false
); );
HashMap<String, Object> context = new HashMap<String, Object>(); HashMap<String, Object> context = new HashMap<String, Object>();
return Sequences.toList(query.run(runner, context), Lists.<SegmentAnalysis>newArrayList()); return Sequences.toList(query.run(runner, context), Lists.<SegmentAnalysis>newArrayList());

View File

@ -50,6 +50,7 @@ import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import java.util.Arrays; import java.util.Arrays;
import java.util.EnumSet;
import java.util.List; import java.util.List;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
@ -164,12 +165,20 @@ public class SegmentMetadataQueryTest
String queryStr = "{\n" String queryStr = "{\n"
+ " \"queryType\":\"segmentMetadata\",\n" + " \"queryType\":\"segmentMetadata\",\n"
+ " \"dataSource\":\"test_ds\",\n" + " \"dataSource\":\"test_ds\",\n"
+ " \"intervals\":[\"2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z\"]\n" + " \"intervals\":[\"2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z\"],\n"
+ " \"analysisTypes\":[\"cardinality\",\"size\"]\n"
+ "}"; + "}";
EnumSet<SegmentMetadataQuery.AnalysisType> expectedAnalysisTypes = EnumSet.of(
SegmentMetadataQuery.AnalysisType.CARDINALITY,
SegmentMetadataQuery.AnalysisType.SIZE
);
Query query = mapper.readValue(queryStr, Query.class); Query query = mapper.readValue(queryStr, Query.class);
Assert.assertTrue(query instanceof SegmentMetadataQuery); Assert.assertTrue(query instanceof SegmentMetadataQuery);
Assert.assertEquals("test_ds", Iterables.getOnlyElement(query.getDataSource().getNames())); Assert.assertEquals("test_ds", Iterables.getOnlyElement(query.getDataSource().getNames()));
Assert.assertEquals(new Interval("2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z"), query.getIntervals().get(0)); Assert.assertEquals(new Interval("2013-12-04T00:00:00.000Z/2013-12-05T00:00:00.000Z"), query.getIntervals().get(0));
Assert.assertEquals(expectedAnalysisTypes, ((SegmentMetadataQuery) query).getAnalysisTypes());
// test serialize and deserialize // test serialize and deserialize
Assert.assertEquals(query, mapper.readValue(mapper.writeValueAsString(query), Query.class)); Assert.assertEquals(query, mapper.readValue(mapper.writeValueAsString(query), Query.class));