Merge pull request #2753 from gianm/null-filtering-multi-value-columns

More consistent empty-set filtering behavior on multi-value columns.
This commit is contained in:
Fangjin Yang 2016-03-29 18:52:25 -07:00
commit 95733a362f
8 changed files with 370 additions and 370 deletions

View File

@ -19,11 +19,72 @@ called `tags`.
{"timestamp": "2011-01-12T00:00:00.000Z", "tags": ["t1","t2","t3"]} #row1
{"timestamp": "2011-01-13T00:00:00.000Z", "tags": ["t3","t4","t5"]} #row2
{"timestamp": "2011-01-14T00:00:00.000Z", "tags": ["t5","t6","t7"]} #row3
{"timestamp": "2011-01-14T00:00:00.000Z", "tags": []} #row4
```
All query types can filter on multi-value dimensions. Filters operate independently on each value of a multi-value
dimension. For example, a `"t1" OR "t3"` filter would match row1 and row2 but not row3. A `"t1" AND "t3"` filter
would only match row1.
### Filtering
All query types, as well as [filtered aggregators](aggregations.html#filtered-aggregator), can filter on multi-value
dimensions. Filters follow these rules on multi-value dimensions:
- Value filters (like "selector", "bound", and "in") match a row if any of the values of a multi-value dimension match
the filter.
- Value filters that match `null` or `""` (empty string) will match empty cells in a multi-value dimension.
- Logical expression filters behave the same way they do on single-value dimensions: "and" matches a row if all
underlying filters match that row; "or" matches a row if any underlying filters match that row; "not" matches a row
if the underlying filter does not match the row.
For example, this "or" filter would match row1 and row2 of the dataset above, but not row3:
```
{
"type": "or",
"fields": [
{
"type": "selector",
"dimension": "tags",
"value": "t1"
},
{
"type": "selector",
"dimension": "tags",
"value": "t3"
}
]
}
```
This "and" filter would match only row1 of the dataset above:
```
{
"type": "and",
"fields": [
{
"type": "selector",
"dimension": "tags",
"value": "t1"
},
{
"type": "selector",
"dimension": "tags",
"value": "t3"
}
]
}
```
This "selector" filter would match row4 of the dataset above:
```
{
"type": "selector",
"dimension": "tags",
"value": null
}
```
### Grouping
topN and groupBy queries can group on multi-value dimensions. When grouping on a multi-value dimension, _all_ values
from matching rows will be used to generate one group per value. It's possible for a query to return more groups than

View File

@ -22,6 +22,7 @@ package io.druid.query.aggregation;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Strings;
import io.druid.query.dimension.DefaultDimensionSpec;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.ValueMatcher;
@ -225,12 +226,11 @@ public class FilteredAggregatorFactory extends AggregatorFactory
);
// Compare "value" as a String.
final String valueString = value == null ? null : value.toString();
final boolean isNullOrEmpty = valueString == null || valueString.isEmpty();
final String valueString = value == null ? null : Strings.emptyToNull(value.toString());
// Missing columns match a null or empty string value, and don't match anything else.
if (selector == null) {
return new BooleanValueMatcher(isNullOrEmpty);
return new BooleanValueMatcher(valueString == null);
}
final int valueId = selector.lookupId(valueString);
@ -241,12 +241,17 @@ public class FilteredAggregatorFactory extends AggregatorFactory
{
final IndexedInts row = selector.getRow();
final int size = row.size();
for (int i = 0; i < size; ++i) {
if (row.get(i) == valueId) {
return true;
if (size == 0) {
// null should match empty rows in multi-value columns
return valueString == null;
} else {
for (int i = 0; i < size; ++i) {
if (row.get(i) == valueId) {
return true;
}
}
return false;
}
return false;
}
};
}
@ -258,8 +263,10 @@ public class FilteredAggregatorFactory extends AggregatorFactory
new DefaultDimensionSpec(dimension, dimension)
);
final boolean doesMatchNull = predicate.apply(null);
if (selector == null) {
return new BooleanValueMatcher(predicate.apply(null));
return new BooleanValueMatcher(doesMatchNull);
}
// Check every value in the dimension, as a String.
@ -278,12 +285,17 @@ public class FilteredAggregatorFactory extends AggregatorFactory
{
final IndexedInts row = selector.getRow();
final int size = row.size();
for (int i = 0; i < size; ++i) {
if (valueIds.get(row.get(i))) {
return true;
if (size == 0) {
// null should match empty rows in multi-value columns
return doesMatchNull;
} else {
for (int i = 0; i < size; ++i) {
if (valueIds.get(row.get(i))) {
return true;
}
}
return false;
}
return false;
}
};
}

View File

@ -659,16 +659,20 @@ public class IndexMerger
progress.progress();
startTime = System.currentTimeMillis();
ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
Map<String, Integer> dimensionCardinalities = Maps.newHashMap();
ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(indexes.size());
final ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
final Map<String, Integer> dimensionCardinalities = Maps.newHashMap();
final ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(indexes.size());
final ArrayList<Boolean> convertMissingDimsFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
final ArrayList<MutableBitmap> nullRowsList = Lists.newArrayListWithCapacity(mergedDimensions.size());
final ArrayList<Boolean> dimHasNullFlags = Lists.newArrayListWithCapacity(mergedDimensions.size());
for (int i = 0; i < indexes.size(); ++i) {
dimConversions.add(Maps.<String, IntBuffer>newHashMap());
}
for (String dimension : mergedDimensions) {
nullRowsList.add(indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap());
final GenericIndexedWriter<String> writer = new GenericIndexedWriter<String>(
ioPeon, dimension, GenericIndexed.STRING_STRATEGY
);
@ -704,6 +708,7 @@ public class IndexMerger
* rows from indexes with null/empty str values for that dimension.
*/
if (convertMissingDims && !dimHasNull) {
dimHasNull = true;
dimValueLookups[indexes.size()] = dimValueLookup = EMPTY_STR_DIM_VAL;
numMergeIndex++;
}
@ -731,6 +736,9 @@ public class IndexMerger
dimensionCardinalities.put(dimension, cardinality);
// Mark if this dim has the null/empty str value in its dictionary, used for determining nullRowsList later.
dimHasNullFlags.add(dimHasNull);
FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(v8OutDir, dimension), true);
dimOuts.add(dimOut);
@ -821,6 +829,14 @@ public class IndexMerger
? null
: Ints.asList(dims[i]);
forwardDimWriters.get(i).write(listToWrite);
if (listToWrite == null || listToWrite.isEmpty()) {
// empty row; add to the nullRows bitmap
nullRowsList.get(i).add(rowCount);
} else if (dimHasNullFlags.get(i) && listToWrite.size() == 1 && listToWrite.get(0) == 0) {
// If this dimension has the null/empty str in its dictionary, a row with a single-valued dimension
// that matches the null/empty str's dictionary ID should also be added to nullRowsList.
nullRowsList.get(i).add(rowCount);
}
}
for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
@ -930,6 +946,9 @@ public class IndexMerger
}
MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap();
if ((dictId == 0) && (Iterables.getFirst(dimVals, "") == null)) {
bitset.or(nullRowsList.get(i));
}
for (Integer row : CombiningIterable.createSplatted(
convertedInverteds,
Ordering.<Integer>natural().nullsFirst()

View File

@ -46,7 +46,7 @@ import java.util.UUID;
*/
public class IndexBuilder
{
private static final int ROWS_PER_INDEX_FOR_MERGING = 2;
private static final int ROWS_PER_INDEX_FOR_MERGING = 1;
private static final int MAX_ROWS = 50_000;
private IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[]{

View File

@ -21,7 +21,6 @@ package io.druid.segment.filter;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
@ -29,11 +28,14 @@ import com.metamx.common.Pair;
import com.metamx.common.guava.Sequence;
import com.metamx.common.guava.Sequences;
import io.druid.common.utils.JodaUtils;
import io.druid.data.input.InputRow;
import io.druid.granularity.QueryGranularity;
import io.druid.query.aggregation.Aggregator;
import io.druid.query.aggregation.CountAggregatorFactory;
import io.druid.query.aggregation.FilteredAggregatorFactory;
import io.druid.query.dimension.DefaultDimensionSpec;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.Filter;
import io.druid.query.filter.ValueMatcher;
import io.druid.query.filter.ValueMatcherFactory;
import io.druid.segment.Cursor;
import io.druid.segment.DimensionSelector;
import io.druid.segment.IndexBuilder;
@ -54,28 +56,59 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
import org.junit.runners.Parameterized;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public abstract class BaseFilterTest
{
@Rule
public TemporaryFolder temporaryFolder = new TemporaryFolder();
private final List<InputRow> rows;
protected final IndexBuilder indexBuilder;
protected final Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher;
protected StorageAdapter adapter;
protected Closeable closeable;
public BaseFilterTest(
List<InputRow> rows,
IndexBuilder indexBuilder,
Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher
)
{
this.rows = rows;
this.indexBuilder = indexBuilder;
this.finisher = finisher;
}
@Before
public void setUp() throws Exception
{
final Pair<StorageAdapter, Closeable> pair = finisher.apply(
indexBuilder.tmpDir(temporaryFolder.newFolder()).add(rows)
);
this.adapter = pair.lhs;
this.closeable = pair.rhs;
}
@After
public void tearDown() throws Exception
{
closeable.close();
}
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() throws IOException
{
return makeConstructors();
}
public static Collection<Object[]> makeConstructors()
{
final List<Object[]> constructors = Lists.newArrayList();
@ -86,8 +119,7 @@ public abstract class BaseFilterTest
);
final Map<String, IndexMerger> indexMergers = ImmutableMap.<String, IndexMerger>of(
// TODO: deal with inconsistent null handling in IndexMerger
// "IndexMerger", TestHelper.getTestIndexMerger(),
"IndexMerger", TestHelper.getTestIndexMerger(),
"IndexMergerV9", TestHelper.getTestIndexMergerV9()
);
@ -179,15 +211,9 @@ public abstract class BaseFilterTest
/**
* Selects elements from "selectColumn" from rows matching a filter. selectColumn must be a single valued dimension.
*/
protected List<String> selectUsingColumn(final Filter filter, final String selectColumn)
protected List<String> selectColumnValuesMatchingFilter(final DimFilter filter, final String selectColumn)
{
final Sequence<Cursor> cursors = adapter.makeCursors(
filter,
new Interval(JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT),
QueryGranularity.ALL,
false
);
final Cursor cursor = Iterables.getOnlyElement(Sequences.toList(cursors, Lists.<Cursor>newArrayList()));
final Cursor cursor = makeCursor(Filters.toFilter(filter));
final List<String> values = Lists.newArrayList();
final DimensionSelector selector = cursor.makeDimensionSelector(
new DefaultDimensionSpec(selectColumn, selectColumn)
@ -202,37 +228,29 @@ public abstract class BaseFilterTest
return values;
}
protected boolean applyFilterToValue(final Filter filter, final Comparable theValue)
protected long selectCountUsingFilteredAggregator(final DimFilter filter)
{
return filter.makeMatcher(
new ValueMatcherFactory()
{
@Override
public ValueMatcher makeValueMatcher(final String dimension, final Comparable value)
{
return new ValueMatcher()
{
@Override
public boolean matches()
{
return Objects.equals(value, theValue);
}
};
}
final Cursor cursor = makeCursor(null);
final Aggregator agg = new FilteredAggregatorFactory(
new CountAggregatorFactory("count"),
filter
).factorize(cursor);
@Override
public ValueMatcher makeValueMatcher(String dimension, final Predicate predicate)
{
return new ValueMatcher()
{
@Override
public boolean matches()
{
return predicate.apply(theValue);
}
};
}
}
).matches();
for (; !cursor.isDone(); cursor.advance()) {
agg.aggregate();
}
return agg.getLong();
}
private Cursor makeCursor(final Filter filter)
{
final Sequence<Cursor> cursors = adapter.makeCursors(
filter,
new Interval(JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT),
QueryGranularity.ALL,
false
);
return Iterables.getOnlyElement(Sequences.toList(cursors, Lists.<Cursor>newArrayList()));
}
}

View File

@ -20,11 +20,8 @@
package io.druid.segment.filter;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.metamx.common.Pair;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.DimensionsSpec;
@ -33,19 +30,16 @@ import io.druid.data.input.impl.MapInputRowParser;
import io.druid.data.input.impl.TimeAndDimsParseSpec;
import io.druid.data.input.impl.TimestampSpec;
import io.druid.query.filter.BoundDimFilter;
import io.druid.query.filter.Filter;
import io.druid.query.filter.DimFilter;
import io.druid.segment.IndexBuilder;
import io.druid.segment.StorageAdapter;
import org.joda.time.DateTime;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
@ -62,259 +56,230 @@ public class BoundFilterTest extends BaseFilterTest
);
private static final List<InputRow> ROWS = ImmutableList.of(
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "a", "dim1", "", "dim2", ImmutableList.of("a", "b"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "b", "dim1", "10", "dim2", ImmutableList.of())),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "c", "dim1", "2", "dim2", ImmutableList.of(""))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "d", "dim1", "1", "dim2", ImmutableList.of("a"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "e", "dim1", "def", "dim2", ImmutableList.of("c"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "f", "dim1", "abc"))
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "0", "dim1", "", "dim2", ImmutableList.of("a", "b"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "1", "dim1", "10", "dim2", ImmutableList.<String>of())),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "3", "dim1", "1", "dim2", ImmutableList.of("a"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "4", "dim1", "def", "dim2", ImmutableList.of("c"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "5", "dim1", "abc"))
);
private final IndexBuilder indexBuilder;
private final Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher;
public BoundFilterTest(
String testName,
IndexBuilder indexBuilder,
Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher
)
{
this.indexBuilder = indexBuilder;
this.finisher = finisher;
}
@Before
public void setUp() throws IOException
{
final Pair<StorageAdapter, Closeable> pair = finisher.apply(
indexBuilder.tmpDir(temporaryFolder.newFolder()).add(ROWS)
);
this.adapter = pair.lhs;
this.closeable = pair.rhs;
}
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() throws IOException
{
return makeConstructors();
super(ROWS, indexBuilder, finisher);
}
@Test
public void testLexicographicMatchEverything()
{
final List<BoundFilter> filters = ImmutableList.of(
new BoundFilter(new BoundDimFilter("dim0", "", "z", false, false, false)),
new BoundFilter(new BoundDimFilter("dim1", "", "z", false, false, false)),
new BoundFilter(new BoundDimFilter("dim2", "", "z", false, false, false)),
new BoundFilter(new BoundDimFilter("dim3", "", "z", false, false, false))
final List<BoundDimFilter> filters = ImmutableList.of(
new BoundDimFilter("dim0", "", "z", false, false, false),
new BoundDimFilter("dim1", "", "z", false, false, false),
new BoundDimFilter("dim2", "", "z", false, false, false),
new BoundDimFilter("dim3", "", "z", false, false, false)
);
for (BoundFilter filter : filters) {
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(filter));
for (BoundDimFilter filter : filters) {
assertFilterMatches(filter, ImmutableList.of("0", "1", "2", "3", "4", "5"));
}
}
@Test
public void testLexicographicMatchNull()
{
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim0", "", "", false, false, false)))
assertFilterMatches(
new BoundDimFilter("dim0", "", "", false, false, false),
ImmutableList.<String>of()
);
Assert.assertEquals(
ImmutableList.of(0),
select(new BoundFilter(new BoundDimFilter("dim1", "", "", false, false, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "", "", false, false, false),
ImmutableList.of("0")
);
Assert.assertEquals(
ImmutableList.of(1, 2, 5),
select(new BoundFilter(new BoundDimFilter("dim2", "", "", false, false, false)))
assertFilterMatches(
new BoundDimFilter("dim2", "", "", false, false, false),
ImmutableList.of("1", "2", "5")
);
}
@Test
public void testLexicographicMatchMissingColumn()
{
Assert.assertEquals(
ImmutableList.of(0, 1, 2, 3, 4, 5),
select(new BoundFilter(new BoundDimFilter("dim3", "", "", false, false, false)))
assertFilterMatches(
new BoundDimFilter("dim3", "", "", false, false, false),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim3", "", "", true, false, false)))
assertFilterMatches(
new BoundDimFilter("dim3", "", "", true, false, false),
ImmutableList.<String>of()
);
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim3", "", "", false, true, false)))
assertFilterMatches(
new BoundDimFilter("dim3", "", "", false, true, false),
ImmutableList.<String>of()
);
Assert.assertEquals(
ImmutableList.of(0, 1, 2, 3, 4, 5),
select(new BoundFilter(new BoundDimFilter("dim3", "", null, false, true, false)))
assertFilterMatches(
new BoundDimFilter("dim3", "", null, false, true, false),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
Assert.assertEquals(
ImmutableList.of(0, 1, 2, 3, 4, 5),
select(new BoundFilter(new BoundDimFilter("dim3", null, "", false, false, false)))
assertFilterMatches(
new BoundDimFilter("dim3", null, "", false, false, false),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim3", null, "", false, true, false)))
assertFilterMatches(
new BoundDimFilter("dim3", null, "", false, true, false),
ImmutableList.<String>of()
);
}
@Test
public void testLexicographicMatchTooStrict()
{
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim1", "abc", "abc", true, false, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "abc", "abc", true, false, false),
ImmutableList.<String>of()
);
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim1", "abc", "abc", true, true, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "abc", "abc", true, true, false),
ImmutableList.<String>of()
);
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim1", "abc", "abc", false, true, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "abc", "abc", false, true, false),
ImmutableList.<String>of()
);
}
@Test
public void testLexicographicMatchExactlySingleValue()
{
Assert.assertEquals(
ImmutableList.of(5),
select(new BoundFilter(new BoundDimFilter("dim1", "abc", "abc", false, false, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "abc", "abc", false, false, false),
ImmutableList.of("5")
);
}
@Test
public void testLexicographicMatchSurroundingSingleValue()
{
Assert.assertEquals(
ImmutableList.of(5),
select(new BoundFilter(new BoundDimFilter("dim1", "ab", "abd", true, true, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "ab", "abd", true, true, false),
ImmutableList.of("5")
);
}
@Test
public void testLexicographicMatchNoUpperLimit()
{
Assert.assertEquals(
ImmutableList.of(4, 5),
select(new BoundFilter(new BoundDimFilter("dim1", "ab", null, true, true, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "ab", null, true, true, false),
ImmutableList.of("4", "5")
);
}
@Test
public void testLexicographicMatchNoLowerLimit()
{
Assert.assertEquals(
ImmutableList.of(0, 1, 2, 3, 5),
select(new BoundFilter(new BoundDimFilter("dim1", null, "abd", true, true, false)))
assertFilterMatches(
new BoundDimFilter("dim1", null, "abd", true, true, false),
ImmutableList.of("0", "1", "2", "3", "5")
);
}
@Test
public void testLexicographicMatchNumbers()
{
Assert.assertEquals(
ImmutableList.of(1, 2, 3),
select(new BoundFilter(new BoundDimFilter("dim1", "1", "3", false, false, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "1", "3", false, false, false),
ImmutableList.of("1", "2", "3")
);
Assert.assertEquals(
ImmutableList.of(1, 2),
select(new BoundFilter(new BoundDimFilter("dim1", "1", "3", true, true, false)))
assertFilterMatches(
new BoundDimFilter("dim1", "1", "3", true, true, false),
ImmutableList.of("1", "2")
);
}
@Test
public void testAlphaNumericMatchNull()
{
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim0", "", "", false, false, true)))
assertFilterMatches(
new BoundDimFilter("dim0", "", "", false, false, true),
ImmutableList.<String>of()
);
Assert.assertEquals(
ImmutableList.of(0),
select(new BoundFilter(new BoundDimFilter("dim1", "", "", false, false, true)))
assertFilterMatches(
new BoundDimFilter("dim1", "", "", false, false, true),
ImmutableList.of("0")
);
Assert.assertEquals(
ImmutableList.of(1, 2, 5),
select(new BoundFilter(new BoundDimFilter("dim2", "", "", false, false, true)))
assertFilterMatches(
new BoundDimFilter("dim2", "", "", false, false, true),
ImmutableList.of("1", "2", "5")
);
Assert.assertEquals(
ImmutableList.of(0, 1, 2, 3, 4, 5),
select(new BoundFilter(new BoundDimFilter("dim3", "", "", false, false, true)))
assertFilterMatches(
new BoundDimFilter("dim3", "", "", false, false, true),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
}
@Test
public void testAlphaNumericMatchTooStrict()
{
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim1", "2", "2", true, false, true)))
assertFilterMatches(
new BoundDimFilter("dim1", "2", "2", true, false, true),
ImmutableList.<String>of()
);
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim1", "2", "2", true, true, true)))
assertFilterMatches(
new BoundDimFilter("dim1", "2", "2", true, true, true),
ImmutableList.<String>of()
);
Assert.assertEquals(
ImmutableList.of(),
select(new BoundFilter(new BoundDimFilter("dim1", "2", "2", false, true, true)))
assertFilterMatches(
new BoundDimFilter("dim1", "2", "2", false, true, true),
ImmutableList.<String>of()
);
}
@Test
public void testAlphaNumericMatchExactlySingleValue()
{
Assert.assertEquals(
ImmutableList.of(2),
select(new BoundFilter(new BoundDimFilter("dim1", "2", "2", false, false, true)))
assertFilterMatches(
new BoundDimFilter("dim1", "2", "2", false, false, true),
ImmutableList.of("2")
);
}
@Test
public void testAlphaNumericMatchSurroundingSingleValue()
{
Assert.assertEquals(
ImmutableList.of(2),
select(new BoundFilter(new BoundDimFilter("dim1", "1", "3", true, true, true)))
assertFilterMatches(
new BoundDimFilter("dim1", "1", "3", true, true, true),
ImmutableList.of("2")
);
}
@Test
public void testAlphaNumericMatchNoUpperLimit()
{
Assert.assertEquals(
ImmutableList.of(1, 2, 4, 5),
select(new BoundFilter(new BoundDimFilter("dim1", "1", null, true, true, true)))
assertFilterMatches(
new BoundDimFilter("dim1", "1", null, true, true, true),
ImmutableList.of("1", "2", "4", "5")
);
}
@Test
public void testAlphaNumericMatchNoLowerLimit()
{
Assert.assertEquals(
ImmutableList.of(0, 3),
select(new BoundFilter(new BoundDimFilter("dim1", null, "2", true, true, true)))
assertFilterMatches(
new BoundDimFilter("dim1", null, "2", true, true, true),
ImmutableList.of("0", "3")
);
}
private List<Integer> select(final Filter filter)
private void assertFilterMatches(
final DimFilter filter,
final List<String> expectedRows
)
{
return Lists.newArrayList(
Iterables.transform(
selectUsingColumn(filter, "dim0"),
new Function<String, Integer>()
{
@Override
public Integer apply(String input)
{
Preconditions.checkArgument(input.length() == 1);
return ((int) input.charAt(0)) - ((int) 'a');
}
}
)
);
Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0"));
Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter));
}
}

View File

@ -20,11 +20,8 @@
package io.druid.segment.filter;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.metamx.common.Pair;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.DimensionsSpec;
@ -32,19 +29,18 @@ import io.druid.data.input.impl.InputRowParser;
import io.druid.data.input.impl.MapInputRowParser;
import io.druid.data.input.impl.TimeAndDimsParseSpec;
import io.druid.data.input.impl.TimestampSpec;
import io.druid.query.filter.Filter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.NotDimFilter;
import io.druid.query.filter.SelectorDimFilter;
import io.druid.segment.IndexBuilder;
import io.druid.segment.StorageAdapter;
import org.joda.time.DateTime;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
@ -61,101 +57,50 @@ public class NotFilterTest extends BaseFilterTest
);
private static final List<InputRow> ROWS = ImmutableList.of(
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "a", "dim1", "", "dim2", ImmutableList.of("a", "b"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "b", "dim1", "10", "dim2", ImmutableList.of())),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "c", "dim1", "2", "dim2", ImmutableList.of(""))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "d", "dim1", "1", "dim2", ImmutableList.of("a"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "e", "dim1", "def", "dim2", ImmutableList.of("c"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "f", "dim1", "abc"))
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "0")),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "1")),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "2")),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "3")),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "4")),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "5"))
);
private final IndexBuilder indexBuilder;
private final Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher;
public NotFilterTest(
String testName,
IndexBuilder indexBuilder,
Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher
)
{
this.indexBuilder = indexBuilder;
this.finisher = finisher;
super(ROWS, indexBuilder, finisher);
}
@Before
public void setUp() throws IOException
@Test
public void testNotSelector()
{
final Pair<StorageAdapter, Closeable> pair = finisher.apply(
indexBuilder.tmpDir(temporaryFolder.newFolder()).add(ROWS)
assertFilterMatches(
new NotDimFilter(new SelectorDimFilter("dim0", null)),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
this.adapter = pair.lhs;
this.closeable = pair.rhs;
}
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() throws IOException
{
return makeConstructors();
}
@Test
public void testSingleValueStringColumnWithoutNulls()
{
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim0", null))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim0", ""))));
Assert.assertEquals(ImmutableList.of(1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim0", "a"))));
Assert.assertEquals(ImmutableList.of(0, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim0", "b"))));
}
@Test
public void testSingleValueStringColumnWithNulls()
{
Assert.assertEquals(ImmutableList.of(1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim1", null))));
Assert.assertEquals(ImmutableList.of(1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim1", ""))));
Assert.assertEquals(ImmutableList.of(0, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim1", "10"))));
Assert.assertEquals(ImmutableList.of(0, 1, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim1", "2"))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 4, 5), select(new NotFilter(new SelectorFilter("dim1", "1"))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 5), select(new NotFilter(new SelectorFilter("dim1", "def"))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4), select(new NotFilter(new SelectorFilter("dim1", "abc"))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim1", "ab"))));
}
@Test
public void testMultiValueStringColumn()
{
Assert.assertEquals(ImmutableList.of(0, 3, 4), select(new NotFilter(new SelectorFilter("dim2", null))));
Assert.assertEquals(ImmutableList.of(0, 3, 4), select(new NotFilter(new SelectorFilter("dim2", ""))));
Assert.assertEquals(ImmutableList.of(1, 2, 4, 5), select(new NotFilter(new SelectorFilter("dim2", "a"))));
Assert.assertEquals(ImmutableList.of(1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim2", "b"))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 5), select(new NotFilter(new SelectorFilter("dim2", "c"))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim2", "d"))));
}
@Test
public void testMissingColumn()
{
Assert.assertEquals(ImmutableList.of(), select(new NotFilter(new SelectorFilter("dim3", null))));
Assert.assertEquals(ImmutableList.of(), select(new NotFilter(new SelectorFilter("dim3", ""))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim3", "a"))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim3", "b"))));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new NotFilter(new SelectorFilter("dim3", "c"))));
}
private List<Integer> select(final Filter filter)
{
return Lists.newArrayList(
Iterables.transform(
selectUsingColumn(filter, "dim0"),
new Function<String, Integer>()
{
@Override
public Integer apply(String input)
{
Preconditions.checkArgument(input.length() == 1);
return ((int) input.charAt(0)) - ((int) 'a');
}
}
)
assertFilterMatches(
new NotDimFilter(new SelectorDimFilter("dim0", "")),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
assertFilterMatches(
new NotDimFilter(new SelectorDimFilter("dim0", "0")),
ImmutableList.of("1", "2", "3", "4", "5")
);
assertFilterMatches(
new NotDimFilter(new SelectorDimFilter("dim0", "1")),
ImmutableList.of("0", "2", "3", "4", "5")
);
}
private void assertFilterMatches(
final DimFilter filter,
final List<String> expectedRows
)
{
Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0"));
Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter));
}
}

View File

@ -20,11 +20,8 @@
package io.druid.segment.filter;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.metamx.common.Pair;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.DimensionsSpec;
@ -32,19 +29,17 @@ import io.druid.data.input.impl.InputRowParser;
import io.druid.data.input.impl.MapInputRowParser;
import io.druid.data.input.impl.TimeAndDimsParseSpec;
import io.druid.data.input.impl.TimestampSpec;
import io.druid.query.filter.Filter;
import io.druid.query.filter.DimFilter;
import io.druid.query.filter.SelectorDimFilter;
import io.druid.segment.IndexBuilder;
import io.druid.segment.StorageAdapter;
import org.joda.time.DateTime;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
@ -56,106 +51,91 @@ public class SelectorFilterTest extends BaseFilterTest
private static final InputRowParser<Map<String, Object>> PARSER = new MapInputRowParser(
new TimeAndDimsParseSpec(
new TimestampSpec(TIMESTAMP_COLUMN, "iso", new DateTime("2000")),
new DimensionsSpec(null, null, null)
new DimensionsSpec(
DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim0", "dim1", "dim2", "dim3")),
null,
null
)
)
);
private static final List<InputRow> ROWS = ImmutableList.of(
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "a", "dim1", "", "dim2", ImmutableList.of("a", "b"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "b", "dim1", "10", "dim2", ImmutableList.of())),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "c", "dim1", "2", "dim2", ImmutableList.of(""))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "d", "dim1", "1", "dim2", ImmutableList.of("a"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "e", "dim1", "def", "dim2", ImmutableList.of("c"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "f", "dim1", "abc"))
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "0", "dim1", "", "dim2", ImmutableList.of("a", "b"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "1", "dim1", "10", "dim2", ImmutableList.of())),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "2", "dim1", "2", "dim2", ImmutableList.of(""))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "3", "dim1", "1", "dim2", ImmutableList.of("a"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "4", "dim1", "def", "dim2", ImmutableList.of("c"))),
PARSER.parse(ImmutableMap.<String, Object>of("dim0", "5", "dim1", "abc"))
);
private final IndexBuilder indexBuilder;
private final Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher;
public SelectorFilterTest(
String testName,
IndexBuilder indexBuilder,
Function<IndexBuilder, Pair<StorageAdapter, Closeable>> finisher
)
{
this.indexBuilder = indexBuilder;
this.finisher = finisher;
}
@Before
public void setUp() throws IOException
{
final Pair<StorageAdapter, Closeable> pair = finisher.apply(
indexBuilder.tmpDir(temporaryFolder.newFolder()).add(ROWS)
);
this.adapter = pair.lhs;
this.closeable = pair.rhs;
}
@Parameterized.Parameters(name = "{0}")
public static Collection<Object[]> constructorFeeder() throws IOException
{
return makeConstructors();
super(ROWS, indexBuilder, finisher);
}
@Test
public void testSingleValueStringColumnWithoutNulls()
{
Assert.assertEquals(ImmutableList.of(), select(new SelectorFilter("dim0", null)));
Assert.assertEquals(ImmutableList.of(), select(new SelectorFilter("dim0", "")));
Assert.assertEquals(ImmutableList.of(0), select(new SelectorFilter("dim0", "a")));
Assert.assertEquals(ImmutableList.of(1), select(new SelectorFilter("dim0", "b")));
assertFilterMatches(new SelectorDimFilter("dim0", null), ImmutableList.<String>of());
assertFilterMatches(new SelectorDimFilter("dim0", ""), ImmutableList.<String>of());
assertFilterMatches(new SelectorDimFilter("dim0", "0"), ImmutableList.of("0"));
assertFilterMatches(new SelectorDimFilter("dim0", "1"), ImmutableList.of("1"));
}
@Test
public void testSingleValueStringColumnWithNulls()
{
Assert.assertEquals(ImmutableList.of(0), select(new SelectorFilter("dim1", null)));
Assert.assertEquals(ImmutableList.of(0), select(new SelectorFilter("dim1", "")));
Assert.assertEquals(ImmutableList.of(1), select(new SelectorFilter("dim1", "10")));
Assert.assertEquals(ImmutableList.of(2), select(new SelectorFilter("dim1", "2")));
Assert.assertEquals(ImmutableList.of(3), select(new SelectorFilter("dim1", "1")));
Assert.assertEquals(ImmutableList.of(4), select(new SelectorFilter("dim1", "def")));
Assert.assertEquals(ImmutableList.of(5), select(new SelectorFilter("dim1", "abc")));
Assert.assertEquals(ImmutableList.of(), select(new SelectorFilter("dim1", "ab")));
assertFilterMatches(new SelectorDimFilter("dim1", null), ImmutableList.of("0"));
assertFilterMatches(new SelectorDimFilter("dim1", ""), ImmutableList.of("0"));
assertFilterMatches(new SelectorDimFilter("dim1", "10"), ImmutableList.of("1"));
assertFilterMatches(new SelectorDimFilter("dim1", "2"), ImmutableList.of("2"));
assertFilterMatches(new SelectorDimFilter("dim1", "1"), ImmutableList.of("3"));
assertFilterMatches(new SelectorDimFilter("dim1", "def"), ImmutableList.of("4"));
assertFilterMatches(new SelectorDimFilter("dim1", "abc"), ImmutableList.of("5"));
assertFilterMatches(new SelectorDimFilter("dim1", "ab"), ImmutableList.<String>of());
}
@Test
public void testMultiValueStringColumn()
{
Assert.assertEquals(ImmutableList.of(1, 2, 5), select(new SelectorFilter("dim2", null)));
Assert.assertEquals(ImmutableList.of(1, 2, 5), select(new SelectorFilter("dim2", "")));
Assert.assertEquals(ImmutableList.of(0, 3), select(new SelectorFilter("dim2", "a")));
Assert.assertEquals(ImmutableList.of(0), select(new SelectorFilter("dim2", "b")));
Assert.assertEquals(ImmutableList.of(4), select(new SelectorFilter("dim2", "c")));
Assert.assertEquals(ImmutableList.of(), select(new SelectorFilter("dim2", "d")));
assertFilterMatches(new SelectorDimFilter("dim2", null), ImmutableList.of("1", "2", "5"));
assertFilterMatches(new SelectorDimFilter("dim2", ""), ImmutableList.of("1", "2", "5"));
assertFilterMatches(new SelectorDimFilter("dim2", "a"), ImmutableList.of("0", "3"));
assertFilterMatches(new SelectorDimFilter("dim2", "b"), ImmutableList.of("0"));
assertFilterMatches(new SelectorDimFilter("dim2", "c"), ImmutableList.of("4"));
assertFilterMatches(new SelectorDimFilter("dim2", "d"), ImmutableList.<String>of());
}
@Test
public void testMissingColumn()
public void testMissingColumnSpecifiedInDimensionList()
{
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new SelectorFilter("dim3", null)));
Assert.assertEquals(ImmutableList.of(0, 1, 2, 3, 4, 5), select(new SelectorFilter("dim3", "")));
Assert.assertEquals(ImmutableList.of(), select(new SelectorFilter("dim3", "a")));
Assert.assertEquals(ImmutableList.of(), select(new SelectorFilter("dim3", "b")));
Assert.assertEquals(ImmutableList.of(), select(new SelectorFilter("dim3", "c")));
assertFilterMatches(new SelectorDimFilter("dim3", null), ImmutableList.of("0", "1", "2", "3", "4", "5"));
assertFilterMatches(new SelectorDimFilter("dim3", ""), ImmutableList.of("0", "1", "2", "3", "4", "5"));
assertFilterMatches(new SelectorDimFilter("dim3", "a"), ImmutableList.<String>of());
assertFilterMatches(new SelectorDimFilter("dim3", "b"), ImmutableList.<String>of());
assertFilterMatches(new SelectorDimFilter("dim3", "c"), ImmutableList.<String>of());
}
private List<Integer> select(final Filter filter)
@Test
public void testMissingColumnNotSpecifiedInDimensionList()
{
return Lists.newArrayList(
Iterables.transform(
selectUsingColumn(filter, "dim0"),
new Function<String, Integer>()
{
@Override
public Integer apply(String input)
{
Preconditions.checkArgument(input.length() == 1);
return ((int) input.charAt(0)) - ((int) 'a');
}
}
)
);
assertFilterMatches(new SelectorDimFilter("dim4", null), ImmutableList.of("0", "1", "2", "3", "4", "5"));
assertFilterMatches(new SelectorDimFilter("dim4", ""), ImmutableList.of("0", "1", "2", "3", "4", "5"));
assertFilterMatches(new SelectorDimFilter("dim4", "a"), ImmutableList.<String>of());
assertFilterMatches(new SelectorDimFilter("dim4", "b"), ImmutableList.<String>of());
assertFilterMatches(new SelectorDimFilter("dim4", "c"), ImmutableList.<String>of());
}
private void assertFilterMatches(
final DimFilter filter,
final List<String> expectedRows
)
{
Assert.assertEquals(filter.toString(), expectedRows, selectColumnValuesMatchingFilter(filter, "dim0"));
Assert.assertEquals(filter.toString(), expectedRows.size(), selectCountUsingFilteredAggregator(filter));
}
}