Merge pull request #1892 from navis/DRUID-1878

Make 'search' filter have a case sensitive option (for #1878)
This commit is contained in:
Fangjin Yang 2015-11-03 07:11:00 -08:00
commit f681c84014
11 changed files with 476 additions and 76 deletions

View File

@ -147,4 +147,13 @@ Search filters can be used to filter on partial string matches.
|property|description|required?|
|--------|-----------|---------|
|type|This String should always be "fragment".|yes|
|values|A JSON array of String values to run the search over. Case insensitive.|yes|
|values|A JSON array of String values to run the search over.|yes|
|caseSensitive|Whether strings should be compared as case sensitive or not. Default: false(insensitive)|no|
##### Contains
|property|description|required?|
|--------|-----------|---------|
|type|This String should always be "contains".|yes|
|value|A String value to run the search over.|yes|
|caseSensitive|Whether two string should be compared as case sensitive or not|yes|

View File

@ -19,11 +19,25 @@ If any part of a dimension value contains the value specified in this search que
FragmentSearchQuerySpec
-----------------------
If any part of a dimension value contains any of the values specified in this search query spec, regardless of case, a "match" occurs. The grammar is:
If any part of a dimension value contains all of the values specified in this search query spec, regardless of case by default, a "match" occurs. The grammar is:
```json
{
"type" : "fragment",
"case_sensitive" : false,
"values" : ["fragment1", "fragment2"]
}
```
ContainsSearchQuerySpec
----------------------------------
If any part of a dimension value contains the value specified in this search query spec, a "match" occurs. The grammar is:
```json
{
"type" : "contains",
"case_sensitive" : true,
"value" : "some_value"
}
```

View File

@ -75,6 +75,10 @@
<groupId>org.mapdb</groupId>
<artifactId>mapdb</artifactId>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
</dependency>
<!-- Tests -->
<dependency>

View File

@ -18,6 +18,7 @@
package io.druid.query;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
@ -36,6 +37,8 @@ import io.druid.query.filter.SelectorDimFilter;
import io.druid.query.metadata.metadata.ColumnIncluderator;
import io.druid.query.metadata.metadata.SegmentMetadataQuery;
import io.druid.query.search.SearchResultValue;
import io.druid.query.search.search.ContainsSearchQuerySpec;
import io.druid.query.search.search.FragmentSearchQuerySpec;
import io.druid.query.search.search.InsensitiveContainsSearchQuerySpec;
import io.druid.query.search.search.SearchQuery;
import io.druid.query.search.search.SearchQuerySpec;
@ -681,13 +684,41 @@ public class Druids
public SearchQueryBuilder query(String q)
{
Preconditions.checkNotNull(q, "no value");
querySpec = new InsensitiveContainsSearchQuerySpec(q);
return this;
}
public SearchQueryBuilder query(Map<String, Object> q)
{
querySpec = new InsensitiveContainsSearchQuerySpec((String) q.get("value"));
String value = Preconditions.checkNotNull(q.get("value"), "no value").toString();
querySpec = new InsensitiveContainsSearchQuerySpec(value);
return this;
}
public SearchQueryBuilder query(String q, boolean caseSensitive)
{
Preconditions.checkNotNull(q, "no value");
querySpec = new ContainsSearchQuerySpec(q, caseSensitive);
return this;
}
public SearchQueryBuilder query(Map<String, Object> q, boolean caseSensitive)
{
String value = Preconditions.checkNotNull(q.get("value"), "no value").toString();
querySpec = new ContainsSearchQuerySpec(value, caseSensitive);
return this;
}
public SearchQueryBuilder fragments(List<String> q)
{
return fragments(q, false);
}
public SearchQueryBuilder fragments(List<String> q, boolean caseSensitive)
{
Preconditions.checkNotNull(q, "no value");
querySpec = new FragmentSearchQuerySpec(q, caseSensitive);
return this;
}

View File

@ -0,0 +1,125 @@
/*
* Druid - a distributed column store.
* Copyright 2012 - 2015 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.query.search.search;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Objects;
import com.metamx.common.StringUtils;
import java.nio.ByteBuffer;
/**
*/
public class ContainsSearchQuerySpec implements SearchQuerySpec
{
private static final byte CACHE_TYPE_ID = 0x1;
private final String value;
private final boolean caseSensitive;
@JsonCreator
public ContainsSearchQuerySpec(
@JsonProperty("value") String value,
@JsonProperty("caseSensitive") boolean caseSensitive
)
{
this.value = value;
this.caseSensitive = caseSensitive;
}
@JsonProperty
public String getValue()
{
return value;
}
@JsonProperty
public boolean isCaseSensitive()
{
return caseSensitive;
}
@Override
public boolean accept(String dimVal)
{
if (dimVal == null || value == null) {
return false;
}
if (caseSensitive) {
return dimVal.contains(value);
}
return org.apache.commons.lang.StringUtils.containsIgnoreCase(dimVal, value);
}
@Override
public byte[] getCacheKey()
{
if (value == null) {
return ByteBuffer.allocate(2)
.put(CACHE_TYPE_ID)
.put(caseSensitive ? (byte) 1 : 0).array();
}
byte[] valueBytes = StringUtils.toUtf8(value);
return ByteBuffer.allocate(2 + valueBytes.length)
.put(CACHE_TYPE_ID)
.put(caseSensitive ? (byte) 1 : 0)
.put(valueBytes)
.array();
}
@Override
public String toString()
{
return "ContainsSearchQuerySpec{" +
"value=" + value + ", caseSensitive=" + caseSensitive +
"}";
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
ContainsSearchQuerySpec that = (ContainsSearchQuerySpec) o;
if (caseSensitive ^ that.caseSensitive) {
return false;
}
if (value == null && that.value == null) {
return true;
}
return value != null && value.equals(that.value);
}
@Override
public int hashCode()
{
return Objects.hashCode(value) + (caseSensitive ? (byte) 1 : 0);
}
}

View File

@ -22,7 +22,10 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.metamx.common.StringUtils;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
/**
*/
@ -31,13 +34,33 @@ public class FragmentSearchQuerySpec implements SearchQuerySpec
private static final byte CACHE_TYPE_ID = 0x2;
private final List<String> values;
private final boolean caseSensitive;
private final String[] target;
@JsonCreator
public FragmentSearchQuerySpec(
@JsonProperty("values") List<String> values
)
{
this(values, false);
}
@JsonCreator
public FragmentSearchQuerySpec(
@JsonProperty("values") List<String> values,
@JsonProperty("caseSensitive") boolean caseSensitive
)
{
this.values = values;
this.caseSensitive = caseSensitive;
Set<String> set = new TreeSet();
if (values != null) {
for (String value : values) {
set.add(value);
}
}
target = set.toArray(new String[set.size()]);
}
@JsonProperty
@ -46,11 +69,33 @@ public class FragmentSearchQuerySpec implements SearchQuerySpec
return values;
}
@JsonProperty
public boolean isCaseSensitive()
{
return caseSensitive;
}
@Override
public boolean accept(String dimVal)
{
for (String value : values) {
if (dimVal == null || !dimVal.toLowerCase().contains(value.toLowerCase())) {
if (dimVal == null || values == null) {
return false;
}
if (caseSensitive) {
return containsAny(target, dimVal);
}
for (String search : target) {
if (!org.apache.commons.lang.StringUtils.containsIgnoreCase(dimVal, search)) {
return false;
}
}
return true;
}
private boolean containsAny(String[] target, String input)
{
for (String value : target) {
if (!input.contains(value)) {
return false;
}
}
@ -60,6 +105,12 @@ public class FragmentSearchQuerySpec implements SearchQuerySpec
@Override
public byte[] getCacheKey()
{
if (values == null) {
return ByteBuffer.allocate(2)
.put(CACHE_TYPE_ID)
.put(caseSensitive ? (byte) 1 : 0).array();
}
final byte[][] valuesBytes = new byte[values.size()][];
int valuesBytesSize = 0;
int index = 0;
@ -69,8 +120,9 @@ public class FragmentSearchQuerySpec implements SearchQuerySpec
++index;
}
final ByteBuffer queryCacheKey = ByteBuffer.allocate(1 + valuesBytesSize)
.put(CACHE_TYPE_ID);
final ByteBuffer queryCacheKey = ByteBuffer.allocate(2 + valuesBytesSize)
.put(CACHE_TYPE_ID)
.put(caseSensitive ? (byte) 1 : 0);
for (byte[] bytes : valuesBytes) {
queryCacheKey.put(bytes);
@ -83,7 +135,7 @@ public class FragmentSearchQuerySpec implements SearchQuerySpec
public String toString()
{
return "FragmentSearchQuerySpec{" +
"values=" + values +
"values=" + values + ", caseSensitive=" + caseSensitive +
"}";
}
@ -99,16 +151,20 @@ public class FragmentSearchQuerySpec implements SearchQuerySpec
FragmentSearchQuerySpec that = (FragmentSearchQuerySpec) o;
if (values != null ? !values.equals(that.values) : that.values != null) {
if (caseSensitive ^ that.caseSensitive) {
return false;
}
return true;
if (values == null && that.values == null) {
return true;
}
return values != null && Arrays.equals(target, that.target);
}
@Override
public int hashCode()
{
return values != null ? values.hashCode() : 0;
return Arrays.hashCode(target) + (caseSensitive ? (byte) 1 : 0);
}
}

View File

@ -19,57 +19,24 @@ package io.druid.query.search.search;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.metamx.common.StringUtils;
import java.nio.ByteBuffer;
/**
*/
public class InsensitiveContainsSearchQuerySpec implements SearchQuerySpec
public class InsensitiveContainsSearchQuerySpec extends ContainsSearchQuerySpec
{
private static final byte CACHE_TYPE_ID = 0x1;
private final String value;
@JsonCreator
public InsensitiveContainsSearchQuerySpec(
@JsonProperty("value") String value
)
{
this.value = value;
}
@JsonProperty
public String getValue()
{
return value;
}
@Override
public boolean accept(String dimVal)
{
if (dimVal == null) {
return false;
}
return dimVal.toLowerCase().contains(value.toLowerCase());
}
@Override
public byte[] getCacheKey()
{
byte[] valueBytes = StringUtils.toUtf8(value);
return ByteBuffer.allocate(1 + valueBytes.length)
.put(CACHE_TYPE_ID)
.put(valueBytes)
.array();
super(value, false);
}
@Override
public String toString()
{
return "InsensitiveContainsSearchQuerySpec{" +
"value=" + value +
"value=" + getValue() +
"}";
}
@ -82,19 +49,6 @@ public class InsensitiveContainsSearchQuerySpec implements SearchQuerySpec
if (o == null || getClass() != o.getClass()) {
return false;
}
InsensitiveContainsSearchQuerySpec that = (InsensitiveContainsSearchQuerySpec) o;
if (value != null ? !value.equals(that.value) : that.value != null) {
return false;
}
return true;
}
@Override
public int hashCode()
{
return value != null ? value.hashCode() : 0;
return super.equals(o);
}
}

View File

@ -24,6 +24,7 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo;
*/
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type")
@JsonSubTypes(value = {
@JsonSubTypes.Type(name = "contains", value = ContainsSearchQuerySpec.class),
@JsonSubTypes.Type(name = "insensitive_contains", value = InsensitiveContainsSearchQuerySpec.class),
@JsonSubTypes.Type(name = "fragment", value = FragmentSearchQuerySpec.class)
})

View File

@ -329,6 +329,15 @@ public class QueryRunnerTestHelper
QueryRunnerFactory<T, QueryType> factory,
Segment adapter
)
{
return makeQueryRunner(factory, segmentId, adapter);
}
public static <T, QueryType extends Query<T>> QueryRunner<T> makeQueryRunner(
QueryRunnerFactory<T, QueryType> factory,
String segmentId,
Segment adapter
)
{
return new FinalizeResultsQueryRunner<T>(
new BySegmentQueryRunner<T>(

View File

@ -0,0 +1,200 @@
/*
* Druid - a distributed column store.
* Copyright 2012 - 2015 Metamarkets Group Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.druid.query.search;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.CharSource;
import com.metamx.common.guava.Sequences;
import io.druid.query.Druids;
import io.druid.query.QueryRunner;
import io.druid.query.Result;
import io.druid.query.search.search.SearchHit;
import io.druid.query.search.search.SearchQuery;
import io.druid.query.search.search.SearchQueryConfig;
import io.druid.segment.IncrementalIndexSegment;
import io.druid.segment.QueryableIndex;
import io.druid.segment.QueryableIndexSegment;
import io.druid.segment.TestIndex;
import io.druid.segment.incremental.IncrementalIndex;
import org.joda.time.DateTime;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static io.druid.query.QueryRunnerTestHelper.*;
/**
*/
@RunWith(Parameterized.class)
public class SearchQueryRunnerWithCaseTest
{
@Parameterized.Parameters
public static Iterable<Object[]> constructorFeeder() throws IOException
{
SearchQueryRunnerFactory factory = new SearchQueryRunnerFactory(
new SearchQueryQueryToolChest(
new SearchQueryConfig(),
NoopIntervalChunkingQueryRunnerDecorator()
),
NOOP_QUERYWATCHER
);
CharSource input = CharSource.wrap(
"2011-01-12T00:00:00.000Z\tspot\tAutoMotive\tPREFERRED\ta\u0001preferred\t100.000000\n" +
"2011-01-12T00:00:00.000Z\tSPot\tbusiness\tpreferred\tb\u0001Preferred\t100.000000\n" +
"2011-01-12T00:00:00.000Z\tspot\tentertainment\tPREFERRed\te\u0001preferred\t100.000000\n" +
"2011-01-13T00:00:00.000Z\tspot\tautomotive\tpreferred\ta\u0001preferred\t94.874713"
);
IncrementalIndex index1 = TestIndex.makeRealtimeIndex(input, true);
IncrementalIndex index2 = TestIndex.makeRealtimeIndex(input, false);
QueryableIndex index3 = TestIndex.persistRealtimeAndLoadMMapped(index1);
QueryableIndex index4 = TestIndex.persistRealtimeAndLoadMMapped(index2);
return transformToConstructionFeeder(
Arrays.asList(
makeQueryRunner(factory, "index1", new IncrementalIndexSegment(index1, "index1")),
makeQueryRunner(factory, "index2", new IncrementalIndexSegment(index2, "index2")),
makeQueryRunner(factory, "index3", new QueryableIndexSegment("index3", index3)),
makeQueryRunner(factory, "index4", new QueryableIndexSegment("index4", index4))
)
);
}
private final QueryRunner runner;
public SearchQueryRunnerWithCaseTest(
QueryRunner runner
)
{
this.runner = runner;
}
private Druids.SearchQueryBuilder testBuilder()
{
return Druids.newSearchQueryBuilder()
.dataSource(dataSource)
.granularity(allGran)
.intervals(fullOnInterval);
}
@Test
public void testSearch()
{
Druids.SearchQueryBuilder builder = testBuilder();
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
SearchQuery searchQuery;
searchQuery = builder.query("SPOT").build();
expectedResults.put(marketDimension, Sets.newHashSet("spot", "SPot"));
checkSearchQuery(searchQuery, expectedResults);
searchQuery = builder.query("spot", true).build();
expectedResults.put(marketDimension, Sets.newHashSet("spot"));
checkSearchQuery(searchQuery, expectedResults);
searchQuery = builder.query("SPot", true).build();
expectedResults.put(marketDimension, Sets.newHashSet("SPot"));
checkSearchQuery(searchQuery, expectedResults);
}
@Test
public void testSearchSameValueInMultiDims()
{
SearchQuery searchQuery;
Druids.SearchQueryBuilder builder = testBuilder()
.dimensions(Arrays.asList(placementDimension, placementishDimension));
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
searchQuery = builder.query("PREFERRED").build();
expectedResults.put(placementDimension, Sets.newHashSet("PREFERRED", "preferred", "PREFERRed"));
expectedResults.put(placementishDimension, Sets.newHashSet("preferred", "Preferred"));
checkSearchQuery(searchQuery, expectedResults);
searchQuery = builder.query("preferred", true).build();
expectedResults.put(placementDimension, Sets.newHashSet("preferred"));
expectedResults.put(placementishDimension, Sets.newHashSet("preferred"));
checkSearchQuery(searchQuery, expectedResults);
}
@Test
public void testFragmentSearch()
{
Druids.SearchQueryBuilder builder = testBuilder();
Map<String, Set<String>> expectedResults = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
SearchQuery searchQuery;
searchQuery = builder.fragments(Arrays.asList("auto", "ve")).build();
expectedResults.put(qualityDimension, Sets.newHashSet("automotive", "AutoMotive"));
checkSearchQuery(searchQuery, expectedResults);
searchQuery = builder.fragments(Arrays.asList("auto", "ve"), true).build();
expectedResults.put(qualityDimension, Sets.newHashSet("automotive"));
checkSearchQuery(searchQuery, expectedResults);
}
private void checkSearchQuery(SearchQuery searchQuery, Map<String, Set<String>> expectedResults)
{
HashMap<String, List> context = new HashMap<>();
Iterable<Result<SearchResultValue>> results = Sequences.toList(
runner.run(searchQuery, context),
Lists.<Result<SearchResultValue>>newArrayList()
);
for (Result<SearchResultValue> result : results) {
Assert.assertEquals(new DateTime("2011-01-12T00:00:00.000Z"), result.getTimestamp());
Assert.assertNotNull(result.getValue());
Iterable<SearchHit> resultValues = result.getValue();
for (SearchHit resultValue : resultValues) {
String dimension = resultValue.getDimension();
String theValue = resultValue.getValue();
Assert.assertTrue(
String.format("Result had unknown dimension[%s]", dimension),
expectedResults.containsKey(dimension)
);
Set<String> expectedSet = expectedResults.get(dimension);
Assert.assertTrue(
String.format("Couldn't remove dim[%s], value[%s]", dimension, theValue), expectedSet.remove(theValue)
);
}
}
for (Map.Entry<String, Set<String>> entry : expectedResults.entrySet()) {
Assert.assertTrue(
String.format(
"Dimension[%s] should have had everything removed, still has[%s]", entry.getKey(), entry.getValue()
),
entry.getValue().isEmpty()
);
}
expectedResults.clear();
}
}

View File

@ -22,9 +22,9 @@ package io.druid.segment;
import com.google.common.base.Charsets;
import com.google.common.base.Throwables;
import com.google.common.hash.Hashing;
import com.google.common.io.CharStreams;
import com.google.common.io.InputSupplier;
import com.google.common.io.CharSource;
import com.google.common.io.LineProcessor;
import com.google.common.io.Resources;
import com.metamx.common.logger.Logger;
import io.druid.data.input.impl.DelimitedParseSpec;
import io.druid.data.input.impl.DimensionsSpec;
@ -46,7 +46,6 @@ import org.joda.time.Interval;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLong;
@ -166,7 +165,16 @@ public class TestIndex
private static IncrementalIndex makeRealtimeIndex(final String resourceFilename, final boolean useOffheap)
{
final URL resource = TestIndex.class.getClassLoader().getResource(resourceFilename);
if (resource == null) {
throw new IllegalArgumentException("cannot find resource " + resourceFilename);
}
log.info("Realtime loading index file[%s]", resource);
CharSource stream = Resources.asByteSource(resource).asCharSource(Charsets.UTF_8);
return makeRealtimeIndex(stream, useOffheap);
}
public static IncrementalIndex makeRealtimeIndex(final CharSource source, final boolean useOffheap)
{
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder()
.withMinTimestamp(new DateTime("2011-01-12T00:00:00.000Z").getMillis())
.withQueryGranularity(QueryGranularity.NONE)
@ -190,18 +198,7 @@ public class TestIndex
final AtomicLong startTime = new AtomicLong();
int lineCount;
try {
lineCount = CharStreams.readLines(
CharStreams.newReaderSupplier(
new InputSupplier<InputStream>()
{
@Override
public InputStream getInput() throws IOException
{
return resource.openStream();
}
},
Charsets.UTF_8
),
lineCount = source.readLines(
new LineProcessor<Integer>()
{
StringInputRowParser parser = new StringInputRowParser(