From 75ee9521972a335cd595da451164b7c8b3760bc0 Mon Sep 17 00:00:00 2001 From: Mangesh Pardeshi Date: Fri, 18 Dec 2015 09:58:44 -0600 Subject: [PATCH] Add extraction function support for dimension Selector --- .../extraction/IdentityExtractionFn.java | 12 ++ .../having/DimensionSelectorHavingSpec.java | 59 ++++++--- .../DimensionSelectorHavingSpecTest.java | 114 +++++++++++++----- 3 files changed, 133 insertions(+), 52 deletions(-) diff --git a/processing/src/main/java/io/druid/query/extraction/IdentityExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/IdentityExtractionFn.java index b74292e2fcc..63afd3d9e48 100644 --- a/processing/src/main/java/io/druid/query/extraction/IdentityExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/IdentityExtractionFn.java @@ -59,4 +59,16 @@ public class IdentityExtractionFn implements ExtractionFn { return ExtractionType.ONE_TO_ONE; } + + @Override + public String toString() + { + return "Identity"; + } + + @Override + public boolean equals(Object o) + { + return o != null && o instanceof IdentityExtractionFn; + } } diff --git a/processing/src/main/java/io/druid/query/groupby/having/DimensionSelectorHavingSpec.java b/processing/src/main/java/io/druid/query/groupby/having/DimensionSelectorHavingSpec.java index 57cb5753ab7..822bdfba130 100644 --- a/processing/src/main/java/io/druid/query/groupby/having/DimensionSelectorHavingSpec.java +++ b/processing/src/main/java/io/druid/query/groupby/having/DimensionSelectorHavingSpec.java @@ -25,6 +25,8 @@ import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.metamx.common.StringUtils; import io.druid.data.input.Row; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.IdentityExtractionFn; import java.nio.ByteBuffer; import java.util.List; @@ -35,41 +37,55 @@ public class DimensionSelectorHavingSpec implements HavingSpec private static final byte STRING_SEPARATOR = (byte) 0xFF; private final String dimension; private final String value; + private final ExtractionFn extractionFn; @JsonCreator public DimensionSelectorHavingSpec( @JsonProperty("dimension") String dimName, - @JsonProperty("value") String value + @JsonProperty("value") String value, + @JsonProperty("extractionFn") ExtractionFn extractionFn ) { - this.dimension = Preconditions.checkNotNull(dimName, "Must have attribute 'dimension'"); + dimension = Preconditions.checkNotNull(dimName, "Must have attribute 'dimension'"); this.value = value; + if (extractionFn != null) { + this.extractionFn = extractionFn; + } else { + this.extractionFn = new IdentityExtractionFn(); + } } @JsonProperty("value") public String getValue() { - return this.value; + return value; } @JsonProperty("dimension") public String getDimension() { - return this.dimension; + return dimension; + } + + @JsonProperty + public ExtractionFn getExtractionFn() + { + return extractionFn; } public boolean eval(Row row) { - List dimRowValList = row.getDimension(this.dimension); + List dimRowValList = row.getDimension(dimension); if (dimRowValList == null || dimRowValList.isEmpty()) { return Strings.isNullOrEmpty(value); } for (String rowVal : dimRowValList) { - if (this.value != null && this.value.equals(rowVal)) { + String extracted = getExtractionFn().apply(rowVal); + if (value != null && value.equals(extracted)) { return true; } - if (rowVal == null || rowVal.isEmpty()) { + if (extracted == null || extracted.isEmpty()) { return Strings.isNullOrEmpty(value); } } @@ -79,14 +95,18 @@ public class DimensionSelectorHavingSpec implements HavingSpec public byte[] getCacheKey() { - byte[] dimBytes = StringUtils.toUtf8(this.dimension); - byte[] valBytes = StringUtils.toUtf8(this.value); - return ByteBuffer.allocate(2 + dimBytes.length + valBytes.length) - .put(CACHE_KEY) - .put(dimBytes) - .put(STRING_SEPARATOR) - .put(valBytes) - .array(); + byte[] dimBytes = StringUtils.toUtf8(dimension); + byte[] valBytes = StringUtils.toUtf8(value); + byte [] extractionFnBytes = this.getExtractionFn().getCacheKey(); + + return ByteBuffer.allocate(3 + dimBytes.length + valBytes.length + extractionFnBytes.length) + .put(CACHE_KEY) + .put(dimBytes) + .put(STRING_SEPARATOR) + .put(valBytes) + .put(STRING_SEPARATOR) + .put(extractionFnBytes) + .array(); } @Override @@ -115,7 +135,7 @@ public class DimensionSelectorHavingSpec implements HavingSpec dimEquals = true; } - return (valEquals && dimEquals); + return (valEquals && dimEquals && extractionFn.equals(that.extractionFn)); } @Override @@ -131,9 +151,10 @@ public class DimensionSelectorHavingSpec implements HavingSpec { StringBuilder sb = new StringBuilder(); sb.append("DimensionSelectorHavingSpec"); - sb.append("{dimension='").append(this.dimension).append('\''); - sb.append(", value='").append(this.value).append('\''); - sb.append('}'); + sb.append("{dimension='").append(dimension).append('\''); + sb.append(", value='").append(value); + sb.append("', extractionFunction='").append(getExtractionFn()); + sb.append("'}"); return sb.toString(); } diff --git a/processing/src/test/java/io/druid/query/groupby/having/DimensionSelectorHavingSpecTest.java b/processing/src/test/java/io/druid/query/groupby/having/DimensionSelectorHavingSpecTest.java index 57c280a05c7..01c7c01c1f0 100644 --- a/processing/src/test/java/io/druid/query/groupby/having/DimensionSelectorHavingSpecTest.java +++ b/processing/src/test/java/io/druid/query/groupby/having/DimensionSelectorHavingSpecTest.java @@ -26,6 +26,9 @@ import com.google.common.collect.ImmutableMap; import io.druid.data.input.MapBasedRow; import io.druid.data.input.Row; import io.druid.jackson.DefaultObjectMapper; +import io.druid.query.extraction.ExtractionFn; +import io.druid.query.extraction.IdentityExtractionFn; +import io.druid.query.extraction.RegexDimExtractionFn; import org.junit.Assert; import org.junit.Test; @@ -53,7 +56,7 @@ public class DimensionSelectorHavingSpecTest @Test public void testDimSelectorHavingClauseSerde() throws Exception { - HavingSpec dimHavingSpec = new DimensionSelectorHavingSpec("dim", "v"); + HavingSpec dimHavingSpec = new DimensionSelectorHavingSpec("dim", "v", null); Map dimSelectMap = ImmutableMap.of( "type", "dimSelector", @@ -63,24 +66,29 @@ public class DimensionSelectorHavingSpecTest ObjectMapper mapper = new DefaultObjectMapper(); assertEquals(dimHavingSpec, mapper.convertValue(dimSelectMap, DimensionSelectorHavingSpec.class)); - } @Test public void testEquals() throws Exception { - HavingSpec dimHavingSpec1 = new DimensionSelectorHavingSpec("dim", "v"); - HavingSpec dimHavingSpec2 = new DimensionSelectorHavingSpec("dim", "v"); - HavingSpec dimHavingSpec3 = new DimensionSelectorHavingSpec("dim1", "v"); - HavingSpec dimHavingSpec4 = new DimensionSelectorHavingSpec("dim2", "v"); - HavingSpec dimHavingSpec5 = new DimensionSelectorHavingSpec("dim", "v1"); - HavingSpec dimHavingSpec6 = new DimensionSelectorHavingSpec("dim", "v2"); - HavingSpec dimHavingSpec7 = new DimensionSelectorHavingSpec("dim", null); - HavingSpec dimHavingSpec8 = new DimensionSelectorHavingSpec("dim", null); - HavingSpec dimHavingSpec9 = new DimensionSelectorHavingSpec("dim1", null); - HavingSpec dimHavingSpec10 = new DimensionSelectorHavingSpec("dim2", null); - HavingSpec dimHavingSpec11 = new DimensionSelectorHavingSpec("dim1", "v"); - HavingSpec dimHavingSpec12 = new DimensionSelectorHavingSpec("dim2", null); + ExtractionFn extractionFn1 = new RegexDimExtractionFn("^([^,]*),", false, ""); + ExtractionFn extractionFn2 = new RegexDimExtractionFn(",(.*)", false, ""); + ExtractionFn extractionFn3 = new RegexDimExtractionFn("^([^,]*),", false, ""); + + HavingSpec dimHavingSpec1 = new DimensionSelectorHavingSpec("dim", "v", extractionFn1); + HavingSpec dimHavingSpec2 = new DimensionSelectorHavingSpec("dim", "v", extractionFn3); + HavingSpec dimHavingSpec3 = new DimensionSelectorHavingSpec("dim1", "v", null); + HavingSpec dimHavingSpec4 = new DimensionSelectorHavingSpec("dim2", "v", null); + HavingSpec dimHavingSpec5 = new DimensionSelectorHavingSpec("dim", "v1", null); + HavingSpec dimHavingSpec6 = new DimensionSelectorHavingSpec("dim", "v2", null); + HavingSpec dimHavingSpec7 = new DimensionSelectorHavingSpec("dim", null, null); + HavingSpec dimHavingSpec8 = new DimensionSelectorHavingSpec("dim", null, null); + HavingSpec dimHavingSpec9 = new DimensionSelectorHavingSpec("dim1", null, null); + HavingSpec dimHavingSpec10 = new DimensionSelectorHavingSpec("dim2", null, null); + HavingSpec dimHavingSpec11 = new DimensionSelectorHavingSpec("dim1", "v", null); + HavingSpec dimHavingSpec12 = new DimensionSelectorHavingSpec("dim2", null, null); + HavingSpec dimHavingSpec13 = new DimensionSelectorHavingSpec("dim", "value", extractionFn1); + HavingSpec dimHavingSpec14 = new DimensionSelectorHavingSpec("dim", "value", extractionFn2); assertEquals(dimHavingSpec1, dimHavingSpec2); assertNotEquals(dimHavingSpec3, dimHavingSpec4); @@ -88,69 +96,109 @@ public class DimensionSelectorHavingSpecTest assertEquals(dimHavingSpec7, dimHavingSpec8); assertNotEquals(dimHavingSpec9, dimHavingSpec10); assertNotEquals(dimHavingSpec11, dimHavingSpec12); + assertNotEquals(dimHavingSpec13, dimHavingSpec14); } @Test public void testToString() { - String expected = "DimensionSelectorHavingSpec{dimension='dimension', value='v'}"; - - Assert.assertEquals(new DimensionSelectorHavingSpec("dimension", "v").toString(), expected); + ExtractionFn extractionFn = new RegexDimExtractionFn("^([^,]*),", false, ""); + String expected = "DimensionSelectorHavingSpec{" + + "dimension='gender'," + + " value='m'," + + " extractionFunction='regex(^([^,]*),)'}"; + Assert.assertEquals(new DimensionSelectorHavingSpec("gender", "m", extractionFn).toString(), expected); + + expected = "DimensionSelectorHavingSpec{" + + "dimension='gender'," + + " value='m'," + + " extractionFunction='Identity'}"; + + Assert.assertEquals(new DimensionSelectorHavingSpec("gender", "m", null).toString(), expected); } @Test(expected = NullPointerException.class) public void testNullDimension() { - new DimensionSelectorHavingSpec(null, "value"); + new DimensionSelectorHavingSpec(null, "value", null); } @Test public void testDimensionFilterSpec() { - DimensionSelectorHavingSpec spec = new DimensionSelectorHavingSpec("dimension", "v"); + DimensionSelectorHavingSpec spec = new DimensionSelectorHavingSpec("dimension", "v", null); assertTrue(spec.eval(getTestRow("v"))); assertTrue(spec.eval(getTestRow(ImmutableList.of("v", "v1")))); assertFalse(spec.eval(getTestRow(ImmutableList.of()))); assertFalse(spec.eval(getTestRow("v1"))); - spec = new DimensionSelectorHavingSpec("dimension", null); + spec = new DimensionSelectorHavingSpec("dimension", null, null); assertTrue(spec.eval(getTestRow(ImmutableList.of()))); assertTrue(spec.eval(getTestRow(ImmutableList.of("")))); assertFalse(spec.eval(getTestRow(ImmutableList.of("v")))); assertFalse(spec.eval(getTestRow(ImmutableList.of("v", "v1")))); - spec = new DimensionSelectorHavingSpec("dimension", ""); + spec = new DimensionSelectorHavingSpec("dimension", "", null); assertTrue(spec.eval(getTestRow(ImmutableList.of()))); assertTrue(spec.eval(getTestRow(ImmutableList.of("")))); assertTrue(spec.eval(getTestRow(ImmutableList.of("v", "v1", "")))); assertFalse(spec.eval(getTestRow(ImmutableList.of("v")))); assertFalse(spec.eval(getTestRow(ImmutableList.of("v", "v1")))); + + ExtractionFn extractionFn = new RegexDimExtractionFn("^([^,]*),", true, "default"); + spec = new DimensionSelectorHavingSpec("dimension", "v", extractionFn); + assertTrue(spec.eval(getTestRow(ImmutableList.of("v,v1", "v2,v3")))); + assertFalse(spec.eval(getTestRow(ImmutableList.of("v1,v4")))); + assertFalse(spec.eval(getTestRow(ImmutableList.of("v")))); + assertFalse(spec.eval(getTestRow(ImmutableList.of("v1", "default")))); + assertTrue(spec.eval(getTestRow(ImmutableList.of("v,default", "none")))); + + spec = new DimensionSelectorHavingSpec("dimension", "default", extractionFn); + assertTrue(spec.eval(getTestRow(ImmutableList.of("v1,v2", "none")))); + } @Test public void testGetCacheKey() { + ExtractionFn extractionFn = new IdentityExtractionFn(); byte[] dimBytes = "dimension".getBytes(Charsets.UTF_8); byte[] valBytes = "v".getBytes(Charsets.UTF_8); + byte[] extFunKey = extractionFn.getCacheKey(); - byte[] expected = ByteBuffer.allocate(12) - .put(CACHE_KEY) - .put(dimBytes) - .put(STRING_SEPARATOR) - .put(valBytes) - .array(); - - DimensionSelectorHavingSpec dfhs = new DimensionSelectorHavingSpec("dimension", "v"); - DimensionSelectorHavingSpec dfhs1 = new DimensionSelectorHavingSpec("dimension", "v"); - DimensionSelectorHavingSpec dfhs2 = new DimensionSelectorHavingSpec("dimensi", "onv"); + byte[] expected = ByteBuffer.allocate(3 + dimBytes.length + valBytes.length + extFunKey.length) + .put(CACHE_KEY) + .put(dimBytes) + .put(STRING_SEPARATOR) + .put(valBytes) + .put(STRING_SEPARATOR) + .put(extFunKey) + .array(); + + DimensionSelectorHavingSpec dfhs = new DimensionSelectorHavingSpec("dimension", "v", null); + DimensionSelectorHavingSpec dfhs1 = new DimensionSelectorHavingSpec("dimension", "v", null); + DimensionSelectorHavingSpec dfhs2 = new DimensionSelectorHavingSpec("dimensi", "onv", null); byte[] actual = dfhs.getCacheKey(); Assert.assertArrayEquals(expected, actual); Assert.assertTrue(Arrays.equals(dfhs.getCacheKey(), dfhs1.getCacheKey())); Assert.assertFalse(Arrays.equals(dfhs.getCacheKey(), dfhs2.getCacheKey())); - - + + extractionFn = new RegexDimExtractionFn("^([^,]*),", false, ""); + extFunKey = extractionFn.getCacheKey(); + dfhs = new DimensionSelectorHavingSpec("dimension", "v", extractionFn); + actual = dfhs.getCacheKey(); + expected = ByteBuffer.allocate(3 + dimBytes.length + valBytes.length + extFunKey.length) + .put(CACHE_KEY) + .put(dimBytes) + .put(STRING_SEPARATOR) + .put(valBytes) + .put(STRING_SEPARATOR) + .put(extFunKey) + .array(); + + Assert.assertArrayEquals(expected, actual); } }