From 41df3e1f11a88a7237475c3024a64e44711e91f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Thu, 21 Nov 2013 17:55:58 -0800 Subject: [PATCH 01/10] implement javascript dimextractionfn --- .../query/extraction/DimExtractionFn.java | 3 +- .../extraction/JavascriptDimExtractionFn.java | 69 +++++++++++++++++++ .../JavascriptDimExtractionFnTest.java | 32 +++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java create mode 100644 processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java diff --git a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java index 65b5c2a5d7b..0509c92714a 100644 --- a/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/DimExtractionFn.java @@ -29,7 +29,8 @@ import com.fasterxml.jackson.annotation.JsonTypeInfo; @JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class), @JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class), @JsonSubTypes.Type(name = "partial", value = PartialDimExtractionFn.class), - @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class) + @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class), + @JsonSubTypes.Type(name = "javascript", value = JavascriptDimExtractionFn.class) }) public interface DimExtractionFn { diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java new file mode 100644 index 00000000000..e648d5b9d91 --- /dev/null +++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java @@ -0,0 +1,69 @@ +package io.druid.query.extraction; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Function; +import org.mozilla.javascript.Context; +import org.mozilla.javascript.ContextFactory; +import org.mozilla.javascript.ScriptableObject; + +import java.nio.ByteBuffer; + +public class JavascriptDimExtractionFn implements DimExtractionFn +{ + private static Function compile(String function) { + final ContextFactory contextFactory = ContextFactory.getGlobal(); + final Context context = contextFactory.enterContext(); + context.setOptimizationLevel(9); + + final ScriptableObject scope = context.initStandardObjects(); + + final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "fn", 1, null); + Context.exit(); + + + return new Function() + { + public String apply(String input) + { + // ideally we need a close() function to discard the context once it is not used anymore + Context cx = Context.getCurrentContext(); + if (cx == null) { + cx = contextFactory.enterContext(); + } + + return Context.toString(fn.call(cx, scope, scope, new String[]{input})); + } + }; + } + + private static final byte CACHE_TYPE_ID = 0x4; + + private final String function; + private final Function fn; + + @JsonCreator + public JavascriptDimExtractionFn( + @JsonProperty("function") String function + ) + { + this.function = function; + this.fn = compile(function); + } + + @Override + public byte[] getCacheKey() + { + byte[] bytes = function.getBytes(); + return ByteBuffer.allocate(1 + bytes.length) + .put(CACHE_TYPE_ID) + .put(bytes) + .array(); + } + + @Override + public String apply(String dimValue) + { + return fn.apply(dimValue); + } +} diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java new file mode 100644 index 00000000000..7a48da0e745 --- /dev/null +++ b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java @@ -0,0 +1,32 @@ +package io.druid.query.extraction.extraction; + +import io.druid.query.extraction.DimExtractionFn; +import io.druid.query.extraction.JavascriptDimExtractionFn; +import org.junit.Assert; +import org.junit.Test; + +public class JavascriptDimExtractionFnTest +{ + private static final String[] testStrings = { + "Quito", + "Calgary", + "Tokyo", + "Stockholm", + "Vancouver", + "Pretoria", + "Wellington", + "Ontario" + }; + + @Test + public void testExtraction() + { + String function = "function(str) { return str.substring(0,3); }"; + DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); + + for (String str : testStrings) { + String res = dimExtractionFn.apply(str); + Assert.assertEquals(str.substring(0, 3), res); + } + } +} From 51c039345561550dd975e43d9203d01560b29022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Fri, 22 Nov 2013 15:35:03 -0800 Subject: [PATCH 02/10] typo --- .../druid/query/aggregation/post/JavaScriptPostAggregator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java b/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java index d2973783b9c..fef4b26f0f2 100644 --- a/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java +++ b/processing/src/main/java/io/druid/query/aggregation/post/JavaScriptPostAggregator.java @@ -57,7 +57,7 @@ public class JavaScriptPostAggregator implements PostAggregator final ScriptableObject scope = context.initStandardObjects(); - final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "aggregate", 1, null); + final org.mozilla.javascript.Function fn = context.compileFunction(scope, function, "fn", 1, null); Context.exit(); From 40af9df0d460d43d0f9c19e507e428a80bd9cdfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Fri, 22 Nov 2013 15:53:39 -0800 Subject: [PATCH 03/10] fix search doc links and code --- docs/content/SearchQuery.md | 90 +++++++++++++++++++------------------ docs/content/toc.textile | 2 +- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/docs/content/SearchQuery.md b/docs/content/SearchQuery.md index 6cee0918e91..2cfc726d60b 100644 --- a/docs/content/SearchQuery.md +++ b/docs/content/SearchQuery.md @@ -3,26 +3,27 @@ layout: doc_page --- A search query returns dimension values that match the search specification. - { - "queryType": "search", - "dataSource": "sample_datasource", - "granularity": "day", - "searchDimensions": [ - "dim1", - "dim2" - ], - "query": { - "type": "insensitive_contains", - "value": "Ke" - }, - "sort" : { - "type": "lexicographic" - }, - "intervals": [ - "2013-01-01T00:00:00.000/2013-01-03T00:00:00.000" - ] - } - +```json +{ + "queryType": "search", + "dataSource": "sample_datasource", + "granularity": "day", + "searchDimensions": [ + "dim1", + "dim2" + ], + "query": { + "type": "insensitive_contains", + "value": "Ke" + }, + "sort" : { + "type": "lexicographic" + }, + "intervals": [ + "2013-01-01T00:00:00.000/2013-01-03T00:00:00.000" + ] +} +``` There are several main parts to a search query: @@ -40,32 +41,33 @@ There are several main parts to a search query: The format of the result is: - [ +```json +[ + { + "timestamp": "2012-01-01T00:00:00.000Z", + "result": [ { - "timestamp": "2012-01-01T00:00:00.000Z", - "result": [ - { - "dimension": "dim1", - "value": "Ke$ha" - }, - { - "dimension": "dim2", - "value": "Ke$haForPresident" - } - ] + "dimension": "dim1", + "value": "Ke$ha" }, { - "timestamp": "2012-01-02T00:00:00.000Z", - "result": [ - { - "dimension": "dim1", - "value": "SomethingThatContainsKe" - }, - { - "dimension": "dim2", - "value": "SomethingElseThatContainsKe" - } - ] + "dimension": "dim2", + "value": "Ke$haForPresident" } ] - + }, + { + "timestamp": "2012-01-02T00:00:00.000Z", + "result": [ + { + "dimension": "dim1", + "value": "SomethingThatContainsKe" + }, + { + "dimension": "dim2", + "value": "SomethingElseThatContainsKe" + } + ] + } +] +``` diff --git a/docs/content/toc.textile b/docs/content/toc.textile index a74bc8cc540..c5be284b206 100644 --- a/docs/content/toc.textile +++ b/docs/content/toc.textile @@ -40,7 +40,7 @@ h2. Querying ** "GroupByQuery":./GroupByQuery.html *** "OrderBy":./OrderBy.html *** "Having":./Having.html -** "SearchQuery":./Having.html +** "SearchQuery":./SearchQuery.html *** "SearchQuerySpec":./SearchQuerySpec.html ** "SegmentMetadataQuery":./SegmentMetadataQuery.html ** "TimeBoundaryQuery":./TimeBoundaryQuery.html From 60fddfb67e5a59d196c5fc43c66dbb49f5f7866b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Fri, 22 Nov 2013 16:32:53 -0800 Subject: [PATCH 04/10] add DimensionSpecs docs --- docs/content/DimensionSpecs.md | 76 ++++++++++++++++++++++++++++++++++ docs/content/toc.textile | 1 + 2 files changed, 77 insertions(+) create mode 100644 docs/content/DimensionSpecs.md diff --git a/docs/content/DimensionSpecs.md b/docs/content/DimensionSpecs.md new file mode 100644 index 00000000000..bb1dda63221 --- /dev/null +++ b/docs/content/DimensionSpecs.md @@ -0,0 +1,76 @@ +--- +layout: doc_page +--- + +## DimensionSpec + +`DimensionSpec`s define how dimension values get transformed prior to aggregation. + +### DefaultDimensionSpec + +Returns dimension values as is and optionally renames renames the dimension. + +```json +{ "type" : "default", "dimension" : , "outputName": } +``` + +### ExtractionDimensionSpec + +Returns dimension values transformed using the given [DimExtractionFn](#toc_3) + +```json +{ + "type" : "extraction", + "dimension" : , + "outputName" : , + "dimExtractionFn" : +} +``` + +## DimExtractionFn + +`DimExtractionFn`s define the transformation applied to each dimenion value + +### RegexDimExtractionFn + +Returns the first group matched by the given regular expression. If there is no match it returns the dimension value as is. + +```json +{ "type" : "regex", "expr", } +``` + +### PartialDimExtractionFn + +Returns the dimension value as is if there is a match, otherwise returns null. + +```json +{ "type" : "partial", "expr", } +``` + +### SearchQuerySpecDimExtractionFn + +Returns the dimension value as is if the given [SearchQuerySpec](SearchQuerySpec.html) matches, otherwise returns null. + +```json +{ "type" : "searchQuery", "query" : } +``` + +### TimeDimExtractionFn + +Parses dimension values as timestamps using the given input format, and returns them formatted using the given output format. Time formats follow the [com.ibm.icu.text.SimpleDateFormat](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) format + +```json +{ "type" : "time", "timeFormat" : , "resultFormat" : } +``` + +### JavascriptDimExtractionFn + +Returns the dimension value as transformed by the given JavaScript function. + +Example + +```json +{ + "type" : "javascript", + "function" : "function(str) { return str.substr(0, 3); }" +} diff --git a/docs/content/toc.textile b/docs/content/toc.textile index c5be284b206..00edcfcebeb 100644 --- a/docs/content/toc.textile +++ b/docs/content/toc.textile @@ -36,6 +36,7 @@ h2. Querying ** "Aggregations":./Aggregations.html ** "Post Aggregations":./Post-aggregations.html ** "Granularities":./Granularities.html +** "DimensionSpecs":./DimensionSpecs.html * Query Types ** "GroupByQuery":./GroupByQuery.html *** "OrderBy":./OrderBy.html From 5c778846848689aec2744e3720fe6f7bc5fe260e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Mon, 25 Nov 2013 17:14:10 -0800 Subject: [PATCH 05/10] Address code review - more tests - fix license - add missing getter --- .../extraction/JavascriptDimExtractionFn.java | 25 ++ .../JavascriptDimExtractionFnTest.java | 243 +++++++++++++++++- 2 files changed, 267 insertions(+), 1 deletion(-) diff --git a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java index e648d5b9d91..1878df5479e 100644 --- a/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/JavascriptDimExtractionFn.java @@ -1,3 +1,22 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package io.druid.query.extraction; import com.fasterxml.jackson.annotation.JsonCreator; @@ -51,6 +70,12 @@ public class JavascriptDimExtractionFn implements DimExtractionFn this.fn = compile(function); } + @JsonProperty + public String getFunction() + { + return function; + } + @Override public byte[] getCacheKey() { diff --git a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java index 7a48da0e745..cc5a1b26b4e 100644 --- a/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/extraction/JavascriptDimExtractionFnTest.java @@ -1,10 +1,32 @@ +/* + * Druid - a distributed column store. + * Copyright (C) 2012, 2013 Metamarkets Group Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + package io.druid.query.extraction.extraction; +import com.google.common.collect.Iterators; import io.druid.query.extraction.DimExtractionFn; import io.druid.query.extraction.JavascriptDimExtractionFn; import org.junit.Assert; import org.junit.Test; +import java.util.Iterator; + public class JavascriptDimExtractionFnTest { private static final String[] testStrings = { @@ -19,7 +41,7 @@ public class JavascriptDimExtractionFnTest }; @Test - public void testExtraction() + public void testJavascriptSubstring() { String function = "function(str) { return str.substring(0,3); }"; DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); @@ -29,4 +51,223 @@ public class JavascriptDimExtractionFnTest Assert.assertEquals(str.substring(0, 3), res); } } + + @Test + public void testJavascriptRegex() + { + String function = "function(str) { return str.replace(/[aeiou]/g, ''); }"; + DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); + + Iterator it = Iterators.forArray("Qt", "Clgry", "Tky", "Stckhlm", "Vncvr", "Prtr", "Wllngtn", "Ontr"); + for (String str : testStrings) { + String res = dimExtractionFn.apply(str); + Assert.assertEquals(it.next(), res); + } + } + + @Test + public void testJavaScriptPorterStemmer() + { + // JavaScript porter stemmer adapted from + // https://github.com/kristopolous/Porter-Stemmer/blob/e990a8d456510571d1ef9ef923d2a30a94679e13/PorterStemmer1980.js + String function = "function(w) {" + + "var step2list = {\n" + + " \"ational\" : \"ate\",\n" + + " \"tional\" : \"tion\",\n" + + " \"enci\" : \"ence\",\n" + + " \"anci\" : \"ance\",\n" + + " \"izer\" : \"ize\",\n" + + " \"bli\" : \"ble\",\n" + + " \"alli\" : \"al\",\n" + + " \"entli\" : \"ent\",\n" + + " \"eli\" : \"e\",\n" + + " \"ousli\" : \"ous\",\n" + + " \"ization\" : \"ize\",\n" + + " \"ation\" : \"ate\",\n" + + " \"ator\" : \"ate\",\n" + + " \"alism\" : \"al\",\n" + + " \"iveness\" : \"ive\",\n" + + " \"fulness\" : \"ful\",\n" + + " \"ousness\" : \"ous\",\n" + + " \"aliti\" : \"al\",\n" + + " \"iviti\" : \"ive\",\n" + + " \"biliti\" : \"ble\",\n" + + " \"logi\" : \"log\"\n" + + " },\n" + + "\n" + + " step3list = {\n" + + " \"icate\" : \"ic\",\n" + + " \"ative\" : \"\",\n" + + " \"alize\" : \"al\",\n" + + " \"iciti\" : \"ic\",\n" + + " \"ical\" : \"ic\",\n" + + " \"ful\" : \"\",\n" + + " \"ness\" : \"\"\n" + + " },\n" + + "\n" + + " c = \"[^aeiou]\", // consonant\n" + + " v = \"[aeiouy]\", // vowel\n" + + " C = c + \"[^aeiouy]*\", // consonant sequence\n" + + " V = v + \"[aeiou]*\", // vowel sequence\n" + + "\n" + + " mgr0 = \"^(\" + C + \")?\" + V + C, // [C]VC... is m>0\n" + + " meq1 = \"^(\" + C + \")?\" + V + C + \"(\" + V + \")?$\", // [C]VC[V] is m=1\n" + + " mgr1 = \"^(\" + C + \")?\" + V + C + V + C, // [C]VCVC... is m>1\n" + + " s_v = \"^(\" + C + \")?\" + v; " + + "" + + "var\n" + + " stem,\n" + + " suffix,\n" + + " firstch,\n" + + " re,\n" + + " re2,\n" + + " re3,\n" + + " re4,\n" + + " debugFunction,\n" + + " origword = w;\n" + + "\n" + + "\n" + + " if (w.length < 3) { return w; }\n" + + "\n" + + " firstch = w.substr(0,1);\n" + + " if (firstch == \"y\") {\n" + + " w = firstch.toUpperCase() + w.substr(1);\n" + + " }\n" + + "\n" + + " // Step 1a\n" + + " re = /^(.+?)(ss|i)es$/;\n" + + " re2 = /^(.+?)([^s])s$/;\n" + + "\n" + + " if (re.test(w)) { \n" + + " w = w.replace(re,\"$1$2\"); \n" + + "\n" + + " } else if (re2.test(w)) {\n" + + " w = w.replace(re2,\"$1$2\"); \n" + + " }\n" + + "\n" + + " // Step 1b\n" + + " re = /^(.+?)eed$/;\n" + + " re2 = /^(.+?)(ed|ing)$/;\n" + + " if (re.test(w)) {\n" + + " var fp = re.exec(w);\n" + + " re = new RegExp(mgr0);\n" + + " if (re.test(fp[1])) {\n" + + " re = /.$/;\n" + + " w = w.replace(re,\"\");\n" + + " }\n" + + " } else if (re2.test(w)) {\n" + + " var fp = re2.exec(w);\n" + + " stem = fp[1];\n" + + " re2 = new RegExp(s_v);\n" + + " if (re2.test(stem)) {\n" + + " w = stem;\n" + + "\n" + + " re2 = /(at|bl|iz)$/;\n" + + " re3 = new RegExp(\"([^aeiouylsz])\\\\1$\");\n" + + " re4 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n" + + "\n" + + " if (re2.test(w)) { \n" + + " w = w + \"e\"; \n" + + "\n" + + " } else if (re3.test(w)) { \n" + + " re = /.$/; \n" + + " w = w.replace(re,\"\"); \n" + + "\n" + + " } else if (re4.test(w)) { \n" + + " w = w + \"e\"; \n" + + " }\n" + + " }\n" + + " }\n" + + "\n" + + " // Step 1c\n" + + " re = new RegExp(\"^(.*\" + v + \".*)y$\");\n" + + " if (re.test(w)) {\n" + + " var fp = re.exec(w);\n" + + " stem = fp[1];\n" + + " w = stem + \"i\";\n" + + " }\n" + + "\n" + + " // Step 2\n" + + " re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;\n" + + " if (re.test(w)) {\n" + + " var fp = re.exec(w);\n" + + " stem = fp[1];\n" + + " suffix = fp[2];\n" + + " re = new RegExp(mgr0);\n" + + " if (re.test(stem)) {\n" + + " w = stem + step2list[suffix];\n" + + " }\n" + + " }\n" + + "\n" + + " // Step 3\n" + + " re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;\n" + + " if (re.test(w)) {\n" + + " var fp = re.exec(w);\n" + + " stem = fp[1];\n" + + " suffix = fp[2];\n" + + " re = new RegExp(mgr0);\n" + + " if (re.test(stem)) {\n" + + " w = stem + step3list[suffix];\n" + + " }\n" + + " }\n" + + "\n" + + " // Step 4\n" + + " re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;\n" + + " re2 = /^(.+?)(s|t)(ion)$/;\n" + + " if (re.test(w)) {\n" + + " var fp = re.exec(w);\n" + + " stem = fp[1];\n" + + " re = new RegExp(mgr1);\n" + + " if (re.test(stem)) {\n" + + " w = stem;\n" + + " }\n" + + " } else if (re2.test(w)) {\n" + + " var fp = re2.exec(w);\n" + + " stem = fp[1] + fp[2];\n" + + " re2 = new RegExp(mgr1);\n" + + " if (re2.test(stem)) {\n" + + " w = stem;\n" + + " }\n" + + " }\n" + + "\n" + + " // Step 5\n" + + " re = /^(.+?)e$/;\n" + + " if (re.test(w)) {\n" + + " var fp = re.exec(w);\n" + + " stem = fp[1];\n" + + " re = new RegExp(mgr1);\n" + + " re2 = new RegExp(meq1);\n" + + " re3 = new RegExp(\"^\" + C + v + \"[^aeiouwxy]$\");\n" + + " if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {\n" + + " w = stem;\n" + + " }\n" + + " }\n" + + "\n" + + " re = /ll$/;\n" + + " re2 = new RegExp(mgr1);\n" + + " if (re.test(w) && re2.test(w)) {\n" + + " re = /.$/;\n" + + " w = w.replace(re,\"\");\n" + + " }\n" + + "\n" + + " // and turn initial Y back to y\n" + + " if (firstch == \"y\") {\n" + + " w = firstch.toLowerCase() + w.substr(1);\n" + + " }\n" + + "\n" + + "\n" + + " return w;" + + "" + + "}"; + + DimExtractionFn dimExtractionFn = new JavascriptDimExtractionFn(function); + + Iterator inputs = Iterators.forArray("introducing", "exploratory", "analytics", "on", "large", "datasets"); + Iterator it = Iterators.forArray("introduc", "exploratori", "analyt", "on", "larg", "dataset"); + + while(inputs.hasNext()) { + String res = dimExtractionFn.apply(inputs.next()); + Assert.assertEquals(it.next(), res); + } + } } From ddb6e83f170ced7f2746d5ad81e0086d7087b9f1 Mon Sep 17 00:00:00 2001 From: fjy Date: Mon, 25 Nov 2013 17:53:17 -0800 Subject: [PATCH 06/10] fix realtime default and prepare for next deploy --- build.sh | 2 +- docs/content/Booting-a-production-cluster.md | 2 +- docs/content/Examples.md | 4 ++-- docs/content/Modules.md | 2 +- docs/content/Realtime.md | 4 ++-- docs/content/Tasks.md | 2 +- docs/content/Tutorial:-A-First-Look-at-Druid.md | 4 ++-- .../Tutorial:-Loading-Your-Data-Part-2.md | 2 +- docs/content/Tutorial:-The-Druid-Cluster.md | 16 ++++++++++------ docs/content/Tutorial:-Webstream.md | 4 ++-- docs/content/Twitter-Tutorial.textile | 2 +- examples/config/historical/runtime.properties | 2 +- examples/config/realtime/runtime.properties | 12 ++++++++---- .../src/main/java/io/druid/cli/CliBroker.java | 2 +- .../main/java/io/druid/cli/CliCoordinator.java | 2 +- .../main/java/io/druid/cli/CliHadoopIndexer.java | 2 +- .../main/java/io/druid/cli/CliHistorical.java | 2 +- .../src/main/java/io/druid/cli/CliOverlord.java | 2 +- .../src/main/java/io/druid/cli/CliRealtime.java | 2 +- .../java/io/druid/cli/CliRealtimeExample.java | 2 +- .../main/java/io/druid/guice/RealtimeModule.java | 4 ++-- 21 files changed, 42 insertions(+), 34 deletions(-) diff --git a/build.sh b/build.sh index 856376deca1..261bfc0835a 100755 --- a/build.sh +++ b/build.sh @@ -30,4 +30,4 @@ echo "For examples, see: " echo " " ls -1 examples/*/*sh echo " " -echo "See also http://druid.io/docs/0.6.23" +echo "See also http://druid.io/docs/0.6.24" diff --git a/docs/content/Booting-a-production-cluster.md b/docs/content/Booting-a-production-cluster.md index 7d670b9ee2c..6877c901134 100644 --- a/docs/content/Booting-a-production-cluster.md +++ b/docs/content/Booting-a-production-cluster.md @@ -3,7 +3,7 @@ layout: doc_page --- # Booting a Single Node Cluster # -[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.23-bin.tar.gz). +[Loading Your Data](Tutorial%3A-Loading-Your-Data-Part-2.html) and [All About Queries](Tutorial%3A-All-About-Queries.html) contain recipes to boot a small druid cluster on localhost. Here we will boot a small cluster on EC2. You can checkout the code, or download a tarball from [here](http://static.druid.io/artifacts/druid-services-0.6.24-bin.tar.gz). The [ec2 run script](https://github.com/metamx/druid/blob/master/examples/bin/run_ec2.sh), run_ec2.sh, is located at 'examples/bin' if you have checked out the code, or at the root of the project if you've downloaded a tarball. The scripts rely on the [Amazon EC2 API Tools](http://aws.amazon.com/developertools/351), and you will need to set three environment variables: diff --git a/docs/content/Examples.md b/docs/content/Examples.md index 65fa7444aaf..dd377e82cdb 100644 --- a/docs/content/Examples.md +++ b/docs/content/Examples.md @@ -19,13 +19,13 @@ Clone Druid and build it: git clone https://github.com/metamx/druid.git druid cd druid git fetch --tags -git checkout druid-0.6.23 +git checkout druid-0.6.24 ./build.sh ``` ### Downloading the DSK (Druid Standalone Kit) -[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz) a stand-alone tarball and run it: +[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz) a stand-alone tarball and run it: ``` bash tar -xzf druid-services-0.X.X-bin.tar.gz diff --git a/docs/content/Modules.md b/docs/content/Modules.md index 4345219db0b..17b8e538785 100644 --- a/docs/content/Modules.md +++ b/docs/content/Modules.md @@ -158,7 +158,7 @@ DruidBinders.queryRunnerFactoryBinder(binder) The first one binds the SegmentMetadataQueryQueryToolChest for usage when a SegmentMetadataQuery is used. The second one does the same thing but for the QueryRunnerFactory instead. -#### Adding new Jersey resources +### Adding new Jersey resources Adding new Jersey resources to a module requires calling the following code to bind the resource in the module: diff --git a/docs/content/Realtime.md b/docs/content/Realtime.md index 676438f75d3..c89727de438 100644 --- a/docs/content/Realtime.md +++ b/docs/content/Realtime.md @@ -27,7 +27,7 @@ druid.host=localhost druid.service=realtime druid.port=8083 -druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.23"] +druid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.24"] druid.zk.service.host=localhost @@ -49,7 +49,7 @@ The realtime module uses several of the default modules in [Configuration](Confi |Property|Description|Default| |--------|-----------|-------| |`druid.realtime.specFile`|The file with realtime specifications in it.|none| -|`druid.publish.type`|Choices:noop, db. After a real-time node completes building a segment after the window period, what does it do with it? For true handoff to occur, this should be set to "db".|noop| +|`druid.publish.type`|Choices:noop, db. After a real-time node completes building a segment after the window period, what does it do with it? For true handoff to occur, this should be set to "db".|db| ### Realtime "specFile" diff --git a/docs/content/Tasks.md b/docs/content/Tasks.md index cc6b5553023..83326d842b6 100644 --- a/docs/content/Tasks.md +++ b/docs/content/Tasks.md @@ -73,7 +73,7 @@ The Hadoop Index Task is used to index larger data sets that require the paralle |--------|-----------|---------| |type|The task type, this should always be "index_hadoop".|yes| |config|A Hadoop Index Config. See [Batch Ingestion](Batch-ingestion.html)|yes| -|hadoopCoordinates|The Maven :: of Hadoop to use. The default is "org.apache.hadoop:hadoop-core:1.0.3".|no| +|hadoopCoordinates|The Maven \:\:\ of Hadoop to use. The default is "org.apache.hadoop:hadoop-core:1.0.3".|no| The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally. diff --git a/docs/content/Tutorial:-A-First-Look-at-Druid.md b/docs/content/Tutorial:-A-First-Look-at-Druid.md index 389863d501b..18d44bc0cdb 100644 --- a/docs/content/Tutorial:-A-First-Look-at-Druid.md +++ b/docs/content/Tutorial:-A-First-Look-at-Druid.md @@ -47,7 +47,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu ### Download a Tarball -We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz). Download this file to a directory of your choosing. +We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz). Download this file to a directory of your choosing. You can extract the awesomeness within by issuing: @@ -58,7 +58,7 @@ tar -zxvf druid-services-*-bin.tar.gz Not too lost so far right? That's great! If you cd into the directory: ``` -cd druid-services-0.6.23 +cd druid-services-0.6.24 ``` You should see a bunch of files: diff --git a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md index 6f3e3144df3..d6fdaa140fa 100644 --- a/docs/content/Tutorial:-Loading-Your-Data-Part-2.md +++ b/docs/content/Tutorial:-Loading-Your-Data-Part-2.md @@ -42,7 +42,7 @@ With real-world data, we recommend having a message bus such as [Apache Kafka](h #### Setting up Kafka -[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.23/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node. +[KafkaFirehoseFactory](https://github.com/metamx/druid/blob/druid-0.6.24/realtime/src/main/java/com/metamx/druid/realtime/firehose/KafkaFirehoseFactory.java) is how druid communicates with Kafka. Using this [Firehose](Firehose.html) with the right configuration, we can import data into Druid in real-time without writing any code. To load data to a real-time node via Kafka, we'll first need to initialize Zookeeper and Kafka, and then configure and initialize a [Realtime](Realtime.html) node. Instructions for booting a Zookeeper and then Kafka cluster are available [here](http://kafka.apache.org/07/quickstart.html). diff --git a/docs/content/Tutorial:-The-Druid-Cluster.md b/docs/content/Tutorial:-The-Druid-Cluster.md index 107a0bbece6..3f5b72c9803 100644 --- a/docs/content/Tutorial:-The-Druid-Cluster.md +++ b/docs/content/Tutorial:-The-Druid-Cluster.md @@ -11,7 +11,7 @@ In this tutorial, we will set up other types of Druid nodes as well as and exter If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first. -You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz) +You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz) and untar the contents within by issuing: @@ -147,7 +147,7 @@ druid.port=8081 druid.zk.service.host=localhost -druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.23"] +druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.24"] # Dummy read only AWS account (used to download example data) druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b @@ -237,11 +237,15 @@ druid.port=8083 druid.zk.service.host=localhost -druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.23-SNAPSHOT"] +druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.24","io.druid.extensions:druid-kafka-seven:0.6.24"] -druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid -druid.db.connector.user=druid -druid.db.connector.password=diurd +# Change this config to db to hand off to the rest of the Druid cluster +druid.publish.type=noop + +# These configs are only required for real hand off +# druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid +# druid.db.connector.user=druid +# druid.db.connector.password=diurd druid.processing.buffer.sizeBytes=10000000 ``` diff --git a/docs/content/Tutorial:-Webstream.md b/docs/content/Tutorial:-Webstream.md index 55a6005edf5..300928684f1 100644 --- a/docs/content/Tutorial:-Webstream.md +++ b/docs/content/Tutorial:-Webstream.md @@ -37,7 +37,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu h3. Download a Tarball -We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz) +We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz) Download this file to a directory of your choosing. You can extract the awesomeness within by issuing: @@ -48,7 +48,7 @@ tar zxvf druid-services-*-bin.tar.gz Not too lost so far right? That's great! If you cd into the directory: ``` -cd druid-services-0.6.23 +cd druid-services-0.6.24 ``` You should see a bunch of files: diff --git a/docs/content/Twitter-Tutorial.textile b/docs/content/Twitter-Tutorial.textile index 9abbeb702ec..edbc5c38b77 100644 --- a/docs/content/Twitter-Tutorial.textile +++ b/docs/content/Twitter-Tutorial.textile @@ -9,7 +9,7 @@ There are two ways to setup Druid: download a tarball, or build it from source. h3. Download a Tarball -We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.23-bin.tar.gz. +We've built a tarball that contains everything you'll need. You'll find it "here":http://static.druid.io/artifacts/releases/druid-services-0.6.24-bin.tar.gz. Download this bad boy to a directory of your choosing. You can extract the awesomeness within by issuing: diff --git a/examples/config/historical/runtime.properties b/examples/config/historical/runtime.properties index 9b942e0e392..15ca0750ca1 100644 --- a/examples/config/historical/runtime.properties +++ b/examples/config/historical/runtime.properties @@ -4,7 +4,7 @@ druid.port=8081 druid.zk.service.host=localhost -druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.23"] +druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.24"] # Dummy read only AWS account (used to download example data) druid.s3.secretKey=QyyfVZ7llSiRg6Qcrql1eEUG7buFpAK6T6engr1b diff --git a/examples/config/realtime/runtime.properties b/examples/config/realtime/runtime.properties index 6f3ce236bf5..0eb7ef4dd3d 100644 --- a/examples/config/realtime/runtime.properties +++ b/examples/config/realtime/runtime.properties @@ -4,10 +4,14 @@ druid.port=8083 druid.zk.service.host=localhost -druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.23","io.druid.extensions:druid-kafka-seven:0.6.23"] +druid.extensions.coordinates=["io.druid.extensions:druid-examples:0.6.24","io.druid.extensions:druid-kafka-seven:0.6.24"] -druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid -druid.db.connector.user=druid -druid.db.connector.password=diurd +# Change this config to db to hand off to the rest of the Druid cluster +druid.publish.type=noop + +# These configs are only required for real hand off +# druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid +# druid.db.connector.user=druid +# druid.db.connector.password=diurd druid.processing.buffer.sizeBytes=10000000 diff --git a/services/src/main/java/io/druid/cli/CliBroker.java b/services/src/main/java/io/druid/cli/CliBroker.java index aa215af876e..f94635100c4 100644 --- a/services/src/main/java/io/druid/cli/CliBroker.java +++ b/services/src/main/java/io/druid/cli/CliBroker.java @@ -53,7 +53,7 @@ import java.util.List; */ @Command( name = "broker", - description = "Runs a broker node, see http://druid.io/docs/0.6.23/Broker.html for a description" + description = "Runs a broker node, see http://druid.io/docs/0.6.24/Broker.html for a description" ) public class CliBroker extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CliCoordinator.java b/services/src/main/java/io/druid/cli/CliCoordinator.java index 95eab497277..74686c57806 100644 --- a/services/src/main/java/io/druid/cli/CliCoordinator.java +++ b/services/src/main/java/io/druid/cli/CliCoordinator.java @@ -63,7 +63,7 @@ import java.util.List; */ @Command( name = "coordinator", - description = "Runs the Coordinator, see http://druid.io/docs/0.6.23/Coordinator.html for a description." + description = "Runs the Coordinator, see http://druid.io/docs/0.6.24/Coordinator.html for a description." ) public class CliCoordinator extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java index 135f0e9edfd..9a8f1fc9bd3 100644 --- a/services/src/main/java/io/druid/cli/CliHadoopIndexer.java +++ b/services/src/main/java/io/druid/cli/CliHadoopIndexer.java @@ -41,7 +41,7 @@ import java.util.List; */ @Command( name = "hadoop", - description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.23/Batch-ingestion.html for a description." + description = "Runs the batch Hadoop Druid Indexer, see http://druid.io/docs/0.6.24/Batch-ingestion.html for a description." ) public class CliHadoopIndexer implements Runnable { diff --git a/services/src/main/java/io/druid/cli/CliHistorical.java b/services/src/main/java/io/druid/cli/CliHistorical.java index 314f34a87fb..24ad591118f 100644 --- a/services/src/main/java/io/druid/cli/CliHistorical.java +++ b/services/src/main/java/io/druid/cli/CliHistorical.java @@ -42,7 +42,7 @@ import java.util.List; */ @Command( name = "historical", - description = "Runs a Historical node, see http://druid.io/docs/0.6.23/Historical.html for a description" + description = "Runs a Historical node, see http://druid.io/docs/0.6.24/Historical.html for a description" ) public class CliHistorical extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CliOverlord.java b/services/src/main/java/io/druid/cli/CliOverlord.java index e58b1043010..6c0c71a2893 100644 --- a/services/src/main/java/io/druid/cli/CliOverlord.java +++ b/services/src/main/java/io/druid/cli/CliOverlord.java @@ -93,7 +93,7 @@ import java.util.List; */ @Command( name = "overlord", - description = "Runs an Overlord node, see http://druid.io/docs/0.6.23/Indexing-Service.html for a description" + description = "Runs an Overlord node, see http://druid.io/docs/0.6.24/Indexing-Service.html for a description" ) public class CliOverlord extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CliRealtime.java b/services/src/main/java/io/druid/cli/CliRealtime.java index b965098f5be..6363b2dfd4e 100644 --- a/services/src/main/java/io/druid/cli/CliRealtime.java +++ b/services/src/main/java/io/druid/cli/CliRealtime.java @@ -30,7 +30,7 @@ import java.util.List; */ @Command( name = "realtime", - description = "Runs a realtime node, see http://druid.io/docs/0.6.23/Realtime.html for a description" + description = "Runs a realtime node, see http://druid.io/docs/0.6.24/Realtime.html for a description" ) public class CliRealtime extends ServerRunnable { diff --git a/services/src/main/java/io/druid/cli/CliRealtimeExample.java b/services/src/main/java/io/druid/cli/CliRealtimeExample.java index a2f140ae08e..61142fe69de 100644 --- a/services/src/main/java/io/druid/cli/CliRealtimeExample.java +++ b/services/src/main/java/io/druid/cli/CliRealtimeExample.java @@ -42,7 +42,7 @@ import java.util.concurrent.Executor; */ @Command( name = "realtime", - description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.23/Realtime.html for a description" + description = "Runs a standalone realtime node for examples, see http://druid.io/docs/0.6.24/Realtime.html for a description" ) public class CliRealtimeExample extends ServerRunnable { diff --git a/services/src/main/java/io/druid/guice/RealtimeModule.java b/services/src/main/java/io/druid/guice/RealtimeModule.java index 04f897ad010..276f850049a 100644 --- a/services/src/main/java/io/druid/guice/RealtimeModule.java +++ b/services/src/main/java/io/druid/guice/RealtimeModule.java @@ -48,13 +48,13 @@ public class RealtimeModule implements Module binder, "druid.publish.type", Key.get(SegmentPublisher.class), - Key.get(NoopSegmentPublisher.class) + Key.get(DbSegmentPublisher.class) ); final MapBinder publisherBinder = PolyBind.optionBinder( binder, Key.get(SegmentPublisher.class) ); - publisherBinder.addBinding("db").to(DbSegmentPublisher.class); + publisherBinder.addBinding("noop").to(NoopSegmentPublisher.class); binder.bind(DbSegmentPublisher.class).in(LazySingleton.class); JsonConfigProvider.bind(binder, "druid.realtime", RealtimeManagerConfig.class); From c7171e33114c9355471935bc248f6f1137425868 Mon Sep 17 00:00:00 2001 From: fjy Date: Mon, 25 Nov 2013 17:54:57 -0800 Subject: [PATCH 07/10] [maven-release-plugin] prepare release druid-0.6.24 --- cassandra-storage/pom.xml | 2 +- common/pom.xml | 2 +- examples/pom.xml | 2 +- hdfs-storage/pom.xml | 2 +- indexing-hadoop/pom.xml | 2 +- indexing-service/pom.xml | 2 +- kafka-eight/pom.xml | 2 +- kafka-seven/pom.xml | 2 +- pom.xml | 4 ++-- processing/pom.xml | 2 +- s3-extensions/pom.xml | 2 +- server/pom.xml | 2 +- services/pom.xml | 2 +- 13 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cassandra-storage/pom.xml b/cassandra-storage/pom.xml index 8a4a9f6278e..157dca77425 100644 --- a/cassandra-storage/pom.xml +++ b/cassandra-storage/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/common/pom.xml b/common/pom.xml index 7b15f631fc8..523ec3b25f3 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/examples/pom.xml b/examples/pom.xml index 3ef0caaa856..f3c810adc68 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/hdfs-storage/pom.xml b/hdfs-storage/pom.xml index af3eb2a4140..7d7f3e397b9 100644 --- a/hdfs-storage/pom.xml +++ b/hdfs-storage/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index 1278df33166..15d4022a811 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml index 33361b66f2c..29e6b29d3de 100644 --- a/indexing-service/pom.xml +++ b/indexing-service/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/kafka-eight/pom.xml b/kafka-eight/pom.xml index 53601c90cee..8e95c578a51 100644 --- a/kafka-eight/pom.xml +++ b/kafka-eight/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/kafka-seven/pom.xml b/kafka-seven/pom.xml index f00bdea925a..73f7d390362 100644 --- a/kafka-seven/pom.xml +++ b/kafka-seven/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/pom.xml b/pom.xml index dee4133212e..3cbf0f277d5 100644 --- a/pom.xml +++ b/pom.xml @@ -23,14 +23,14 @@ io.druid druid pom - 0.6.24-SNAPSHOT + 0.6.24 druid druid scm:git:ssh://git@github.com/metamx/druid.git scm:git:ssh://git@github.com/metamx/druid.git http://www.github.com/metamx/druid - ${project.artifactId}-${project.version} + druid-0.6.24 diff --git a/processing/pom.xml b/processing/pom.xml index a58ffad72fc..1fe8fc34167 100644 --- a/processing/pom.xml +++ b/processing/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/s3-extensions/pom.xml b/s3-extensions/pom.xml index 93e0844925b..21fd0b8e596 100644 --- a/s3-extensions/pom.xml +++ b/s3-extensions/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/server/pom.xml b/server/pom.xml index 420e53cd7fc..aa916771b01 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 diff --git a/services/pom.xml b/services/pom.xml index cf0f4d41d2c..50b9b2de4bc 100644 --- a/services/pom.xml +++ b/services/pom.xml @@ -27,7 +27,7 @@ io.druid druid - 0.6.24-SNAPSHOT + 0.6.24 From 730e3a2303bca66a8d3d9425336e0de1a6f1bd43 Mon Sep 17 00:00:00 2001 From: fjy Date: Mon, 25 Nov 2013 17:55:01 -0800 Subject: [PATCH 08/10] [maven-release-plugin] prepare for next development iteration --- cassandra-storage/pom.xml | 2 +- common/pom.xml | 2 +- examples/pom.xml | 2 +- hdfs-storage/pom.xml | 2 +- indexing-hadoop/pom.xml | 2 +- indexing-service/pom.xml | 2 +- kafka-eight/pom.xml | 2 +- kafka-seven/pom.xml | 2 +- pom.xml | 4 ++-- processing/pom.xml | 2 +- s3-extensions/pom.xml | 2 +- server/pom.xml | 2 +- services/pom.xml | 2 +- 13 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cassandra-storage/pom.xml b/cassandra-storage/pom.xml index 157dca77425..b9190260274 100644 --- a/cassandra-storage/pom.xml +++ b/cassandra-storage/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/common/pom.xml b/common/pom.xml index 523ec3b25f3..74172822ce2 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/examples/pom.xml b/examples/pom.xml index f3c810adc68..b6094213411 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/hdfs-storage/pom.xml b/hdfs-storage/pom.xml index 7d7f3e397b9..8dbb858663c 100644 --- a/hdfs-storage/pom.xml +++ b/hdfs-storage/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index 15d4022a811..5a7ff2cfe57 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/indexing-service/pom.xml b/indexing-service/pom.xml index 29e6b29d3de..26983285c9e 100644 --- a/indexing-service/pom.xml +++ b/indexing-service/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/kafka-eight/pom.xml b/kafka-eight/pom.xml index 8e95c578a51..ac3f89eda65 100644 --- a/kafka-eight/pom.xml +++ b/kafka-eight/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/kafka-seven/pom.xml b/kafka-seven/pom.xml index 73f7d390362..9fd061e853f 100644 --- a/kafka-seven/pom.xml +++ b/kafka-seven/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/pom.xml b/pom.xml index 3cbf0f277d5..7dac175bc12 100644 --- a/pom.xml +++ b/pom.xml @@ -23,14 +23,14 @@ io.druid druid pom - 0.6.24 + 0.6.25-SNAPSHOT druid druid scm:git:ssh://git@github.com/metamx/druid.git scm:git:ssh://git@github.com/metamx/druid.git http://www.github.com/metamx/druid - druid-0.6.24 + ${project.artifactId}-${project.version} diff --git a/processing/pom.xml b/processing/pom.xml index 1fe8fc34167..64b64dbb898 100644 --- a/processing/pom.xml +++ b/processing/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/s3-extensions/pom.xml b/s3-extensions/pom.xml index 21fd0b8e596..fb558506dbb 100644 --- a/s3-extensions/pom.xml +++ b/s3-extensions/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/server/pom.xml b/server/pom.xml index aa916771b01..434da7a5ea2 100644 --- a/server/pom.xml +++ b/server/pom.xml @@ -28,7 +28,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT diff --git a/services/pom.xml b/services/pom.xml index 50b9b2de4bc..e1ca5244092 100644 --- a/services/pom.xml +++ b/services/pom.xml @@ -27,7 +27,7 @@ io.druid druid - 0.6.24 + 0.6.25-SNAPSHOT From 5524cb7608a857308cae2580c2372c5e63d27b2e Mon Sep 17 00:00:00 2001 From: fjy Date: Mon, 25 Nov 2013 18:34:53 -0800 Subject: [PATCH 09/10] remove extra contents in toc --- docs/content/toc.textile | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/content/toc.textile b/docs/content/toc.textile index 98f88149ac9..a74bc8cc540 100644 --- a/docs/content/toc.textile +++ b/docs/content/toc.textile @@ -8,7 +8,6 @@ h1. Contents * "Concepts and Terminology":./Concepts-and-Terminology.html h2. Getting Started -* "Concepts and Terminology":./Concepts-and-Terminology.html * "Tutorial: A First Look at Druid":./Tutorial:-A-First-Look-at-Druid.html * "Tutorial: The Druid Cluster":./Tutorial:-The-Druid-Cluster.html * "Tutorial: Loading Your Data Part 1":./Tutorial:-Loading-Your-Data-Part-1.html From 78a8206ee6fec85b1aeb6fa7b5f64273ebdba3f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Xavier=20L=C3=A9aut=C3=A9?= Date: Tue, 26 Nov 2013 17:07:32 -0800 Subject: [PATCH 10/10] add docs on hadoop flavors and hadoop index task classpath resolution --- docs/content/Tasks.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/content/Tasks.md b/docs/content/Tasks.md index 83326d842b6..5ad56b55d06 100644 --- a/docs/content/Tasks.md +++ b/docs/content/Tasks.md @@ -78,6 +78,18 @@ The Hadoop Index Task is used to index larger data sets that require the paralle The Hadoop Index Config submitted as part of an Hadoop Index Task is identical to the Hadoop Index Config used by the `HadoopBatchIndexer` except that three fields must be omitted: `segmentOutputPath`, `workingPath`, `updaterJobSpec`. The Indexing Service takes care of setting these fields internally. +##### Using your own Hadoop distribution + +Druid is compiled against Apache hadoop-core 1.0.3. However, if you happen to use a different flavor of hadoop that is API compatible with hadoop-core 1.0.3, you should only have to change the hadoopCoordinates property to point to the maven artifact used by your distribution. + +##### Resolving dependency conflicts running HadoopIndexTask + +Currently, the HadoopIndexTask creates a single classpath to run the HadoopDruidIndexerJob, which can lead to version conflicts between various dependencies of Druid, extension modules, and Hadoop's own dependencies. + +The Hadoop index task will put Druid's dependencies first on the classpath, followed by any extensions dependencies, and any Hadoop dependencies last. + +If you are having trouble with any extensions in HadoopIndexTask, it may be the case that Druid, or one of its dependencies, depends on a different version of a library than what you are using as part of your extensions, but Druid's version overrides the one in your extension. In that case you probably want to build your own Druid version and override the offending library by adding an explicit dependency to the pom.xml of each druid sub-module that depends on it. + #### Realtime Index Task The indexing service can also run real-time tasks. These tasks effectively transform a middle manager into a real-time node. We introduced real-time tasks as a way to programmatically add new real-time data sources without needing to manually add nodes. The grammar for the real-time task is as follows: