Merge pull request #98 from metamx/druid-sql

Basic SQL grammar for Druid
This commit is contained in:
cheddar 2013-03-07 11:14:54 -08:00
commit bb1b3cd2f9
5 changed files with 556 additions and 3 deletions

View File

@ -191,6 +191,14 @@
<artifactId>caliper</artifactId> <artifactId>caliper</artifactId>
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</dependency>
</dependencies> </dependencies>
<build> <build>
@ -205,6 +213,18 @@
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
</plugins>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>antlr4</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build> </build>
</project> </project>

View File

@ -0,0 +1,305 @@
grammar DruidSQL;
@header {
import com.metamx.druid.aggregation.post.*;
import com.metamx.druid.aggregation.*;
import com.metamx.druid.query.filter.*;
import com.metamx.druid.query.dimension.*;
import com.metamx.druid.*;
import com.google.common.base.*;
import com.google.common.collect.Lists;
import org.joda.time.*;
import java.text.*;
import java.util.*;
}
@parser::members {
public Map<String, AggregatorFactory> aggregators = new LinkedHashMap<String, AggregatorFactory>();
public List<PostAggregator> postAggregators = new LinkedList<PostAggregator>();
public DimFilter filter;
public List<org.joda.time.Interval> intervals;
public List<String> fields = new LinkedList<String>();
public QueryGranularity granularity = QueryGranularity.ALL;
public Map<String, DimensionSpec> groupByDimensions = new LinkedHashMap<String, DimensionSpec>();
String dataSourceName = null;
public String getDataSource() {
return dataSourceName;
}
public String unescape(String quoted) {
String unquote = quoted.trim().replaceFirst("^'(.*)'\$", "\$1");
return unquote.replace("''", "'");
}
AggregatorFactory evalAgg(String name, int fn) {
switch (fn) {
case SUM: return new DoubleSumAggregatorFactory("sum("+name+")", name);
case MIN: return new MinAggregatorFactory("min("+name+")", name);
case MAX: return new MaxAggregatorFactory("max("+name+")", name);
case COUNT: return new CountAggregatorFactory(name);
}
throw new IllegalArgumentException("Unknown function [" + fn + "]");
}
PostAggregator evalArithmeticPostAggregator(PostAggregator a, List<Token> ops, List<PostAggregator> b) {
if(b.isEmpty()) return a;
else {
int i = 0;
PostAggregator root = a;
while(i < ops.size()) {
List<PostAggregator> list = new LinkedList<PostAggregator>();
List<String> names = new LinkedList<String>();
names.add(root.getName());
list.add(root);
Token op = ops.get(i);
while(i < ops.size() && ops.get(i).getType() == op.getType()) {
PostAggregator e = b.get(i);
list.add(e);
names.add(e.getName());
i++;
}
root = new ArithmeticPostAggregator("("+Joiner.on(op.getText()).join(names)+")", op.getText(), list);
}
return root;
}
}
}
AND: 'and';
OR: 'or';
SUM: 'sum';
MIN: 'min';
MAX: 'max';
COUNT: 'count';
AS: 'as';
OPEN: '(';
CLOSE: ')';
STAR: '*';
NOT: '!' ;
PLUS: '+';
MINUS: '-';
DIV: '/';
COMMA: ',';
EQ: '=';
NEQ: '!=';
MATCH: '~';
GROUP: 'group';
IDENT : (LETTER)(LETTER | DIGIT | '_')* ;
QUOTED_STRING : '\'' ( ESC | ~'\'' )*? '\'' ;
ESC : '\'' '\'';
NUMBER: ('+'|'-')?DIGIT*'.'?DIGIT+(EXPONENT)?;
EXPONENT: ('e') ('+'|'-')? ('0'..'9')+;
fragment DIGIT : '0'..'9';
fragment LETTER : 'a'..'z' | 'A'..'Z';
LINE_COMMENT : '--' .*? '\r'? '\n' -> skip ;
COMMENT : '/*' .*? '*/' -> skip ;
WS : (' '| '\t' | '\r' '\n' | '\n' | '\r')+ -> skip;
query
: select_stmt where_stmt (groupby_stmt)?
;
select_stmt
: 'select' e+=aliasedExpression (',' e+=aliasedExpression)* 'from' datasource {
for(AliasedExpressionContext a : $e) {
postAggregators.add(a.p);
fields.add(a.p.getName());
}
this.dataSourceName = $datasource.text;
}
;
where_stmt
: 'where' f=timeAndDimFilter {
if($f.filter != null) this.filter = $f.filter;
this.intervals = Lists.newArrayList($f.interval);
}
;
groupby_stmt
: GROUP 'by' groupByExpression ( COMMA! groupByExpression )*
;
groupByExpression
: gran=granularityFn {this.granularity = $gran.granularity;}
| dim=IDENT { this.groupByDimensions.put($dim.text, new DefaultDimensionSpec($dim.text, $dim.text)); }
;
datasource
: IDENT
;
aliasedExpression returns [PostAggregator p]
: expression ( AS^ name=IDENT )? {
if($name != null) {
postAggregators.add($expression.p);
$p = new FieldAccessPostAggregator($name.text, $expression.p.getName());
}
else $p = $expression.p;
}
;
expression returns [PostAggregator p]
: additiveExpression { $p = $additiveExpression.p; }
;
additiveExpression returns [PostAggregator p]
: a=multiplyExpression (( ops+=PLUS^ | ops+=MINUS^ ) b+=multiplyExpression)* {
List<PostAggregator> rhs = new LinkedList<PostAggregator>();
for(MultiplyExpressionContext e : $b) rhs.add(e.p);
$p = evalArithmeticPostAggregator($a.p, $ops, rhs);
}
;
multiplyExpression returns [PostAggregator p]
: a=unaryExpression ((ops+= STAR | ops+=DIV ) b+=unaryExpression)* {
List<PostAggregator> rhs = new LinkedList<PostAggregator>();
for(UnaryExpressionContext e : $b) rhs.add(e.p);
$p = evalArithmeticPostAggregator($a.p, $ops, rhs);
}
;
unaryExpression returns [PostAggregator p]
: MINUS e=unaryExpression {
$p = new ArithmeticPostAggregator(
"-"+$e.p.getName(),
"*",
Lists.newArrayList($e.p, new ConstantPostAggregator("-1", -1.0))
);
}
| PLUS e=unaryExpression { $p = $e.p; }
| primaryExpression { $p = $primaryExpression.p; }
;
primaryExpression returns [PostAggregator p]
: constant { $p = $constant.c; }
| aggregate {
aggregators.put($aggregate.agg.getName(), $aggregate.agg);
$p = new FieldAccessPostAggregator($aggregate.agg.getName(), $aggregate.agg.getName());
}
| OPEN! e=expression CLOSE! { $p = $e.p; }
;
aggregate returns [AggregatorFactory agg]
: fn=( SUM^ | MIN^ | MAX^ ) OPEN! name=(IDENT|COUNT) CLOSE! { $agg = evalAgg($name.text, $fn.type); }
| fn=COUNT OPEN! STAR CLOSE! { $agg = evalAgg("count(*)", $fn.type); }
;
constant returns [ConstantPostAggregator c]
: value=NUMBER { double v = Double.parseDouble($value.text); $c = new ConstantPostAggregator(Double.toString(v), v); }
;
/* time filters must be top level filters */
timeAndDimFilter returns [DimFilter filter, org.joda.time.Interval interval]
: (f1=dimFilter AND)? t=timeFilter (AND f2=dimFilter)? {
if($f1.ctx != null || $f2.ctx != null) {
if($f1.ctx != null && $f2.ctx != null) {
$filter = new AndDimFilter(Lists.newArrayList($f1.filter, $f2.filter));
} else if($f1.ctx != null) {
$filter = $f1.filter;
} else {
$filter = $f2.filter;
}
}
$interval = $t.interval;
}
;
dimFilter returns [DimFilter filter]
: e=orDimFilter { $filter = $e.filter; }
;
orDimFilter returns [DimFilter filter]
: a=andDimFilter (OR^ b+=andDimFilter)* {
if($b.isEmpty()) $filter = $a.filter;
else {
List<DimFilter> rest = new ArrayList<DimFilter>();
for(AndDimFilterContext e : $b) rest.add(e.filter);
$filter = new OrDimFilter(Lists.asList($a.filter, rest.toArray(new DimFilter[]{})));
}
}
;
andDimFilter returns [DimFilter filter]
: a=primaryDimFilter (AND^ b+=primaryDimFilter)* {
if($b.isEmpty()) $filter = $a.filter;
else {
List<DimFilter> rest = new ArrayList<DimFilter>();
for(PrimaryDimFilterContext e : $b) rest.add(e.filter);
$filter = new AndDimFilter(Lists.asList($a.filter, rest.toArray(new DimFilter[]{})));
}
}
;
primaryDimFilter returns [DimFilter filter]
: e=selectorDimFilter { $filter = $e.filter; }
| l=inListDimFilter { $filter = $l.filter; }
| NOT f=dimFilter { $filter = new NotDimFilter($f.filter); }
| OPEN! f=dimFilter CLOSE! { $filter = $f.filter; }
;
selectorDimFilter returns [DimFilter filter]
: dimension=IDENT op=(EQ|NEQ|MATCH) value=QUOTED_STRING {
String dim = $dimension.text;
String val = unescape($value.text);
switch($op.type) {
case(EQ): $filter = new SelectorDimFilter(dim, val); break;
case(NEQ): $filter = new NotDimFilter(new SelectorDimFilter(dim, val)); break;
case(MATCH): $filter = new RegexDimFilter(dim, val); break;
}
}
;
inListDimFilter returns [DimFilter filter]
: dimension=IDENT 'in' (OPEN! ( (list+=QUOTED_STRING (COMMA! list+=QUOTED_STRING)*) ) CLOSE!) {
List<DimFilter> filterList = new LinkedList<DimFilter>();
for(Token e : $list) filterList.add(new SelectorDimFilter($dimension.text, unescape(e.getText())));
$filter = new OrDimFilter(filterList);
}
;
timeFilter returns [org.joda.time.Interval interval, QueryGranularity granularity]
: 'timestamp' 'between' s=timestamp AND e=timestamp {
$interval = new org.joda.time.Interval($s.t, $e.t);
}
;
granularityFn returns [QueryGranularity granularity]
: 'granularity' OPEN! 'timestamp' ',' str=QUOTED_STRING CLOSE! {
String granStr = unescape($str.text);
try {
$granularity = QueryGranularity.fromString(granStr);
} catch(IllegalArgumentException e) {
$granularity = new PeriodGranularity(new Period(granStr), null, null);
}
}
;
timestamp returns [DateTime t]
: NUMBER {
String str = $NUMBER.text.trim();
try {
$t = new DateTime(NumberFormat.getInstance().parse(str));
}
catch(ParseException e) {
throw new IllegalArgumentException("Unable to parse number [" + str + "]");
}
}
| QUOTED_STRING { $t = new DateTime(unescape($QUOTED_STRING.text)); }
;

View File

@ -0,0 +1,209 @@
package com.metamx.druid.sql;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import com.metamx.druid.Druids;
import com.metamx.druid.Query;
import com.metamx.druid.aggregation.AggregatorFactory;
import com.metamx.druid.input.Row;
import com.metamx.druid.jackson.DefaultObjectMapper;
import com.metamx.druid.query.dimension.DimensionSpec;
import com.metamx.druid.query.group.GroupByQuery;
import com.metamx.druid.result.Result;
import com.metamx.druid.result.TimeseriesResultValue;
import com.metamx.druid.sql.antlr4.DruidSQLLexer;
import com.metamx.druid.sql.antlr4.DruidSQLParser;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.ConsoleErrorListener;
import org.antlr.v4.runtime.TokenStream;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.methods.StringRequestEntity;
import javax.annotation.Nullable;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
public class SQLRunner
{
private static final String STATEMENT = "select count(*), (1 - count(*) / sum(count)) * 100 as ratio from wikipedia where"
+ " timestamp between '2013-02-01' and '2013-02-14'"
+ " and (namespace = 'article' or page ~ 'Talk:.*')"
+ " and language in ( 'en', 'fr' ) "
+ " and user ~ '(?i)^david.*'"
+ " group by granularity(timestamp, 'day'), language";
public static void main(String[] args) throws Exception
{
Options options = new Options();
options.addOption("h", "help", false, "help");
options.addOption("v", false, "verbose");
options.addOption("e", "host", true, "endpoint [hostname:port]");
CommandLine cmd = new GnuParser().parse(options, args);
if(cmd.hasOption("h")) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("SQLRunner", options);
System.exit(2);
}
String hostname = cmd.getOptionValue("e", "localhost:8080");
String sql = cmd.getArgs().length > 0 ? cmd.getArgs()[0] : STATEMENT;
ObjectMapper objectMapper = new DefaultObjectMapper();
ObjectWriter jsonWriter = objectMapper.writerWithDefaultPrettyPrinter();
CharStream stream = new ANTLRInputStream(sql);
DruidSQLLexer lexer = new DruidSQLLexer(stream);
TokenStream tokenStream = new CommonTokenStream(lexer);
DruidSQLParser parser = new DruidSQLParser(tokenStream);
lexer.removeErrorListeners();
parser.removeErrorListeners();
lexer.addErrorListener(ConsoleErrorListener.INSTANCE);
parser.addErrorListener(ConsoleErrorListener.INSTANCE);
try {
DruidSQLParser.QueryContext queryContext = parser.query();
if(parser.getNumberOfSyntaxErrors() > 0) throw new IllegalStateException();
// parser.setBuildParseTree(true);
// System.err.println(q.toStringTree(parser));
} catch(Exception e) {
String msg = e.getMessage();
if(msg != null) System.err.println(e);
System.exit(1);
}
final Query query;
final TypeReference typeRef;
boolean groupBy = false;
if(parser.groupByDimensions.isEmpty()) {
query = Druids.newTimeseriesQueryBuilder()
.dataSource(parser.getDataSource())
.aggregators(new ArrayList<AggregatorFactory>(parser.aggregators.values()))
.postAggregators(parser.postAggregators)
.intervals(parser.intervals)
.granularity(parser.granularity)
.filters(parser.filter)
.build();
typeRef = new TypeReference<List<Result<TimeseriesResultValue>>>(){};
} else {
query = GroupByQuery.builder()
.setDataSource(parser.getDataSource())
.setAggregatorSpecs(new ArrayList<AggregatorFactory>(parser.aggregators.values()))
.setPostAggregatorSpecs(parser.postAggregators)
.setInterval(parser.intervals)
.setGranularity(parser.granularity)
.setDimFilter(parser.filter)
.setDimensions(new ArrayList<DimensionSpec>(parser.groupByDimensions.values()))
.build();
typeRef = new TypeReference<List<Row>>(){};
groupBy = true;
}
String queryStr = jsonWriter.writeValueAsString(query);
if(cmd.hasOption("v")) System.err.println(queryStr);
PostMethod req = new PostMethod("http://" + hostname + "/druid/v2/?pretty");
req.setRequestEntity(new StringRequestEntity(queryStr, "application/json", "utf-8"));
new HttpClient().executeMethod(req);
BufferedReader stdInput = new BufferedReader(new
InputStreamReader(req.getResponseBodyAsStream()));
Object res = objectMapper.readValue(stdInput, typeRef);
Joiner tabJoiner = Joiner.on("\t");
if(groupBy) {
List<Row> rows = (List<Row>)res;
Iterable<String> dimensions = Iterables.transform(parser.groupByDimensions.values(), new Function<DimensionSpec, String>()
{
@Override
public String apply(@Nullable DimensionSpec input)
{
return input.getOutputName();
}
});
System.out.println(tabJoiner.join(Iterables.concat(
Lists.newArrayList("timestamp"),
dimensions,
parser.fields
)));
for(final Row r : rows) {
System.out.println(
tabJoiner.join(
Iterables.concat(
Lists.newArrayList(parser.granularity.toDateTime(r.getTimestampFromEpoch())),
Iterables.transform(
parser.groupByDimensions.values(), new Function<DimensionSpec, String>()
{
@Override
public String apply(@Nullable DimensionSpec input)
{
return Joiner.on(",").join(r.getDimension(input.getOutputName()));
}
}),
Iterables.transform(parser.fields, new Function<String, Object>()
{
@Override
public Object apply(@Nullable String input)
{
return r.getFloatMetric(input);
}
})
)
)
);
}
}
else {
List<Result<TimeseriesResultValue>> rows = (List<Result<TimeseriesResultValue>>)res;
System.out.println(tabJoiner.join(Iterables.concat(
Lists.newArrayList("timestamp"),
parser.fields
)));
for(final Result<TimeseriesResultValue> r : rows) {
System.out.println(
tabJoiner.join(
Iterables.concat(
Lists.newArrayList(r.getTimestamp()),
Lists.transform(
parser.fields,
new Function<String, Object>()
{
@Override
public Object apply(@Nullable String input)
{
return r.getValue().getMetric(input);
}
}
)
)
)
);
}
}
Closeables.closeQuietly(stdInput);
}
}

View File

@ -144,7 +144,7 @@
</goals> </goals>
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
</plugins> </plugins>
</build> </build>
</project> </project>

21
pom.xml
View File

@ -288,7 +288,21 @@
<artifactId>spymemcached</artifactId> <artifactId>spymemcached</artifactId>
<version>2.8.4</version> <version>2.8.4</version>
</dependency> </dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>4.0</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-master</artifactId>
<version>4.0</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<!-- Test Scope --> <!-- Test Scope -->
<dependency> <dependency>
<groupId>com.metamx</groupId> <groupId>com.metamx</groupId>
@ -380,6 +394,11 @@
<artifactId>maven-scala-plugin</artifactId> <artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version> <version>2.15.2</version>
</plugin> </plugin>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<version>4.0</version>
</plugin>
</plugins> </plugins>
</pluginManagement> </pluginManagement>
</build> </build>