Add docs and benchmark for JSON flattening parser

This commit is contained in:
jon-wei 2015-12-09 15:35:26 -08:00
parent f29c25b826
commit c53bf85d83
9 changed files with 946 additions and 3 deletions

View File

@ -51,6 +51,16 @@
<artifactId>druid-processing</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>com.github.wnameless</groupId>
<artifactId>json-flattener</artifactId>
<version>0.1.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<properties>

View File

@ -0,0 +1,124 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.benchmark;
import com.metamx.common.parsers.Parser;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
public class FlattenJSONBenchmark
{
private static final int numEvents = 1000000;
List<String> flatInputs;
List<String> nestedInputs;
Parser flatParser;
Parser nestedParser;
Parser fieldDiscoveryParser;
Parser forcedPathParser;
int flatCounter = 0;
int nestedCounter = 0;
@Setup
public void prepare() throws Exception
{
FlattenJSONBenchmarkUtil gen = new FlattenJSONBenchmarkUtil();
flatInputs = new ArrayList<String>();
for (int i = 0; i < numEvents; i++) {
flatInputs.add(gen.generateFlatEvent());
}
nestedInputs = new ArrayList<String>();
for (int i = 0; i < numEvents; i++) {
nestedInputs.add(gen.generateNestedEvent());
}
flatParser = gen.getFlatParser();
nestedParser = gen.getNestedParser();
fieldDiscoveryParser = gen.getFieldDiscoveryParser();
forcedPathParser = gen.getForcedPathParser();
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public Map<String, Object> baseline()
{
Map<String, Object> parsed = flatParser.parse(flatInputs.get(flatCounter));
flatCounter = (flatCounter + 1) % numEvents;
return parsed;
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public Map<String, Object> flatten()
{
Map<String, Object> parsed = nestedParser.parse(nestedInputs.get(nestedCounter));
nestedCounter = (nestedCounter + 1) % numEvents;
return parsed;
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public Map<String, Object> preflattenNestedParser()
{
Map<String, Object> parsed = fieldDiscoveryParser.parse(flatInputs.get(nestedCounter));
nestedCounter = (nestedCounter + 1) % numEvents;
return parsed;
}
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public Map<String, Object> forcedRootPaths()
{
Map<String, Object> parsed = forcedPathParser.parse(flatInputs.get(nestedCounter));
nestedCounter = (nestedCounter + 1) % numEvents;
return parsed;
}
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder()
.include(FlattenJSONBenchmark.class.getSimpleName())
.warmupIterations(1)
.measurementIterations(25)
.forks(1)
.build();
new Runner(opt).run();
}
}

View File

@ -0,0 +1,441 @@
package io.druid.benchmark;
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.PropertyAccessor;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.github.wnameless.json.flattener.JsonFlattener;
import com.metamx.common.parsers.Parser;
import io.druid.data.input.impl.DimensionsSpec;
import io.druid.data.input.impl.JSONParseSpec;
import io.druid.data.input.impl.JSONPathFieldSpec;
import io.druid.data.input.impl.JSONPathSpec;
import io.druid.data.input.impl.TimestampSpec;
import io.druid.jackson.DefaultObjectMapper;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class FlattenJSONBenchmarkUtil
{
private Random rng;
private final ObjectMapper mapper = new DefaultObjectMapper();
private static final String DEFAULT_TIMESTAMP = "2015-09-12T12:10:53.155Z";
public FlattenJSONBenchmarkUtil()
{
this.rng = new Random(9999);
mapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.PUBLIC_ONLY);
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
}
public Parser getFlatParser()
{
JSONParseSpec spec = new JSONParseSpec(
new TimestampSpec("ts", "iso", null),
new DimensionsSpec(null, null, null),
null,
null
);
return spec.makeParser();
}
public Parser getFieldDiscoveryParser()
{
List<JSONPathFieldSpec> fields = new ArrayList<>();
JSONPathSpec flattenSpec = new JSONPathSpec(true, fields);
JSONParseSpec spec = new JSONParseSpec(
new TimestampSpec("ts", "iso", null),
new DimensionsSpec(null, null, null),
flattenSpec,
null
);
return spec.makeParser();
}
public Parser getNestedParser()
{
List<JSONPathFieldSpec> fields = new ArrayList<>();
fields.add(JSONPathFieldSpec.createRootField("ts"));
fields.add(JSONPathFieldSpec.createRootField("d1"));
//fields.add(JSONPathFieldSpec.createRootField("d2"));
fields.add(JSONPathFieldSpec.createNestedField("e1.d1", "$.e1.d1"));
fields.add(JSONPathFieldSpec.createNestedField("e1.d2", "$.e1.d2"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d3", "$.e2.d3"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d4", "$.e2.d4"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d5", "$.e2.d5"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d6", "$.e2.d6"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[0]", "$.e2.ad1[0]"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[1]", "$.e2.ad1[1]"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[2]", "$.e2.ad1[2]"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[0].d1", "$.ae1[0].d1"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[1].d1", "$.ae1[1].d1"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[2].e1.d2", "$.ae1[2].e1.d2"));
fields.add(JSONPathFieldSpec.createRootField("m3"));
//fields.add(JSONPathFieldSpec.createRootField("m4"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m1", "$.e3.m1"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m2", "$.e3.m2"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m3", "$.e3.m3"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m4", "$.e3.m4"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[0]", "$.e3.am1[0]"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[1]", "$.e3.am1[1]"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[2]", "$.e3.am1[2]"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[3]", "$.e3.am1[3]"));
fields.add(JSONPathFieldSpec.createNestedField("e4.e4.m4", "$.e4.e4.m4"));
JSONPathSpec flattenSpec = new JSONPathSpec(true, fields);
JSONParseSpec spec = new JSONParseSpec(
new TimestampSpec("ts", "iso", null),
new DimensionsSpec(null, null, null),
flattenSpec,
null
);
return spec.makeParser();
}
public Parser getForcedPathParser()
{
List<JSONPathFieldSpec> fields = new ArrayList<>();
fields.add(JSONPathFieldSpec.createNestedField("ts", "$['ts']"));
fields.add(JSONPathFieldSpec.createNestedField("d1", "$['d1']"));
fields.add(JSONPathFieldSpec.createNestedField("d2", "$['d2']"));
fields.add(JSONPathFieldSpec.createNestedField("e1.d1", "$['e1.d1']"));
fields.add(JSONPathFieldSpec.createNestedField("e1.d2", "$['e1.d2']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d3", "$['e2.d3']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d4", "$['e2.d4']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d5", "$['e2.d5']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.d6", "$['e2.d6']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[0]", "$['e2.ad1[0]']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[1]", "$['e2.ad1[1]']"));
fields.add(JSONPathFieldSpec.createNestedField("e2.ad1[2]", "$['e2.ad1[2]']"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[0].d1", "$['ae1[0].d1']"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[1].d1", "$['ae1[1].d1']"));
fields.add(JSONPathFieldSpec.createNestedField("ae1[2].e1.d2", "$['ae1[2].e1.d2']"));
fields.add(JSONPathFieldSpec.createNestedField("m3", "$['m3']"));
fields.add(JSONPathFieldSpec.createNestedField("m4", "$['m4']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m1", "$['e3.m1']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m2", "$['e3.m2']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m3", "$['e3.m3']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.m4", "$['e3.m4']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[0]", "$['e3.am1[0]']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[1]", "$['e3.am1[1]']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[2]", "$['e3.am1[2]']"));
fields.add(JSONPathFieldSpec.createNestedField("e3.am1[3]", "$['e3.am1[3]']"));
fields.add(JSONPathFieldSpec.createNestedField("e4.e4.m4", "$['e4.e4.m4']"));
JSONPathSpec flattenSpec = new JSONPathSpec(false, fields);
JSONParseSpec spec = new JSONParseSpec(
new TimestampSpec("ts", "iso", null),
new DimensionsSpec(null, null, null),
flattenSpec,
null
);
return spec.makeParser();
}
public String generateFlatEvent() throws Exception
{
String nestedEvent = generateNestedEvent();
String flatEvent = JsonFlattener.flatten(nestedEvent);
return flatEvent;
}
/*
e.g.,
{
"d1":"-889954295",
"d2":"-1724267856",
"m3":0.1429096312550323,
"m4":-7491190942271782800,
"e1":{"d1":"2044704643",
"d2":"743384585"},
"e2":{"d3":"1879234327",
"d4":"1248394579",
"d5":"-639742676",
"d6":"1334864967",
"ad1":["-684042233","-1368392605","1826364033"]},
"e3":{"m1":1026394465228315487,
"m2":0.27737174619459004,
"m3":0.011921350960908628,
"m4":-7507319256575520484,
"am1":[-2383262648875933574,-3980663171371801209,-8225906222712163481,6074309311406287835]},
"e4":{"e4":{"m4":32836881083689842}},
"ae1":[{"d1":"-1797792200"},{"d1":"142582995"},{"e1":{"d2":"-1341994709"}}],
"ts":"2015-09-12T12:10:53.155Z"
}
*/
public String generateNestedEvent() throws Exception
{
BenchmarkEvent nestedDims1 = new BenchmarkEvent(
null,
String.valueOf(rng.nextInt()), String.valueOf(rng.nextInt()), null, null, null, null,
null, null, null, null,
null, null, null, null,
null, null, null
);
String[] dimsArray1 = {String.valueOf(rng.nextInt()), String.valueOf(rng.nextInt()), String.valueOf(rng.nextInt())};
BenchmarkEvent nestedDims2 = new BenchmarkEvent(
null,
null, null, String.valueOf(rng.nextInt()), String.valueOf(rng.nextInt()), String.valueOf(rng.nextInt()), String.valueOf(rng.nextInt()),
null, null, null, null,
null, null, null, null,
dimsArray1, null, null
);
Long[] metricsArray1 = {rng.nextLong(), rng.nextLong(), rng.nextLong(), rng.nextLong()};
BenchmarkEvent nestedMetrics1 = new BenchmarkEvent(
null,
null, null, null, null, null, null,
rng.nextLong(), rng.nextDouble(), rng.nextDouble(), rng.nextLong(),
null, null, null, null,
null, metricsArray1, null
);
BenchmarkEvent nestedMetrics2 = new BenchmarkEvent(
null,
null, null, null, null, null, null,
null, null, null, rng.nextLong(),
null, null, null, null,
null, null, null
);
BenchmarkEvent metricsWrapper = new BenchmarkEvent(
null,
null, null, null, null, null, null,
null, null, null, null,
null, null, null, nestedMetrics2,
null, null, null
);
//nest some dimensions in an array!
BenchmarkEvent arrayNestedDim1 = new BenchmarkEvent(
null,
String.valueOf(rng.nextInt()), null, null, null, null, null,
null, null, null, null,
null, null, null, null,
null, null, null
);
BenchmarkEvent arrayNestedDim2 = new BenchmarkEvent(
null,
String.valueOf(rng.nextInt()), null, null, null, null, null,
null, null, null, null,
null, null, null, null,
null, null, null
);
BenchmarkEvent arrayNestedDim3 = new BenchmarkEvent(
null,
null, String.valueOf(rng.nextInt()), null, null, null, null,
null, null, null, null,
null, null, null, null,
null, null, null
);
BenchmarkEvent arrayNestedWrapper = new BenchmarkEvent(
null,
null, null, null, null, null, null,
null, null, null, null,
arrayNestedDim3, null, null, null,
null, null, null
);
BenchmarkEvent[] eventArray = {arrayNestedDim1, arrayNestedDim2, arrayNestedWrapper};
Long[] ignoredMetrics = {Long.valueOf(10), Long.valueOf(20), Long.valueOf(30)};
BenchmarkEvent wrapper = new BenchmarkEvent(
DEFAULT_TIMESTAMP,
String.valueOf(rng.nextInt()), String.valueOf(rng.nextInt()), null, null, null, null,
null, null, rng.nextDouble(), rng.nextLong(),
nestedDims1, nestedDims2, nestedMetrics1, metricsWrapper,
null, ignoredMetrics, eventArray
);
return mapper.writeValueAsString(wrapper);
}
public class BenchmarkEvent
{
public String ts;
@JsonProperty
public String getTs()
{
return ts;
}
@JsonProperty
public String getD1()
{
return d1;
}
@JsonProperty
public String getD2()
{
return d2;
}
@JsonProperty
public String getD3()
{
return d3;
}
@JsonProperty
public String getD4()
{
return d4;
}
@JsonProperty
public String getD5()
{
return d5;
}
@JsonProperty
public String getD6()
{
return d6;
}
@JsonProperty
public Long getM1()
{
return m1;
}
@JsonProperty
public Double getM2()
{
return m2;
}
@JsonProperty
public Double getM3()
{
return m3;
}
@JsonProperty
public Long getM4()
{
return m4;
}
@JsonProperty
public BenchmarkEvent getE1()
{
return e1;
}
@JsonProperty
public BenchmarkEvent getE2()
{
return e2;
}
@JsonProperty
public BenchmarkEvent getE3()
{
return e3;
}
@JsonProperty
public BenchmarkEvent getE4()
{
return e4;
}
@JsonProperty
public String[] getAd1()
{
return ad1;
}
@JsonProperty
public Long[] getAm1()
{
return am1;
}
@JsonProperty
public BenchmarkEvent[] getAe1()
{
return ae1;
}
public String d1;
public String d2;
public String d3;
public String d4;
public String d5;
public String d6;
public Long m1;
public Double m2;
public Double m3;
public Long m4;
public BenchmarkEvent e1;
public BenchmarkEvent e2;
public BenchmarkEvent e3;
public BenchmarkEvent e4;
public String[] ad1;
public Long[] am1;
public BenchmarkEvent[] ae1;
@JsonCreator
public BenchmarkEvent(
@JsonProperty("ts") String ts,
@JsonProperty("d1") String d1,
@JsonProperty("d2") String d2,
@JsonProperty("d3") String d3,
@JsonProperty("d4") String d4,
@JsonProperty("d5") String d5,
@JsonProperty("d6") String d6,
@JsonProperty("m1") Long m1,
@JsonProperty("m2") Double m2,
@JsonProperty("m3") Double m3,
@JsonProperty("m4") Long m4,
@JsonProperty("e1") BenchmarkEvent e1,
@JsonProperty("e2") BenchmarkEvent e2,
@JsonProperty("e3") BenchmarkEvent e3,
@JsonProperty("e4") BenchmarkEvent e4,
@JsonProperty("ad1") String[] ad1,
@JsonProperty("am1") Long[] am1,
@JsonProperty("ae1") BenchmarkEvent[] ae1
)
{
this.ts = ts;
this.d1 = d1;
this.d2 = d2;
this.d3 = d3;
this.d4 = d4;
this.d5 = d5;
this.d6 = d6;
this.m1 = m1;
this.m2 = m2;
this.m3 = m3;
this.m4 = m4;
this.e1 = e1;
this.e2 = e2;
this.e3 = e3;
this.e4 = e4;
this.ad1 = ad1;
this.am1 = am1;
this.ae1 = ae1;
}
}
}

View File

@ -0,0 +1,129 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.benchmark;
import com.metamx.common.parsers.JSONPathParser;
import com.metamx.common.parsers.Parser;
//import com.yourkit.api.Controller;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.DimensionsSpec;
import io.druid.data.input.impl.JSONParseSpec;
import io.druid.data.input.impl.JSONPathFieldSpec;
import io.druid.data.input.impl.JSONPathSpec;
import io.druid.data.input.impl.StringInputRowParser;
import io.druid.data.input.impl.TimestampSpec;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* Test app for profiling JSON parsing behavior. Uses the proprietary YourKit API, so this file
* is commented out to prevent dependency resolution issues. Kept here for future usage/reference.
*/
public class FlattenJSONProfile
{
/*
private static final int numEvents = 400000;
List<String> flatInputs;
List<String> nestedInputs;
Parser flatParser;
Parser nestedParser;
Parser fieldDiscoveryParser;
Parser forcedPathParser;
int flatCounter = 0;
int nestedCounter = 0;
public void prepare() throws Exception
{
FlattenJSONBenchmarkUtil gen = new FlattenJSONBenchmarkUtil();
flatInputs = new ArrayList<String>();
for (int i = 0; i < numEvents; i++) {
flatInputs.add(gen.generateFlatEvent());
}
nestedInputs = new ArrayList<String>();
for (int i = 0; i < numEvents; i++) {
nestedInputs.add(gen.generateNestedEvent());
}
flatParser = gen.getFlatParser();
nestedParser = gen.getNestedParser();
fieldDiscoveryParser = gen.getFieldDiscoveryParser();
forcedPathParser = gen.getForcedPathParser();
}
public Map<String, Object> parseNested(Parser parser)
{
Map<String, Object> parsed = parser.parse(nestedInputs.get(nestedCounter));
nestedCounter = (nestedCounter + 1) % numEvents;
return parsed;
}
public Map<String, Object> parseFlat(Parser parser)
{
Map<String, Object> parsed = parser.parse(flatInputs.get(flatCounter));
flatCounter = (flatCounter + 1) % numEvents;
return parsed;
}
public static void main(String[] args) throws Exception
{
FlattenJSONProfile fjp = new FlattenJSONProfile();
fjp.prepare();
Map<String, Object> parsedMap;
List<JSONPathFieldSpec> fields = new ArrayList<>();
JSONPathSpec flattenSpec = new JSONPathSpec(true, fields);
JSONParseSpec parseSpec = new JSONParseSpec(
new TimestampSpec("ts", "iso", null),
new DimensionsSpec(null, null, null),
flattenSpec
);
Parser parser = fjp.fieldDiscoveryParser;
Parser nestedPar = fjp.nestedParser;
Parser forcedParser = fjp.forcedPathParser;
int j = 0;
Controller control = new Controller();
control.stopCPUProfiling();
control.clearCPUData();
Thread.sleep(5000);
control.startCPUSampling(null);
for(int i = 0; i < numEvents; i++) {
//parsedMap = parser.parse(fjp.nestedInputs.get(i));
parsedMap = fjp.parseFlat(forcedParser);
//parsedMap = fjp.parseFlat(parser);
//parsedMap = fjp.parseNested(nestedPar);
if(parsedMap != null) {
j++;
}
}
control.stopCPUProfiling();
System.out.println(j);
}
*/
}

View File

@ -0,0 +1,85 @@
package io.druid.benchmark;
import com.metamx.common.parsers.Parser;
import org.junit.Assert;
import org.junit.Test;
import java.util.Map;
public class FlattenJSONBenchmarkUtilTest
{
@Test
public void testOne() throws Exception {
FlattenJSONBenchmarkUtil eventGen = new FlattenJSONBenchmarkUtil();
String newEvent = eventGen.generateFlatEvent();
String newEvent2 = eventGen.generateNestedEvent();
Parser flatParser = eventGen.getFlatParser();
Parser nestedParser = eventGen.getNestedParser();
Map<String, Object> event = flatParser.parse(newEvent);
Map<String, Object> event2 = nestedParser.parse(newEvent2);
checkEvent1(event);
checkEvent2(event2);
}
public void checkEvent1(Map<String, Object> event) {
Assert.assertEquals("2015-09-12T12:10:53.155Z", event.get("ts").toString());
Assert.assertEquals("-1170723877", event.get("d1").toString());
Assert.assertEquals("238976084", event.get("d2").toString());
Assert.assertEquals("0.9818780016507468", event.get("m3").toString());
Assert.assertEquals("-3.8218837693501747E18", event.get("m4").toString());
Assert.assertEquals("-509091100", event.get("e1.d1").toString());
Assert.assertEquals("274706327", event.get("e1.d2").toString());
Assert.assertEquals("870378185", event.get("e2.d3").toString());
Assert.assertEquals("-377775321", event.get("e2.d4").toString());
Assert.assertEquals("-1797988763", event.get("e2.d5").toString());
Assert.assertEquals("1309474524", event.get("e2.d6").toString());
Assert.assertEquals("129047958", event.get("e2.ad1[0]").toString());
Assert.assertEquals("1658972185", event.get("e2.ad1[1]").toString());
Assert.assertEquals("-997010830", event.get("e2.ad1[2]").toString());
Assert.assertEquals("-5.8772014847368817E18", event.get("e3.m1").toString());
Assert.assertEquals("0.4375433369079904", event.get("e3.m2").toString());
Assert.assertEquals("0.8510482953607659", event.get("e3.m3").toString());
Assert.assertEquals("-2.3832626488759337E18", event.get("e3.m4").toString());
Assert.assertEquals("7.9789762132607068E18", event.get("e3.am1[0]").toString());
Assert.assertEquals("-7.8634787235005573E18", event.get("e3.am1[1]").toString());
Assert.assertEquals("8.7372945568982446E18", event.get("e3.am1[2]").toString());
Assert.assertEquals("3.1928124802414899E18", event.get("e3.am1[3]").toString());
Assert.assertEquals("-3.9806631713718011E18", event.get("e4.e4.m4").toString());
Assert.assertEquals("-1915243040", event.get("ae1[0].d1").toString());
Assert.assertEquals("-2020543641", event.get("ae1[1].d1").toString());
Assert.assertEquals("1414285347", event.get("ae1[2].e1.d2").toString());
}
public void checkEvent2(Map<String, Object> event2) {
Assert.assertEquals("728062074", event2.get("ae1[0].d1").toString());
Assert.assertEquals("1701675101", event2.get("ae1[1].d1").toString());
Assert.assertEquals("1887775139", event2.get("ae1[2].e1.d2").toString());
Assert.assertEquals("1375814994", event2.get("e1.d1").toString());
Assert.assertEquals("-1747933975", event2.get("e1.d2").toString());
Assert.assertEquals("1616761116", event2.get("e2.ad1[0]").toString());
Assert.assertEquals("7645432", event2.get("e2.ad1[1]").toString());
Assert.assertEquals("679897970", event2.get("e2.ad1[2]").toString());
Assert.assertEquals("-1797792200", event2.get("e2.d3").toString());
Assert.assertEquals("142582995", event2.get("e2.d4").toString());
Assert.assertEquals("-1341994709", event2.get("e2.d5").toString());
Assert.assertEquals("-889954295", event2.get("e2.d6").toString());
Assert.assertEquals("678995794", event2.get("d1").toString());
Assert.assertEquals("-1744549866", event2.get("d2").toString());
Assert.assertEquals("2015-09-12T12:10:53.155Z", event2.get("ts").toString());
Assert.assertEquals("0.7279915615037622", event2.get("m3").toString());
Assert.assertEquals("977083178034247050", event2.get("m4").toString());
Assert.assertEquals("1940993614184952155", event2.get("e3.m1").toString());
Assert.assertEquals("0.55936084127688", event2.get("e3.m2").toString());
Assert.assertEquals("0.22821798320943232", event2.get("e3.m3").toString());
Assert.assertEquals("8176144126231114468", event2.get("e3.m4").toString());
Assert.assertEquals("-7405674050450245158", event2.get("e3.am1[0]").toString());
Assert.assertEquals("150970357863018887", event2.get("e3.am1[1]").toString());
Assert.assertEquals("3261802881806411610", event2.get("e3.am1[2]").toString());
Assert.assertEquals("8492292414932401114", event2.get("e3.am1[3]").toString());
Assert.assertEquals("-1192952196729165097", event2.get("e4.e4.m4").toString());
}
}

View File

@ -0,0 +1,150 @@
---
layout: doc_page
---
# JSON Flatten Spec
| Field | Type | Description | Required |
|-------|------|-------------|----------|
| useFieldDiscovery | Boolean | If true, interpret all fields with singular values (not a map or list) and flat lists (lists of singular values) at the root level as columns. | no (default == true) |
| fields | JSON Object array | Specifies the fields of interest and how they are accessed | no (default == []) |
Defining the JSON Flatten Spec allows nested JSON fields to be flattened during ingestion time. Only the JSON ParseSpec supports flattening.
'fields' is a list of JSON Objects, describing the field names and how the fields are accessed:
## JSON Field Spec
| Field | Type | Description | Required |
|-------|------|-------------|----------|
| type | String | Type of the field, "root" or "nested". | yes |
| name | String | This string will be used as the column name when the data has been ingested. | yes |
| expr | String | Defines an expression for accessing the field within the JSON object, using [JsonPath](https://github.com/jayway/JsonPath) notation. | yes |
Suppose the event JSON has the following form:
```json
{
"timestamp": "2015-09-12T12:10:53.155Z",
"dim1": "qwerty",
"dim2": "asdf",
"dim3": "zxcv",
"ignore_me": "ignore this",
"metrica": 9999,
"foo": {"bar": "abc"},
"foo.bar": "def",
"nestmet": {"val": 42},
"hello": [1.0, 2.0, 3.0, 4.0, 5.0],
"mixarray": [1.0, 2.0, 3.0, 4.0, {"last": 5}],
"world": [{"hey": "there"}, {"tree": "apple"}],
"thing": {"food": ["sandwich", "pizza"]}
}
```
The column "metrica" is a Long metric column, "hello" is an array of Double metrics, and "nestmet.val" is a nested Long metric. All other columns are dimensions.
To flatten this JSON, the parseSpec could be defined as follows:
```json
"parseSpec": {
"format": "json",
"flattenSpec": {
"useFieldDiscovery": true,
"fields": [
{
"type": "root",
"name": "dim1",
"expr": "dim1"
},
"dim2",
{
"type": "nested",
"name": "foo.bar",
"expr": "$.foo.bar"
},
{
"type": "root",
"name": "root-foo.bar",
"expr": "foo.bar"
},
{
"type": "nested",
"name": "nested-metric",
"expr": "$.nestmet.val"
},
{
"type": "nested",
"name": "hello-0",
"expr": "$.hello[0]"
},
{
"type": "nested",
"name": "hello-4",
"expr": "$.hello[4]"
},
{
"type": "nested",
"name": "world-hey",
"expr": "$.world.hey"
},
{
"type": "nested",
"name": "worldtree",
"expr": "$.world.tree"
},
{
"type": "nested",
"name": "first-food",
"expr": "$.thing.food[0]"
},
{
"type": "nested",
"name": "second-food",
"expr": "$.thing.food[1]"
}
]
},
"dimensionsSpec" : {
"dimensions" : [],
"dimensionsExclusions": ["ignore_me"]
},
"timestampSpec" : {
"format" : "auto",
"column" : "timestamp"
}
}
```
Fields "dim3", "ignore_me", and "metrica" will be automatically discovered because 'useFieldDiscovery' is true, so they have been omitted from the field spec list.
"ignore_me" will be automatically discovered but excluded as specified by dimensionsExclusions.
Aggregators should use the metric column names as defined in the flattenSpec. Using the example above:
```json
"metricsSpec" : [
{
"type" : "longSum",
"name" : "Nested Metric Value",
"fieldName" : "nested-metric"
},
{
"type" : "doubleSum",
"name" : "Hello Index #0",
"fieldName" : "hello-0"
},
{
"type" : "longSum",
"name" : "metrica",
"fieldName" : "metrica"
}
]
```
Note that:
* For convenience, when defining a root-level field, it is possible to define only the field name, as a string, shown with "dim2" above.
* Enabling 'useFieldDiscovery' will only autodetect fields at the root level with a single value (not a map or list), as well as fields referring to a list of single values. In the example above, "dim1", "dim2", "dim3", "ignore_me", "metrica", and "foo.bar" (at the root) would be automatically detected as columns. The "hello" field is a list of Doubles and will be autodiscovered, but note that the example ingests the individual list members as separate fields. The "world" field must be explicitly defined because its value is a map. The "mixarray" field, while similar to "hello", must also be explicitly defined because its last value is a map.
* Duplicate field definitions are not allowed, an exception will be thrown.
* If auto field discovery is enabled, any discovered field with the same name as one already defined in the field specs will be skipped and not added twice.
* The JSON input must be a JSON object at the root, not an array. e.g., {"valid": "true"} and {"valid":[1,2,3]} are supported but [{"invalid": "true"}] and [1,2,3] are not.

View File

@ -102,7 +102,9 @@ If `format` is not included, the parseSpec defaults to `tsv`.
| format | String | This should say `json`. | no |
| timestampSpec | JSON Object | Specifies the column and format of the timestamp. | yes |
| dimensionsSpec | JSON Object | Specifies the dimensions of the data. | yes |
| flattenSpec | JSON Object | Specifies flattening configuration for nested JSON data. See [Flattening JSON](./flatten-json.html) for more info. | no |
#### JSON Lowercase ParseSpec
This is a special variation of the JSON ParseSpec that lower cases all the column names in the incoming JSON data. This parseSpec is required if you are updating to Druid 0.7.x from Druid 0.6.x, are directly ingesting JSON with mixed case column names, do not have any ETL in place to lower case those column names, and would like to make queries that include the data you created using 0.6.x and 0.7.x.

View File

@ -69,7 +69,7 @@
<apache.curator.version>2.9.1</apache.curator.version>
<jetty.version>9.2.5.v20141112</jetty.version>
<jersey.version>1.19</jersey.version>
<druid.api.version>0.3.13</druid.api.version>
<druid.api.version>0.3.14</druid.api.version>
<!-- Watch out for Hadoop compatibility when updating to >= 2.5; see https://github.com/druid-io/druid/pull/1669 -->
<jackson.version>2.4.6</jackson.version>
<log4j.version>2.4.1</log4j.version>

View File

@ -173,7 +173,9 @@ public class DataSchemaTest
+ "\"parseSpec\":{"
+ "\"format\":\"json\","
+ "\"timestampSpec\":{\"column\":\"xXx\", \"format\": \"auto\", \"missingValue\": null},"
+ "\"dimensionsSpec\":{\"dimensions\":[], \"dimensionExclusions\":[], \"spatialDimensions\":[]}},"
+ "\"dimensionsSpec\":{\"dimensions\":[], \"dimensionExclusions\":[], \"spatialDimensions\":[]},"
+ "\"flattenSpec\":{\"useFieldDiscovery\":true, \"fields\":[]},"
+ "\"featureSpec\":{}},"
+ "\"encoding\":\"UTF-8\""
+ "},"
+ "\"metricsSpec\":[{\"type\":\"doubleSum\",\"name\":\"metric1\",\"fieldName\":\"col1\"}],"