mirror of https://github.com/apache/druid.git
introduce a "tree" type to the flattenSpec (#12177)
* introduce a "tree" type to the flattenSpec * feedback - rename exprs to nodes, use CollectionsUtils.isNullOrEmpty for guard * feedback - expand docs to more clearly capture limitations of "tree" flattenSpec * feedback - fix for typo on docs * introduce a comment to explain defensive copy, tweak null handling * fix: part of rebase * mark ObjectFlatteners.FlattenerMaker as an ExtensionPoint and provide default for new tree type * fix: objectflattener restore previous behavior to call getRootField for root type * docs: ingestion/data-formats add note that ORC only supports path expressions * chore: linter remove unused import * fix: use correct newer form for empty DimensionsSpec in FlattenJSONBenchmark
This commit is contained in:
parent
675fd982fb
commit
0d03ce435f
|
@ -56,6 +56,8 @@ public class FlattenJSONBenchmark
|
||||||
Parser flatParser;
|
Parser flatParser;
|
||||||
Parser nestedParser;
|
Parser nestedParser;
|
||||||
Parser jqParser;
|
Parser jqParser;
|
||||||
|
Parser treeJqParser;
|
||||||
|
Parser treeTreeParser;
|
||||||
Parser fieldDiscoveryParser;
|
Parser fieldDiscoveryParser;
|
||||||
Parser forcedPathParser;
|
Parser forcedPathParser;
|
||||||
int flatCounter = 0;
|
int flatCounter = 0;
|
||||||
|
@ -82,6 +84,8 @@ public class FlattenJSONBenchmark
|
||||||
flatParser = gen.getFlatParser();
|
flatParser = gen.getFlatParser();
|
||||||
nestedParser = gen.getNestedParser();
|
nestedParser = gen.getNestedParser();
|
||||||
jqParser = gen.getJqParser();
|
jqParser = gen.getJqParser();
|
||||||
|
treeJqParser = gen.getTreeJqParser();
|
||||||
|
treeTreeParser = gen.getTreeTreeParser();
|
||||||
fieldDiscoveryParser = gen.getFieldDiscoveryParser();
|
fieldDiscoveryParser = gen.getFieldDiscoveryParser();
|
||||||
forcedPathParser = gen.getForcedPathParser();
|
forcedPathParser = gen.getForcedPathParser();
|
||||||
}
|
}
|
||||||
|
@ -112,6 +116,32 @@ public class FlattenJSONBenchmark
|
||||||
return parsed;
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||||
|
public Map<String, Object> treejqflatten(final Blackhole blackhole)
|
||||||
|
{
|
||||||
|
Map<String, Object> parsed = treeJqParser.parseToMap(nestedInputs.get(jqCounter));
|
||||||
|
for (String s : parsed.keySet()) {
|
||||||
|
blackhole.consume(parsed.get(s));
|
||||||
|
}
|
||||||
|
jqCounter = (jqCounter + 1) % NUM_EVENTS;
|
||||||
|
return parsed;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||||
|
public Map<String, Object> treetreeflatten(final Blackhole blackhole)
|
||||||
|
{
|
||||||
|
Map<String, Object> parsed = treeTreeParser.parseToMap(nestedInputs.get(jqCounter));
|
||||||
|
for (String s : parsed.keySet()) {
|
||||||
|
blackhole.consume(parsed.get(s));
|
||||||
|
}
|
||||||
|
jqCounter = (jqCounter + 1) % NUM_EVENTS;
|
||||||
|
return parsed;
|
||||||
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
@BenchmarkMode(Mode.AverageTime)
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
@OutputTimeUnit(TimeUnit.MICROSECONDS)
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.druid.java.util.common.parsers.JSONPathSpec;
|
||||||
import org.apache.druid.java.util.common.parsers.Parser;
|
import org.apache.druid.java.util.common.parsers.Parser;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
@ -209,6 +210,69 @@ public class FlattenJSONBenchmarkUtil
|
||||||
return spec.makeParser();
|
return spec.makeParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Parser getTreeJqParser()
|
||||||
|
{
|
||||||
|
List<JSONPathFieldSpec> fields = new ArrayList<>();
|
||||||
|
fields.add(JSONPathFieldSpec.createRootField("ts"));
|
||||||
|
|
||||||
|
fields.add(JSONPathFieldSpec.createRootField("d1"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e1.d1", ".e1.d1"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e1.d2", ".e1.d2"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e2.d3", ".e2.d3"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e2.d4", ".e2.d4"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e2.d5", ".e2.d5"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e2.d6", ".e2.d6"));
|
||||||
|
|
||||||
|
fields.add(JSONPathFieldSpec.createRootField("m3"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e3.m1", ".e3.m1"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e3.m2", ".e3.m2"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e3.m3", ".e3.m3"));
|
||||||
|
fields.add(JSONPathFieldSpec.createJqField("e3.m4", ".e3.m4"));
|
||||||
|
|
||||||
|
JSONPathSpec flattenSpec = new JSONPathSpec(false, fields);
|
||||||
|
JSONParseSpec spec = new JSONParseSpec(
|
||||||
|
new TimestampSpec("ts", "iso", null),
|
||||||
|
DimensionsSpec.EMPTY,
|
||||||
|
flattenSpec,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
return spec.makeParser();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Parser getTreeTreeParser()
|
||||||
|
{
|
||||||
|
List<JSONPathFieldSpec> fields = new ArrayList<>();
|
||||||
|
fields.add(JSONPathFieldSpec.createRootField("ts"));
|
||||||
|
|
||||||
|
fields.add(JSONPathFieldSpec.createRootField("d1"));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e1.d1", Arrays.asList("e1", "d1")));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e1.d2", Arrays.asList("e1", "d2")));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e2.d3", Arrays.asList("e2", "d3")));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e2.d4", Arrays.asList("e2", "d4")));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e2.d5", Arrays.asList("e2", "d5")));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e2.d6", Arrays.asList("e2", "d6")));
|
||||||
|
|
||||||
|
fields.add(JSONPathFieldSpec.createRootField("m3"));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e3.m1", Arrays.asList("e3", "m1")));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e3.m2", Arrays.asList("e3", "m2")));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e3.m3", Arrays.asList("e3", "m3")));
|
||||||
|
fields.add(JSONPathFieldSpec.createTreeField("e3.m4", Arrays.asList("e3", "m4")));
|
||||||
|
|
||||||
|
JSONPathSpec flattenSpec = new JSONPathSpec(false, fields);
|
||||||
|
JSONParseSpec spec = new JSONParseSpec(
|
||||||
|
new TimestampSpec("ts", "iso", null),
|
||||||
|
DimensionsSpec.EMPTY,
|
||||||
|
flattenSpec,
|
||||||
|
null,
|
||||||
|
null
|
||||||
|
);
|
||||||
|
|
||||||
|
return spec.makeParser();
|
||||||
|
}
|
||||||
|
|
||||||
public String generateFlatEvent() throws Exception
|
public String generateFlatEvent() throws Exception
|
||||||
{
|
{
|
||||||
String nestedEvent = generateNestedEvent();
|
String nestedEvent = generateNestedEvent();
|
||||||
|
|
|
@ -109,6 +109,24 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker<JsonN
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Function<JsonNode, Object> makeJsonTreeExtractor(final List<String> nodes)
|
||||||
|
{
|
||||||
|
// create a defensive copy
|
||||||
|
final String[] keyNames = nodes.toArray(new String[0]);
|
||||||
|
|
||||||
|
return jsonNode -> {
|
||||||
|
JsonNode targetNode = jsonNode;
|
||||||
|
for (String keyName : keyNames) {
|
||||||
|
if (targetNode == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
targetNode = targetNode.get(keyName);
|
||||||
|
}
|
||||||
|
return finalizeConversionForMap(targetNode);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JsonProvider getJsonProvider()
|
public JsonProvider getJsonProvider()
|
||||||
{
|
{
|
||||||
|
|
|
@ -22,7 +22,9 @@ package org.apache.druid.java.util.common.parsers;
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.druid.utils.CollectionUtils;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
public class JSONPathFieldSpec
|
public class JSONPathFieldSpec
|
||||||
|
@ -30,25 +32,49 @@ public class JSONPathFieldSpec
|
||||||
private final JSONPathFieldType type;
|
private final JSONPathFieldType type;
|
||||||
private final String name;
|
private final String name;
|
||||||
private final String expr;
|
private final String expr;
|
||||||
|
private final List<String> nodes;
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public JSONPathFieldSpec(
|
public JSONPathFieldSpec(
|
||||||
@JsonProperty("type") JSONPathFieldType type,
|
@JsonProperty("type") JSONPathFieldType type,
|
||||||
@JsonProperty("name") String name,
|
@JsonProperty("name") String name,
|
||||||
@JsonProperty("expr") String expr
|
@JsonProperty("expr") String expr,
|
||||||
|
@JsonProperty("nodes") List<String> nodes
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.name = Preconditions.checkNotNull(name, "Missing 'name' in field spec");
|
this.name = Preconditions.checkNotNull(name, "Missing 'name' in field spec");
|
||||||
|
|
||||||
// If expr is null and type is root, use the name as the expr too.
|
// Validate required fields are present
|
||||||
if (expr == null && type == JSONPathFieldType.ROOT) {
|
switch (type) {
|
||||||
this.expr = name;
|
case ROOT:
|
||||||
} else {
|
this.expr = (expr == null) ? name : expr;
|
||||||
this.expr = Preconditions.checkNotNull(expr, "Missing 'expr' for field[%s]", name);
|
this.nodes = null;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case TREE:
|
||||||
|
this.expr = null;
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
!CollectionUtils.isNullOrEmpty(nodes),
|
||||||
|
"Missing 'nodes' for field[%s], was [%s]", name, nodes);
|
||||||
|
this.nodes = nodes;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
this.expr = Preconditions.checkNotNull(expr, "Missing 'expr' for field[%s]", name);
|
||||||
|
this.nodes = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public JSONPathFieldSpec(
|
||||||
|
JSONPathFieldType type,
|
||||||
|
String name,
|
||||||
|
String expr
|
||||||
|
)
|
||||||
|
{
|
||||||
|
this(type, name, expr, null);
|
||||||
|
}
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
public JSONPathFieldType getType()
|
public JSONPathFieldType getType()
|
||||||
{
|
{
|
||||||
|
@ -67,6 +93,12 @@ public class JSONPathFieldSpec
|
||||||
return expr;
|
return expr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public List<String> getNodes()
|
||||||
|
{
|
||||||
|
return nodes;
|
||||||
|
}
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public static JSONPathFieldSpec fromString(String name)
|
public static JSONPathFieldSpec fromString(String name)
|
||||||
{
|
{
|
||||||
|
@ -88,6 +120,11 @@ public class JSONPathFieldSpec
|
||||||
return new JSONPathFieldSpec(JSONPathFieldType.ROOT, name, null);
|
return new JSONPathFieldSpec(JSONPathFieldType.ROOT, name, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static JSONPathFieldSpec createTreeField(String name, List<String> nodes)
|
||||||
|
{
|
||||||
|
return new JSONPathFieldSpec(JSONPathFieldType.TREE, name, null, nodes);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(final Object o)
|
public boolean equals(final Object o)
|
||||||
{
|
{
|
||||||
|
@ -100,13 +137,14 @@ public class JSONPathFieldSpec
|
||||||
final JSONPathFieldSpec that = (JSONPathFieldSpec) o;
|
final JSONPathFieldSpec that = (JSONPathFieldSpec) o;
|
||||||
return type == that.type &&
|
return type == that.type &&
|
||||||
Objects.equals(name, that.name) &&
|
Objects.equals(name, that.name) &&
|
||||||
Objects.equals(expr, that.expr);
|
Objects.equals(expr, that.expr) &&
|
||||||
|
Objects.equals(nodes, that.nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode()
|
public int hashCode()
|
||||||
{
|
{
|
||||||
return Objects.hash(type, name, expr);
|
return Objects.hash(type, name, expr, nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -116,6 +154,7 @@ public class JSONPathFieldSpec
|
||||||
"type=" + type +
|
"type=" + type +
|
||||||
", name='" + name + '\'' +
|
", name='" + name + '\'' +
|
||||||
", expr='" + expr + '\'' +
|
", expr='" + expr + '\'' +
|
||||||
|
", nodes='" + nodes + '\'' +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,8 @@ public enum JSONPathFieldType
|
||||||
{
|
{
|
||||||
ROOT,
|
ROOT,
|
||||||
PATH,
|
PATH,
|
||||||
JQ;
|
JQ,
|
||||||
|
TREE;
|
||||||
|
|
||||||
@JsonValue
|
@JsonValue
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.druid.java.util.common.parsers;
|
||||||
|
|
||||||
import com.google.common.collect.Iterables;
|
import com.google.common.collect.Iterables;
|
||||||
import com.jayway.jsonpath.spi.json.JsonProvider;
|
import com.jayway.jsonpath.spi.json.JsonProvider;
|
||||||
|
import org.apache.druid.guice.annotations.ExtensionPoint;
|
||||||
import org.apache.druid.java.util.common.IAE;
|
import org.apache.druid.java.util.common.IAE;
|
||||||
import org.apache.druid.java.util.common.UOE;
|
import org.apache.druid.java.util.common.UOE;
|
||||||
|
|
||||||
|
@ -64,6 +65,9 @@ public class ObjectFlatteners
|
||||||
case JQ:
|
case JQ:
|
||||||
extractor = flattenerMaker.makeJsonQueryExtractor(fieldSpec.getExpr());
|
extractor = flattenerMaker.makeJsonQueryExtractor(fieldSpec.getExpr());
|
||||||
break;
|
break;
|
||||||
|
case TREE:
|
||||||
|
extractor = flattenerMaker.makeJsonTreeExtractor(fieldSpec.getNodes());
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
throw new UOE("Unsupported field type[%s]", fieldSpec.getType());
|
throw new UOE("Unsupported field type[%s]", fieldSpec.getType());
|
||||||
}
|
}
|
||||||
|
@ -208,6 +212,7 @@ public class ObjectFlatteners
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ExtensionPoint
|
||||||
public interface FlattenerMaker<T>
|
public interface FlattenerMaker<T>
|
||||||
{
|
{
|
||||||
JsonProvider getJsonProvider();
|
JsonProvider getJsonProvider();
|
||||||
|
@ -231,6 +236,14 @@ public class ObjectFlatteners
|
||||||
*/
|
*/
|
||||||
Function<T, Object> makeJsonQueryExtractor(String expr);
|
Function<T, Object> makeJsonQueryExtractor(String expr);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a "field" extractor for nested json expressions
|
||||||
|
*/
|
||||||
|
default Function<T, Object> makeJsonTreeExtractor(List<String> nodes)
|
||||||
|
{
|
||||||
|
throw new UOE("makeJsonTreeExtractor has not been implemented.");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert object to Java {@link Map} using {@link #getJsonProvider()} and {@link #finalizeConversionForMap} to
|
* Convert object to Java {@link Map} using {@link #getJsonProvider()} and {@link #finalizeConversionForMap} to
|
||||||
* extract and convert data
|
* extract and convert data
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class JsonInputFormatTest
|
public class JsonInputFormatTest
|
||||||
{
|
{
|
||||||
|
@ -48,7 +49,9 @@ public class JsonInputFormatTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
ImmutableMap.of(Feature.ALLOW_COMMENTS.name(), true, Feature.ALLOW_UNQUOTED_FIELD_NAMES.name(), false),
|
ImmutableMap.of(Feature.ALLOW_COMMENTS.name(), true, Feature.ALLOW_UNQUOTED_FIELD_NAMES.name(), false),
|
||||||
|
|
|
@ -52,7 +52,11 @@ public class JsonLineReaderTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz", null, Collections.singletonList("baz")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz2", null, Collections.singletonList("baz2")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -83,12 +87,16 @@ public class JsonLineReaderTest
|
||||||
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
||||||
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
|
||||||
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
|
||||||
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("tree_baz")));
|
||||||
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
|
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
|
||||||
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
|
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
|
||||||
|
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("tree_omg")));
|
||||||
|
|
||||||
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
|
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_baz2").isEmpty());
|
||||||
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
|
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
|
||||||
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
|
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_omg2").isEmpty());
|
||||||
numActualIterations++;
|
numActualIterations++;
|
||||||
}
|
}
|
||||||
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
||||||
|
@ -148,7 +156,8 @@ public class JsonLineReaderTest
|
||||||
new JSONPathSpec(
|
new JSONPathSpec(
|
||||||
true,
|
true,
|
||||||
ImmutableList.of(
|
ImmutableList.of(
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg")
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -175,10 +184,11 @@ public class JsonLineReaderTest
|
||||||
int numActualIterations = 0;
|
int numActualIterations = 0;
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
final InputRow row = iterator.next();
|
final InputRow row = iterator.next();
|
||||||
Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "bar", "foo"), row.getDimensions());
|
Assert.assertEquals(Arrays.asList("path_omg", "tree_omg", "timestamp", "bar", "foo"), row.getDimensions());
|
||||||
Assert.assertTrue(row.getDimension("bar").isEmpty());
|
Assert.assertTrue(row.getDimension("bar").isEmpty());
|
||||||
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
||||||
Assert.assertTrue(row.getDimension("path_omg").isEmpty());
|
Assert.assertTrue(row.getDimension("path_omg").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_omg").isEmpty());
|
||||||
numActualIterations++;
|
numActualIterations++;
|
||||||
}
|
}
|
||||||
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
||||||
|
@ -192,7 +202,8 @@ public class JsonLineReaderTest
|
||||||
new JSONPathSpec(
|
new JSONPathSpec(
|
||||||
true,
|
true,
|
||||||
ImmutableList.of(
|
ImmutableList.of(
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg")
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -219,10 +230,11 @@ public class JsonLineReaderTest
|
||||||
int numActualIterations = 0;
|
int numActualIterations = 0;
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
final InputRow row = iterator.next();
|
final InputRow row = iterator.next();
|
||||||
Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "bar", "foo"), row.getDimensions());
|
Assert.assertEquals(Arrays.asList("path_omg", "tree_omg", "timestamp", "bar", "foo"), row.getDimensions());
|
||||||
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("bar")));
|
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("bar")));
|
||||||
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
||||||
Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("path_omg")));
|
Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("path_omg")));
|
||||||
|
Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("tree_omg")));
|
||||||
numActualIterations++;
|
numActualIterations++;
|
||||||
}
|
}
|
||||||
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
||||||
|
@ -236,7 +248,8 @@ public class JsonLineReaderTest
|
||||||
new JSONPathSpec(
|
new JSONPathSpec(
|
||||||
true,
|
true,
|
||||||
ImmutableList.of(
|
ImmutableList.of(
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg")
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -263,10 +276,11 @@ public class JsonLineReaderTest
|
||||||
int numActualIterations = 0;
|
int numActualIterations = 0;
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
final InputRow row = iterator.next();
|
final InputRow row = iterator.next();
|
||||||
Assert.assertEquals(Arrays.asList("path_omg", "timestamp", "foo"), row.getDimensions());
|
Assert.assertEquals(Arrays.asList("path_omg", "tree_omg", "timestamp", "foo"), row.getDimensions());
|
||||||
Assert.assertTrue(row.getDimension("bar").isEmpty());
|
Assert.assertTrue(row.getDimension("bar").isEmpty());
|
||||||
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
||||||
Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("path_omg")));
|
Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("path_omg")));
|
||||||
|
Assert.assertEquals("a", Iterables.getOnlyElement(row.getDimension("tree_omg")));
|
||||||
numActualIterations++;
|
numActualIterations++;
|
||||||
}
|
}
|
||||||
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
||||||
|
|
|
@ -39,6 +39,8 @@ import org.junit.Test;
|
||||||
import org.junit.rules.ExpectedException;
|
import org.junit.rules.ExpectedException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
public class JsonReaderTest
|
public class JsonReaderTest
|
||||||
{
|
{
|
||||||
|
@ -57,7 +59,11 @@ public class JsonReaderTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz", null, Collections.singletonList("baz")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz2", null, Collections.singletonList("baz2")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -95,12 +101,16 @@ public class JsonReaderTest
|
||||||
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
||||||
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
|
||||||
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
|
||||||
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("tree_baz")));
|
||||||
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("path_omg")));
|
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("path_omg")));
|
||||||
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("jq_omg")));
|
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("jq_omg")));
|
||||||
|
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("tree_omg")));
|
||||||
|
|
||||||
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
|
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_baz2").isEmpty());
|
||||||
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
|
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
|
||||||
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
|
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_omg2").isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
Assert.assertEquals(numExpectedIterations, numActualIterations);
|
||||||
|
@ -119,7 +129,11 @@ public class JsonReaderTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz", null, Collections.singletonList("baz")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz2", null, Collections.singletonList("baz2")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -162,12 +176,16 @@ public class JsonReaderTest
|
||||||
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
||||||
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
|
||||||
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
|
||||||
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("tree_baz")));
|
||||||
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
|
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("path_omg")));
|
||||||
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
|
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("jq_omg")));
|
||||||
|
Assert.assertEquals("1", Iterables.getOnlyElement(row.getDimension("tree_omg")));
|
||||||
|
|
||||||
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
|
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_baz2").isEmpty());
|
||||||
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
|
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
|
||||||
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
|
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_omg2").isEmpty());
|
||||||
|
|
||||||
numActualIterations++;
|
numActualIterations++;
|
||||||
}
|
}
|
||||||
|
@ -188,7 +206,11 @@ public class JsonReaderTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz", null, Collections.singletonList("baz")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz2", null, Collections.singletonList("baz2")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -244,7 +266,11 @@ public class JsonReaderTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz", null, Collections.singletonList("baz")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz2", null, Collections.singletonList("baz2")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -287,12 +313,16 @@ public class JsonReaderTest
|
||||||
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
Assert.assertEquals("x", Iterables.getOnlyElement(row.getDimension("foo")));
|
||||||
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("baz")));
|
||||||
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("root_baz")));
|
||||||
|
Assert.assertEquals("4", Iterables.getOnlyElement(row.getDimension("tree_baz")));
|
||||||
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("path_omg")));
|
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("path_omg")));
|
||||||
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("jq_omg")));
|
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("jq_omg")));
|
||||||
|
Assert.assertEquals(msgId, Iterables.getOnlyElement(row.getDimension("tree_omg")));
|
||||||
|
|
||||||
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
|
Assert.assertTrue(row.getDimension("root_baz2").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_baz2").isEmpty());
|
||||||
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
|
Assert.assertTrue(row.getDimension("path_omg2").isEmpty());
|
||||||
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
|
Assert.assertTrue(row.getDimension("jq_omg2").isEmpty());
|
||||||
|
Assert.assertTrue(row.getDimension("tree_omg2").isEmpty());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -312,7 +342,11 @@ public class JsonReaderTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz", null, Collections.singletonList("baz")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz2", null, Collections.singletonList("baz2")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -370,7 +404,11 @@ public class JsonReaderTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz", null, Collections.singletonList("baz")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz2", null, Collections.singletonList("baz2")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
@ -428,7 +466,11 @@ public class JsonReaderTest
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg", "$.o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
new JSONPathFieldSpec(JSONPathFieldType.PATH, "path_omg2", "$.o.mg2"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg", ".o.mg"),
|
||||||
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2")
|
new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq_omg2", ".o.mg2"),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz", null, Collections.singletonList("baz")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_baz2", null, Collections.singletonList("baz2")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg", null, Arrays.asList("o", "mg")),
|
||||||
|
new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree_omg2", null, Arrays.asList("o", "mg2"))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
null,
|
null,
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.junit.rules.ExpectedException;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -173,6 +174,10 @@ public class JSONPathParserTest
|
||||||
fields.add(new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq-nested-foo.bar2", ".foo.bar2"));
|
fields.add(new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq-nested-foo.bar2", ".foo.bar2"));
|
||||||
fields.add(new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq-heybarx0", ".hey[0].barx"));
|
fields.add(new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq-heybarx0", ".hey[0].barx"));
|
||||||
fields.add(new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq-met-array", ".met.a"));
|
fields.add(new JSONPathFieldSpec(JSONPathFieldType.JQ, "jq-met-array", ".met.a"));
|
||||||
|
fields.add(new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree-simpleVal", null, Collections.singletonList("simpleVal")));
|
||||||
|
fields.add(new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree-timestamp", null, Collections.singletonList("timestamp")));
|
||||||
|
fields.add(new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree-nested-foo.bar2", null, Arrays.asList("foo", "bar2")));
|
||||||
|
fields.add(new JSONPathFieldSpec(JSONPathFieldType.TREE, "tree-met-array", null, Arrays.asList("met", "a")));
|
||||||
|
|
||||||
final Parser<String, Object> jsonParser = new JSONPathParser(new JSONPathSpec(false, fields), null, false);
|
final Parser<String, Object> jsonParser = new JSONPathParser(new JSONPathSpec(false, fields), null, false);
|
||||||
final Map<String, Object> jsonMap = jsonParser.parseToMap(NESTED_JSON);
|
final Map<String, Object> jsonMap = jsonParser.parseToMap(NESTED_JSON);
|
||||||
|
@ -180,6 +185,8 @@ public class JSONPathParserTest
|
||||||
// Root fields
|
// Root fields
|
||||||
Assert.assertEquals("text", jsonMap.get("simpleVal"));
|
Assert.assertEquals("text", jsonMap.get("simpleVal"));
|
||||||
Assert.assertEquals("2999", jsonMap.get("timestamp"));
|
Assert.assertEquals("2999", jsonMap.get("timestamp"));
|
||||||
|
Assert.assertEquals("text", jsonMap.get("tree-simpleVal"));
|
||||||
|
Assert.assertEquals("2999", jsonMap.get("tree-timestamp"));
|
||||||
|
|
||||||
// Nested fields
|
// Nested fields
|
||||||
Assert.assertEquals("bbb", jsonMap.get("nested-foo.bar2"));
|
Assert.assertEquals("bbb", jsonMap.get("nested-foo.bar2"));
|
||||||
|
@ -189,6 +196,9 @@ public class JSONPathParserTest
|
||||||
Assert.assertEquals("asdf", jsonMap.get("jq-heybarx0"));
|
Assert.assertEquals("asdf", jsonMap.get("jq-heybarx0"));
|
||||||
Assert.assertEquals(ImmutableList.of(7L, 8L, 9L), jsonMap.get("jq-met-array"));
|
Assert.assertEquals(ImmutableList.of(7L, 8L, 9L), jsonMap.get("jq-met-array"));
|
||||||
|
|
||||||
|
Assert.assertEquals(ImmutableList.of(7L, 8L, 9L), jsonMap.get("tree-met-array"));
|
||||||
|
Assert.assertEquals("bbb", jsonMap.get("tree-nested-foo.bar2"));
|
||||||
|
|
||||||
// Fields that should not be discovered
|
// Fields that should not be discovered
|
||||||
Assert.assertFalse(jsonMap.containsKey("newmet"));
|
Assert.assertFalse(jsonMap.containsKey("newmet"));
|
||||||
Assert.assertFalse(jsonMap.containsKey("foo.bar1"));
|
Assert.assertFalse(jsonMap.containsKey("foo.bar1"));
|
||||||
|
|
|
@ -231,7 +231,7 @@ Configure the ORC `inputFormat` to load ORC data as follows:
|
||||||
| Field | Type | Description | Required |
|
| Field | Type | Description | Required |
|
||||||
|-------|------|-------------|----------|
|
|-------|------|-------------|----------|
|
||||||
| type | String | Set value to `orc`. | yes |
|
| type | String | Set value to `orc`. | yes |
|
||||||
| flattenSpec | JSON Object | Specifies flattening configuration for nested ORC data. See [`flattenSpec`](#flattenspec) for more info. | no |
|
| flattenSpec | JSON Object | Specifies flattening configuration for nested ORC data. Only 'path' expressions are supported ('jq' and 'tree' are unavailable). See [`flattenSpec`](#flattenspec) for more info. | no |
|
||||||
| binaryAsString | Boolean | Specifies if the binary orc column which is not logically marked as a string should be treated as a UTF-8 encoded string. | no (default = false) |
|
| binaryAsString | Boolean | Specifies if the binary orc column which is not logically marked as a string should be treated as a UTF-8 encoded string. | no (default = false) |
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
|
@ -261,11 +261,11 @@ To use the Parquet input format load the Druid Parquet extension ([`druid-parque
|
||||||
|
|
||||||
Configure the Parquet `inputFormat` to load Parquet data as follows:
|
Configure the Parquet `inputFormat` to load Parquet data as follows:
|
||||||
|
|
||||||
| Field | Type | Description | Required |
|
| Field | Type | Description | Required |
|
||||||
|-------|------|-------------|----------|
|
|-------|------|---------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
||||||
|type| String| Set value to `parquet`.| yes |
|
|type| String| Set value to `parquet`. | yes |
|
||||||
|flattenSpec| JSON Object | Define a [`flattenSpec`](#flattenspec) to extract nested values from a Parquet file. Only 'path' expressions are supported ('jq' is unavailable).| no (default will auto-discover 'root' level properties) |
|
|flattenSpec| JSON Object | Define a [`flattenSpec`](#flattenspec) to extract nested values from a Parquet file. Only 'path' expressions are supported ('jq' and 'tree' are unavailable). | no (default will auto-discover 'root' level properties) |
|
||||||
| binaryAsString | Boolean | Specifies if the bytes parquet column which is not logically marked as a string or enum type should be treated as a UTF-8 encoded string. | no (default = false) |
|
| binaryAsString | Boolean | Specifies if the bytes parquet column which is not logically marked as a string or enum type should be treated as a UTF-8 encoded string. | no (default = false) |
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
```json
|
```json
|
||||||
|
@ -509,12 +509,12 @@ See the [Avro Types](../development/extensions-core/avro.md#avro-types) section
|
||||||
|
|
||||||
Configure the Avro OCF `inputFormat` to load Avro OCF data as follows:
|
Configure the Avro OCF `inputFormat` to load Avro OCF data as follows:
|
||||||
|
|
||||||
| Field | Type | Description | Required |
|
| Field | Type | Description | Required |
|
||||||
|-------|------|-------------|----------|
|
|-------|------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
||||||
|type| String| Set value to `avro_ocf`. | yes |
|
|type| String| Set value to `avro_ocf`. | yes |
|
||||||
|flattenSpec| JSON Object |Define a [`flattenSpec`](#flattenspec) to extract nested values from Avro records. Only 'path' expressions are supported ('jq' is unavailable).| no (default will auto-discover 'root' level properties) |
|
|flattenSpec| JSON Object | Define a [`flattenSpec`](#flattenspec) to extract nested values from Avro records. Only 'path' expressions are supported ('jq' and 'tree' are unavailable). | no (default will auto-discover 'root' level properties) |
|
||||||
|schema| JSON Object |Define a reader schema to be used when parsing Avro records. This is useful when parsing multiple versions of Avro OCF file data. | no (default will decode using the writer schema contained in the OCF file) |
|
|schema| JSON Object | Define a reader schema to be used when parsing Avro records. This is useful when parsing multiple versions of Avro OCF file data. | no (default will decode using the writer schema contained in the OCF file) |
|
||||||
| binaryAsString | Boolean | Specifies if the bytes parquet column which is not logically marked as a string or enum type should be treated as a UTF-8 encoded string. | no (default = false) |
|
| binaryAsString | Boolean | Specifies if the bytes parquet column which is not logically marked as a string or enum type should be treated as a UTF-8 encoded string. | no (default = false) |
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
```json
|
```json
|
||||||
|
@ -558,11 +558,11 @@ For example:
|
||||||
|
|
||||||
Configure the Protobuf `inputFormat` to load Protobuf data as follows:
|
Configure the Protobuf `inputFormat` to load Protobuf data as follows:
|
||||||
|
|
||||||
| Field | Type | Description | Required |
|
| Field | Type | Description | Required |
|
||||||
|-------|------|-------------|----------|
|
|-------|------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
||||||
|type| String| Set value to `protobuf`. | yes |
|
|type| String| Set value to `protobuf`. | yes |
|
||||||
|flattenSpec| JSON Object |Define a [`flattenSpec`](#flattenspec) to extract nested values from a Protobuf record. Note that only 'path' expression are supported ('jq' is unavailable).| no (default will auto-discover 'root' level properties) |
|
|flattenSpec| JSON Object | Define a [`flattenSpec`](#flattenspec) to extract nested values from a Protobuf record. Note that only 'path' expression are supported ('jq' and 'tree' is unavailable). | no (default will auto-discover 'root' level properties) |
|
||||||
|`protoBytesDecoder`| JSON Object |Specifies how to decode bytes to Protobuf record. | yes |
|
|`protoBytesDecoder`| JSON Object | Specifies how to decode bytes to Protobuf record. | yes |
|
||||||
|
|
||||||
For example:
|
For example:
|
||||||
```json
|
```json
|
||||||
|
@ -609,6 +609,7 @@ For example:
|
||||||
"fields": [
|
"fields": [
|
||||||
{ "name": "baz", "type": "root" },
|
{ "name": "baz", "type": "root" },
|
||||||
{ "name": "foo_bar", "type": "path", "expr": "$.foo.bar" },
|
{ "name": "foo_bar", "type": "path", "expr": "$.foo.bar" },
|
||||||
|
{ "name": "foo_other_bar", "type": "tree", "nodes": ["foo", "other", "bar"] },
|
||||||
{ "name": "first_food", "type": "jq", "expr": ".thing.food[1]" }
|
{ "name": "first_food", "type": "jq", "expr": ".thing.food[1]" }
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -623,9 +624,10 @@ Each entry in the `fields` list can have the following components:
|
||||||
|
|
||||||
| Field | Description | Default |
|
| Field | Description | Default |
|
||||||
|-------|-------------|---------|
|
|-------|-------------|---------|
|
||||||
| type | Options are as follows:<br /><br /><ul><li>`root`, referring to a field at the root level of the record. Only really useful if `useFieldDiscovery` is false.</li><li>`path`, referring to a field using [JsonPath](https://github.com/jayway/JsonPath) notation. Supported by most data formats that offer nesting, including `avro`, `json`, `orc`, and `parquet`.</li><li>`jq`, referring to a field using [jackson-jq](https://github.com/eiiches/jackson-jq) notation. Only supported for the `json` format.</li></ul> | none (required) |
|
| type | Options are as follows:<br /><br /><ul><li>`root`, referring to a field at the root level of the record. Only really useful if `useFieldDiscovery` is false.</li><li>`path`, referring to a field using [JsonPath](https://github.com/jayway/JsonPath) notation. Supported by most data formats that offer nesting, including `avro`, `json`, `orc`, and `parquet`.</li><li>`jq`, referring to a field using [jackson-jq](https://github.com/eiiches/jackson-jq) notation. Only supported for the `json` format.</li><li>`tree`, referring to a nested field from the root level of the record. Useful and more efficient than `path` or `jq` if a simple hierarchical fetch is required. Only supported for the `json` format.</li></ul> | none (required) |
|
||||||
| name | Name of the field after flattening. This name can be referred to by the [`timestampSpec`](./ingestion-spec.md#timestampspec), [`transformSpec`](./ingestion-spec.md#transformspec), [`dimensionsSpec`](./ingestion-spec.md#dimensionsspec), and [`metricsSpec`](./ingestion-spec.md#metricsspec).| none (required) |
|
| name | Name of the field after flattening. This name can be referred to by the [`timestampSpec`](./ingestion-spec.md#timestampspec), [`transformSpec`](./ingestion-spec.md#transformspec), [`dimensionsSpec`](./ingestion-spec.md#dimensionsspec), and [`metricsSpec`](./ingestion-spec.md#metricsspec).| none (required) |
|
||||||
| expr | Expression for accessing the field while flattening. For type `path`, this should be [JsonPath](https://github.com/jayway/JsonPath). For type `jq`, this should be [jackson-jq](https://github.com/eiiches/jackson-jq) notation. For other types, this parameter is ignored. | none (required for types `path` and `jq`) |
|
| expr | Expression for accessing the field while flattening. For type `path`, this should be [JsonPath](https://github.com/jayway/JsonPath). For type `jq`, this should be [jackson-jq](https://github.com/eiiches/jackson-jq) notation. For other types, this parameter is ignored. | none (required for types `path` and `jq`) |
|
||||||
|
| nodes | For `tree` only. Multiple-expression field for accessing the field while flattening, representing the hierarchy of field names to read. For other types, this parameter must not be provided. | none (required for type `tree`) |
|
||||||
|
|
||||||
#### Notes on flattening
|
#### Notes on flattening
|
||||||
|
|
||||||
|
@ -690,7 +692,8 @@ See [Avro specification](http://avro.apache.org/docs/1.7.7/spec.html#Schema+Reso
|
||||||
| fromPigAvroStorage | Boolean | Specifies whether the data file is stored using AvroStorage. | no(default == false) |
|
| fromPigAvroStorage | Boolean | Specifies whether the data file is stored using AvroStorage. | no(default == false) |
|
||||||
|
|
||||||
An Avro parseSpec can contain a [`flattenSpec`](#flattenspec) using either the "root" or "path"
|
An Avro parseSpec can contain a [`flattenSpec`](#flattenspec) using either the "root" or "path"
|
||||||
field types, which can be used to read nested Avro records. The "jq" field type is not currently supported for Avro.
|
field types, which can be used to read nested Avro records. The "jq" and "tree" field type is not currently supported
|
||||||
|
for Avro.
|
||||||
|
|
||||||
For example, using Avro Hadoop parser with custom reader's schema file:
|
For example, using Avro Hadoop parser with custom reader's schema file:
|
||||||
|
|
||||||
|
@ -1208,7 +1211,7 @@ This parser is for [stream ingestion](./index.md#streaming) and reads Avro data
|
||||||
| parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. Should be an "avro" parseSpec. | yes |
|
| parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. Should be an "avro" parseSpec. | yes |
|
||||||
|
|
||||||
An Avro parseSpec can contain a [`flattenSpec`](#flattenspec) using either the "root" or "path"
|
An Avro parseSpec can contain a [`flattenSpec`](#flattenspec) using either the "root" or "path"
|
||||||
field types, which can be used to read nested Avro records. The "jq" field type is not currently supported for Avro.
|
field types, which can be used to read nested Avro records. The "jq" and "tree" field type is not currently supported for Avro.
|
||||||
|
|
||||||
For example, using Avro stream parser with schema repo Avro bytes decoder:
|
For example, using Avro stream parser with schema repo Avro bytes decoder:
|
||||||
|
|
||||||
|
|
|
@ -140,6 +140,16 @@ public class AvroFlattenerMaker implements ObjectFlatteners.FlattenerMaker<Gener
|
||||||
throw new UnsupportedOperationException("Avro + JQ not supported");
|
throw new UnsupportedOperationException("Avro + JQ not supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Function<GenericRecord, Object> makeJsonTreeExtractor(List<String> nodes)
|
||||||
|
{
|
||||||
|
if (nodes.size() == 1) {
|
||||||
|
return (GenericRecord record) -> getRootField(record, nodes.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new UnsupportedOperationException("Avro + nested tree extraction not supported");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JsonProvider getJsonProvider()
|
public JsonProvider getJsonProvider()
|
||||||
{
|
{
|
||||||
|
|
|
@ -91,6 +91,16 @@ public class OrcStructFlattenerMaker implements ObjectFlatteners.FlattenerMaker<
|
||||||
throw new UnsupportedOperationException("ORC flattener does not support JQ");
|
throw new UnsupportedOperationException("ORC flattener does not support JQ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Function<OrcStruct, Object> makeJsonTreeExtractor(List<String> nodes)
|
||||||
|
{
|
||||||
|
if (nodes.size() == 1) {
|
||||||
|
return (OrcStruct record) -> getRootField(record, nodes.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new UnsupportedOperationException("ORC flattener does not support nested root queries");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JsonProvider getJsonProvider()
|
public JsonProvider getJsonProvider()
|
||||||
{
|
{
|
||||||
|
|
|
@ -88,6 +88,16 @@ public class ParquetGroupFlattenerMaker implements ObjectFlatteners.FlattenerMak
|
||||||
throw new UnsupportedOperationException("Parquet does not support JQ");
|
throw new UnsupportedOperationException("Parquet does not support JQ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Function<Group, Object> makeJsonTreeExtractor(List<String> nodes)
|
||||||
|
{
|
||||||
|
if (nodes.size() == 1) {
|
||||||
|
return (Group group) -> getRootField(group, nodes.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new UnsupportedOperationException("Parque does not support nested tree extraction");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JsonProvider getJsonProvider()
|
public JsonProvider getJsonProvider()
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue