NIFI-6088: Widen type inference for BIGINT and DOUBLE

This closes #3342

Signed-off-by: Mike Thomsen <mikerthomsen@gmail.com>
This commit is contained in:
Matthew Burgess 2019-02-27 20:25:25 -05:00 committed by Mike Thomsen
parent 2a6c3c1686
commit e5fa18d63c
4 changed files with 112 additions and 3 deletions

View File

@ -152,6 +152,7 @@
<exclude>src/test/resources/json/bank-account-multiarray.json</exclude>
<exclude>src/test/resources/json/bank-account-multiline.json</exclude>
<exclude>src/test/resources/json/bank-account-oneline.json</exclude>
<exclude>src/test/resources/json/data-types.json</exclude>
<exclude>src/test/resources/json/json-with-unicode.json</exclude>
<exclude>src/test/resources/json/primitive-type-array.json</exclude>
<exclude>src/test/resources/json/single-bank-account.json</exclude>

View File

@ -56,13 +56,13 @@ public class JsonSchemaInference extends HierarchicalSchemaInference<JsonNode> {
}
if (jsonNode.isIntegralNumber()) {
if (jsonNode.isBigInteger()) {
return RecordFieldType.BIGINT.getDataType();
}
return RecordFieldType.LONG.getDataType();
}
if (jsonNode.isFloatingPointNumber()) {
return RecordFieldType.FLOAT.getDataType();
}
if (jsonNode.isDouble()) {
return RecordFieldType.DOUBLE.getDataType();
}
if (jsonNode.isBinary()) {

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.json;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.schema.inference.InferSchemaAccessStrategy;
import org.apache.nifi.schema.inference.TimeValueInference;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.junit.Test;
import org.mockito.Mockito;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;
public class TestJsonSchemaInference {
private final TimeValueInference timestampInference = new TimeValueInference("yyyy-MM-dd", "HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
@Test
public void testInferenceIncludesAllRecords() throws IOException {
final File file = new File("src/test/resources/json/data-types.json");
final RecordSchema schema;
try (final InputStream in = new FileInputStream(file);
final InputStream bufferedIn = new BufferedInputStream(in)) {
final InferSchemaAccessStrategy<?> accessStrategy = new InferSchemaAccessStrategy<>(
(var, content) -> new JsonRecordSource(content),
new JsonSchemaInference(timestampInference), Mockito.mock(ComponentLog.class));
schema = accessStrategy.getSchema(null, bufferedIn, null);
}
assertSame(RecordFieldType.STRING, schema.getDataType("varcharc").get().getFieldType());
assertSame(RecordFieldType.LONG, schema.getDataType("uuid").get().getFieldType());
assertSame(RecordFieldType.LONG, schema.getDataType("tinyintc").get().getFieldType());
assertSame(RecordFieldType.STRING, schema.getDataType("textc").get().getFieldType());
assertEquals(RecordFieldType.DATE.getDataType("yyyy-MM-dd"), schema.getDataType("datec").get());
assertSame(RecordFieldType.LONG, schema.getDataType("smallintc").get().getFieldType());
assertSame(RecordFieldType.LONG, schema.getDataType("mediumintc").get().getFieldType());
assertSame(RecordFieldType.LONG, schema.getDataType("intc").get().getFieldType());
assertSame(RecordFieldType.BIGINT, schema.getDataType("bigintc").get().getFieldType());
assertSame(RecordFieldType.DOUBLE, schema.getDataType("floatc").get().getFieldType());
assertSame(RecordFieldType.DOUBLE, schema.getDataType("doublec").get().getFieldType());
assertSame(RecordFieldType.DOUBLE, schema.getDataType("decimalc").get().getFieldType());
assertEquals(RecordFieldType.TIMESTAMP.getDataType("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), schema.getDataType("timestampc").get());
assertEquals(RecordFieldType.TIME.getDataType("HH:mm:ss"), schema.getDataType("timec").get());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("charc").get());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("tinytextc").get());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("blobc").get());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("mediumtextc").get());
assertSame(RecordFieldType.LONG, schema.getDataType("enumc").get().getFieldType());
assertSame(RecordFieldType.LONG, schema.getDataType("setc").get().getFieldType());
assertSame(RecordFieldType.LONG, schema.getDataType("boolc").get().getFieldType());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("binaryc").get());
final List<String> fieldNames = schema.getFieldNames();
assertEquals(Arrays.asList("varcharc", "uuid", "tinyintc", "textc", "datec", "smallintc", "mediumintc", "intc", "bigintc",
"floatc", "doublec", "decimalc", "timestampc", "timec", "charc", "tinytextc", "blobc", "mediumtextc", "enumc", "setc", "boolc", "binaryc"), fieldNames);
}
}

View File

@ -0,0 +1,24 @@
[{
"varcharc": "Nam penatibus in neque.",
"uuid": 1,
"tinyintc": -81,
"textc": "A faucibus volutpat placerat euismod mollis, quis semper quis ultrices aliquam massa vestibulum a lacus hendrerit turpis nullam, tincidunt ullamcorper ad ridiculus habitasse tristique vivamus elit. Ac id montes erat accumsan rhoncus consectetur leo condimentum.\n\nConubia lectus et viverra taciti, mollis molestie phasellus, fermentum accumsan sem nisi sit dapibus interdum ridiculus blandit blandit. Volutpat nullam orci cras. Justo nullam penatibus non fusce vivamus integer fames dis tellus dictumst.\n\nQuam sem condimentum sociis fermentum montes, id natoque aenean at quisque lacus quam orci hac enim metus, mollis potenti litora blandit leo aenean nibh varius ultrices. Sollicitudin egestas eu ultrices lacinia dictumst ligula magnis. Molestie id eget diam est.",
"datec": "2019-02-27",
"smallintc": -8423,
"mediumintc": 6008538,
"intc": -1130599020,
"bigintc": 171234567890123456789,
"floatc": 182.33,
"doublec": 149.67382865705562,
"decimalc": 109.88,
"timestampc": "2019-02-27T20:40:53.000Z",
"timec": "20:40:53",
"charc": "DBDDGpPz",
"tinytextc": "hgFuypClmWWMNsDXEFJJOhdsljdBP",
"blobc": "Wc5YvvF8fUsOgejKPsOa",
"mediumtextc": "Torquent aliquet malesuada adipiscing, eget himenaeos facilisi ridiculus eros netus, nisi semper eleifend dolor nisi sapien phasellus luctus libero aenean suscipit pulvinar, lacus posuere id hendrerit feugiat vitae purus ac blandit euismod pharetra. Adipiscing lectus primis eros pellentesque porta blandit dictum fermentum lectus tortor nam, fusce est dis class ornare neque est enim quisque a.\n\nScelerisque aptent etiam non imperdiet volutpat. Quisque est fusce purus fringilla suspendisse mauris vivamus faucibus potenti.\n\nLacus nullam molestie mollis luctus ultricies dapibus taciti. Hac porttitor vehicula aliquam turpis hac ultricies dolor interdum morbi, litora etiam faucibus, commodo tristique vestibulum. Gravida id conubia lorem etiam per cum quisque scelerisque, dignissim porttitor sollicitudin urna lacinia suspendisse non magnis vivamus.\n\nPurus integer nam rhoncus leo tempor vivamus tortor cubilia arcu nunc ultrices felis, euismod feugiat placerat eleifend feugiat, laoreet viverra nisl potenti suspendisse amet parturient ullamcorper risus penatibus, eget erat leo suscipit ligula facilisi facilisi class accumsan. Quam habitasse mollis consequat risus felis nostra phasellus massa, condimentum ultrices eget himenaeos cursus, orci eros aliquet fringilla pulvinar, montes facilisi mi. Eu id tincidunt sagittis. Lobortis magna per eget vehicula congue imperdiet tristique dolor sociis penatibus, cubilia lorem scelerisque convallis iaculis condimentum pharetra, bibendum cubilia conubia cum eleifend vitae sociosqu porta fusce etiam natoque.\n\nMagnis natoque magnis conubia, penatibus mattis id cum lacus praesent inceptos, mattis praesent feugiat leo malesuada. Convallis cras inceptos nam bibendum laoreet imperdiet eu blandit nostra vivamus inceptos, morbi ultrices ullamcorper posuere sapien sagittis sagittis, ridiculus inceptos porttitor vivamus nostra. Pharetra porta ultricies porttitor, potenti lacus sollicitudin viverra class quis, amet cum lacus leo penatibus sapien pharetra ipsum etiam integer phasellus magna, justo ridiculus velit. Consequat cursus condimentum porta amet, ornare iaculis varius consectetur cursus sodales, proin penatibus fermentum libero vel, porta elementum aliquam nibh ac platea ante elit aliquet.",
"enumc": 1,
"setc": 4,
"boolc": 0,
"binaryc": "ehynfnybBfxmxgkMVrVt"
}]