NIFI-6304 added trim, toLowerCase and toUpperCase to record path operations.

NIFI-6304 Updated code based on code review.

NIFI-6304 Updated documentation.

NIFI-6304 Refactored to make it simpler

NIFI-6304 Reverted Concat to its last state.

This closes #3478.

Signed-off-by: Koji Kawamura <ijokarumawak@apache.org>
This commit is contained in:
Mike Thomsen 2019-05-16 21:11:05 -04:00 committed by Koji Kawamura
parent 99b20ac2d1
commit 6a06cd3094
8 changed files with 338 additions and 15 deletions

View File

@ -52,4 +52,4 @@ public class Concat extends RecordPathSegment {
return Stream.of(responseValue);
}
}
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.record.path.functions;
import org.apache.nifi.record.path.FieldValue;
import org.apache.nifi.record.path.RecordPathEvaluationContext;
import org.apache.nifi.record.path.StandardFieldValue;
import org.apache.nifi.record.path.paths.RecordPathSegment;
import org.apache.nifi.serialization.record.util.DataTypeUtils;
import java.util.stream.Stream;
/**
* Abstract class for String functions without any argument.
*/
public abstract class NoArgStringFunction extends RecordPathSegment {
private final RecordPathSegment valuePath;
public NoArgStringFunction(final String path, final RecordPathSegment valuePath, final boolean absolute) {
super(path, null, absolute);
this.valuePath = valuePath;
}
@Override
public Stream<FieldValue> evaluate(RecordPathEvaluationContext context) {
return valuePath.evaluate(context).map(fv -> {
final String original = fv.getValue() == null ? "" : DataTypeUtils.toString(fv.getValue(), (String) null);
final String processed = apply(original);
return new StandardFieldValue(processed, fv.getField(), fv.getParent().orElse(null));
});
}
/**
* Sub-classes apply its function to the given value and return the result.
* @param value possibly null
* @return the function result
*/
abstract String apply(String value);
}

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.record.path.functions;
import org.apache.nifi.record.path.paths.RecordPathSegment;
public class ToLowerCase extends NoArgStringFunction {
public ToLowerCase(RecordPathSegment valuePath, boolean absolute) {
super("toLowerCase", valuePath, absolute);
}
@Override
String apply(String value) {
return value == null ? null : value.toLowerCase();
}
}

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.record.path.functions;
import org.apache.nifi.record.path.paths.RecordPathSegment;
public class ToUpperCase extends NoArgStringFunction {
public ToUpperCase(final RecordPathSegment valuePath, final boolean absolute) {
super("toUpperCase", valuePath, absolute);
}
@Override
String apply(String value) {
return value == null ? null : value.toUpperCase();
}
}

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.record.path.functions;
import org.apache.nifi.record.path.paths.RecordPathSegment;
public class TrimString extends NoArgStringFunction {
public TrimString(RecordPathSegment valuePath, boolean absolute) {
super("trim", valuePath, absolute);
}
@Override
String apply(String value) {
return value == null ? null : value.trim();
}
}

View File

@ -79,7 +79,10 @@ import org.apache.nifi.record.path.functions.SubstringBefore;
import org.apache.nifi.record.path.functions.SubstringBeforeLast;
import org.apache.nifi.record.path.functions.ToBytes;
import org.apache.nifi.record.path.functions.ToDate;
import org.apache.nifi.record.path.functions.ToLowerCase;
import org.apache.nifi.record.path.functions.ToString;
import org.apache.nifi.record.path.functions.ToUpperCase;
import org.apache.nifi.record.path.functions.TrimString;
public class RecordPathCompiler {
@ -246,6 +249,18 @@ public class RecordPathCompiler {
return new Concat(argPaths, absolute);
}
case "toLowerCase": {
final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
return new ToLowerCase(args[0], absolute);
}
case "toUpperCase": {
final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
return new ToUpperCase(args[0], absolute);
}
case "trim": {
final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
return new TrimString(args[0], absolute);
}
case "fieldName": {
final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
return new FieldName(args[0], absolute);
@ -299,6 +314,17 @@ public class RecordPathCompiler {
throw new RecordPathException("Encountered unexpected token " + tree);
}
private static RecordPathSegment[] getArgumentsForStringFunction(boolean absolute, Tree argumentListTree) {
final int numArgs = argumentListTree.getChildCount();
final RecordPathSegment[] argPaths = new RecordPathSegment[numArgs];
for (int i = 0; i < numArgs; i++) {
argPaths[i] = buildPath(argumentListTree.getChild(i), null, absolute);
}
return argPaths;
}
private static RecordPathFilter createFilter(final Tree operatorTree, final RecordPathSegment parent, final boolean absolute) {
switch (operatorTree.getType()) {
case EQUAL:

View File

@ -17,9 +17,17 @@
package org.apache.nifi.record.path;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.apache.nifi.record.path.exception.RecordPathException;
import org.apache.nifi.serialization.SimpleRecordSchema;
import org.apache.nifi.serialization.record.DataType;
import org.apache.nifi.serialization.record.MapRecord;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.type.ArrayDataType;
import org.apache.nifi.serialization.record.util.DataTypeUtils;
import org.junit.Test;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
@ -35,16 +43,10 @@ import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.nifi.record.path.exception.RecordPathException;
import org.apache.nifi.serialization.SimpleRecordSchema;
import org.apache.nifi.serialization.record.DataType;
import org.apache.nifi.serialization.record.MapRecord;
import org.apache.nifi.serialization.record.Record;
import org.apache.nifi.serialization.record.RecordField;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.serialization.record.util.DataTypeUtils;
import org.junit.Test;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TestRecordPath {
@ -1210,6 +1212,68 @@ public class TestRecordPath {
assertEquals("John Doe: 48", RecordPath.compile("concat(/firstName, ' ', /lastName, ': ', 48)").evaluate(record).getSelectedFields().findFirst().get().getValue());
}
private Record getCaseTestRecord() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("middleName", RecordFieldType.STRING.getDataType()));
fields.add(new RecordField("lastName", RecordFieldType.STRING.getDataType()));
fields.add(new RecordField("firstName", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> values = new HashMap<>();
values.put("lastName", "Doe");
values.put("firstName", "John");
values.put("middleName", "Smith");
return new MapRecord(schema, values);
}
@Test
public void testToUpperCase() {
final Record record = getCaseTestRecord();
assertEquals("JOHN SMITH DOE", RecordPath.compile("toUpperCase(concat(/firstName, ' ', /middleName, ' ', /lastName))").evaluate(record).getSelectedFields().findFirst().get().getValue());
assertEquals("", RecordPath.compile("toLowerCase(/notDefined)").evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test
public void testToLowerCase() {
final Record record = getCaseTestRecord();
assertEquals("john smith doe", RecordPath.compile("toLowerCase(concat(/firstName, ' ', /middleName, ' ', /lastName))").evaluate(record).getSelectedFields().findFirst().get().getValue());
assertEquals("", RecordPath.compile("toLowerCase(/notDefined)").evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test
public void testTrimString() {
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("fullName", RecordFieldType.STRING.getDataType()));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> values = new HashMap<>();
values.put("fullName", " John Smith ");
final Record record = new MapRecord(schema, values);
assertEquals("John Smith", RecordPath.compile("trim(/fullName)").evaluate(record).getSelectedFields().findFirst().get().getValue());
assertEquals("", RecordPath.compile("trim(/missing)").evaluate(record).getSelectedFields().findFirst().get().getValue());
}
@Test
public void testTrimArray() {
final List<RecordField> fields = new ArrayList<>();
final DataType dataType = new ArrayDataType(RecordFieldType.STRING.getDataType());
fields.add(new RecordField("names", dataType));
final RecordSchema schema = new SimpleRecordSchema(fields);
final Map<String, Object> values = new HashMap<>();
values.put("names", new String[]{" John Smith ", " Jane Smith "});
final Record record = new MapRecord(schema, values);
final List<FieldValue> results = RecordPath.compile("trim(/names[*])").evaluate(record).getSelectedFields().collect(Collectors.toList());
assertEquals("John Smith", results.get(0).getValue());
assertEquals("Jane Smith", results.get(1).getValue());
}
@Test
public void testFieldName() {
final List<RecordField> fields = new ArrayList<>();

View File

@ -3,7 +3,7 @@
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// (the "License"); you may not use this file except ixn compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
@ -632,6 +632,94 @@ The following record path expression would re-format the date String:
| `format( toDate(/eventDate, "yyyy-MM-dd'T'HH:mm:ss'Z'"), 'yyyy-MM-dd')` | 2017-10-20
|==========================================================
=== trim
Removes whitespace from the start and end of a string.
----
{
"type": "record",
"name": "events",
"fields": [
{ "name": "name", "type": "string" }
]
}
----
and a record such as:
----
{
"name" : " John Smith "
}
----
The following record path expression would remove extraneous whitespace:
|==========================================================
| RecordPath | Return value
| `trim(/name)` | John Smith
|==========================================================
=== toUpperCase
Change the entire String to upper case
----
{
"type": "record",
"name": "events",
"fields": [
{ "name": "fullName", "type": "string" }
]
}
----
and a record such as:
----
{
"fullName" : "john smith"
}
----
The following record path expression would remove extraneous whitespace:
|==========================================================
| RecordPath | Return value
| `toUpperCase(/name)` | JOHN SMITH
|==========================================================
=== toLowerCase
Changes the entire string to lower case.
----
{
"type": "record",
"name": "events",
"fields": [
{ "name": "message", "type": "string" }
]
}
----
and a record such as:
----
{
"name" : "hEllO wORLd"
}
----
The following record path expression would remove extraneous whitespace:
|==========================================================
| RecordPath | Return value
| `trim(/message)` | hello world
|==========================================================
=== base64Encode
Converts a String or byte[] using Base64 encoding, using the UTF-8 character set. For example, given a schema such as: