NIFI-6304 added trim, toLowerCase and toUpperCase to record path operations.

NIFI-6304 Updated code based on code review. NIFI-6304 Updated documentation. NIFI-6304 Refactored to make it simpler NIFI-6304 Reverted Concat to its last state. This closes #3478. Signed-off-by: Koji Kawamura <ijokarumawak@apache.org>
2019-05-16 21:11:05 -04:00 · 2019-05-16 21:11:05 -04:00 · 6a06cd3094
parent 99b20ac2d1
commit 6a06cd3094
8 changed files with 338 additions and 15 deletions
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/Concat.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/Concat.java
@ -52,4 +52,4 @@ public class Concat extends RecordPathSegment {
        return Stream.of(responseValue);
    }

-}
+}
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/NoArgStringFunction.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/NoArgStringFunction.java
@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nifi.record.path.functions;
+
+import org.apache.nifi.record.path.FieldValue;
+import org.apache.nifi.record.path.RecordPathEvaluationContext;
+import org.apache.nifi.record.path.StandardFieldValue;
+import org.apache.nifi.record.path.paths.RecordPathSegment;
+import org.apache.nifi.serialization.record.util.DataTypeUtils;
+
+import java.util.stream.Stream;
+
+/**
+ * Abstract class for String functions without any argument.
+ */
+public abstract class NoArgStringFunction extends RecordPathSegment {
+    private final RecordPathSegment valuePath;
+
+    public NoArgStringFunction(final String path, final RecordPathSegment valuePath, final boolean absolute) {
+        super(path, null, absolute);
+        this.valuePath = valuePath;
+    }
+
+    @Override
+    public Stream<FieldValue> evaluate(RecordPathEvaluationContext context) {
+        return valuePath.evaluate(context).map(fv -> {
+            final String original = fv.getValue() == null ? "" : DataTypeUtils.toString(fv.getValue(), (String) null);
+            final String processed = apply(original);
+            return new StandardFieldValue(processed, fv.getField(), fv.getParent().orElse(null));
+        });
+    }
+
+    /**
+     * Sub-classes apply its function to the given value and return the result.
+     * @param value possibly null
+     * @return the function result
+     */
+    abstract String apply(String value);
+
+}
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/ToLowerCase.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/ToLowerCase.java
@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.record.path.functions;
+
+import org.apache.nifi.record.path.paths.RecordPathSegment;
+
+public class ToLowerCase extends NoArgStringFunction {
+    public ToLowerCase(RecordPathSegment valuePath, boolean absolute) {
+        super("toLowerCase", valuePath, absolute);
+    }
+
+    @Override
+    String apply(String value) {
+        return value == null ? null : value.toLowerCase();
+    }
+}
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/ToUpperCase.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/ToUpperCase.java
@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.record.path.functions;
+
+import org.apache.nifi.record.path.paths.RecordPathSegment;
+
+public class ToUpperCase extends NoArgStringFunction {
+    public ToUpperCase(final RecordPathSegment valuePath, final boolean absolute) {
+        super("toUpperCase", valuePath, absolute);
+    }
+
+    @Override
+    String apply(String value) {
+        return value == null ? null : value.toUpperCase();
+    }
+}
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/TrimString.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/functions/TrimString.java
@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.record.path.functions;
+
+import org.apache.nifi.record.path.paths.RecordPathSegment;
+
+public class TrimString extends NoArgStringFunction {
+    public TrimString(RecordPathSegment valuePath, boolean absolute) {
+        super("trim", valuePath, absolute);
+    }
+
+    @Override
+    String apply(String value) {
+        return value == null ? null : value.trim();
+    }
+}
--- a/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/paths/RecordPathCompiler.java
+++ b/nifi-commons/nifi-record-path/src/main/java/org/apache/nifi/record/path/paths/RecordPathCompiler.java
@ -79,7 +79,10 @@ import org.apache.nifi.record.path.functions.SubstringBefore;
 import org.apache.nifi.record.path.functions.SubstringBeforeLast;
 import org.apache.nifi.record.path.functions.ToBytes;
 import org.apache.nifi.record.path.functions.ToDate;
+import org.apache.nifi.record.path.functions.ToLowerCase;
 import org.apache.nifi.record.path.functions.ToString;
+import org.apache.nifi.record.path.functions.ToUpperCase;
+import org.apache.nifi.record.path.functions.TrimString;

 public class RecordPathCompiler {

@ -246,6 +249,18 @@ public class RecordPathCompiler {

                        return new Concat(argPaths, absolute);
                    }
+                    case "toLowerCase": {
+                        final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
+                        return new ToLowerCase(args[0], absolute);
+                    }
+                    case "toUpperCase": {
+                        final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
+                        return new ToUpperCase(args[0], absolute);
+                    }
+                    case "trim": {
+                        final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
+                        return new TrimString(args[0], absolute);
+                    }
                    case "fieldName": {
                        final RecordPathSegment[] args = getArgPaths(argumentListTree, 1, functionName, absolute);
                        return new FieldName(args[0], absolute);
@ -299,6 +314,17 @@ public class RecordPathCompiler {
        throw new RecordPathException("Encountered unexpected token " + tree);
    }

+    private static RecordPathSegment[] getArgumentsForStringFunction(boolean absolute, Tree argumentListTree) {
+        final int numArgs = argumentListTree.getChildCount();
+
+        final RecordPathSegment[] argPaths = new RecordPathSegment[numArgs];
+        for (int i = 0; i < numArgs; i++) {
+            argPaths[i] = buildPath(argumentListTree.getChild(i), null, absolute);
+        }
+
+        return argPaths;
+    }
+
    private static RecordPathFilter createFilter(final Tree operatorTree, final RecordPathSegment parent, final boolean absolute) {
        switch (operatorTree.getType()) {
            case EQUAL:
--- a/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java
+++ b/nifi-commons/nifi-record-path/src/test/java/org/apache/nifi/record/path/TestRecordPath.java
@ -17,9 +17,17 @@

 package org.apache.nifi.record.path;

-import static org.junit.Assert.assertArrayEquals;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import org.apache.nifi.record.path.exception.RecordPathException;
+import org.apache.nifi.serialization.SimpleRecordSchema;
+import org.apache.nifi.serialization.record.DataType;
+import org.apache.nifi.serialization.record.MapRecord;
+import org.apache.nifi.serialization.record.Record;
+import org.apache.nifi.serialization.record.RecordField;
+import org.apache.nifi.serialization.record.RecordFieldType;
+import org.apache.nifi.serialization.record.RecordSchema;
+import org.apache.nifi.serialization.record.type.ArrayDataType;
+import org.apache.nifi.serialization.record.util.DataTypeUtils;
+import org.junit.Test;

 import java.nio.charset.IllegalCharsetNameException;
 import java.nio.charset.StandardCharsets;
@ -35,16 +43,10 @@ import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
-import org.apache.nifi.record.path.exception.RecordPathException;
-import org.apache.nifi.serialization.SimpleRecordSchema;
-import org.apache.nifi.serialization.record.DataType;
-import org.apache.nifi.serialization.record.MapRecord;
-import org.apache.nifi.serialization.record.Record;
-import org.apache.nifi.serialization.record.RecordField;
-import org.apache.nifi.serialization.record.RecordFieldType;
-import org.apache.nifi.serialization.record.RecordSchema;
-import org.apache.nifi.serialization.record.util.DataTypeUtils;
-import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;

 public class TestRecordPath {

@ -1210,6 +1212,68 @@ public class TestRecordPath {
        assertEquals("John Doe: 48", RecordPath.compile("concat(/firstName, ' ', /lastName, ': ', 48)").evaluate(record).getSelectedFields().findFirst().get().getValue());
    }

+    private Record getCaseTestRecord() {
+        final List<RecordField> fields = new ArrayList<>();
+        fields.add(new RecordField("middleName", RecordFieldType.STRING.getDataType()));
+        fields.add(new RecordField("lastName", RecordFieldType.STRING.getDataType()));
+        fields.add(new RecordField("firstName", RecordFieldType.STRING.getDataType()));
+
+        final RecordSchema schema = new SimpleRecordSchema(fields);
+
+        final Map<String, Object> values = new HashMap<>();
+        values.put("lastName", "Doe");
+        values.put("firstName", "John");
+        values.put("middleName", "Smith");
+        return new MapRecord(schema, values);
+    }
+
+    @Test
+    public void testToUpperCase() {
+        final Record record = getCaseTestRecord();
+
+        assertEquals("JOHN SMITH DOE", RecordPath.compile("toUpperCase(concat(/firstName, ' ', /middleName, ' ', /lastName))").evaluate(record).getSelectedFields().findFirst().get().getValue());
+        assertEquals("", RecordPath.compile("toLowerCase(/notDefined)").evaluate(record).getSelectedFields().findFirst().get().getValue());
+    }
+
+    @Test
+    public void testToLowerCase() {
+        final Record record = getCaseTestRecord();
+
+        assertEquals("john smith doe", RecordPath.compile("toLowerCase(concat(/firstName, ' ', /middleName, ' ', /lastName))").evaluate(record).getSelectedFields().findFirst().get().getValue());
+        assertEquals("", RecordPath.compile("toLowerCase(/notDefined)").evaluate(record).getSelectedFields().findFirst().get().getValue());
+    }
+
+    @Test
+    public void testTrimString() {
+        final List<RecordField> fields = new ArrayList<>();
+        fields.add(new RecordField("fullName", RecordFieldType.STRING.getDataType()));
+
+        final RecordSchema schema = new SimpleRecordSchema(fields);
+
+        final Map<String, Object> values = new HashMap<>();
+        values.put("fullName", "   John Smith     ");
+        final Record record = new MapRecord(schema, values);
+
+        assertEquals("John Smith", RecordPath.compile("trim(/fullName)").evaluate(record).getSelectedFields().findFirst().get().getValue());
+        assertEquals("", RecordPath.compile("trim(/missing)").evaluate(record).getSelectedFields().findFirst().get().getValue());
+    }
+
+    @Test
+    public void testTrimArray() {
+        final List<RecordField> fields = new ArrayList<>();
+        final DataType dataType = new ArrayDataType(RecordFieldType.STRING.getDataType());
+        fields.add(new RecordField("names", dataType));
+
+        final RecordSchema schema = new SimpleRecordSchema(fields);
+
+        final Map<String, Object> values = new HashMap<>();
+        values.put("names", new String[]{"   John Smith     ", "   Jane Smith     "});
+        final Record record = new MapRecord(schema, values);
+
+        final List<FieldValue> results = RecordPath.compile("trim(/names[*])").evaluate(record).getSelectedFields().collect(Collectors.toList());
+        assertEquals("John Smith", results.get(0).getValue());
+        assertEquals("Jane Smith", results.get(1).getValue());
+    }
    @Test
    public void testFieldName() {
        final List<RecordField> fields = new ArrayList<>();
--- a/nifi-docs/src/main/asciidoc/record-path-guide.adoc
+++ b/nifi-docs/src/main/asciidoc/record-path-guide.adoc
@ -3,7 +3,7 @@
 // contributor license agreements.  See the NOTICE file distributed with
 // this work for additional information regarding copyright ownership.
 // The ASF licenses this file to You under the Apache License, Version 2.0
-// (the "License"); you may not use this file except in compliance with
+// (the "License"); you may not use this file except ixn compliance with
 // the License.  You may obtain a copy of the License at
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
@ -632,6 +632,94 @@ The following record path expression would re-format the date String:
 | `format( toDate(/eventDate, "yyyy-MM-dd'T'HH:mm:ss'Z'"), 'yyyy-MM-dd')` | 2017-10-20
 |==========================================================

+=== trim
+
+Removes whitespace from the start and end of a string.
+
+----
+{
+  "type": "record",
+  "name": "events",
+  "fields": [
+    { "name": "name", "type": "string" }
+  ]
+}
+----
+
+and a record such as:
+
+----
+{
+  "name" : "    John Smith    "
+}
+----
+
+The following record path expression would remove extraneous whitespace:
+
+|==========================================================
+| RecordPath | Return value
+| `trim(/name)` | John Smith
+|==========================================================
+
+
+=== toUpperCase
+
+Change the entire String to upper case
+
+----
+{
+  "type": "record",
+  "name": "events",
+  "fields": [
+    { "name": "fullName", "type": "string" }
+  ]
+}
+----
+
+and a record such as:
+
+----
+{
+  "fullName" : "john smith"
+}
+----
+
+The following record path expression would remove extraneous whitespace:
+
+|==========================================================
+| RecordPath | Return value
+| `toUpperCase(/name)` | JOHN SMITH
+|==========================================================
+
+=== toLowerCase
+
+Changes the entire string to lower case.
+
+----
+{
+  "type": "record",
+  "name": "events",
+  "fields": [
+    { "name": "message", "type": "string" }
+  ]
+}
+----
+
+and a record such as:
+
+----
+{
+  "name" : "hEllO wORLd"
+}
+----
+
+The following record path expression would remove extraneous whitespace:
+
+|==========================================================
+| RecordPath | Return value
+| `trim(/message)` | hello world
+|==========================================================
+
 === base64Encode

 Converts a String or byte[] using Base64 encoding, using the UTF-8 character set.  For example, given a schema such as: