From f0f75e74803c16eb571d5e7dffaac7c7ccc532d1 Mon Sep 17 00:00:00 2001 From: jpercivall Date: Sun, 4 Dec 2016 12:44:07 -0500 Subject: [PATCH] NIFI-3145 Rewriting double validation in NumberParsing Adding more tests to TestQuery NIFI-3145 Adding logic to handle lowercase hex values This closes #1296 --- .../evaluation/util/NumberParsing.java | 75 +++++------ .../expression/language/TestQuery.java | 122 +++++++++++++++++- 2 files changed, 159 insertions(+), 38 deletions(-) diff --git a/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/util/NumberParsing.java b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/util/NumberParsing.java index bbfd4e2312..0e1ac6e5d8 100644 --- a/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/util/NumberParsing.java +++ b/nifi-commons/nifi-expression-language/src/main/java/org/apache/nifi/attribute/expression/language/evaluation/util/NumberParsing.java @@ -21,54 +21,57 @@ import java.util.regex.Pattern; public class NumberParsing { - public static enum ParseResultType { NOT_NUMBER, WHOLE_NUMBER, DECIMAL; } - private static final String Digits = "(\\p{Digit}+)"; + private static final String OptionalSign = "[\\-\\+]?"; - // Double regex according to Oracle documentation: http://docs.oracle.com/javase/6/docs/api/java/lang/Double.html#valueOf%28java.lang.String%29 - private static final String HexDigits = "(\\p{XDigit}+)"; - // an exponent is 'e' or 'E' followed by an optionally - // signed decimal integer. - private static final String Exp = "[eE][+-]?"+Digits; - private static final String fpRegex = - ("[\\x00-\\x20]*"+ // Optional leading "whitespace" - "[+-]?(" + // Optional sign character - "NaN|" + // "NaN" string - "Infinity|" + // "Infinity" string + private static final String Infinity = "(Infinity)"; + private static final String NotANumber = "(NaN)"; - // A decimal floating-point string representing a finite positive - // number without a leading sign has at most five basic pieces: - // Digits . Digits ExponentPart FloatTypeSuffix - // - // Since this method allows integer-only strings as input - // in addition to strings of floating-point literals, the - // two sub-patterns below are simplifications of the grammar - // productions from the Java Language Specification, 2nd - // edition, section 3.10.2. + // Base 10 + private static final String Base10Digits = "\\d+"; + private static final String Base10Decimal = "\\." + Base10Digits; + private static final String OptionalBase10Decimal = Base10Decimal + "?"; - // Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt - "((("+Digits+"(\\.)?("+Digits+"?)("+Exp+")?)|"+ + private static final String Base10Exponent = "[eE]" + OptionalSign + Base10Digits; + private static final String OptionalBase10Exponent = "(" + Base10Exponent + ")?"; - // . Digits ExponentPart_opt FloatTypeSuffix_opt - "(\\.("+Digits+")("+Exp+")?)|"+ + // Hex + private static final String HexIdentifier = "0[xX]"; - // Hexadecimal strings - "((" + - // 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt - "(0[xX]" + HexDigits + "(\\.)?)|" + + private static final String HexDigits = "[0-9a-fA-F]+"; + private static final String HexDecimal = "\\." + HexDigits; + private static final String OptionalHexDecimal = HexDecimal + "?"; - // 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt - "(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" + + private static final String HexExponent = "[pP]" + OptionalSign + Base10Digits; + private static final String OptionalHexExponent = "(" + HexExponent + ")?"; - ")[pP][+-]?" + Digits + "))" + - "[fFdD]?))" + - "[\\x00-\\x20]*");// Optional trailing "whitespace" + // Written according to the "Floating Point Literal" specification as outlined here: http://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.10.2 - private static final Pattern DOUBLE_PATTERN = Pattern.compile(fpRegex); + private static final String doubleRegex = + OptionalSign + + "(" + + Infinity + "|" + + NotANumber + "|"+ + "(" + Base10Digits + Base10Decimal + ")" + "|" + + "(" + Base10Digits + OptionalBase10Decimal + Base10Exponent + ")" + "|" + + "(" + Base10Decimal + OptionalBase10Exponent + ")" + "|" + + // The case of a hex number with a decimal portion but no exponent is not supported by "parseDouble" and throws a NumberFormatException + "(" + HexIdentifier + HexDigits + "\\.?" + HexExponent + ")" + "|" + // The case of a hex numeral with a "." but no decimal values is valid. + "(" + HexIdentifier + HexDigits + OptionalHexDecimal + HexExponent + ")" + "|" + + "(" + HexIdentifier + HexDecimal + OptionalHexExponent + ")" + + ")"; - private static final Pattern NUMBER_PATTERN = Pattern.compile("-?((\\d+)|(0[xX]" + HexDigits + "))"); + private static final String numberRegex = + OptionalSign + + "(" + + Base10Digits + "|" + + HexIdentifier + HexDigits + + ")"; + + private static final Pattern DOUBLE_PATTERN = Pattern.compile(doubleRegex); + private static final Pattern NUMBER_PATTERN = Pattern.compile(numberRegex); private NumberParsing(){ } diff --git a/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java b/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java index 3b6896c282..b8eacc8a6e 100644 --- a/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java +++ b/nifi-commons/nifi-expression-language/src/test/java/org/apache/nifi/attribute/expression/language/TestQuery.java @@ -16,6 +16,9 @@ */ package org.apache.nifi.attribute.expression.language; +import static java.lang.Double.NEGATIVE_INFINITY; +import static java.lang.Double.NaN; +import static java.lang.Double.POSITIVE_INFINITY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; @@ -1063,8 +1066,118 @@ public class TestQuery { verifyEquals("${literal(5.5):toDecimal()}", attributes, 5.5D); verifyEquals("${literal('0xF.Fp10'):toDecimal()}", attributes, 0xF.Fp10D); - verifyEquals("${literal('0xABC'):toNumber()}", attributes, 0xABCL); - verifyEquals("${literal('-0xABC'):toNumber()}", attributes, -0xABCL); + verifyEquals("${literal('0x1234567890ABCDEF'):toNumber()}", attributes, 0x1234567890ABCDEFL); + verifyEquals("${literal('-0x1234567890ABCDEF'):toNumber()}", attributes, -0x1234567890ABCDEFL); + + verifyEquals("${literal('-0x1234567890abcdef'):toNumber()}", attributes, -0x1234567890abcdefL); + verifyEquals("${literal('0x1234567890abcdef'):toNumber()}", attributes, 0x1234567890abcdefL); + } + + @Test + public void testDecimalParsing() { + final Map attributes = new HashMap<>(); + + // Test decimal format X.X + verifyEquals("${literal(5.5):toDecimal()}", attributes, 5.5D); + verifyEquals("${literal('-12.5'):toDecimal()}", attributes, -12.5D); + verifyEquals("${literal('+12.5'):toDecimal()}", attributes, 12.5D); + + // Test decimal format X.XEX with positive exponent + verifyEquals("${literal('-12.5E2'):toDecimal()}", attributes, -12.5E2D); + verifyEquals("${literal('-12.5e2'):toDecimal()}", attributes, -12.5e2D); + verifyEquals("${literal('-12.5e+2'):toDecimal()}", attributes, -12.5e+2D); + verifyEquals("${literal('12.5E+2'):toDecimal()}", attributes, 12.5E+2D); + verifyEquals("${literal('+12.5e+2'):toDecimal()}", attributes, +12.5e+2D); + verifyEquals("${literal('+12.5E2'):toDecimal()}", attributes, +12.5E2D); + verifyEquals("${literal('-12.5e2'):toDecimal()}", attributes, -12.5e2D); + verifyEquals("${literal('12.5E2'):toDecimal()}", attributes, 12.5E2D); + verifyEquals("${literal('+12.5e2'):toDecimal()}", attributes, +12.5e2D); + + // Test decimal format X.XEX with negative exponent + verifyEquals("${literal('-12.5E-2'):toDecimal()}", attributes, -12.5E-2D); + verifyEquals("${literal('12.5E-2'):toDecimal()}", attributes, 12.5E-2D); + verifyEquals("${literal('+12.5e-2'):toDecimal()}", attributes, +12.5e-2D); + + // Test decimal format .X + verifyEquals("${literal('.5'):toDecimal()}", attributes, .5D); + verifyEquals("${literal('.5'):toDecimal()}", attributes, .5D); + verifyEquals("${literal('-.5'):toDecimal()}", attributes, -0.5D); + verifyEquals("${literal('+.5'):toDecimal()}", attributes, .5D); + + // Test decimal format .XEX with positive exponent + verifyEquals("${literal('-.5E2'):toDecimal()}", attributes, -.5E2D); + verifyEquals("${literal('-.5E2'):toDecimal()}", attributes, -.5E2D); + verifyEquals("${literal('-.5e+2'):toDecimal()}", attributes, -.5e+2D); + verifyEquals("${literal('.5E+2'):toDecimal()}", attributes, .5E+2D); + verifyEquals("${literal('+.5e+2'):toDecimal()}", attributes, +.5e+2D); + verifyEquals("${literal('+.5E2'):toDecimal()}", attributes, +.5E2D); + verifyEquals("${literal('-.5e2'):toDecimal()}", attributes, -.5e2D); + verifyEquals("${literal('.5E2'):toDecimal()}", attributes, .5E2D); + verifyEquals("${literal('+.5e2'):toDecimal()}", attributes, +.5e2D); + + // Test decimal format .XEX with negative exponent + verifyEquals("${literal('-.5E-2'):toDecimal()}", attributes, -.5E-2D); + verifyEquals("${literal('.5e-2'):toDecimal()}", attributes, .5e-2D); + verifyEquals("${literal('+.5E-2'):toDecimal()}", attributes, +.5E-2D); + + // Verify allowed values + verifyEquals("${literal('9876543210.0123456789e123'):toDecimal()}", attributes, 9876543210.0123456789e123D); + + verifyEmpty("${literal('A.1e123'):toDecimal()}", attributes); + verifyEmpty("${literal('0.Ae123'):toDecimal()}", attributes); + verifyEmpty("${literal('0.1eA'):toDecimal()}", attributes); + + // --------- Hex format ------// + + // Test Hex format X. + verifyEquals("${literal('0xF1.p2'):toDecimal()}", attributes, 0xF1.p2D); + verifyEquals("${literal('+0xF1.P2'):toDecimal()}", attributes, +0xF1.p2D); + verifyEquals("${literal('-0xF1.p2'):toDecimal()}", attributes, -0xF1.p2D); + + // Test Hex format X.XEX with positive exponent + verifyEquals("${literal('-0xF1.5Bp2'):toDecimal()}", attributes, -0xF1.5Bp2D); + verifyEquals("${literal('-0xF1.5BP2'):toDecimal()}", attributes, -0xF1.5BP2D); + verifyEquals("${literal('-0xF1.5BP+2'):toDecimal()}", attributes, -0xF1.5Bp+2D); + verifyEquals("${literal('0xF1.5BP+2'):toDecimal()}", attributes, 0xF1.5BP+2D); + verifyEquals("${literal('+0xF1.5Bp+2'):toDecimal()}", attributes, +0xF1.5Bp+2D); + verifyEquals("${literal('+0xF1.5BP2'):toDecimal()}", attributes, +0xF1.5BP2D); + verifyEquals("${literal('-0xF1.5Bp2'):toDecimal()}", attributes, -0xF1.5Bp2D); + verifyEquals("${literal('0xF1.5BP2'):toDecimal()}", attributes, 0xF1.5BP2D); + verifyEquals("${literal('+0xF1.5Bp2'):toDecimal()}", attributes, +0xF1.5Bp2D); + + // Test decimal format X.XEX with negative exponent + verifyEquals("${literal('-0xF1.5BP-2'):toDecimal()}", attributes, -0xF1.5BP-2D); + verifyEquals("${literal('0xF1.5BP-2'):toDecimal()}", attributes, 0xF1.5BP-2D); + verifyEquals("${literal('+0xF1.5Bp-2'):toDecimal()}", attributes, +0xF1.5Bp-2D); + + // Test decimal format .XEX with positive exponent + verifyEquals("${literal('0x.5BP0'):toDecimal()}", attributes, 0x.5BP0D); + verifyEquals("${literal('-0x.5BP0'):toDecimal()}", attributes, -0x.5BP0D); + verifyEquals("${literal('-0x.5BP+2'):toDecimal()}", attributes, -0x.5BP+2D); + verifyEquals("${literal('0x.5BP+2'):toDecimal()}", attributes, 0x.5BP+2D); + verifyEquals("${literal('+0x.5Bp+2'):toDecimal()}", attributes, +0x.5Bp+2D); + verifyEquals("${literal('+0x.5BP2'):toDecimal()}", attributes, +0x.5BP2D); + verifyEquals("${literal('-0x.5Bp2'):toDecimal()}", attributes, -0x.5Bp2D); + verifyEquals("${literal('0x.5BP2'):toDecimal()}", attributes, 0x.5BP2D); + verifyEquals("${literal('+0x.5Bp+2'):toDecimal()}", attributes, +0x.5Bp2D); + + // Test decimal format .XEX with negative exponent + verifyEquals("${literal('-0x.5BP-2'):toDecimal()}", attributes, -0x.5BP-2D); + verifyEquals("${literal('0x.5Bp-2'):toDecimal()}", attributes, 0x.5Bp-2D); + verifyEquals("${literal('+0x.5BP-2'):toDecimal()}", attributes, +0x.5BP-2D); + + // Verify allowed values + verifyEquals("${literal('0xFEDCBA9876543210.0123456789ABCDEFp123'):toDecimal()}", attributes, 0xFEDCBA9876543210.0123456789ABCDEFp123D); + verifyEquals("${literal('0xfedcba9876543210.0123456789abcdefp123'):toDecimal()}", attributes, 0xfedcba9876543210.0123456789abcdefp123D); + verifyEmpty("${literal('0xG.1p123'):toDecimal()}", attributes); + verifyEmpty("${literal('0x1.Gp123'):toDecimal()}", attributes); + verifyEmpty("${literal('0x1.1pA'):toDecimal()}", attributes); + verifyEmpty("${literal('0x1.1'):toDecimal()}", attributes); + + // Special cases + verifyEquals("${literal('" + Double.toString(POSITIVE_INFINITY) + "'):toDecimal():plus(1):plus(2)}", attributes, POSITIVE_INFINITY); + verifyEquals("${literal('" + Double.toString(NEGATIVE_INFINITY) + "'):toDecimal():plus(1):plus(2)}", attributes, NEGATIVE_INFINITY); + verifyEquals("${literal('" + Double.toString(NaN) + "'):toDecimal():plus(1):plus(2)}", attributes, NaN); } @Test @@ -1568,6 +1681,11 @@ public class TestQuery { assertEquals(expectedResult, result.getValue()); } + private void verifyEmpty(final String expression, final Map attributes) { + Query.validateExpression(expression, false); + assertEquals(String.valueOf(""), Query.evaluateExpressions(expression, attributes, null)); + } + private String getResourceAsString(String resourceName) throws IOException { try (final Reader reader = new InputStreamReader(new BufferedInputStream(getClass().getResourceAsStream(resourceName)))) { int n = 0;