NIFI-3145 Rewriting double validation in NumberParsing

Adding more tests to TestQuery

NIFI-3145 Adding logic to handle lowercase hex values

This closes #1296
This commit is contained in:
jpercivall 2016-12-04 12:44:07 -05:00 committed by Matt Burgess
parent 8f8b8cdf46
commit f0f75e7480
2 changed files with 159 additions and 38 deletions

View File

@ -21,54 +21,57 @@ import java.util.regex.Pattern;
public class NumberParsing {
public static enum ParseResultType {
NOT_NUMBER, WHOLE_NUMBER, DECIMAL;
}
private static final String Digits = "(\\p{Digit}+)";
private static final String OptionalSign = "[\\-\\+]?";
// Double regex according to Oracle documentation: http://docs.oracle.com/javase/6/docs/api/java/lang/Double.html#valueOf%28java.lang.String%29
private static final String HexDigits = "(\\p{XDigit}+)";
// an exponent is 'e' or 'E' followed by an optionally
// signed decimal integer.
private static final String Exp = "[eE][+-]?"+Digits;
private static final String fpRegex =
("[\\x00-\\x20]*"+ // Optional leading "whitespace"
"[+-]?(" + // Optional sign character
"NaN|" + // "NaN" string
"Infinity|" + // "Infinity" string
private static final String Infinity = "(Infinity)";
private static final String NotANumber = "(NaN)";
// A decimal floating-point string representing a finite positive
// number without a leading sign has at most five basic pieces:
// Digits . Digits ExponentPart FloatTypeSuffix
//
// Since this method allows integer-only strings as input
// in addition to strings of floating-point literals, the
// two sub-patterns below are simplifications of the grammar
// productions from the Java Language Specification, 2nd
// edition, section 3.10.2.
// Base 10
private static final String Base10Digits = "\\d+";
private static final String Base10Decimal = "\\." + Base10Digits;
private static final String OptionalBase10Decimal = Base10Decimal + "?";
// Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt
"((("+Digits+"(\\.)?("+Digits+"?)("+Exp+")?)|"+
private static final String Base10Exponent = "[eE]" + OptionalSign + Base10Digits;
private static final String OptionalBase10Exponent = "(" + Base10Exponent + ")?";
// . Digits ExponentPart_opt FloatTypeSuffix_opt
"(\\.("+Digits+")("+Exp+")?)|"+
// Hex
private static final String HexIdentifier = "0[xX]";
// Hexadecimal strings
"((" +
// 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt
"(0[xX]" + HexDigits + "(\\.)?)|" +
private static final String HexDigits = "[0-9a-fA-F]+";
private static final String HexDecimal = "\\." + HexDigits;
private static final String OptionalHexDecimal = HexDecimal + "?";
// 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt
"(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" +
private static final String HexExponent = "[pP]" + OptionalSign + Base10Digits;
private static final String OptionalHexExponent = "(" + HexExponent + ")?";
")[pP][+-]?" + Digits + "))" +
"[fFdD]?))" +
"[\\x00-\\x20]*");// Optional trailing "whitespace"
// Written according to the "Floating Point Literal" specification as outlined here: http://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.10.2
private static final Pattern DOUBLE_PATTERN = Pattern.compile(fpRegex);
private static final String doubleRegex =
OptionalSign +
"(" +
Infinity + "|" +
NotANumber + "|"+
"(" + Base10Digits + Base10Decimal + ")" + "|" +
"(" + Base10Digits + OptionalBase10Decimal + Base10Exponent + ")" + "|" +
"(" + Base10Decimal + OptionalBase10Exponent + ")" + "|" +
// The case of a hex number with a decimal portion but no exponent is not supported by "parseDouble" and throws a NumberFormatException
"(" + HexIdentifier + HexDigits + "\\.?" + HexExponent + ")" + "|" + // The case of a hex numeral with a "." but no decimal values is valid.
"(" + HexIdentifier + HexDigits + OptionalHexDecimal + HexExponent + ")" + "|" +
"(" + HexIdentifier + HexDecimal + OptionalHexExponent + ")" +
")";
private static final Pattern NUMBER_PATTERN = Pattern.compile("-?((\\d+)|(0[xX]" + HexDigits + "))");
private static final String numberRegex =
OptionalSign +
"(" +
Base10Digits + "|" +
HexIdentifier + HexDigits +
")";
private static final Pattern DOUBLE_PATTERN = Pattern.compile(doubleRegex);
private static final Pattern NUMBER_PATTERN = Pattern.compile(numberRegex);
private NumberParsing(){
}

View File

@ -16,6 +16,9 @@
*/
package org.apache.nifi.attribute.expression.language;
import static java.lang.Double.NEGATIVE_INFINITY;
import static java.lang.Double.NaN;
import static java.lang.Double.POSITIVE_INFINITY;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@ -1063,8 +1066,118 @@ public class TestQuery {
verifyEquals("${literal(5.5):toDecimal()}", attributes, 5.5D);
verifyEquals("${literal('0xF.Fp10'):toDecimal()}", attributes, 0xF.Fp10D);
verifyEquals("${literal('0xABC'):toNumber()}", attributes, 0xABCL);
verifyEquals("${literal('-0xABC'):toNumber()}", attributes, -0xABCL);
verifyEquals("${literal('0x1234567890ABCDEF'):toNumber()}", attributes, 0x1234567890ABCDEFL);
verifyEquals("${literal('-0x1234567890ABCDEF'):toNumber()}", attributes, -0x1234567890ABCDEFL);
verifyEquals("${literal('-0x1234567890abcdef'):toNumber()}", attributes, -0x1234567890abcdefL);
verifyEquals("${literal('0x1234567890abcdef'):toNumber()}", attributes, 0x1234567890abcdefL);
}
@Test
public void testDecimalParsing() {
final Map<String, String> attributes = new HashMap<>();
// Test decimal format X.X
verifyEquals("${literal(5.5):toDecimal()}", attributes, 5.5D);
verifyEquals("${literal('-12.5'):toDecimal()}", attributes, -12.5D);
verifyEquals("${literal('+12.5'):toDecimal()}", attributes, 12.5D);
// Test decimal format X.XEX with positive exponent
verifyEquals("${literal('-12.5E2'):toDecimal()}", attributes, -12.5E2D);
verifyEquals("${literal('-12.5e2'):toDecimal()}", attributes, -12.5e2D);
verifyEquals("${literal('-12.5e+2'):toDecimal()}", attributes, -12.5e+2D);
verifyEquals("${literal('12.5E+2'):toDecimal()}", attributes, 12.5E+2D);
verifyEquals("${literal('+12.5e+2'):toDecimal()}", attributes, +12.5e+2D);
verifyEquals("${literal('+12.5E2'):toDecimal()}", attributes, +12.5E2D);
verifyEquals("${literal('-12.5e2'):toDecimal()}", attributes, -12.5e2D);
verifyEquals("${literal('12.5E2'):toDecimal()}", attributes, 12.5E2D);
verifyEquals("${literal('+12.5e2'):toDecimal()}", attributes, +12.5e2D);
// Test decimal format X.XEX with negative exponent
verifyEquals("${literal('-12.5E-2'):toDecimal()}", attributes, -12.5E-2D);
verifyEquals("${literal('12.5E-2'):toDecimal()}", attributes, 12.5E-2D);
verifyEquals("${literal('+12.5e-2'):toDecimal()}", attributes, +12.5e-2D);
// Test decimal format .X
verifyEquals("${literal('.5'):toDecimal()}", attributes, .5D);
verifyEquals("${literal('.5'):toDecimal()}", attributes, .5D);
verifyEquals("${literal('-.5'):toDecimal()}", attributes, -0.5D);
verifyEquals("${literal('+.5'):toDecimal()}", attributes, .5D);
// Test decimal format .XEX with positive exponent
verifyEquals("${literal('-.5E2'):toDecimal()}", attributes, -.5E2D);
verifyEquals("${literal('-.5E2'):toDecimal()}", attributes, -.5E2D);
verifyEquals("${literal('-.5e+2'):toDecimal()}", attributes, -.5e+2D);
verifyEquals("${literal('.5E+2'):toDecimal()}", attributes, .5E+2D);
verifyEquals("${literal('+.5e+2'):toDecimal()}", attributes, +.5e+2D);
verifyEquals("${literal('+.5E2'):toDecimal()}", attributes, +.5E2D);
verifyEquals("${literal('-.5e2'):toDecimal()}", attributes, -.5e2D);
verifyEquals("${literal('.5E2'):toDecimal()}", attributes, .5E2D);
verifyEquals("${literal('+.5e2'):toDecimal()}", attributes, +.5e2D);
// Test decimal format .XEX with negative exponent
verifyEquals("${literal('-.5E-2'):toDecimal()}", attributes, -.5E-2D);
verifyEquals("${literal('.5e-2'):toDecimal()}", attributes, .5e-2D);
verifyEquals("${literal('+.5E-2'):toDecimal()}", attributes, +.5E-2D);
// Verify allowed values
verifyEquals("${literal('9876543210.0123456789e123'):toDecimal()}", attributes, 9876543210.0123456789e123D);
verifyEmpty("${literal('A.1e123'):toDecimal()}", attributes);
verifyEmpty("${literal('0.Ae123'):toDecimal()}", attributes);
verifyEmpty("${literal('0.1eA'):toDecimal()}", attributes);
// --------- Hex format ------//
// Test Hex format X.
verifyEquals("${literal('0xF1.p2'):toDecimal()}", attributes, 0xF1.p2D);
verifyEquals("${literal('+0xF1.P2'):toDecimal()}", attributes, +0xF1.p2D);
verifyEquals("${literal('-0xF1.p2'):toDecimal()}", attributes, -0xF1.p2D);
// Test Hex format X.XEX with positive exponent
verifyEquals("${literal('-0xF1.5Bp2'):toDecimal()}", attributes, -0xF1.5Bp2D);
verifyEquals("${literal('-0xF1.5BP2'):toDecimal()}", attributes, -0xF1.5BP2D);
verifyEquals("${literal('-0xF1.5BP+2'):toDecimal()}", attributes, -0xF1.5Bp+2D);
verifyEquals("${literal('0xF1.5BP+2'):toDecimal()}", attributes, 0xF1.5BP+2D);
verifyEquals("${literal('+0xF1.5Bp+2'):toDecimal()}", attributes, +0xF1.5Bp+2D);
verifyEquals("${literal('+0xF1.5BP2'):toDecimal()}", attributes, +0xF1.5BP2D);
verifyEquals("${literal('-0xF1.5Bp2'):toDecimal()}", attributes, -0xF1.5Bp2D);
verifyEquals("${literal('0xF1.5BP2'):toDecimal()}", attributes, 0xF1.5BP2D);
verifyEquals("${literal('+0xF1.5Bp2'):toDecimal()}", attributes, +0xF1.5Bp2D);
// Test decimal format X.XEX with negative exponent
verifyEquals("${literal('-0xF1.5BP-2'):toDecimal()}", attributes, -0xF1.5BP-2D);
verifyEquals("${literal('0xF1.5BP-2'):toDecimal()}", attributes, 0xF1.5BP-2D);
verifyEquals("${literal('+0xF1.5Bp-2'):toDecimal()}", attributes, +0xF1.5Bp-2D);
// Test decimal format .XEX with positive exponent
verifyEquals("${literal('0x.5BP0'):toDecimal()}", attributes, 0x.5BP0D);
verifyEquals("${literal('-0x.5BP0'):toDecimal()}", attributes, -0x.5BP0D);
verifyEquals("${literal('-0x.5BP+2'):toDecimal()}", attributes, -0x.5BP+2D);
verifyEquals("${literal('0x.5BP+2'):toDecimal()}", attributes, 0x.5BP+2D);
verifyEquals("${literal('+0x.5Bp+2'):toDecimal()}", attributes, +0x.5Bp+2D);
verifyEquals("${literal('+0x.5BP2'):toDecimal()}", attributes, +0x.5BP2D);
verifyEquals("${literal('-0x.5Bp2'):toDecimal()}", attributes, -0x.5Bp2D);
verifyEquals("${literal('0x.5BP2'):toDecimal()}", attributes, 0x.5BP2D);
verifyEquals("${literal('+0x.5Bp+2'):toDecimal()}", attributes, +0x.5Bp2D);
// Test decimal format .XEX with negative exponent
verifyEquals("${literal('-0x.5BP-2'):toDecimal()}", attributes, -0x.5BP-2D);
verifyEquals("${literal('0x.5Bp-2'):toDecimal()}", attributes, 0x.5Bp-2D);
verifyEquals("${literal('+0x.5BP-2'):toDecimal()}", attributes, +0x.5BP-2D);
// Verify allowed values
verifyEquals("${literal('0xFEDCBA9876543210.0123456789ABCDEFp123'):toDecimal()}", attributes, 0xFEDCBA9876543210.0123456789ABCDEFp123D);
verifyEquals("${literal('0xfedcba9876543210.0123456789abcdefp123'):toDecimal()}", attributes, 0xfedcba9876543210.0123456789abcdefp123D);
verifyEmpty("${literal('0xG.1p123'):toDecimal()}", attributes);
verifyEmpty("${literal('0x1.Gp123'):toDecimal()}", attributes);
verifyEmpty("${literal('0x1.1pA'):toDecimal()}", attributes);
verifyEmpty("${literal('0x1.1'):toDecimal()}", attributes);
// Special cases
verifyEquals("${literal('" + Double.toString(POSITIVE_INFINITY) + "'):toDecimal():plus(1):plus(2)}", attributes, POSITIVE_INFINITY);
verifyEquals("${literal('" + Double.toString(NEGATIVE_INFINITY) + "'):toDecimal():plus(1):plus(2)}", attributes, NEGATIVE_INFINITY);
verifyEquals("${literal('" + Double.toString(NaN) + "'):toDecimal():plus(1):plus(2)}", attributes, NaN);
}
@Test
@ -1568,6 +1681,11 @@ public class TestQuery {
assertEquals(expectedResult, result.getValue());
}
private void verifyEmpty(final String expression, final Map<String, String> attributes) {
Query.validateExpression(expression, false);
assertEquals(String.valueOf(""), Query.evaluateExpressions(expression, attributes, null));
}
private String getResourceAsString(String resourceName) throws IOException {
try (final Reader reader = new InputStreamReader(new BufferedInputStream(getClass().getResourceAsStream(resourceName)))) {
int n = 0;