NIFI-13988 Adjusted Record number conversion to treat empty String as null (#9511)

Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
dan-s1 2024-11-12 14:23:08 -05:00 committed by GitHub
parent 2f3dd015e0
commit 2f048ca70a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 69 additions and 34 deletions

View File

@ -1203,7 +1203,7 @@ public class DataTypeUtils {
}
case String string -> {
try {
return new BigInteger(string);
return string.isBlank() ? null : new BigInteger(string);
} catch (NumberFormatException nfe) {
throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to BigInteger for field " + fieldName
+ ", value is not a valid representation of BigInteger", nfe);
@ -1286,9 +1286,9 @@ public class DataTypeUtils {
}
}
if (value instanceof String) {
if (value instanceof String string) {
try {
return new BigDecimal((String) value);
return string.isBlank() ? null : new BigDecimal(string);
} catch (NumberFormatException nfe) {
throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to BigDecimal for field " + fieldName
+ ", value is not a valid representation of BigDecimal", nfe);
@ -1302,7 +1302,7 @@ public class DataTypeUtils {
return switch (value) {
case null -> null;
case Number number -> number.doubleValue();
case String s -> Double.parseDouble(s);
case String string -> string.isBlank() ? null : Double.parseDouble(string);
default ->
throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Double for field " + fieldName);
};
@ -1327,7 +1327,7 @@ public class DataTypeUtils {
return switch (value) {
case null -> null;
case Number number -> number.floatValue();
case String s -> Float.parseFloat(s);
case String string -> string.isBlank() ? null : Float.parseFloat(string);
default ->
throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Float for field " + fieldName);
};
@ -1388,7 +1388,7 @@ public class DataTypeUtils {
return switch (value) {
case null -> null;
case Number number -> number.longValue();
case String s -> Long.parseLong(s);
case String string -> string.isBlank() ? null : Long.parseLong(string);
case java.util.Date date -> date.getTime();
default ->
throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Long for field " + fieldName);
@ -1464,8 +1464,8 @@ public class DataTypeUtils {
+ " as it causes an arithmetic overflow (the value is too large, e.g.)", ae);
}
}
case String s -> {
return Integer.parseInt(s);
case String string -> {
return string.isBlank() ? null : Integer.parseInt(string);
}
default -> {
}
@ -1491,7 +1491,7 @@ public class DataTypeUtils {
return switch (value) {
case null -> null;
case Number number -> number.shortValue();
case String s -> Short.parseShort(s);
case String string -> string.isBlank() ? null : Short.parseShort(string);
default ->
throw new IllegalTypeConversionException("Cannot convert value [" + value + "] of type " + value.getClass() + " to Short for field " + fieldName);
};

View File

@ -1201,4 +1201,16 @@ public class TestDataTypeUtils {
assertTrue(converted instanceof java.sql.Date, "Converted value is not java.sql.Date");
assertEquals(ISO_8601_YEAR_MONTH_DAY, converted.toString());
}
@Test
void testNumberParsingWhereStringBlank() {
final String fieldName = "someField";
assertNull(DataTypeUtils.toBigDecimal("", fieldName));
assertNull(DataTypeUtils.toBigInt("", fieldName));
assertNull(DataTypeUtils.toDouble("", fieldName));
assertNull(DataTypeUtils.toFloat("", fieldName));
assertNull(DataTypeUtils.toInteger("", fieldName));
assertNull(DataTypeUtils.toLong("", fieldName));
assertNull(DataTypeUtils.toShort("", fieldName));
}
}

View File

@ -53,6 +53,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertThrows;
@ -78,12 +79,7 @@ public class TestExcelRecordReader {
@BeforeAll
static void setUpBeforeAll() throws Exception {
//Generate an Excel file and populate it with data
final ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
try (XSSFWorkbook workbook = new XSSFWorkbook()) {
final XSSFSheet sheet = workbook.createSheet("User Info");
populateSheet(sheet);
workbook.write(outputStream);
}
final InputStream workbook = createWorkbook(DATA);
//Protect the Excel file with a password
try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem()) {
@ -91,7 +87,7 @@ public class TestExcelRecordReader {
Encryptor encryptor = encryptionInfo.getEncryptor();
encryptor.confirmPassword(PASSWORD);
try (OPCPackage opc = OPCPackage.open(new ByteArrayInputStream(outputStream.toByteArray()));
try (OPCPackage opc = OPCPackage.open(workbook);
OutputStream os = encryptor.getDataStream(poifsFileSystem)) {
opc.save(os);
}
@ -99,24 +95,6 @@ public class TestExcelRecordReader {
}
}
private static void populateSheet(XSSFSheet sheet) {
//Adding the data to the Excel worksheet
int rowCount = 0;
for (Object[] dataRow : DATA) {
Row row = sheet.createRow(rowCount++);
int columnCount = 0;
for (Object field : dataRow) {
Cell cell = row.createCell(columnCount++);
if (field instanceof String) {
cell.setCellValue((String) field);
} else if (field instanceof Integer) {
cell.setCellValue((Integer) field);
}
}
}
}
@Test
public void testNonExcelFile() {
ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
@ -335,4 +313,49 @@ public class TestExcelRecordReader {
return new SimpleRecordSchema(Arrays.asList(new RecordField("id", RecordFieldType.INT.getDataType()),
new RecordField("name", RecordFieldType.STRING.getDataType())));
}
@Test
void testWithNumberColumnWhoseValueIsEmptyString() throws Exception {
final RecordSchema schema = new SimpleRecordSchema(Arrays.asList(new RecordField("first", RecordFieldType.STRING.getDataType()),
new RecordField("second", RecordFieldType.LONG.getDataType())));
final ExcelRecordReaderConfiguration configuration = new ExcelRecordReaderConfiguration.Builder()
.withSchema(schema)
.build();
final Object[][] data = {{"Manny", ""}};
final InputStream workbook = createWorkbook(data);
final ExcelRecordReader recordReader = new ExcelRecordReader(configuration, workbook, logger);
assertDoesNotThrow(() -> getRecords(recordReader, true, true));
}
private static InputStream createWorkbook(Object[][] data) throws Exception {
final ByteArrayOutputStream workbookOutputStream = new ByteArrayOutputStream();
try (XSSFWorkbook workbook = new XSSFWorkbook()) {
final XSSFSheet sheet = workbook.createSheet("SomeSheetName");
populateSheet(sheet, data);
workbook.write(workbookOutputStream);
}
return new ByteArrayInputStream(workbookOutputStream.toByteArray());
}
private static void populateSheet(XSSFSheet sheet, Object[][] data) {
//Adding the data to the Excel worksheet
int rowCount = 0;
for (Object[] dataRow : data) {
Row row = sheet.createRow(rowCount++);
int columnCount = 0;
for (Object field : dataRow) {
Cell cell = row.createCell(columnCount++);
switch (field) {
case String string -> cell.setCellValue(string);
case Integer integer -> cell.setCellValue(integer.doubleValue());
case Long l -> cell.setCellValue(l.doubleValue());
default -> { }
}
}
}
}
}