[ML] Use map and filter instead of flatMap in find_file_structure (#42534)

Using map and filter avoids the garbage from all the
Stream.of calls that flatMap necessitated. Performance
is better when there are masses of fields.
This commit is contained in:
David Roberts 2019-05-24 20:05:15 +01:00
parent 34de68b007
commit 48dc0dca57
1 changed files with 2 additions and 5 deletions

View File

@ -187,11 +187,8 @@ public final class FileStructureUtils {
for (String fieldName : uniqueFieldNames) {
List<Object> fieldValues = sampleRecords.stream().flatMap(record -> {
Object fieldValue = record.get(fieldName);
return (fieldValue == null) ? Stream.empty() : Stream.of(fieldValue);
}
).collect(Collectors.toList());
List<Object> fieldValues = sampleRecords.stream().map(record -> record.get(fieldName)).filter(fieldValue -> fieldValue != null)
.collect(Collectors.toList());
Tuple<Map<String, String>, FieldStats> mappingAndFieldStats =
guessMappingAndCalculateFieldStats(explanation, fieldName, fieldValues, timeoutChecker);