[ML] Include the "properties" layer in find_file_structure mappings (#62158)
Previously the "mappings" field of the response from the find_file_structure endpoint was not a drop-in for the mappings format of the create index endpoint - the "properties" layer was missing. The reason for omitting it initially was that the assumption was that the find_file_structure endpoint would only ever return very simple mappings without any nested objects. However, this will not be true in the future, as we will improve mappings detection for complex JSON objects. As a first step it makes sense to move the returned mappings closer to the standard format. This is a small building block towards fixing #55616
This commit is contained in:
parent
038f7a83df
commit
969a1c558b
|
@ -299,21 +299,23 @@ If the request does not encounter errors, you receive the following result:
|
||||||
],
|
],
|
||||||
"need_client_timezone" : true, <10>
|
"need_client_timezone" : true, <10>
|
||||||
"mappings" : { <11>
|
"mappings" : { <11>
|
||||||
"@timestamp" : {
|
"properties" : {
|
||||||
"type" : "date"
|
"@timestamp" : {
|
||||||
},
|
"type" : "date"
|
||||||
"author" : {
|
},
|
||||||
"type" : "keyword"
|
"author" : {
|
||||||
},
|
"type" : "keyword"
|
||||||
"name" : {
|
},
|
||||||
"type" : "keyword"
|
"name" : {
|
||||||
},
|
"type" : "keyword"
|
||||||
"page_count" : {
|
},
|
||||||
"type" : "long"
|
"page_count" : {
|
||||||
},
|
"type" : "long"
|
||||||
"release_date" : {
|
},
|
||||||
"type" : "date",
|
"release_date" : {
|
||||||
"format" : "iso8601"
|
"type" : "date",
|
||||||
|
"format" : "iso8601"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ingest_pipeline" : {
|
"ingest_pipeline" : {
|
||||||
|
@ -622,61 +624,63 @@ If the request does not encounter errors, you receive the following result:
|
||||||
],
|
],
|
||||||
"need_client_timezone" : true, <10>
|
"need_client_timezone" : true, <10>
|
||||||
"mappings" : {
|
"mappings" : {
|
||||||
"@timestamp" : {
|
"properties" : {
|
||||||
"type" : "date"
|
"@timestamp" : {
|
||||||
},
|
"type" : "date"
|
||||||
"DOLocationID" : {
|
},
|
||||||
"type" : "long"
|
"DOLocationID" : {
|
||||||
},
|
"type" : "long"
|
||||||
"PULocationID" : {
|
},
|
||||||
"type" : "long"
|
"PULocationID" : {
|
||||||
},
|
"type" : "long"
|
||||||
"RatecodeID" : {
|
},
|
||||||
"type" : "long"
|
"RatecodeID" : {
|
||||||
},
|
"type" : "long"
|
||||||
"VendorID" : {
|
},
|
||||||
"type" : "long"
|
"VendorID" : {
|
||||||
},
|
"type" : "long"
|
||||||
"extra" : {
|
},
|
||||||
"type" : "double"
|
"extra" : {
|
||||||
},
|
"type" : "double"
|
||||||
"fare_amount" : {
|
},
|
||||||
"type" : "double"
|
"fare_amount" : {
|
||||||
},
|
"type" : "double"
|
||||||
"improvement_surcharge" : {
|
},
|
||||||
"type" : "double"
|
"improvement_surcharge" : {
|
||||||
},
|
"type" : "double"
|
||||||
"mta_tax" : {
|
},
|
||||||
"type" : "double"
|
"mta_tax" : {
|
||||||
},
|
"type" : "double"
|
||||||
"passenger_count" : {
|
},
|
||||||
"type" : "long"
|
"passenger_count" : {
|
||||||
},
|
"type" : "long"
|
||||||
"payment_type" : {
|
},
|
||||||
"type" : "long"
|
"payment_type" : {
|
||||||
},
|
"type" : "long"
|
||||||
"store_and_fwd_flag" : {
|
},
|
||||||
"type" : "keyword"
|
"store_and_fwd_flag" : {
|
||||||
},
|
"type" : "keyword"
|
||||||
"tip_amount" : {
|
},
|
||||||
"type" : "double"
|
"tip_amount" : {
|
||||||
},
|
"type" : "double"
|
||||||
"tolls_amount" : {
|
},
|
||||||
"type" : "double"
|
"tolls_amount" : {
|
||||||
},
|
"type" : "double"
|
||||||
"total_amount" : {
|
},
|
||||||
"type" : "double"
|
"total_amount" : {
|
||||||
},
|
"type" : "double"
|
||||||
"tpep_dropoff_datetime" : {
|
},
|
||||||
"type" : "date",
|
"tpep_dropoff_datetime" : {
|
||||||
"format" : "yyyy-MM-dd HH:mm:ss"
|
"type" : "date",
|
||||||
},
|
"format" : "yyyy-MM-dd HH:mm:ss"
|
||||||
"tpep_pickup_datetime" : {
|
},
|
||||||
"type" : "date",
|
"tpep_pickup_datetime" : {
|
||||||
"format" : "yyyy-MM-dd HH:mm:ss"
|
"type" : "date",
|
||||||
},
|
"format" : "yyyy-MM-dd HH:mm:ss"
|
||||||
"trip_distance" : {
|
},
|
||||||
"type" : "double"
|
"trip_distance" : {
|
||||||
|
"type" : "double"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ingest_pipeline" : {
|
"ingest_pipeline" : {
|
||||||
|
@ -1560,14 +1564,16 @@ this:
|
||||||
],
|
],
|
||||||
"need_client_timezone" : true,
|
"need_client_timezone" : true,
|
||||||
"mappings" : {
|
"mappings" : {
|
||||||
"@timestamp" : {
|
"properties" : {
|
||||||
"type" : "date"
|
"@timestamp" : {
|
||||||
},
|
"type" : "date"
|
||||||
"loglevel" : {
|
},
|
||||||
"type" : "keyword"
|
"loglevel" : {
|
||||||
},
|
"type" : "keyword"
|
||||||
"message" : {
|
},
|
||||||
"type" : "text"
|
"message" : {
|
||||||
|
"type" : "text"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ingest_pipeline" : {
|
"ingest_pipeline" : {
|
||||||
|
@ -1720,20 +1726,22 @@ this:
|
||||||
],
|
],
|
||||||
"need_client_timezone" : true,
|
"need_client_timezone" : true,
|
||||||
"mappings" : {
|
"mappings" : {
|
||||||
"@timestamp" : {
|
"properties" : {
|
||||||
"type" : "date"
|
"@timestamp" : {
|
||||||
},
|
"type" : "date"
|
||||||
"class" : {
|
},
|
||||||
"type" : "keyword"
|
"class" : {
|
||||||
},
|
"type" : "keyword"
|
||||||
"loglevel" : {
|
},
|
||||||
"type" : "keyword"
|
"loglevel" : {
|
||||||
},
|
"type" : "keyword"
|
||||||
"message" : {
|
},
|
||||||
"type" : "text"
|
"message" : {
|
||||||
},
|
"type" : "text"
|
||||||
"node" : {
|
},
|
||||||
"type" : "keyword"
|
"node" : {
|
||||||
|
"type" : "keyword"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"ingest_pipeline" : {
|
"ingest_pipeline" : {
|
||||||
|
|
|
@ -102,7 +102,7 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
|
||||||
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
|
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
|
||||||
FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
|
FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
|
||||||
|
|
||||||
SortedMap<String, Object> mappings = mappingsAndFieldStats.v1();
|
SortedMap<String, Object> fieldMappings = mappingsAndFieldStats.v1();
|
||||||
|
|
||||||
List<String> columnNamesList = Arrays.asList(columnNames);
|
List<String> columnNamesList = Arrays.asList(columnNames);
|
||||||
char delimiter = (char) csvPreference.getDelimiterChar();
|
char delimiter = (char) csvPreference.getDelimiterChar();
|
||||||
|
@ -149,17 +149,17 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
|
||||||
.setJavaTimestampFormats(timeField.v2().getJavaTimestampFormats())
|
.setJavaTimestampFormats(timeField.v2().getJavaTimestampFormats())
|
||||||
.setNeedClientTimezone(needClientTimeZone)
|
.setNeedClientTimezone(needClientTimeZone)
|
||||||
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), csvProcessorSettings,
|
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(), csvProcessorSettings,
|
||||||
mappings, timeField.v1(), timeField.v2().getJavaTimestampFormats(), needClientTimeZone,
|
fieldMappings, timeField.v1(), timeField.v2().getJavaTimestampFormats(), needClientTimeZone,
|
||||||
timeField.v2().needNanosecondPrecision()))
|
timeField.v2().needNanosecondPrecision()))
|
||||||
.setMultilineStartPattern(makeMultilineStartPattern(explanation, columnNamesList, maxLinesPerMessage, delimiterPattern,
|
.setMultilineStartPattern(makeMultilineStartPattern(explanation, columnNamesList, maxLinesPerMessage, delimiterPattern,
|
||||||
quotePattern, mappings, timeField.v1(), timeField.v2()));
|
quotePattern, fieldMappings, timeField.v1(), timeField.v2()));
|
||||||
|
|
||||||
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
|
fieldMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
|
||||||
} else {
|
} else {
|
||||||
structureBuilder.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(),
|
structureBuilder.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(null, Collections.emptyMap(),
|
||||||
csvProcessorSettings, mappings, null, null, false, false));
|
csvProcessorSettings, fieldMappings, null, null, false, false));
|
||||||
structureBuilder.setMultilineStartPattern(makeMultilineStartPattern(explanation, columnNamesList, maxLinesPerMessage,
|
structureBuilder.setMultilineStartPattern(makeMultilineStartPattern(explanation, columnNamesList, maxLinesPerMessage,
|
||||||
delimiterPattern, quotePattern, mappings, null, null));
|
delimiterPattern, quotePattern, fieldMappings, null, null));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mappingsAndFieldStats.v2() != null) {
|
if (mappingsAndFieldStats.v2() != null) {
|
||||||
|
@ -167,7 +167,7 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
|
||||||
}
|
}
|
||||||
|
|
||||||
FileStructure structure = structureBuilder
|
FileStructure structure = structureBuilder
|
||||||
.setMappings(mappings)
|
.setMappings(Collections.singletonMap(FileStructureUtils.MAPPING_PROPERTIES_SETTING, fieldMappings))
|
||||||
.setExplanation(explanation)
|
.setExplanation(explanation)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -628,7 +628,7 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
|
||||||
* records.
|
* records.
|
||||||
*/
|
*/
|
||||||
static String makeMultilineStartPattern(List<String> explanation, List<String> columnNames, int maxLinesPerMessage,
|
static String makeMultilineStartPattern(List<String> explanation, List<String> columnNames, int maxLinesPerMessage,
|
||||||
String delimiterPattern, String quotePattern, Map<String, Object> mappings,
|
String delimiterPattern, String quotePattern, Map<String, Object> fieldMappings,
|
||||||
String timeFieldName, TimestampFormatFinder timeFieldFormat) {
|
String timeFieldName, TimestampFormatFinder timeFieldFormat) {
|
||||||
|
|
||||||
assert columnNames.isEmpty() == false;
|
assert columnNames.isEmpty() == false;
|
||||||
|
@ -653,7 +653,7 @@ public class DelimitedFileStructureFinder implements FileStructureFinder {
|
||||||
explanation.add("Created a multi-line start pattern based on timestamp column [" + columnName + "]");
|
explanation.add("Created a multi-line start pattern based on timestamp column [" + columnName + "]");
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
}
|
}
|
||||||
Object columnMapping = mappings.get(columnName);
|
Object columnMapping = fieldMappings.get(columnName);
|
||||||
if (columnMapping instanceof Map) {
|
if (columnMapping instanceof Map) {
|
||||||
String type = (String) ((Map<?, ?>) columnMapping).get(FileStructureUtils.MAPPING_TYPE_SETTING);
|
String type = (String) ((Map<?, ?>) columnMapping).get(FileStructureUtils.MAPPING_TYPE_SETTING);
|
||||||
if (type != null) {
|
if (type != null) {
|
||||||
|
|
|
@ -393,6 +393,7 @@ public final class FileStructureUtils {
|
||||||
* @param csvProcessorSettings The CSV processor settings for delimited formats. <code>null</code> for
|
* @param csvProcessorSettings The CSV processor settings for delimited formats. <code>null</code> for
|
||||||
* non-delimited formats.
|
* non-delimited formats.
|
||||||
* @param mappingsForConversions Mappings (or partial mappings) that will be considered for field type conversions.
|
* @param mappingsForConversions Mappings (or partial mappings) that will be considered for field type conversions.
|
||||||
|
* The keys in the map are the top level field names - there is no properties layer.
|
||||||
* @param timestampField The input field containing the timestamp to be parsed into <code>@timestamp</code>.
|
* @param timestampField The input field containing the timestamp to be parsed into <code>@timestamp</code>.
|
||||||
* <code>null</code> if there is no timestamp.
|
* <code>null</code> if there is no timestamp.
|
||||||
* @param timestampFormats Timestamp formats to be used for parsing {@code timestampField}.
|
* @param timestampFormats Timestamp formats to be used for parsing {@code timestampField}.
|
||||||
|
|
|
@ -71,9 +71,9 @@ public class NdJsonFileStructureFinder implements FileStructureFinder {
|
||||||
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
|
Tuple<SortedMap<String, Object>, SortedMap<String, FieldStats>> mappingsAndFieldStats =
|
||||||
FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
|
FileStructureUtils.guessMappingsAndCalculateFieldStats(explanation, sampleRecords, timeoutChecker);
|
||||||
|
|
||||||
Map<String, Object> mappings = mappingsAndFieldStats.v1();
|
Map<String, Object> fieldMappings = mappingsAndFieldStats.v1();
|
||||||
if (timeField != null) {
|
if (timeField != null) {
|
||||||
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
|
fieldMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mappingsAndFieldStats.v2() != null) {
|
if (mappingsAndFieldStats.v2() != null) {
|
||||||
|
@ -81,7 +81,7 @@ public class NdJsonFileStructureFinder implements FileStructureFinder {
|
||||||
}
|
}
|
||||||
|
|
||||||
FileStructure structure = structureBuilder
|
FileStructure structure = structureBuilder
|
||||||
.setMappings(mappings)
|
.setMappings(Collections.singletonMap(FileStructureUtils.MAPPING_PROPERTIES_SETTING, fieldMappings))
|
||||||
.setExplanation(explanation)
|
.setExplanation(explanation)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
|
@ -109,15 +109,15 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
|
||||||
.setMultilineStartPattern(multiLineRegex);
|
.setMultilineStartPattern(multiLineRegex);
|
||||||
|
|
||||||
Map<String, String> messageMapping = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "text");
|
Map<String, String> messageMapping = Collections.singletonMap(FileStructureUtils.MAPPING_TYPE_SETTING, "text");
|
||||||
SortedMap<String, Object> mappings = new TreeMap<>();
|
SortedMap<String, Object> fieldMappings = new TreeMap<>();
|
||||||
mappings.put("message", messageMapping);
|
fieldMappings.put("message", messageMapping);
|
||||||
mappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timestampFormatFinder.getEsDateMappingTypeWithoutFormat());
|
fieldMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timestampFormatFinder.getEsDateMappingTypeWithoutFormat());
|
||||||
|
|
||||||
SortedMap<String, FieldStats> fieldStats = new TreeMap<>();
|
SortedMap<String, FieldStats> fieldStats = new TreeMap<>();
|
||||||
fieldStats.put("message", FileStructureUtils.calculateFieldStats(messageMapping, sampleMessages, timeoutChecker));
|
fieldStats.put("message", FileStructureUtils.calculateFieldStats(messageMapping, sampleMessages, timeoutChecker));
|
||||||
|
|
||||||
Map<String, String> customGrokPatternDefinitions = timestampFormatFinder.getCustomGrokPatternDefinitions();
|
Map<String, String> customGrokPatternDefinitions = timestampFormatFinder.getCustomGrokPatternDefinitions();
|
||||||
GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings, fieldStats,
|
GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, fieldMappings, fieldStats,
|
||||||
customGrokPatternDefinitions, timeoutChecker);
|
customGrokPatternDefinitions, timeoutChecker);
|
||||||
// We can't parse directly into @timestamp using Grok, so parse to some other time field, which the date filter will then remove
|
// We can't parse directly into @timestamp using Grok, so parse to some other time field, which the date filter will then remove
|
||||||
String interimTimestampField = overrides.getTimestampField();
|
String interimTimestampField = overrides.getTimestampField();
|
||||||
|
@ -150,10 +150,10 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
|
||||||
.setJavaTimestampFormats(timestampFormatFinder.getJavaTimestampFormats())
|
.setJavaTimestampFormats(timestampFormatFinder.getJavaTimestampFormats())
|
||||||
.setNeedClientTimezone(needClientTimeZone)
|
.setNeedClientTimezone(needClientTimeZone)
|
||||||
.setGrokPattern(grokPattern)
|
.setGrokPattern(grokPattern)
|
||||||
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(grokPattern, customGrokPatternDefinitions, null, mappings,
|
.setIngestPipeline(FileStructureUtils.makeIngestPipelineDefinition(grokPattern, customGrokPatternDefinitions, null,
|
||||||
interimTimestampField, timestampFormatFinder.getJavaTimestampFormats(), needClientTimeZone,
|
fieldMappings, interimTimestampField, timestampFormatFinder.getJavaTimestampFormats(), needClientTimeZone,
|
||||||
timestampFormatFinder.needNanosecondPrecision()))
|
timestampFormatFinder.needNanosecondPrecision()))
|
||||||
.setMappings(mappings)
|
.setMappings(Collections.singletonMap(FileStructureUtils.MAPPING_PROPERTIES_SETTING, fieldMappings))
|
||||||
.setFieldStats(fieldStats)
|
.setFieldStats(fieldStats)
|
||||||
.setExplanation(explanation)
|
.setExplanation(explanation)
|
||||||
.build();
|
.build();
|
||||||
|
|
|
@ -114,18 +114,18 @@ public class XmlFileStructureFinder implements FileStructureFinder {
|
||||||
structureBuilder.setFieldStats(mappingsAndFieldStats.v2());
|
structureBuilder.setFieldStats(mappingsAndFieldStats.v2());
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, Object> innerMappings = mappingsAndFieldStats.v1();
|
Map<String, Object> innerFieldMappings = mappingsAndFieldStats.v1();
|
||||||
Map<String, Object> secondLevelProperties = new LinkedHashMap<>();
|
Map<String, Object> secondLevelProperties = new LinkedHashMap<>();
|
||||||
secondLevelProperties.put(FileStructureUtils.MAPPING_TYPE_SETTING, "object");
|
secondLevelProperties.put(FileStructureUtils.MAPPING_TYPE_SETTING, "object");
|
||||||
secondLevelProperties.put(FileStructureUtils.MAPPING_PROPERTIES_SETTING, innerMappings);
|
secondLevelProperties.put(FileStructureUtils.MAPPING_PROPERTIES_SETTING, innerFieldMappings);
|
||||||
SortedMap<String, Object> outerMappings = new TreeMap<>();
|
SortedMap<String, Object> outerFieldMappings = new TreeMap<>();
|
||||||
outerMappings.put(topLevelTag, secondLevelProperties);
|
outerFieldMappings.put(topLevelTag, secondLevelProperties);
|
||||||
if (timeField != null) {
|
if (timeField != null) {
|
||||||
outerMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
|
outerFieldMappings.put(FileStructureUtils.DEFAULT_TIMESTAMP_FIELD, timeField.v2().getEsDateMappingTypeWithoutFormat());
|
||||||
}
|
}
|
||||||
|
|
||||||
FileStructure structure = structureBuilder
|
FileStructure structure = structureBuilder
|
||||||
.setMappings(outerMappings)
|
.setMappings(Collections.singletonMap(FileStructureUtils.MAPPING_PROPERTIES_SETTING, outerFieldMappings))
|
||||||
.setExplanation(explanation)
|
.setExplanation(explanation)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
|
@ -30,8 +30,8 @@ import static org.hamcrest.Matchers.not;
|
||||||
|
|
||||||
public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
|
|
||||||
private FileStructureFinderFactory csvFactory = new DelimitedFileStructureFinderFactory(',', '"', 2, false);
|
private final FileStructureFinderFactory csvFactory = new DelimitedFileStructureFinderFactory(',', '"', 2, false);
|
||||||
private FileStructureFinderFactory tsvFactory = new DelimitedFileStructureFinderFactory('\t', '"', 3, false);
|
private final FileStructureFinderFactory tsvFactory = new DelimitedFileStructureFinderFactory('\t', '"', 3, false);
|
||||||
|
|
||||||
public void testCreateConfigsGivenCompleteCsv() throws Exception {
|
public void testCreateConfigsGivenCompleteCsv() throws Exception {
|
||||||
String sample = "time,message\n" +
|
String sample = "time,message\n" +
|
||||||
|
@ -63,6 +63,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("time", structure.getTimestampField());
|
assertEquals("time", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenIncompleteCsv() throws Exception {
|
public void testCreateConfigsGivenIncompleteCsv() throws Exception {
|
||||||
|
@ -109,6 +110,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getMultilineStartPattern());
|
assertNull(structure.getMultilineStartPattern());
|
||||||
assertNull(structure.getShouldTrimFields());
|
assertNull(structure.getShouldTrimFields());
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenIncompleteCsvWithMultiLinedRows() throws Exception {
|
public void testCreateConfigsGivenIncompleteCsvWithMultiLinedRows() throws Exception {
|
||||||
|
@ -156,6 +158,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||||
assertNull(structure.getShouldTrimFields());
|
assertNull(structure.getShouldTrimFields());
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenCompleteCsvAndColumnNamesOverride() throws Exception {
|
public void testCreateConfigsGivenCompleteCsvAndColumnNamesOverride() throws Exception {
|
||||||
|
@ -191,6 +194,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("my_time", structure.getTimestampField());
|
assertEquals("my_time", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenCompleteCsvAndHasHeaderRowOverride() throws Exception {
|
public void testCreateConfigsGivenCompleteCsvAndHasHeaderRowOverride() throws Exception {
|
||||||
|
@ -228,6 +232,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertNull(structure.getTimestampField());
|
assertNull(structure.getTimestampField());
|
||||||
assertNull(structure.getJodaTimestampFormats());
|
assertNull(structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenCsvWithIncompleteLastRecord() throws Exception {
|
public void testCreateConfigsGivenCsvWithIncompleteLastRecord() throws Exception {
|
||||||
|
@ -261,6 +266,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("time", structure.getTimestampField());
|
assertEquals("time", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenCsvWithTrailingNulls() throws Exception {
|
public void testCreateConfigsGivenCsvWithTrailingNulls() throws Exception {
|
||||||
|
@ -301,6 +307,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("tpep_pickup_datetime", structure.getTimestampField());
|
assertEquals("tpep_pickup_datetime", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenCsvWithTrailingNullsAndOverriddenTimeField() throws Exception {
|
public void testCreateConfigsGivenCsvWithTrailingNullsAndOverriddenTimeField() throws Exception {
|
||||||
|
@ -346,6 +353,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("tpep_dropoff_datetime", structure.getTimestampField());
|
assertEquals("tpep_dropoff_datetime", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenCsvWithTrailingNullsExceptHeader() throws Exception {
|
public void testCreateConfigsGivenCsvWithTrailingNullsExceptHeader() throws Exception {
|
||||||
|
@ -386,6 +394,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("tpep_pickup_datetime", structure.getTimestampField());
|
assertEquals("tpep_pickup_datetime", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenCsvWithTrailingNullsExceptHeaderAndColumnNamesOverride() throws Exception {
|
public void testCreateConfigsGivenCsvWithTrailingNullsExceptHeaderAndColumnNamesOverride() throws Exception {
|
||||||
|
@ -434,6 +443,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("my_tpep_pickup_datetime", structure.getTimestampField());
|
assertEquals("my_tpep_pickup_datetime", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenCsvWithTimeLastColumn() throws Exception {
|
public void testCreateConfigsGivenCsvWithTimeLastColumn() throws Exception {
|
||||||
|
@ -467,6 +477,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("timestamp", structure.getTimestampField());
|
assertEquals("timestamp", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss.SSSSSS"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("YYYY-MM-dd HH:mm:ss.SSSSSS"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenTsvWithSyslogLikeTimestamp() throws Exception {
|
public void testCreateConfigsGivenTsvWithSyslogLikeTimestamp() throws Exception {
|
||||||
|
@ -508,6 +519,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertEquals("Timestamp", structure.getTimestampField());
|
assertEquals("Timestamp", structure.getTimestampField());
|
||||||
assertEquals(Arrays.asList("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss"),
|
assertEquals(Arrays.asList("MMM dd YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss", "MMM d YYYY HH:mm:ss"),
|
||||||
structure.getJodaTimestampFormats());
|
structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenDotInFieldName() throws Exception {
|
public void testCreateConfigsGivenDotInFieldName() throws Exception {
|
||||||
|
@ -541,6 +553,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("time_iso8601", structure.getTimestampField());
|
assertEquals("time_iso8601", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("ISO8601"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testFindHeaderFromSampleGivenHeaderInSample() throws IOException {
|
public void testFindHeaderFromSampleGivenHeaderInSample() throws IOException {
|
||||||
|
|
|
@ -11,7 +11,7 @@ import java.util.Collections;
|
||||||
|
|
||||||
public class NdJsonFileStructureFinderTests extends FileStructureTestCase {
|
public class NdJsonFileStructureFinderTests extends FileStructureTestCase {
|
||||||
|
|
||||||
private FileStructureFinderFactory factory = new NdJsonFileStructureFinderFactory();
|
private final FileStructureFinderFactory factory = new NdJsonFileStructureFinderFactory();
|
||||||
|
|
||||||
public void testCreateConfigsGivenGoodJson() throws Exception {
|
public void testCreateConfigsGivenGoodJson() throws Exception {
|
||||||
assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE, 0.0));
|
assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE, 0.0));
|
||||||
|
@ -39,5 +39,6 @@ public class NdJsonFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("timestamp", structure.getTimestampField());
|
assertEquals("timestamp", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,7 @@ import static org.hamcrest.Matchers.not;
|
||||||
|
|
||||||
public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
||||||
|
|
||||||
private FileStructureFinderFactory factory = new TextLogFileStructureFinderFactory();
|
private final FileStructureFinderFactory factory = new TextLogFileStructureFinderFactory();
|
||||||
|
|
||||||
public void testCreateConfigsGivenLowLineMergeSizeLimit() {
|
public void testCreateConfigsGivenLowLineMergeSizeLimit() {
|
||||||
|
|
||||||
|
@ -74,6 +74,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
||||||
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
|
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
|
||||||
assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
|
assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
|
||||||
}
|
}
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenElasticsearchLogAndTimestampFormatOverride() throws Exception {
|
public void testCreateConfigsGivenElasticsearchLogAndTimestampFormatOverride() throws Exception {
|
||||||
|
@ -115,6 +116,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
||||||
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
|
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
|
||||||
assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
|
assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
|
||||||
}
|
}
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenElasticsearchLogAndTimestampFieldOverride() throws Exception {
|
public void testCreateConfigsGivenElasticsearchLogAndTimestampFieldOverride() throws Exception {
|
||||||
|
@ -151,6 +153,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
||||||
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
|
for (String statMessage : messageFieldStats.getTopHits().stream().map(m -> (String) m.get("value")).collect(Collectors.toList())) {
|
||||||
assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
|
assertThat(structureFinder.getSampleMessages(), hasItem(statMessage));
|
||||||
}
|
}
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenElasticsearchLogAndGrokPatternOverride() throws Exception {
|
public void testCreateConfigsGivenElasticsearchLogAndGrokPatternOverride() throws Exception {
|
||||||
|
@ -191,6 +194,7 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
||||||
// at the end of the processing will _not_ contain a complete sample message
|
// at the end of the processing will _not_ contain a complete sample message
|
||||||
assertThat(structureFinder.getSampleMessages(), not(hasItem(statMessage)));
|
assertThat(structureFinder.getSampleMessages(), not(hasItem(statMessage)));
|
||||||
}
|
}
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCreateConfigsGivenElasticsearchLogAndImpossibleGrokPatternOverride() {
|
public void testCreateConfigsGivenElasticsearchLogAndImpossibleGrokPatternOverride() {
|
||||||
|
|
|
@ -11,7 +11,7 @@ import java.util.Collections;
|
||||||
|
|
||||||
public class XmlFileStructureFinderTests extends FileStructureTestCase {
|
public class XmlFileStructureFinderTests extends FileStructureTestCase {
|
||||||
|
|
||||||
private FileStructureFinderFactory factory = new XmlFileStructureFinderFactory();
|
private final FileStructureFinderFactory factory = new XmlFileStructureFinderFactory();
|
||||||
|
|
||||||
public void testCreateConfigsGivenGoodXml() throws Exception {
|
public void testCreateConfigsGivenGoodXml() throws Exception {
|
||||||
assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE, 0.0));
|
assertTrue(factory.canCreateFromSample(explanation, XML_SAMPLE, 0.0));
|
||||||
|
@ -39,5 +39,6 @@ public class XmlFileStructureFinderTests extends FileStructureTestCase {
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
assertEquals("timestamp", structure.getTimestampField());
|
assertEquals("timestamp", structure.getTimestampField());
|
||||||
assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
|
assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats());
|
||||||
|
assertEquals(Collections.singleton("properties"), structure.getMappings().keySet());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,11 +35,11 @@ setup:
|
||||||
- match: { joda_timestamp_formats.0: UNIX }
|
- match: { joda_timestamp_formats.0: UNIX }
|
||||||
- match: { java_timestamp_formats.0: UNIX }
|
- match: { java_timestamp_formats.0: UNIX }
|
||||||
- match: { need_client_timezone: false }
|
- match: { need_client_timezone: false }
|
||||||
- match: { mappings.airline.type: keyword }
|
- match: { mappings.properties.airline.type: keyword }
|
||||||
- match: { mappings.responsetime.type: double }
|
- match: { mappings.properties.responsetime.type: double }
|
||||||
- match: { mappings.sourcetype.type: keyword }
|
- match: { mappings.properties.sourcetype.type: keyword }
|
||||||
- match: { mappings.time.type: date }
|
- match: { mappings.properties.time.type: date }
|
||||||
- match: { mappings.time.format: epoch_second }
|
- match: { mappings.properties.time.format: epoch_second }
|
||||||
- match: { ingest_pipeline.description: "Ingest pipeline created by file structure finder" }
|
- match: { ingest_pipeline.description: "Ingest pipeline created by file structure finder" }
|
||||||
- match: { ingest_pipeline.processors.0.date.field: time }
|
- match: { ingest_pipeline.processors.0.date.field: time }
|
||||||
- match: { ingest_pipeline.processors.0.date.formats.0: UNIX }
|
- match: { ingest_pipeline.processors.0.date.formats.0: UNIX }
|
||||||
|
@ -96,11 +96,11 @@ setup:
|
||||||
- match: { joda_timestamp_formats.0: UNIX }
|
- match: { joda_timestamp_formats.0: UNIX }
|
||||||
- match: { java_timestamp_formats.0: UNIX }
|
- match: { java_timestamp_formats.0: UNIX }
|
||||||
- match: { need_client_timezone: false }
|
- match: { need_client_timezone: false }
|
||||||
- match: { mappings.airline.type: keyword }
|
- match: { mappings.properties.airline.type: keyword }
|
||||||
- match: { mappings.responsetime.type: double }
|
- match: { mappings.properties.responsetime.type: double }
|
||||||
- match: { mappings.sourcetype.type: keyword }
|
- match: { mappings.properties.sourcetype.type: keyword }
|
||||||
- match: { mappings.time.type: date }
|
- match: { mappings.properties.time.type: date }
|
||||||
- match: { mappings.time.format: epoch_second }
|
- match: { mappings.properties.time.format: epoch_second }
|
||||||
- match: { ingest_pipeline.description: "Ingest pipeline created by file structure finder" }
|
- match: { ingest_pipeline.description: "Ingest pipeline created by file structure finder" }
|
||||||
- match: { ingest_pipeline.processors.0.date.field: time }
|
- match: { ingest_pipeline.processors.0.date.field: time }
|
||||||
- match: { ingest_pipeline.processors.0.date.formats.0: UNIX }
|
- match: { ingest_pipeline.processors.0.date.formats.0: UNIX }
|
||||||
|
|
Loading…
Reference in New Issue