[ML] Rename the json file structure to ndjson (#34901)

The file structure finder endpoint can find the NDJSON
(newline-delimited JSON) file format, but called it
`json`.  This change renames the `format` for this file
structure to `ndjson`, which is more precise and will
hopefully avoid confusion.
This commit is contained in:
David Roberts 2018-10-29 10:06:12 +01:00 committed by GitHub
parent f13d529448
commit c455be7bc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 62 additions and 62 deletions

View File

@ -74,7 +74,7 @@ chosen.
structure finder produced its result. The default value is `false`. structure finder produced its result. The default value is `false`.
`format`:: `format`::
(string) The high level structure of the file. Valid values are `json`, `xml`, (string) The high level structure of the file. Valid values are `ndjson`, `xml`,
`delimited`, and `semi_structured_text`. If this parameter is not specified, `delimited`, and `semi_structured_text`. If this parameter is not specified,
the structure finder chooses one. the structure finder chooses one.
@ -259,7 +259,7 @@ If the request does not encounter errors, you receive the following result:
"sample_start" : "{\"name\": \"Leviathan Wakes\", \"author\": \"James S.A. Corey\", \"release_date\": \"2011-06-02\", \"page_count\": 561}\n{\"name\": \"Hyperion\", \"author\": \"Dan Simmons\", \"release_date\": \"1989-05-26\", \"page_count\": 482}\n", <3> "sample_start" : "{\"name\": \"Leviathan Wakes\", \"author\": \"James S.A. Corey\", \"release_date\": \"2011-06-02\", \"page_count\": 561}\n{\"name\": \"Hyperion\", \"author\": \"Dan Simmons\", \"release_date\": \"1989-05-26\", \"page_count\": 482}\n", <3>
"charset" : "UTF-8", <4> "charset" : "UTF-8", <4>
"has_byte_order_marker" : false, <5> "has_byte_order_marker" : false, <5>
"format" : "json", <6> "format" : "ndjson", <6>
"need_client_timezone" : false, <7> "need_client_timezone" : false, <7>
"mappings" : { <8> "mappings" : { <8>
"author" : { "author" : {
@ -473,14 +473,14 @@ If the request does not encounter errors, you receive the following result:
<1> `num_lines_analyzed` indicates how many lines of the file were analyzed. <1> `num_lines_analyzed` indicates how many lines of the file were analyzed.
<2> `num_messages_analyzed` indicates how many distinct messages the lines contained. <2> `num_messages_analyzed` indicates how many distinct messages the lines contained.
For ND-JSON, this value is the same as `num_lines_analyzed`. For other file For NDJSON, this value is the same as `num_lines_analyzed`. For other file
formats, messages can span several lines. formats, messages can span several lines.
<3> `sample_start` reproduces the first two messages in the file verbatim. This <3> `sample_start` reproduces the first two messages in the file verbatim. This
may help to diagnose parse errors or accidental uploads of the wrong file. may help to diagnose parse errors or accidental uploads of the wrong file.
<4> `charset` indicates the character encoding used to parse the file. <4> `charset` indicates the character encoding used to parse the file.
<5> For UTF character encodings, `has_byte_order_marker` indicates whether the <5> For UTF character encodings, `has_byte_order_marker` indicates whether the
file begins with a byte order marker. file begins with a byte order marker.
<6> `format` is one of `json`, `xml`, `delimited` or `semi_structured_text`. <6> `format` is one of `ndjson`, `xml`, `delimited` or `semi_structured_text`.
<7> If a timestamp format is detected that does not include a timezone, <7> If a timestamp format is detected that does not include a timezone,
`need_client_timezone` will be `true`. The server that parses the file must `need_client_timezone` will be `true`. The server that parses the file must
therefore be told the correct timezone by the client. therefore be told the correct timezone by the client.

View File

@ -32,11 +32,11 @@ public class FileStructure implements ToXContentObject, Writeable {
public enum Format { public enum Format {
JSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT; NDJSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
public boolean supportsNesting() { public boolean supportsNesting() {
switch (this) { switch (this) {
case JSON: case NDJSON:
case XML: case XML:
return true; return true;
case DELIMITED: case DELIMITED:
@ -49,7 +49,7 @@ public class FileStructure implements ToXContentObject, Writeable {
public boolean isStructured() { public boolean isStructured() {
switch (this) { switch (this) {
case JSON: case NDJSON:
case XML: case XML:
case DELIMITED: case DELIMITED:
return true; return true;
@ -62,7 +62,7 @@ public class FileStructure implements ToXContentObject, Writeable {
public boolean isSemiStructured() { public boolean isSemiStructured() {
switch (this) { switch (this) {
case JSON: case NDJSON:
case XML: case XML:
case DELIMITED: case DELIMITED:
return false; return false;
@ -645,7 +645,7 @@ public class FileStructure implements ToXContentObject, Writeable {
} }
switch (format) { switch (format) {
case JSON: case NDJSON:
if (shouldTrimFields != null) { if (shouldTrimFields != null) {
throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures."); throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures.");
} }

View File

@ -124,7 +124,7 @@ public class FindFileStructureActionRequestTests extends AbstractStreamableTestC
public void testValidateNonSemiStructuredText() { public void testValidateNonSemiStructuredText() {
FindFileStructureAction.Request request = new FindFileStructureAction.Request(); FindFileStructureAction.Request request = new FindFileStructureAction.Request();
request.setFormat(randomFrom(FileStructure.Format.JSON, FileStructure.Format.XML, FileStructure.Format.DELIMITED)); request.setFormat(randomFrom(FileStructure.Format.NDJSON, FileStructure.Format.XML, FileStructure.Format.DELIMITED));
request.setGrokPattern(randomAlphaOfLength(80)); request.setGrokPattern(randomAlphaOfLength(80));
request.setSample(new BytesArray("foo\n")); request.setSample(new BytesArray("foo\n"));

View File

@ -35,7 +35,7 @@ import java.util.stream.Collectors;
* Runs the high-level steps needed to create ingest configs for the specified file. In order: * Runs the high-level steps needed to create ingest configs for the specified file. In order:
* 1. Determine the most likely character set (UTF-8, UTF-16LE, ISO-8859-2, etc.) * 1. Determine the most likely character set (UTF-8, UTF-16LE, ISO-8859-2, etc.)
* 2. Load a sample of the file, consisting of the first 1000 lines of the file * 2. Load a sample of the file, consisting of the first 1000 lines of the file
* 3. Determine the most likely file structure - one of ND-JSON, XML, delimited or semi-structured text * 3. Determine the most likely file structure - one of NDJSON, XML, delimited or semi-structured text
* 4. Create an appropriate structure object and delegate writing configs to it * 4. Create an appropriate structure object and delegate writing configs to it
*/ */
public final class FileStructureFinderManager { public final class FileStructureFinderManager {
@ -73,9 +73,9 @@ public final class FileStructureFinderManager {
* These need to be ordered so that the more generic formats come after the more specific ones * These need to be ordered so that the more generic formats come after the more specific ones
*/ */
private static final List<FileStructureFinderFactory> ORDERED_STRUCTURE_FACTORIES = Collections.unmodifiableList(Arrays.asList( private static final List<FileStructureFinderFactory> ORDERED_STRUCTURE_FACTORIES = Collections.unmodifiableList(Arrays.asList(
new JsonFileStructureFinderFactory(), new NdJsonFileStructureFinderFactory(),
new XmlFileStructureFinderFactory(), new XmlFileStructureFinderFactory(),
// ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV // NDJSON will often also be valid (although utterly weird) CSV, so NDJSON must come before CSV
new DelimitedFileStructureFinderFactory(',', '"', 2, false), new DelimitedFileStructureFinderFactory(',', '"', 2, false),
new DelimitedFileStructureFinderFactory('\t', '"', 2, false), new DelimitedFileStructureFinderFactory('\t', '"', 2, false),
new DelimitedFileStructureFinderFactory(';', '"', 4, false), new DelimitedFileStructureFinderFactory(';', '"', 4, false),

View File

@ -25,14 +25,14 @@ import java.util.stream.Collectors;
import static org.elasticsearch.common.xcontent.json.JsonXContent.jsonXContent; import static org.elasticsearch.common.xcontent.json.JsonXContent.jsonXContent;
/** /**
* Really ND-JSON. * Newline-delimited JSON.
*/ */
public class JsonFileStructureFinder implements FileStructureFinder { public class NdJsonFileStructureFinder implements FileStructureFinder {
private final List<String> sampleMessages; private final List<String> sampleMessages;
private final FileStructure structure; private final FileStructure structure;
static JsonFileStructureFinder makeJsonFileStructureFinder(List<String> explanation, String sample, String charsetName, static NdJsonFileStructureFinder makeNdJsonFileStructureFinder(List<String> explanation, String sample, String charsetName,
Boolean hasByteOrderMarker, FileStructureOverrides overrides, Boolean hasByteOrderMarker, FileStructureOverrides overrides,
TimeoutChecker timeoutChecker) throws IOException { TimeoutChecker timeoutChecker) throws IOException {
@ -43,10 +43,10 @@ public class JsonFileStructureFinder implements FileStructureFinder {
XContentParser parser = jsonXContent.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, XContentParser parser = jsonXContent.createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
sampleMessage); sampleMessage);
sampleRecords.add(parser.mapOrdered()); sampleRecords.add(parser.mapOrdered());
timeoutChecker.check("JSON parsing"); timeoutChecker.check("NDJSON parsing");
} }
FileStructure.Builder structureBuilder = new FileStructure.Builder(FileStructure.Format.JSON) FileStructure.Builder structureBuilder = new FileStructure.Builder(FileStructure.Format.NDJSON)
.setCharset(charsetName) .setCharset(charsetName)
.setHasByteOrderMarker(hasByteOrderMarker) .setHasByteOrderMarker(hasByteOrderMarker)
.setSampleStart(sampleMessages.stream().limit(2).collect(Collectors.joining("\n", "", "\n"))) .setSampleStart(sampleMessages.stream().limit(2).collect(Collectors.joining("\n", "", "\n")))
@ -84,10 +84,10 @@ public class JsonFileStructureFinder implements FileStructureFinder {
.setExplanation(explanation) .setExplanation(explanation)
.build(); .build();
return new JsonFileStructureFinder(sampleMessages, structure); return new NdJsonFileStructureFinder(sampleMessages, structure);
} }
private JsonFileStructureFinder(List<String> sampleMessages, FileStructure structure) { private NdJsonFileStructureFinder(List<String> sampleMessages, FileStructure structure) {
this.sampleMessages = Collections.unmodifiableList(sampleMessages); this.sampleMessages = Collections.unmodifiableList(sampleMessages);
this.structure = structure; this.structure = structure;
} }

View File

@ -17,15 +17,15 @@ import java.util.Locale;
import static org.elasticsearch.common.xcontent.json.JsonXContent.jsonXContent; import static org.elasticsearch.common.xcontent.json.JsonXContent.jsonXContent;
public class JsonFileStructureFinderFactory implements FileStructureFinderFactory { public class NdJsonFileStructureFinderFactory implements FileStructureFinderFactory {
@Override @Override
public boolean canFindFormat(FileStructure.Format format) { public boolean canFindFormat(FileStructure.Format format) {
return format == null || format == FileStructure.Format.JSON; return format == null || format == FileStructure.Format.NDJSON;
} }
/** /**
* This format matches if the sample consists of one or more JSON documents. * This format matches if the sample consists of one or more NDJSON documents.
* If there is more than one, they must be newline-delimited. The * If there is more than one, they must be newline-delimited. The
* documents must be non-empty, to prevent lines containing "{}" from matching. * documents must be non-empty, to prevent lines containing "{}" from matching.
*/ */
@ -41,35 +41,35 @@ public class JsonFileStructureFinderFactory implements FileStructureFinderFactor
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, new ContextPrintingStringReader(sampleLine))) { DeprecationHandler.THROW_UNSUPPORTED_OPERATION, new ContextPrintingStringReader(sampleLine))) {
if (parser.map().isEmpty()) { if (parser.map().isEmpty()) {
explanation.add("Not JSON because an empty object was parsed: [" + sampleLine + "]"); explanation.add("Not NDJSON because an empty object was parsed: [" + sampleLine + "]");
return false; return false;
} }
++completeDocCount; ++completeDocCount;
if (parser.nextToken() != null) { if (parser.nextToken() != null) {
explanation.add("Not newline delimited JSON because a line contained more than a single object: [" + explanation.add("Not newline delimited NDJSON because a line contained more than a single object: [" +
sampleLine + "]"); sampleLine + "]");
return false; return false;
} }
} }
} }
} catch (IOException | IllegalStateException e) { } catch (IOException | IllegalStateException e) {
explanation.add("Not JSON because there was a parsing exception: [" + e.getMessage().replaceAll("\\s?\r?\n\\s?", " ") + "]"); explanation.add("Not NDJSON because there was a parsing exception: [" + e.getMessage().replaceAll("\\s?\r?\n\\s?", " ") + "]");
return false; return false;
} }
if (completeDocCount == 0) { if (completeDocCount == 0) {
explanation.add("Not JSON because sample didn't contain a complete document"); explanation.add("Not NDJSON because sample didn't contain a complete document");
return false; return false;
} }
explanation.add("Deciding sample is newline delimited JSON"); explanation.add("Deciding sample is newline delimited NDJSON");
return true; return true;
} }
@Override @Override
public FileStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker, public FileStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker,
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) throws IOException { FileStructureOverrides overrides, TimeoutChecker timeoutChecker) throws IOException {
return JsonFileStructureFinder.makeJsonFileStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, overrides, return NdJsonFileStructureFinder.makeNdJsonFileStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, overrides,
timeoutChecker); timeoutChecker);
} }

View File

@ -12,7 +12,7 @@ public class DelimitedFileStructureFinderFactoryTests extends FileStructureTestC
private FileStructureFinderFactory semiColonDelimitedfactory = new DelimitedFileStructureFinderFactory(';', '"', 4, false); private FileStructureFinderFactory semiColonDelimitedfactory = new DelimitedFileStructureFinderFactory(';', '"', 4, false);
private FileStructureFinderFactory pipeDelimitedFactory = new DelimitedFileStructureFinderFactory('|', '"', 5, true); private FileStructureFinderFactory pipeDelimitedFactory = new DelimitedFileStructureFinderFactory('|', '"', 5, true);
// CSV - no need to check JSON or XML because they come earlier in the order we check formats // CSV - no need to check NDJSON or XML because they come earlier in the order we check formats
public void testCanCreateCsvFromSampleGivenCsv() { public void testCanCreateCsvFromSampleGivenCsv() {
@ -39,7 +39,7 @@ public class DelimitedFileStructureFinderFactoryTests extends FileStructureTestC
assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE)); assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
} }
// TSV - no need to check JSON, XML or CSV because they come earlier in the order we check formats // TSV - no need to check NDJSON, XML or CSV because they come earlier in the order we check formats
public void testCanCreateTsvFromSampleGivenTsv() { public void testCanCreateTsvFromSampleGivenTsv() {
@ -61,7 +61,7 @@ public class DelimitedFileStructureFinderFactoryTests extends FileStructureTestC
assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE)); assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
} }
// Semi-colon delimited - no need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats // Semi-colon delimited - no need to check NDJSON, XML, CSV or TSV because they come earlier in the order we check formats
public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() { public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() {
@ -78,7 +78,7 @@ public class DelimitedFileStructureFinderFactoryTests extends FileStructureTestC
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE)); assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE));
} }
// Pipe delimited - no need to check JSON, XML, CSV, TSV or semi-colon delimited // Pipe delimited - no need to check NDJSON, XML, CSV, TSV or semi-colon delimited
// values because they come earlier in the order we check formats // values because they come earlier in the order we check formats
public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() { public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() {

View File

@ -73,20 +73,20 @@ public class FileStructureFinderManagerTests extends FileStructureTestCase {
} }
} }
public void testMakeBestStructureGivenJson() throws Exception { public void testMakeBestStructureGivenNdJson() throws Exception {
assertThat(structureFinderManager.makeBestStructureFinder(explanation, JSON_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(), assertThat(structureFinderManager.makeBestStructureFinder(explanation, NDJSON_SAMPLE, StandardCharsets.UTF_8.name(),
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER), instanceOf(JsonFileStructureFinder.class)); randomBoolean(), EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER), instanceOf(NdJsonFileStructureFinder.class));
} }
public void testMakeBestStructureGivenJsonAndDelimitedOverride() throws Exception { public void testMakeBestStructureGivenNdJsonAndDelimitedOverride() throws Exception {
// Need to change the quote character from the default of double quotes // Need to change the quote character from the default of double quotes
// otherwise the quotes in the JSON will stop it parsing as CSV // otherwise the quotes in the NDJSON will stop it parsing as CSV
FileStructureOverrides overrides = FileStructureOverrides.builder() FileStructureOverrides overrides = FileStructureOverrides.builder()
.setFormat(FileStructure.Format.DELIMITED).setQuote('\'').build(); .setFormat(FileStructure.Format.DELIMITED).setQuote('\'').build();
assertThat(structureFinderManager.makeBestStructureFinder(explanation, JSON_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(), assertThat(structureFinderManager.makeBestStructureFinder(explanation, NDJSON_SAMPLE, StandardCharsets.UTF_8.name(),
overrides, NOOP_TIMEOUT_CHECKER), instanceOf(DelimitedFileStructureFinder.class)); randomBoolean(), overrides, NOOP_TIMEOUT_CHECKER), instanceOf(DelimitedFileStructureFinder.class));
} }
public void testMakeBestStructureGivenXml() throws Exception { public void testMakeBestStructureGivenXml() throws Exception {
@ -109,13 +109,13 @@ public class FileStructureFinderManagerTests extends FileStructureTestCase {
public void testMakeBestStructureGivenCsvAndJsonOverride() { public void testMakeBestStructureGivenCsvAndJsonOverride() {
FileStructureOverrides overrides = FileStructureOverrides.builder().setFormat(FileStructure.Format.JSON).build(); FileStructureOverrides overrides = FileStructureOverrides.builder().setFormat(FileStructure.Format.NDJSON).build();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> structureFinderManager.makeBestStructureFinder(explanation, CSV_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(), () -> structureFinderManager.makeBestStructureFinder(explanation, CSV_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(),
overrides, NOOP_TIMEOUT_CHECKER)); overrides, NOOP_TIMEOUT_CHECKER));
assertEquals("Input did not match the specified format [json]", e.getMessage()); assertEquals("Input did not match the specified format [ndjson]", e.getMessage());
} }
public void testMakeBestStructureGivenText() throws Exception { public void testMakeBestStructureGivenText() throws Exception {

View File

@ -27,7 +27,7 @@ public abstract class FileStructureTestCase extends ESTestCase {
"2018-05-17T16:23:40,key1,42.0\n" + "2018-05-17T16:23:40,key1,42.0\n" +
"2018-05-17T16:24:11,\"key with spaces\",42.0\n"; "2018-05-17T16:24:11,\"key with spaces\",42.0\n";
protected static final String JSON_SAMPLE = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," + protected static final String NDJSON_SAMPLE = "{\"logger\":\"controller\",\"timestamp\":1478261151445,\"level\":\"INFO\"," +
"\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," + "\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 1\",\"class\":\"ml\"," +
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" + "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n" +
"{\"logger\":\"controller\",\"timestamp\":1478261151445," + "{\"logger\":\"controller\",\"timestamp\":1478261151445," +

View File

@ -9,21 +9,21 @@ import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure;
import java.util.Collections; import java.util.Collections;
public class JsonFileStructureFinderTests extends FileStructureTestCase { public class NdJsonFileStructureFinderTests extends FileStructureTestCase {
private FileStructureFinderFactory factory = new JsonFileStructureFinderFactory(); private FileStructureFinderFactory factory = new NdJsonFileStructureFinderFactory();
public void testCreateConfigsGivenGoodJson() throws Exception { public void testCreateConfigsGivenGoodJson() throws Exception {
assertTrue(factory.canCreateFromSample(explanation, JSON_SAMPLE)); assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE));
String charset = randomFrom(POSSIBLE_CHARSETS); String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
FileStructureFinder structureFinder = factory.createFromSample(explanation, JSON_SAMPLE, charset, hasByteOrderMarker, FileStructureFinder structureFinder = factory.createFromSample(explanation, NDJSON_SAMPLE, charset, hasByteOrderMarker,
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER); FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
FileStructure structure = structureFinder.getStructure(); FileStructure structure = structureFinder.getStructure();
assertEquals(FileStructure.Format.JSON, structure.getFormat()); assertEquals(FileStructure.Format.NDJSON, structure.getFormat());
assertEquals(charset, structure.getCharset()); assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) { if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker()); assertNull(structure.getHasByteOrderMarker());

View File

@ -5,13 +5,13 @@
*/ */
package org.elasticsearch.xpack.ml.filestructurefinder; package org.elasticsearch.xpack.ml.filestructurefinder;
public class JsonFileStructureFinderFactoryTests extends FileStructureTestCase { public class NdNdJsonFileStructureFinderFactoryTests extends FileStructureTestCase {
private FileStructureFinderFactory factory = new JsonFileStructureFinderFactory(); private FileStructureFinderFactory factory = new NdJsonFileStructureFinderFactory();
public void testCanCreateFromSampleGivenJson() { public void testCanCreateFromSampleGivenNdJson() {
assertTrue(factory.canCreateFromSample(explanation, JSON_SAMPLE)); assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE));
} }
public void testCanCreateFromSampleGivenXml() { public void testCanCreateFromSampleGivenXml() {

View File

@ -9,7 +9,7 @@ public class TextLogFileStructureFinderFactoryTests extends FileStructureTestCas
private FileStructureFinderFactory factory = new TextLogFileStructureFinderFactory(); private FileStructureFinderFactory factory = new TextLogFileStructureFinderFactory();
// No need to check JSON, XML, CSV, TSV, semi-colon delimited values or pipe // No need to check NDJSON, XML, CSV, TSV, semi-colon delimited values or pipe
// delimited values because they come earlier in the order we check formats // delimited values because they come earlier in the order we check formats
public void testCanCreateFromSampleGivenText() { public void testCanCreateFromSampleGivenText() {

View File

@ -9,7 +9,7 @@ public class XmlFileStructureFinderFactoryTests extends FileStructureTestCase {
private FileStructureFinderFactory factory = new XmlFileStructureFinderFactory(); private FileStructureFinderFactory factory = new XmlFileStructureFinderFactory();
// No need to check JSON because it comes earlier in the order we check formats // No need to check NDJSON because it comes earlier in the order we check formats
public void testCanCreateFromSampleGivenXml() { public void testCanCreateFromSampleGivenXml() {

View File

@ -22,7 +22,7 @@
}, },
"format": { "format": {
"type": "enum", "type": "enum",
"options": [ "json", "xml", "delimited", "semi_structured_text" ], "options": [ "ndjson", "xml", "delimited", "semi_structured_text" ],
"description": "Optional parameter to specify the high level file format" "description": "Optional parameter to specify the high level file format"
}, },
"has_header_row": { "has_header_row": {

View File

@ -1,5 +1,5 @@
--- ---
"Test JSON file structure analysis without overrides": "Test NDJSON file structure analysis without overrides":
- do: - do:
headers: headers:
# This is to stop the usual content type randomization, which # This is to stop the usual content type randomization, which
@ -26,7 +26,7 @@
- match: { num_messages_analyzed: 3 } - match: { num_messages_analyzed: 3 }
- match: { charset: "UTF-8" } - match: { charset: "UTF-8" }
- match: { has_byte_order_marker: false } - match: { has_byte_order_marker: false }
- match: { format: json } - match: { format: ndjson }
- match: { timestamp_field: time } - match: { timestamp_field: time }
- match: { joda_timestamp_formats.0: UNIX } - match: { joda_timestamp_formats.0: UNIX }
- match: { java_timestamp_formats.0: UNIX } - match: { java_timestamp_formats.0: UNIX }
@ -56,7 +56,7 @@
- is_false: explanation - is_false: explanation
--- ---
"Test JSON file structure analysis with overrides": "Test NDJSON file structure analysis with overrides":
- do: - do:
headers: headers:
# This is to stop the usual content type randomization, which # This is to stop the usual content type randomization, which
@ -64,7 +64,7 @@
Content-Type: "application/json" Content-Type: "application/json"
xpack.ml.find_file_structure: xpack.ml.find_file_structure:
charset: UTF-8 charset: UTF-8
format: json format: ndjson
timestamp_field: time timestamp_field: time
timestamp_format: UNIX timestamp_format: UNIX
explain: true explain: true
@ -86,7 +86,7 @@
- match: { num_messages_analyzed: 3 } - match: { num_messages_analyzed: 3 }
- match: { charset: "UTF-8" } - match: { charset: "UTF-8" }
- match: { has_byte_order_marker: false } - match: { has_byte_order_marker: false }
- match: { format: json } - match: { format: ndjson }
- match: { timestamp_field: time } - match: { timestamp_field: time }
- match: { joda_timestamp_formats.0: UNIX } - match: { joda_timestamp_formats.0: UNIX }
- match: { java_timestamp_formats.0: UNIX } - match: { java_timestamp_formats.0: UNIX }