[ML] Add a limit on line merging in find_file_structure (#42501)
When analysing a semi-structured text file the find_file_structure endpoint merges lines to form multi-line messages using the assumption that the first line in each message contains the timestamp. However, if the timestamp is misdetected then this can lead to excessive numbers of lines being merged to form massive messages. This commit adds a line_merge_size_limit setting (default 10000 characters) that halts the analysis if a message bigger than this is created. This prevents significant CPU time being spent subsequently trying to determine the internal structure of the huge bogus messages.
This commit is contained in:
parent
0253927ec4
commit
b61202b0a8
|
@ -37,6 +37,7 @@ import java.util.Optional;
|
|||
public class FindFileStructureRequest implements Validatable, ToXContentFragment {
|
||||
|
||||
public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample");
|
||||
public static final ParseField LINE_MERGE_SIZE_LIMIT = new ParseField("line_merge_size_limit");
|
||||
public static final ParseField TIMEOUT = new ParseField("timeout");
|
||||
public static final ParseField CHARSET = FileStructure.CHARSET;
|
||||
public static final ParseField FORMAT = FileStructure.FORMAT;
|
||||
|
@ -52,6 +53,7 @@ public class FindFileStructureRequest implements Validatable, ToXContentFragment
|
|||
public static final ParseField EXPLAIN = new ParseField("explain");
|
||||
|
||||
private Integer linesToSample;
|
||||
private Integer lineMergeSizeLimit;
|
||||
private TimeValue timeout;
|
||||
private String charset;
|
||||
private FileStructure.Format format;
|
||||
|
@ -77,6 +79,14 @@ public class FindFileStructureRequest implements Validatable, ToXContentFragment
|
|||
this.linesToSample = linesToSample;
|
||||
}
|
||||
|
||||
public Integer getLineMergeSizeLimit() {
|
||||
return lineMergeSizeLimit;
|
||||
}
|
||||
|
||||
public void setLineMergeSizeLimit(Integer lineMergeSizeLimit) {
|
||||
this.lineMergeSizeLimit = lineMergeSizeLimit;
|
||||
}
|
||||
|
||||
public TimeValue getTimeout() {
|
||||
return timeout;
|
||||
}
|
||||
|
@ -228,6 +238,9 @@ public class FindFileStructureRequest implements Validatable, ToXContentFragment
|
|||
if (linesToSample != null) {
|
||||
builder.field(LINES_TO_SAMPLE.getPreferredName(), linesToSample);
|
||||
}
|
||||
if (lineMergeSizeLimit != null) {
|
||||
builder.field(LINE_MERGE_SIZE_LIMIT.getPreferredName(), lineMergeSizeLimit);
|
||||
}
|
||||
if (timeout != null) {
|
||||
builder.field(TIMEOUT.getPreferredName(), timeout);
|
||||
}
|
||||
|
@ -270,8 +283,8 @@ public class FindFileStructureRequest implements Validatable, ToXContentFragment
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(linesToSample, timeout, charset, format, columnNames, hasHeaderRow, delimiter, grokPattern, timestampFormat,
|
||||
timestampField, explain, sample);
|
||||
return Objects.hash(linesToSample, lineMergeSizeLimit, timeout, charset, format, columnNames, hasHeaderRow, delimiter, grokPattern,
|
||||
timestampFormat, timestampField, explain, sample);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -287,6 +300,7 @@ public class FindFileStructureRequest implements Validatable, ToXContentFragment
|
|||
|
||||
FindFileStructureRequest that = (FindFileStructureRequest) other;
|
||||
return Objects.equals(this.linesToSample, that.linesToSample) &&
|
||||
Objects.equals(this.lineMergeSizeLimit, that.lineMergeSizeLimit) &&
|
||||
Objects.equals(this.timeout, that.timeout) &&
|
||||
Objects.equals(this.charset, that.charset) &&
|
||||
Objects.equals(this.format, that.format) &&
|
||||
|
|
|
@ -35,6 +35,7 @@ public class FindFileStructureRequestTests extends AbstractXContentTestCase<Find
|
|||
|
||||
static {
|
||||
PARSER.declareInt(FindFileStructureRequest::setLinesToSample, FindFileStructureRequest.LINES_TO_SAMPLE);
|
||||
PARSER.declareInt(FindFileStructureRequest::setLineMergeSizeLimit, FindFileStructureRequest.LINE_MERGE_SIZE_LIMIT);
|
||||
PARSER.declareString((p, c) -> p.setTimeout(TimeValue.parseTimeValue(c, FindFileStructureRequest.TIMEOUT.getPreferredName())),
|
||||
FindFileStructureRequest.TIMEOUT);
|
||||
PARSER.declareString(FindFileStructureRequest::setCharset, FindFileStructureRequest.CHARSET);
|
||||
|
@ -72,6 +73,9 @@ public class FindFileStructureRequestTests extends AbstractXContentTestCase<Find
|
|||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setLinesToSample(randomIntBetween(1000, 2000));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setLineMergeSizeLimit(randomIntBetween(10000, 20000));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
findFileStructureRequest.setTimeout(TimeValue.timeValueSeconds(randomIntBetween(10, 20)));
|
||||
}
|
||||
|
|
|
@ -92,6 +92,13 @@ chosen.
|
|||
parameter is not specified, the structure finder guesses based on the similarity of
|
||||
the first row of the file to other rows.
|
||||
|
||||
`line_merge_size_limit`::
|
||||
(unsigned integer) The maximum number of characters in a message when lines are
|
||||
merged to form messages while analyzing semi-structured files. The default
|
||||
is 10000. If you have extremely long messages you may need to increase this, but
|
||||
be aware that this may lead to very long processing times if the way to group
|
||||
lines into messages is misdetected.
|
||||
|
||||
`lines_to_sample`::
|
||||
(unsigned integer) The number of lines to include in the structural analysis,
|
||||
starting from the beginning of the file. The minimum is 2; the default
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
*/
|
||||
package org.elasticsearch.xpack.core.ml.action;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.Action;
|
||||
import org.elasticsearch.action.ActionRequest;
|
||||
import org.elasticsearch.action.ActionRequestBuilder;
|
||||
|
@ -113,6 +114,7 @@ public class FindFileStructureAction extends Action<FindFileStructureAction.Resp
|
|||
public static class Request extends ActionRequest {
|
||||
|
||||
public static final ParseField LINES_TO_SAMPLE = new ParseField("lines_to_sample");
|
||||
public static final ParseField LINE_MERGE_SIZE_LIMIT = new ParseField("line_merge_size_limit");
|
||||
public static final ParseField TIMEOUT = new ParseField("timeout");
|
||||
public static final ParseField CHARSET = FileStructure.CHARSET;
|
||||
public static final ParseField FORMAT = FileStructure.FORMAT;
|
||||
|
@ -130,6 +132,7 @@ public class FindFileStructureAction extends Action<FindFileStructureAction.Resp
|
|||
"[%s] may only be specified if [" + FORMAT.getPreferredName() + "] is [%s]";
|
||||
|
||||
private Integer linesToSample;
|
||||
private Integer lineMergeSizeLimit;
|
||||
private TimeValue timeout;
|
||||
private String charset;
|
||||
private FileStructure.Format format;
|
||||
|
@ -154,6 +157,14 @@ public class FindFileStructureAction extends Action<FindFileStructureAction.Resp
|
|||
this.linesToSample = linesToSample;
|
||||
}
|
||||
|
||||
public Integer getLineMergeSizeLimit() {
|
||||
return lineMergeSizeLimit;
|
||||
}
|
||||
|
||||
public void setLineMergeSizeLimit(Integer lineMergeSizeLimit) {
|
||||
this.lineMergeSizeLimit = lineMergeSizeLimit;
|
||||
}
|
||||
|
||||
public TimeValue getTimeout() {
|
||||
return timeout;
|
||||
}
|
||||
|
@ -291,6 +302,10 @@ public class FindFileStructureAction extends Action<FindFileStructureAction.Resp
|
|||
validationException =
|
||||
addValidationError("[" + LINES_TO_SAMPLE.getPreferredName() + "] must be positive if specified", validationException);
|
||||
}
|
||||
if (lineMergeSizeLimit != null && lineMergeSizeLimit <= 0) {
|
||||
validationException = addValidationError("[" + LINE_MERGE_SIZE_LIMIT.getPreferredName() + "] must be positive if specified",
|
||||
validationException);
|
||||
}
|
||||
if (format != FileStructure.Format.DELIMITED) {
|
||||
if (columnNames != null) {
|
||||
validationException = addIncompatibleArgError(COLUMN_NAMES, FileStructure.Format.DELIMITED, validationException);
|
||||
|
@ -324,6 +339,9 @@ public class FindFileStructureAction extends Action<FindFileStructureAction.Resp
|
|||
public void readFrom(StreamInput in) throws IOException {
|
||||
super.readFrom(in);
|
||||
linesToSample = in.readOptionalVInt();
|
||||
if (in.getVersion().onOrAfter(Version.CURRENT)) {
|
||||
lineMergeSizeLimit = in.readOptionalVInt();
|
||||
}
|
||||
timeout = in.readOptionalTimeValue();
|
||||
charset = in.readOptionalString();
|
||||
format = in.readBoolean() ? in.readEnum(FileStructure.Format.class) : null;
|
||||
|
@ -342,6 +360,9 @@ public class FindFileStructureAction extends Action<FindFileStructureAction.Resp
|
|||
public void writeTo(StreamOutput out) throws IOException {
|
||||
super.writeTo(out);
|
||||
out.writeOptionalVInt(linesToSample);
|
||||
if (out.getVersion().onOrAfter(Version.CURRENT)) {
|
||||
out.writeOptionalVInt(lineMergeSizeLimit);
|
||||
}
|
||||
out.writeOptionalTimeValue(timeout);
|
||||
out.writeOptionalString(charset);
|
||||
if (format == null) {
|
||||
|
@ -378,8 +399,8 @@ public class FindFileStructureAction extends Action<FindFileStructureAction.Resp
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(linesToSample, timeout, charset, format, columnNames, hasHeaderRow, delimiter, grokPattern, timestampFormat,
|
||||
timestampField, sample);
|
||||
return Objects.hash(linesToSample, lineMergeSizeLimit, timeout, charset, format, columnNames, hasHeaderRow, delimiter,
|
||||
grokPattern, timestampFormat, timestampField, sample);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -395,6 +416,7 @@ public class FindFileStructureAction extends Action<FindFileStructureAction.Resp
|
|||
|
||||
Request that = (Request) other;
|
||||
return Objects.equals(this.linesToSample, that.linesToSample) &&
|
||||
Objects.equals(this.lineMergeSizeLimit, that.lineMergeSizeLimit) &&
|
||||
Objects.equals(this.timeout, that.timeout) &&
|
||||
Objects.equals(this.charset, that.charset) &&
|
||||
Objects.equals(this.format, that.format) &&
|
||||
|
|
|
@ -26,6 +26,10 @@ public class FindFileStructureActionRequestTests extends AbstractStreamableTestC
|
|||
request.setLinesToSample(randomIntBetween(10, 2000));
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
request.setLineMergeSizeLimit(randomIntBetween(1000, 20000));
|
||||
}
|
||||
|
||||
if (randomBoolean()) {
|
||||
request.setCharset(randomAlphaOfLength(10));
|
||||
}
|
||||
|
@ -85,6 +89,18 @@ public class FindFileStructureActionRequestTests extends AbstractStreamableTestC
|
|||
assertThat(e.getMessage(), containsString(" [lines_to_sample] must be positive if specified"));
|
||||
}
|
||||
|
||||
public void testValidateLineMergeSizeLimit() {
|
||||
|
||||
FindFileStructureAction.Request request = new FindFileStructureAction.Request();
|
||||
request.setLineMergeSizeLimit(randomIntBetween(-1, 0));
|
||||
request.setSample(new BytesArray("foo\n"));
|
||||
|
||||
ActionRequestValidationException e = request.validate();
|
||||
assertNotNull(e);
|
||||
assertThat(e.getMessage(), startsWith("Validation Failed: "));
|
||||
assertThat(e.getMessage(), containsString(" [line_merge_size_limit] must be positive if specified"));
|
||||
}
|
||||
|
||||
public void testValidateNonDelimited() {
|
||||
|
||||
FindFileStructureAction.Request request = new FindFileStructureAction.Request();
|
||||
|
|
|
@ -49,7 +49,7 @@ public class TransportFindFileStructureAction
|
|||
FileStructureFinderManager structureFinderManager = new FileStructureFinderManager(threadPool.scheduler());
|
||||
|
||||
FileStructureFinder fileStructureFinder = structureFinderManager.findFileStructure(request.getLinesToSample(),
|
||||
request.getSample().streamInput(), new FileStructureOverrides(request), request.getTimeout());
|
||||
request.getLineMergeSizeLimit(), request.getSample().streamInput(), new FileStructureOverrides(request), request.getTimeout());
|
||||
|
||||
return new FindFileStructureAction.Response(fileStructureFinder.getStructure());
|
||||
}
|
||||
|
|
|
@ -62,7 +62,8 @@ public class DelimitedFileStructureFinderFactory implements FileStructureFinderF
|
|||
|
||||
@Override
|
||||
public FileStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker,
|
||||
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) throws IOException {
|
||||
int lineMergeSizeLimit, FileStructureOverrides overrides, TimeoutChecker timeoutChecker)
|
||||
throws IOException {
|
||||
return DelimitedFileStructureFinder.makeDelimitedFileStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
csvPreference, trimFields, overrides, timeoutChecker);
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ public interface FileStructureFinderFactory {
|
|||
* @param sample A sample from the file to be ingested.
|
||||
* @param charsetName The name of the character set in which the sample was provided.
|
||||
* @param hasByteOrderMarker Did the sample have a byte order marker? <code>null</code> means "not relevant".
|
||||
* @param lineMergeSizeLimit Maximum number of characters permitted when lines are merged to create messages.
|
||||
* @param overrides Stores structure decisions that have been made by the end user, and should
|
||||
* take precedence over anything the {@link FileStructureFinder} may decide.
|
||||
* @param timeoutChecker Will abort the operation if its timeout is exceeded.
|
||||
|
@ -44,5 +45,6 @@ public interface FileStructureFinderFactory {
|
|||
* @throws Exception if something goes wrong during creation.
|
||||
*/
|
||||
FileStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker,
|
||||
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) throws Exception;
|
||||
int lineMergeSizeLimit, FileStructureOverrides overrides,
|
||||
TimeoutChecker timeoutChecker) throws Exception;
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ public final class FileStructureFinderManager {
|
|||
|
||||
public static final int MIN_SAMPLE_LINE_COUNT = 2;
|
||||
public static final int DEFAULT_IDEAL_SAMPLE_LINE_COUNT = 1000;
|
||||
public static final int DEFAULT_LINE_MERGE_SIZE_LIMIT = 10000;
|
||||
|
||||
static final Set<String> FILEBEAT_SUPPORTED_ENCODINGS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(
|
||||
"866", "ansi_x3.4-1968", "arabic", "ascii", "asmo-708", "big5", "big5-hkscs", "chinese", "cn-big5", "cp1250", "cp1251", "cp1252",
|
||||
|
@ -96,8 +97,9 @@ public final class FileStructureFinderManager {
|
|||
this.scheduler = Objects.requireNonNull(scheduler);
|
||||
}
|
||||
|
||||
public FileStructureFinder findFileStructure(Integer idealSampleLineCount, InputStream fromFile) throws Exception {
|
||||
return findFileStructure(idealSampleLineCount, fromFile, FileStructureOverrides.EMPTY_OVERRIDES, null);
|
||||
public FileStructureFinder findFileStructure(Integer idealSampleLineCount, Integer lineMergeSizeLimit,
|
||||
InputStream fromFile) throws Exception {
|
||||
return findFileStructure(idealSampleLineCount, lineMergeSizeLimit, fromFile, FileStructureOverrides.EMPTY_OVERRIDES, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -106,6 +108,8 @@ public final class FileStructureFinderManager {
|
|||
* If the stream has fewer lines then an attempt will still be made, providing at
|
||||
* least {@link #MIN_SAMPLE_LINE_COUNT} lines can be read. If <code>null</code>
|
||||
* the value of {@link #DEFAULT_IDEAL_SAMPLE_LINE_COUNT} will be used.
|
||||
* @param lineMergeSizeLimit Maximum number of characters permitted when lines are merged to create messages.
|
||||
* If <code>null</code> the value of {@link #DEFAULT_LINE_MERGE_SIZE_LIMIT} will be used.
|
||||
* @param fromFile A stream from which the sample will be read.
|
||||
* @param overrides Aspects of the file structure that are known in advance. These take precedence over
|
||||
* values determined by structure analysis. An exception will be thrown if the file structure
|
||||
|
@ -116,20 +120,21 @@ public final class FileStructureFinderManager {
|
|||
* @return A {@link FileStructureFinder} object from which the structure and messages can be queried.
|
||||
* @throws Exception A variety of problems could occur at various stages of the structure finding process.
|
||||
*/
|
||||
public FileStructureFinder findFileStructure(Integer idealSampleLineCount, InputStream fromFile, FileStructureOverrides overrides,
|
||||
TimeValue timeout)
|
||||
throws Exception {
|
||||
return findFileStructure(new ArrayList<>(), (idealSampleLineCount == null) ? DEFAULT_IDEAL_SAMPLE_LINE_COUNT : idealSampleLineCount,
|
||||
fromFile, overrides, timeout);
|
||||
}
|
||||
|
||||
public FileStructureFinder findFileStructure(List<String> explanation, int idealSampleLineCount, InputStream fromFile)
|
||||
throws Exception {
|
||||
return findFileStructure(explanation, idealSampleLineCount, fromFile, FileStructureOverrides.EMPTY_OVERRIDES, null);
|
||||
}
|
||||
|
||||
public FileStructureFinder findFileStructure(List<String> explanation, int idealSampleLineCount, InputStream fromFile,
|
||||
public FileStructureFinder findFileStructure(Integer idealSampleLineCount, Integer lineMergeSizeLimit, InputStream fromFile,
|
||||
FileStructureOverrides overrides, TimeValue timeout) throws Exception {
|
||||
return findFileStructure(new ArrayList<>(), (idealSampleLineCount == null) ? DEFAULT_IDEAL_SAMPLE_LINE_COUNT : idealSampleLineCount,
|
||||
(lineMergeSizeLimit == null) ? DEFAULT_LINE_MERGE_SIZE_LIMIT : lineMergeSizeLimit, fromFile, overrides, timeout);
|
||||
}
|
||||
|
||||
public FileStructureFinder findFileStructure(List<String> explanation, int idealSampleLineCount, int lineMergeSizeLimit,
|
||||
InputStream fromFile) throws Exception {
|
||||
return findFileStructure(explanation, idealSampleLineCount, lineMergeSizeLimit, fromFile, FileStructureOverrides.EMPTY_OVERRIDES,
|
||||
null);
|
||||
}
|
||||
|
||||
public FileStructureFinder findFileStructure(List<String> explanation, int idealSampleLineCount, int lineMergeSizeLimit,
|
||||
InputStream fromFile, FileStructureOverrides overrides,
|
||||
TimeValue timeout) throws Exception {
|
||||
|
||||
try (TimeoutChecker timeoutChecker = new TimeoutChecker("structure analysis", timeout, scheduler)) {
|
||||
|
||||
|
@ -148,7 +153,8 @@ public final class FileStructureFinderManager {
|
|||
Tuple<String, Boolean> sampleInfo = sampleFile(sampleReader, charsetName, MIN_SAMPLE_LINE_COUNT,
|
||||
Math.max(MIN_SAMPLE_LINE_COUNT, idealSampleLineCount), timeoutChecker);
|
||||
|
||||
return makeBestStructureFinder(explanation, sampleInfo.v1(), charsetName, sampleInfo.v2(), overrides, timeoutChecker);
|
||||
return makeBestStructureFinder(explanation, sampleInfo.v1(), charsetName, sampleInfo.v2(), lineMergeSizeLimit, overrides,
|
||||
timeoutChecker);
|
||||
} catch (Exception e) {
|
||||
// Add a dummy exception containing the explanation so far - this can be invaluable for troubleshooting as incorrect
|
||||
// decisions made early on in the structure analysis can result in seemingly crazy decisions or timeouts later on
|
||||
|
@ -263,7 +269,8 @@ public final class FileStructureFinderManager {
|
|||
}
|
||||
|
||||
FileStructureFinder makeBestStructureFinder(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker,
|
||||
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) throws Exception {
|
||||
int lineMergeSizeLimit, FileStructureOverrides overrides,
|
||||
TimeoutChecker timeoutChecker) throws Exception {
|
||||
|
||||
Character delimiter = overrides.getDelimiter();
|
||||
Character quote = overrides.getQuote();
|
||||
|
@ -295,7 +302,8 @@ public final class FileStructureFinderManager {
|
|||
for (FileStructureFinderFactory factory : factories) {
|
||||
timeoutChecker.check("high level format detection");
|
||||
if (factory.canCreateFromSample(explanation, sample)) {
|
||||
return factory.createFromSample(explanation, sample, charsetName, hasByteOrderMarker, overrides, timeoutChecker);
|
||||
return factory.createFromSample(explanation, sample, charsetName, hasByteOrderMarker, lineMergeSizeLimit, overrides,
|
||||
timeoutChecker);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -68,7 +68,8 @@ public class NdJsonFileStructureFinderFactory implements FileStructureFinderFact
|
|||
|
||||
@Override
|
||||
public FileStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker,
|
||||
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) throws IOException {
|
||||
int lineMergeSizeLimit, FileStructureOverrides overrides, TimeoutChecker timeoutChecker)
|
||||
throws IOException {
|
||||
return NdJsonFileStructureFinder.makeNdJsonFileStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, overrides,
|
||||
timeoutChecker);
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
package org.elasticsearch.xpack.ml.filestructurefinder;
|
||||
|
||||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.xpack.core.ml.action.FindFileStructureAction;
|
||||
import org.elasticsearch.xpack.core.ml.filestructurefinder.FieldStats;
|
||||
import org.elasticsearch.xpack.core.ml.filestructurefinder.FileStructure;
|
||||
|
||||
|
@ -24,8 +25,8 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
|
|||
private final FileStructure structure;
|
||||
|
||||
static TextLogFileStructureFinder makeTextLogFileStructureFinder(List<String> explanation, String sample, String charsetName,
|
||||
Boolean hasByteOrderMarker, FileStructureOverrides overrides,
|
||||
TimeoutChecker timeoutChecker) {
|
||||
Boolean hasByteOrderMarker, int lineMergeSizeLimit,
|
||||
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) {
|
||||
String[] sampleLines = sample.split("\n");
|
||||
TimestampFormatFinder timestampFormatFinder = populateTimestampFormatFinder(explanation, sampleLines, overrides, timeoutChecker);
|
||||
switch (timestampFormatFinder.getNumMatchedFormats()) {
|
||||
|
@ -69,6 +70,16 @@ public class TextLogFileStructureFinder implements FileStructureFinder {
|
|||
// for the CSV header or lines before the first XML document starts)
|
||||
++linesConsumed;
|
||||
} else {
|
||||
// This check avoids subsequent problems when a massive message is unwieldy and slow to process
|
||||
long lengthAfterAppend = message.length() + 1L + sampleLine.length();
|
||||
if (lengthAfterAppend > lineMergeSizeLimit) {
|
||||
assert linesInMessage > 0;
|
||||
throw new IllegalArgumentException("Merging lines into messages resulted in an unacceptably long message. "
|
||||
+ "Merged message would have [" + (linesInMessage + 1) + "] lines and [" + lengthAfterAppend + "] "
|
||||
+ "characters (limit [" + lineMergeSizeLimit + "]). If you have messages this big please increase "
|
||||
+ "the value of [" + FindFileStructureAction.Request.LINE_MERGE_SIZE_LIMIT + "]. Otherwise it "
|
||||
+ "probably means the timestamp has been incorrectly detected, so try overriding that.");
|
||||
}
|
||||
message.append('\n').append(sampleLine);
|
||||
++linesInMessage;
|
||||
}
|
||||
|
|
|
@ -41,8 +41,8 @@ public class TextLogFileStructureFinderFactory implements FileStructureFinderFac
|
|||
|
||||
@Override
|
||||
public FileStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker,
|
||||
FileStructureOverrides overrides, TimeoutChecker timeoutChecker) {
|
||||
int lineMergeSizeLimit, FileStructureOverrides overrides, TimeoutChecker timeoutChecker) {
|
||||
return TextLogFileStructureFinder.makeTextLogFileStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
overrides, timeoutChecker);
|
||||
lineMergeSizeLimit, overrides, timeoutChecker);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -125,7 +125,7 @@ public class XmlFileStructureFinderFactory implements FileStructureFinderFactory
|
|||
|
||||
@Override
|
||||
public FileStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker,
|
||||
FileStructureOverrides overrides, TimeoutChecker timeoutChecker)
|
||||
int lineMergeSizeLimit, FileStructureOverrides overrides, TimeoutChecker timeoutChecker)
|
||||
throws IOException, ParserConfigurationException, SAXException {
|
||||
return XmlFileStructureFinder.makeXmlFileStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, overrides,
|
||||
timeoutChecker);
|
||||
|
|
|
@ -53,6 +53,8 @@ public class RestFindFileStructureAction extends BaseRestHandler {
|
|||
FindFileStructureAction.Request request = new FindFileStructureAction.Request();
|
||||
request.setLinesToSample(restRequest.paramAsInt(FindFileStructureAction.Request.LINES_TO_SAMPLE.getPreferredName(),
|
||||
FileStructureFinderManager.DEFAULT_IDEAL_SAMPLE_LINE_COUNT));
|
||||
request.setLineMergeSizeLimit(restRequest.paramAsInt(FindFileStructureAction.Request.LINE_MERGE_SIZE_LIMIT.getPreferredName(),
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT));
|
||||
request.setTimeout(TimeValue.parseTimeValue(restRequest.param(FindFileStructureAction.Request.TIMEOUT.getPreferredName()),
|
||||
DEFAULT_TIMEOUT, FindFileStructureAction.Request.TIMEOUT.getPreferredName()));
|
||||
request.setCharset(restRequest.param(FindFileStructureAction.Request.CHARSET.getPreferredName()));
|
||||
|
|
|
@ -30,7 +30,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -64,8 +64,8 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker, overrides,
|
||||
NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -101,8 +101,8 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker, overrides,
|
||||
NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -135,7 +135,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -170,7 +170,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -214,8 +214,8 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker, overrides,
|
||||
NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -255,7 +255,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -301,8 +301,8 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker, overrides,
|
||||
NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -340,7 +340,7 @@ public class DelimitedFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
|
|
@ -102,7 +102,8 @@ public class FileStructureFinderManagerTests extends FileStructureTestCase {
|
|||
|
||||
public void testMakeBestStructureGivenNdJson() throws Exception {
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, NDJSON_SAMPLE, StandardCharsets.UTF_8.name(),
|
||||
randomBoolean(), EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER), instanceOf(NdJsonFileStructureFinder.class));
|
||||
randomBoolean(), FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER),
|
||||
instanceOf(NdJsonFileStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenNdJsonAndDelimitedOverride() throws Exception {
|
||||
|
@ -113,12 +114,14 @@ public class FileStructureFinderManagerTests extends FileStructureTestCase {
|
|||
.setFormat(FileStructure.Format.DELIMITED).setQuote('\'').build();
|
||||
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, NDJSON_SAMPLE, StandardCharsets.UTF_8.name(),
|
||||
randomBoolean(), overrides, NOOP_TIMEOUT_CHECKER), instanceOf(DelimitedFileStructureFinder.class));
|
||||
randomBoolean(), FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER),
|
||||
instanceOf(DelimitedFileStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenXml() throws Exception {
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, XML_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(),
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER), instanceOf(XmlFileStructureFinder.class));
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER),
|
||||
instanceOf(XmlFileStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenXmlAndTextOverride() throws Exception {
|
||||
|
@ -126,12 +129,14 @@ public class FileStructureFinderManagerTests extends FileStructureTestCase {
|
|||
FileStructureOverrides overrides = FileStructureOverrides.builder().setFormat(FileStructure.Format.SEMI_STRUCTURED_TEXT).build();
|
||||
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, XML_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(),
|
||||
overrides, NOOP_TIMEOUT_CHECKER), instanceOf(TextLogFileStructureFinder.class));
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER),
|
||||
instanceOf(TextLogFileStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenCsv() throws Exception {
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, CSV_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(),
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER), instanceOf(DelimitedFileStructureFinder.class));
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER),
|
||||
instanceOf(DelimitedFileStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenCsvAndJsonOverride() {
|
||||
|
@ -140,14 +145,15 @@ public class FileStructureFinderManagerTests extends FileStructureTestCase {
|
|||
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> structureFinderManager.makeBestStructureFinder(explanation, CSV_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(),
|
||||
overrides, NOOP_TIMEOUT_CHECKER));
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals("Input did not match the specified format [ndjson]", e.getMessage());
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenText() throws Exception {
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, TEXT_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(),
|
||||
EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER), instanceOf(TextLogFileStructureFinder.class));
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER),
|
||||
instanceOf(TextLogFileStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenTextAndDelimitedOverride() throws Exception {
|
||||
|
@ -157,7 +163,8 @@ public class FileStructureFinderManagerTests extends FileStructureTestCase {
|
|||
.setFormat(FileStructure.Format.DELIMITED).setDelimiter(':').build();
|
||||
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, TEXT_SAMPLE, StandardCharsets.UTF_8.name(), randomBoolean(),
|
||||
overrides, NOOP_TIMEOUT_CHECKER), instanceOf(DelimitedFileStructureFinder.class));
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER),
|
||||
instanceOf(DelimitedFileStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testFindFileStructureTimeout() throws IOException, InterruptedException {
|
||||
|
@ -190,7 +197,8 @@ public class FileStructureFinderManagerTests extends FileStructureTestCase {
|
|||
junkProducer.start();
|
||||
|
||||
ElasticsearchTimeoutException e = expectThrows(ElasticsearchTimeoutException.class,
|
||||
() -> structureFinderManager.findFileStructure(explanation, linesOfJunk - 1, bigInput, EMPTY_OVERRIDES, timeout));
|
||||
() -> structureFinderManager.findFileStructure(explanation, FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT,
|
||||
linesOfJunk - 1, bigInput, EMPTY_OVERRIDES, timeout));
|
||||
|
||||
assertThat(e.getMessage(), startsWith("Aborting structure analysis during ["));
|
||||
assertThat(e.getMessage(), endsWith("] as it has taken longer than the timeout of [" + timeout + "]"));
|
||||
|
|
|
@ -19,7 +19,7 @@ public class NdJsonFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, NDJSON_SAMPLE, charset, hasByteOrderMarker,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
|
|
@ -20,13 +20,36 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
|||
|
||||
private FileStructureFinderFactory factory = new TextLogFileStructureFinderFactory();
|
||||
|
||||
public void testCreateConfigsGivenLowLineMergeSizeLimit() {
|
||||
|
||||
String sample = "2019-05-16 16:56:14 line 1 abcdefghijklmnopqrstuvwxyz\n" +
|
||||
"2019-05-16 16:56:14 line 2 abcdefghijklmnopqrstuvwxyz\n" +
|
||||
"continuation line 2.1\n" +
|
||||
"continuation line 2.2\n" +
|
||||
"continuation line 2.3\n" +
|
||||
"continuation line 2.4\n" +
|
||||
"2019-05-16 16:56:14 line 3 abcdefghijklmnopqrstuvwxyz\n";
|
||||
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> factory.createFromSample(explanation, sample, charset, hasByteOrderMarker, 100,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals("Merging lines into messages resulted in an unacceptably long message. Merged message would have [4] lines and "
|
||||
+ "[119] characters (limit [100]). If you have messages this big please increase the value of [line_merge_size_limit]. "
|
||||
+ "Otherwise it probably means the timestamp has been incorrectly detected, so try overriding that.", e.getMessage());
|
||||
}
|
||||
|
||||
public void testCreateConfigsGivenElasticsearchLog() throws Exception {
|
||||
assertTrue(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, TEXT_SAMPLE, charset, hasByteOrderMarker,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -66,8 +89,8 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
|||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker, overrides,
|
||||
NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -102,8 +125,8 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
|||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, TEXT_SAMPLE, charset, hasByteOrderMarker, overrides,
|
||||
NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, TEXT_SAMPLE, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -139,8 +162,8 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
|||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, TEXT_SAMPLE, charset, hasByteOrderMarker, overrides,
|
||||
NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, TEXT_SAMPLE, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
@ -181,7 +204,8 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> factory.createFromSample(explanation, TEXT_SAMPLE, charset, hasByteOrderMarker, overrides, NOOP_TIMEOUT_CHECKER));
|
||||
() -> factory.createFromSample(explanation, TEXT_SAMPLE, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, overrides, NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals("Supplied Grok pattern [\\[%{LOGLEVEL:loglevel} *\\]\\[%{HOSTNAME:node}\\]\\[%{TIMESTAMP_ISO8601:timestamp}\\] " +
|
||||
"\\[%{JAVACLASS:class} *\\] %{JAVALOGMESSAGE:message}] does not match sample messages", e.getMessage());
|
||||
|
@ -200,8 +224,8 @@ public class TextLogFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> factory.createFromSample(explanation, sample, charset, hasByteOrderMarker, FileStructureOverrides.EMPTY_OVERRIDES,
|
||||
NOOP_TIMEOUT_CHECKER));
|
||||
() -> factory.createFromSample(explanation, sample, charset, hasByteOrderMarker,
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER));
|
||||
|
||||
assertEquals("Failed to create more than one message from the sample lines provided. (The last is discarded in "
|
||||
+ "case the sample is incomplete.) If your sample does contain multiple messages the problem is probably that "
|
||||
|
|
|
@ -19,7 +19,7 @@ public class XmlFileStructureFinderTests extends FileStructureTestCase {
|
|||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
FileStructureFinder structureFinder = factory.createFromSample(explanation, XML_SAMPLE, charset, hasByteOrderMarker,
|
||||
FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
FileStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, FileStructureOverrides.EMPTY_OVERRIDES, NOOP_TIMEOUT_CHECKER);
|
||||
|
||||
FileStructure structure = structureFinder.getStructure();
|
||||
|
||||
|
|
|
@ -11,6 +11,11 @@
|
|||
"description": "How many lines of the file should be included in the analysis",
|
||||
"default": 1000
|
||||
},
|
||||
"line_merge_size_limit": {
|
||||
"type": "int",
|
||||
"description": "Maximum number of characters permitted in a single message when lines are merged to create messages.",
|
||||
"default": 10000
|
||||
},
|
||||
"timeout": {
|
||||
"type": "time",
|
||||
"description": "Timeout after which the analysis will be aborted",
|
||||
|
|
|
@ -10,6 +10,7 @@ setup:
|
|||
Content-Type: "application/json"
|
||||
ml.find_file_structure:
|
||||
lines_to_sample: 3
|
||||
line_merge_size_limit: 1234
|
||||
timeout: 10s
|
||||
body:
|
||||
- airline: AAL
|
||||
|
|
Loading…
Reference in New Issue