Merge branch 'master' into ccr

* master:
  Mute test watcher usage stats output
  [Rollup] Fix FullClusterRestart test
  Adjust soft-deletes version after backport into 6.5
  completely drop `index.shard.check_on_startup: fix` for 7.0 (#33194)
  Fix AwaitsFix issue number
  Mute SmokeTestWatcherWithSecurityIT testsi
  drop `index.shard.check_on_startup: fix` (#32279)
  tracked at
  [DOCS] Moves ml folder from x-pack/docs to docs (#33248)
  [DOCS] Move rollup APIs to docs (#31450)
  [DOCS] Rename X-Pack Commands section (#33005)
  TEST: Disable soft-deletes in ParentChildTestCase
  Fixes SecurityIntegTestCase so it always adds at least one alias (#33296)
  Fix pom for build-tools (#33300)
  Lazy evaluate java9home (#33301)
  SQL: test coverage for JdbcResultSet (#32813)
  Work around to be able to generate eclipse projects (#33295)
  Highlight that index_phrases only works if no slop is used (#33303)
  Different handling for security specific errors in the CLI. Fix for https://github.com/elastic/elasticsearch/issues/33230 (#33255)
  [ML] Refactor delimited file structure detection (#33233)
  SQL: Support multi-index format as table identifier (#33278)
  MINOR: Remove Dead Code from PathTrie (#33280)
  Enable forbiddenapis server java9 (#33245)
This commit is contained in:
Nhat Nguyen 2018-08-31 19:03:04 -04:00
commit b93507608a
122 changed files with 2541 additions and 841 deletions

View File

@ -16,7 +16,9 @@
* specific language governing permissions and limitations
* under the License.
*/
import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin
import org.apache.tools.ant.taskdefs.condition.Os
import org.elasticsearch.gradle.BuildPlugin
import org.elasticsearch.gradle.LoggedExec
import org.elasticsearch.gradle.Version
@ -24,14 +26,9 @@ import org.elasticsearch.gradle.VersionCollection
import org.elasticsearch.gradle.VersionProperties
import org.elasticsearch.gradle.plugin.PluginBuildPlugin
import org.gradle.plugins.ide.eclipse.model.SourceFolder
import org.gradle.util.GradleVersion
import org.gradle.util.DistributionLocator
import org.apache.tools.ant.taskdefs.condition.Os
import org.apache.tools.ant.filters.ReplaceTokens
import java.nio.file.Files
import java.nio.file.Path
import java.security.MessageDigest
plugins {
id 'com.gradle.build-scan' version '1.13.2'
@ -512,6 +509,16 @@ allprojects {
tasks.cleanEclipse.dependsOn(wipeEclipseSettings)
// otherwise the eclipse merging is *super confusing*
tasks.eclipse.dependsOn(cleanEclipse, copyEclipseSettings)
// work arround https://github.com/gradle/gradle/issues/6582
tasks.eclipseProject.mustRunAfter tasks.cleanEclipseProject
tasks.matching { it.name == 'eclipseClasspath' }.all {
it.mustRunAfter { tasks.cleanEclipseClasspath }
}
tasks.matching { it.name == 'eclipseJdt' }.all {
it.mustRunAfter { tasks.cleanEclipseJdt }
}
tasks.copyEclipseSettings.mustRunAfter tasks.wipeEclipseSettings
}
allprojects {

View File

@ -24,15 +24,6 @@ plugins {
id 'groovy'
}
gradlePlugin {
plugins {
simplePlugin {
id = 'elasticsearch.clusterformation'
implementationClass = 'org.elasticsearch.gradle.clusterformation.ClusterformationPlugin'
}
}
}
group = 'org.elasticsearch.gradle'
String minimumGradleVersion = file('src/main/resources/minimumGradleVersion').text.trim()

View File

@ -38,7 +38,6 @@ import org.gradle.api.artifacts.ModuleDependency
import org.gradle.api.artifacts.ModuleVersionIdentifier
import org.gradle.api.artifacts.ProjectDependency
import org.gradle.api.artifacts.ResolvedArtifact
import org.gradle.api.artifacts.SelfResolvingDependency
import org.gradle.api.artifacts.dsl.RepositoryHandler
import org.gradle.api.execution.TaskExecutionGraph
import org.gradle.api.plugins.JavaPlugin
@ -212,6 +211,7 @@ class BuildPlugin implements Plugin<Project> {
project.rootProject.ext.minimumRuntimeVersion = minimumRuntimeVersion
project.rootProject.ext.inFipsJvm = inFipsJvm
project.rootProject.ext.gradleJavaVersion = JavaVersion.toVersion(gradleJavaVersion)
project.rootProject.ext.java9Home = "${-> findJavaHome("9")}"
}
project.targetCompatibility = project.rootProject.ext.minimumRuntimeVersion
@ -225,6 +225,7 @@ class BuildPlugin implements Plugin<Project> {
project.ext.javaVersions = project.rootProject.ext.javaVersions
project.ext.inFipsJvm = project.rootProject.ext.inFipsJvm
project.ext.gradleJavaVersion = project.rootProject.ext.gradleJavaVersion
project.ext.java9Home = project.rootProject.ext.java9Home
}
private static String getPaddedMajorVersion(JavaVersion compilerJavaVersionEnum) {

View File

@ -100,7 +100,7 @@ class PrecommitTasks {
private static Task configureForbiddenApisCli(Project project) {
Task forbiddenApisCli = project.tasks.create('forbiddenApis')
project.sourceSets.forEach { sourceSet ->
project.sourceSets.all { sourceSet ->
forbiddenApisCli.dependsOn(
project.tasks.create(sourceSet.getTaskName('forbiddenApis', null), ForbiddenApisCliTask) {
ExportElasticsearchBuildResourcesTask buildResources = project.tasks.getByName('buildResources')

View File

@ -51,7 +51,8 @@ public class ForbiddenApisCliTask extends DefaultTask {
private JavaVersion targetCompatibility;
private FileCollection classesDirs;
private SourceSet sourceSet;
private String javaHome;
// This needs to be an object so it can hold Groovy GStrings
private Object javaHome;
@Input
public JavaVersion getTargetCompatibility() {
@ -142,11 +143,11 @@ public class ForbiddenApisCliTask extends DefaultTask {
}
@Input
public String getJavaHome() {
public Object getJavaHome() {
return javaHome;
}
public void setJavaHome(String javaHome) {
public void setJavaHome(Object javaHome) {
this.javaHome = javaHome;
}

View File

@ -0,0 +1 @@
implementation-class=org.elasticsearch.gradle.clusterformation.ClusterformationPlugin

View File

@ -19,6 +19,12 @@
apply plugin: 'elasticsearch.docs-test'
/* List of files that have snippets that require a gold or platinum licence
and therefore cannot be tested yet... */
buildRestTests.expectedUnconvertedCandidates = [
'reference/ml/transforms.asciidoc',
]
integTestCluster {
/* Enable regexes in painless so our tests don't complain about example
* snippets that use them. */
@ -74,6 +80,17 @@ buildRestTests.docs = fileTree(projectDir) {
exclude 'build'
// Just syntax examples
exclude 'README.asciidoc'
// Broken code snippet tests
exclude 'reference/rollup/rollup-getting-started.asciidoc'
exclude 'reference/rollup/apis/rollup-job-config.asciidoc'
exclude 'reference/rollup/apis/rollup-index-caps.asciidoc'
exclude 'reference/rollup/apis/put-job.asciidoc'
exclude 'reference/rollup/apis/stop-job.asciidoc'
exclude 'reference/rollup/apis/start-job.asciidoc'
exclude 'reference/rollup/apis/rollup-search.asciidoc'
exclude 'reference/rollup/apis/delete-job.asciidoc'
exclude 'reference/rollup/apis/get-job.asciidoc'
exclude 'reference/rollup/apis/rollup-caps.asciidoc'
}
listSnippets.docs = buildRestTests.docs
@ -594,3 +611,259 @@ buildRestTests.setups['library'] = '''
{"name": "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288}
'''
buildRestTests.setups['sensor_rollup_job'] = '''
- do:
indices.create:
index: sensor-1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: float
node:
type: keyword
- do:
xpack.rollup.put_job:
id: "sensor"
body: >
{
"index_pattern": "sensor-*",
"rollup_index": "sensor_rollup",
"cron": "*/30 * * * * ?",
"page_size" :1000,
"groups" : {
"date_histogram": {
"field": "timestamp",
"interval": "1h",
"delay": "7d"
},
"terms": {
"fields": ["node"]
}
},
"metrics": [
{
"field": "temperature",
"metrics": ["min", "max", "sum"]
},
{
"field": "voltage",
"metrics": ["avg"]
}
]
}
'''
buildRestTests.setups['sensor_started_rollup_job'] = '''
- do:
indices.create:
index: sensor-1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: float
node:
type: keyword
- do:
bulk:
index: sensor-1
type: _doc
refresh: true
body: |
{"index":{}}
{"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"}
{"index":{}}
{"timestamp": 1516642894000, "temperature": 201, "voltage": 5.8, "node": "b"}
{"index":{}}
{"timestamp": 1516556494000, "temperature": 202, "voltage": 5.1, "node": "a"}
{"index":{}}
{"timestamp": 1516470094000, "temperature": 198, "voltage": 5.6, "node": "b"}
{"index":{}}
{"timestamp": 1516383694000, "temperature": 200, "voltage": 4.2, "node": "c"}
{"index":{}}
{"timestamp": 1516297294000, "temperature": 202, "voltage": 4.0, "node": "c"}
- do:
xpack.rollup.put_job:
id: "sensor"
body: >
{
"index_pattern": "sensor-*",
"rollup_index": "sensor_rollup",
"cron": "* * * * * ?",
"page_size" :1000,
"groups" : {
"date_histogram": {
"field": "timestamp",
"interval": "1h",
"delay": "7d"
},
"terms": {
"fields": ["node"]
}
},
"metrics": [
{
"field": "temperature",
"metrics": ["min", "max", "sum"]
},
{
"field": "voltage",
"metrics": ["avg"]
}
]
}
- do:
xpack.rollup.start_job:
id: "sensor"
'''
buildRestTests.setups['sensor_index'] = '''
- do:
indices.create:
index: sensor-1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: float
node:
type: keyword
load:
type: double
net_in:
type: long
net_out:
type: long
hostname:
type: keyword
datacenter:
type: keyword
'''
buildRestTests.setups['sensor_prefab_data'] = '''
- do:
indices.create:
index: sensor-1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: float
node:
type: keyword
- do:
indices.create:
index: sensor_rollup
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
node.terms.value:
type: keyword
temperature.sum.value:
type: double
temperature.max.value:
type: double
temperature.min.value:
type: double
timestamp.date_histogram.time_zone:
type: keyword
timestamp.date_histogram.interval:
type: keyword
timestamp.date_histogram.timestamp:
type: date
timestamp.date_histogram._count:
type: long
voltage.avg.value:
type: double
voltage.avg._count:
type: long
_rollup.id:
type: keyword
_rollup.version:
type: long
_meta:
_rollup:
sensor:
cron: "* * * * * ?"
rollup_index: "sensor_rollup"
index_pattern: "sensor-*"
timeout: "20s"
page_size: 1000
groups:
date_histogram:
delay: "7d"
field: "timestamp"
interval: "1h"
time_zone: "UTC"
terms:
fields:
- "node"
id: sensor
metrics:
- field: "temperature"
metrics:
- min
- max
- sum
- field: "voltage"
metrics:
- avg
- do:
bulk:
index: sensor_rollup
type: _doc
refresh: true
body: |
{"index":{}}
{"node.terms.value":"b","temperature.sum.value":201.0,"temperature.max.value":201.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":201.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.800000190734863,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516640400000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"c","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516381200000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"a","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.099999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516554000000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"a","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516726800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"b","temperature.sum.value":198.0,"temperature.max.value":198.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":198.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.599999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516467600000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"c","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.0,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516294800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
'''

View File

@ -1,11 +1,11 @@
[role="xpack"]
[[xpack-commands]]
= {xpack} Commands
[[commands]]
= Command line tools
[partintro]
--
{xpack} includes commands that help you configure security:
{es} provides the following tools for configuring security and performing other
tasks from the command line:
* <<certgen>>
* <<certutil>>

View File

@ -63,12 +63,6 @@ corruption is detected, it will prevent the shard from being opened. Accepts:
Check for both physical and logical corruption. This is much more
expensive in terms of CPU and memory usage.
`fix`::
Check for both physical and logical corruption. Segments that were reported
as corrupted will be automatically removed. This option *may result in data loss*.
Use with extreme caution!
WARNING: Expert only. Checking shards may take a lot of time on large indices.
--

View File

@ -61,7 +61,7 @@ include::sql/index.asciidoc[]
include::monitoring/index.asciidoc[]
include::{xes-repo-dir}/rollup/index.asciidoc[]
include::rollup/index.asciidoc[]
include::rest-api/index.asciidoc[]

View File

@ -99,7 +99,7 @@ The following parameters are accepted by `text` fields:
`index_phrases`::
If enabled, two-term word combinations ('shingles') are indexed into a separate
field. This allows exact phrase queries to run more efficiently, at the expense
field. This allows exact phrase queries (no slop) to run more efficiently, at the expense
of a larger index. Note that this works best when stopwords are not removed,
as phrases containing stopwords will not use the subsidiary field and will fall
back to a standard phrase query. Accepts `true` or `false` (default).
@ -171,4 +171,4 @@ PUT my_index
--------------------------------
// CONSOLE
<1> `min_chars` must be greater than zero, defaults to 2
<2> `max_chars` must be greater than or equal to `min_chars` and less than 20, defaults to 5
<2> `max_chars` must be greater than or equal to `min_chars` and less than 20, defaults to 5

View File

@ -78,3 +78,7 @@ The parent circuit breaker defines a new setting `indices.breaker.total.use_real
heap memory instead of only considering the reserved memory by child circuit breakers. When this
setting is `true`, the default parent breaker limit also changes from 70% to 95% of the JVM heap size.
The previous behavior can be restored by setting `indices.breaker.total.use_real_memory` to `false`.
==== `fix` value for `index.shard.check_on_startup` is removed
Deprecated option value `fix` for setting `index.shard.check_on_startup` is not supported.

View File

@ -41,7 +41,7 @@ PUT _xpack/ml/anomaly_detectors/farequote
}
----------------------------------
// CONSOLE
// TEST[setup:farequote_data]
// TEST[skip:setup:farequote_data]
In this example, the `airline`, `responsetime`, and `time` fields are
aggregations.
@ -90,7 +90,7 @@ PUT _xpack/ml/datafeeds/datafeed-farequote
}
----------------------------------
// CONSOLE
// TEST[setup:farequote_job]
// TEST[skip:setup:farequote_job]
In this example, the aggregations have names that match the fields that they
operate on. That is to say, the `max` aggregation is named `time` and its

View File

@ -44,6 +44,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs
}
----------------------------------
//CONSOLE
// TEST[skip:needs-licence]
<1> The `categorization_field_name` property indicates which field will be
categorized.
<2> The resulting categories are used in a detector by setting `by_field_name`,
@ -127,6 +128,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs2
}
----------------------------------
//CONSOLE
// TEST[skip:needs-licence]
<1> The
{ref}/analysis-pattern-replace-charfilter.html[`pattern_replace` character filter]
here achieves exactly the same as the `categorization_filters` in the first
@ -193,6 +195,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs3
}
----------------------------------
//CONSOLE
// TEST[skip:needs-licence]
<1> Tokens basically consist of hyphens, digits, letters, underscores and dots.
<2> By default, categorization ignores tokens that begin with a digit.
<3> By default, categorization also ignores tokens that are hexadecimal numbers.

View File

@ -36,20 +36,20 @@ The scenarios in this section describe some best practices for generating useful
* <<ml-configuring-transform>>
* <<ml-configuring-detector-custom-rules>>
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/customurl.asciidoc
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/customurl.asciidoc
include::customurl.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/aggregations.asciidoc
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/aggregations.asciidoc
include::aggregations.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/categories.asciidoc
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/categories.asciidoc
include::categories.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/populations.asciidoc
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/populations.asciidoc
include::populations.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/transforms.asciidoc
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/transforms.asciidoc
include::transforms.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/detector-custom-rules.asciidoc
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/detector-custom-rules.asciidoc
include::detector-custom-rules.asciidoc[]

View File

@ -106,7 +106,7 @@ POST _xpack/ml/anomaly_detectors/sample_job/_update
}
----------------------------------
//CONSOLE
//TEST[setup:sample_job]
//TEST[skip:setup:sample_job]
When you click this custom URL in the anomalies table in {kib}, it opens up the
*Discover* page and displays source data for the period one hour before and

View File

@ -39,6 +39,7 @@ PUT _xpack/ml/filters/safe_domains
}
----------------------------------
// CONSOLE
// TEST[skip:needs-licence]
Now, we can create our job specifying a scope that uses the `safe_domains`
filter for the `highest_registered_domain` field:
@ -70,6 +71,7 @@ PUT _xpack/ml/anomaly_detectors/dns_exfiltration_with_rule
}
----------------------------------
// CONSOLE
// TEST[skip:needs-licence]
As time advances and we see more data and more results, we might encounter new
domains that we want to add in the filter. We can do that by using the
@ -83,7 +85,7 @@ POST _xpack/ml/filters/safe_domains/_update
}
----------------------------------
// CONSOLE
// TEST[setup:ml_filter_safe_domains]
// TEST[skip:setup:ml_filter_safe_domains]
Note that we can use any of the `partition_field_name`, `over_field_name`, or
`by_field_name` fields in the `scope`.
@ -123,6 +125,7 @@ PUT _xpack/ml/anomaly_detectors/scoping_multiple_fields
}
----------------------------------
// CONSOLE
// TEST[skip:needs-licence]
Such a detector will skip results when the values of all 3 scoped fields
are included in the referenced filters.
@ -166,6 +169,7 @@ PUT _xpack/ml/anomaly_detectors/cpu_with_rule
}
----------------------------------
// CONSOLE
// TEST[skip:needs-licence]
When there are multiple conditions they are combined with a logical `and`.
This is useful when we want the rule to apply to a range. We simply create
@ -205,6 +209,7 @@ PUT _xpack/ml/anomaly_detectors/rule_with_range
}
----------------------------------
// CONSOLE
// TEST[skip:needs-licence]
==== Custom rules in the life-cycle of a job

View File

@ -59,6 +59,7 @@ PUT _xpack/ml/anomaly_detectors/example1
}
--------------------------------------------------
// CONSOLE
// TEST[skip:needs-licence]
This example is probably the simplest possible analysis. It identifies
time buckets during which the overall count of events is higher or lower than
@ -86,6 +87,7 @@ PUT _xpack/ml/anomaly_detectors/example2
}
--------------------------------------------------
// CONSOLE
// TEST[skip:needs-licence]
If you use this `high_count` function in a detector in your job, it
models the event rate for each error code. It detects users that generate an
@ -110,6 +112,7 @@ PUT _xpack/ml/anomaly_detectors/example3
}
--------------------------------------------------
// CONSOLE
// TEST[skip:needs-licence]
In this example, the function detects when the count of events for a
status code is lower than usual.
@ -136,6 +139,7 @@ PUT _xpack/ml/anomaly_detectors/example4
}
--------------------------------------------------
// CONSOLE
// TEST[skip:needs-licence]
If you are analyzing an aggregated `events_per_min` field, do not use a sum
function (for example, `sum(events_per_min)`). Instead, use the count function
@ -200,6 +204,7 @@ PUT _xpack/ml/anomaly_detectors/example5
}
--------------------------------------------------
// CONSOLE
// TEST[skip:needs-licence]
If you use this `high_non_zero_count` function in a detector in your job, it
models the count of events for the `signaturename` field. It ignores any buckets
@ -253,6 +258,7 @@ PUT _xpack/ml/anomaly_detectors/example6
}
--------------------------------------------------
// CONSOLE
// TEST[skip:needs-licence]
This `distinct_count` function detects when a system has an unusual number
of logged in users. When you use this function in a detector in your job, it
@ -278,6 +284,7 @@ PUT _xpack/ml/anomaly_detectors/example7
}
--------------------------------------------------
// CONSOLE
// TEST[skip:needs-licence]
This example detects instances of port scanning. When you use this function in a
detector in your job, it models the distinct count of ports. It also detects the

View File

@ -47,6 +47,7 @@ PUT _xpack/ml/anomaly_detectors/example1
}
--------------------------------------------------
// CONSOLE
// TEST[skip:needs-licence]
If you use this `lat_long` function in a detector in your job, it
detects anomalies where the geographic location of a credit card transaction is
@ -98,6 +99,6 @@ PUT _xpack/ml/datafeeds/datafeed-test2
}
--------------------------------------------------
// CONSOLE
// TEST[setup:farequote_job]
// TEST[skip:setup:farequote_job]
For more information, see <<ml-configuring-transform>>.

View File

Before

Width:  |  Height:  |  Size: 118 KiB

After

Width:  |  Height:  |  Size: 118 KiB

View File

Before

Width:  |  Height:  |  Size: 347 KiB

After

Width:  |  Height:  |  Size: 347 KiB

View File

Before

Width:  |  Height:  |  Size: 70 KiB

After

Width:  |  Height:  |  Size: 70 KiB

View File

Before

Width:  |  Height:  |  Size: 187 KiB

After

Width:  |  Height:  |  Size: 187 KiB

View File

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 36 KiB

View File

Before

Width:  |  Height:  |  Size: 130 KiB

After

Width:  |  Height:  |  Size: 130 KiB

View File

Before

Width:  |  Height:  |  Size: 384 KiB

After

Width:  |  Height:  |  Size: 384 KiB

View File

Before

Width:  |  Height:  |  Size: 120 KiB

After

Width:  |  Height:  |  Size: 120 KiB

View File

Before

Width:  |  Height:  |  Size: 163 KiB

After

Width:  |  Height:  |  Size: 163 KiB

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

Before

Width:  |  Height:  |  Size: 350 KiB

After

Width:  |  Height:  |  Size: 350 KiB

View File

Before

Width:  |  Height:  |  Size: 99 KiB

After

Width:  |  Height:  |  Size: 99 KiB

View File

Before

Width:  |  Height:  |  Size: 75 KiB

After

Width:  |  Height:  |  Size: 75 KiB

View File

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

View File

Before

Width:  |  Height:  |  Size: 176 KiB

After

Width:  |  Height:  |  Size: 176 KiB

View File

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 96 KiB

View File

Before

Width:  |  Height:  |  Size: 205 KiB

After

Width:  |  Height:  |  Size: 205 KiB

View File

Before

Width:  |  Height:  |  Size: 100 KiB

After

Width:  |  Height:  |  Size: 100 KiB

View File

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

View File

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

Before

Width:  |  Height:  |  Size: 90 KiB

After

Width:  |  Height:  |  Size: 90 KiB

View File

@ -51,14 +51,11 @@ PUT _xpack/ml/anomaly_detectors/population
}
----------------------------------
//CONSOLE
// TEST[skip:needs-licence]
<1> This `over_field_name` property indicates that the metrics for each user (
as identified by their `username` value) are analyzed relative to other users
in each bucket.
//TO-DO: Per sophiec20 "Perhaps add the datafeed config and add a query filter to
//include only workstations as servers and printers would behave differently
//from the population
If your data is stored in {es}, you can use the population job wizard in {kib}
to create a job with these same properties. For example, the population job
wizard provides the following job settings:

View File

@ -28,7 +28,7 @@ request stops the `feed1` {dfeed}:
POST _xpack/ml/datafeeds/datafeed-total-requests/_stop
--------------------------------------------------
// CONSOLE
// TEST[setup:server_metrics_startdf]
// TEST[skip:setup:server_metrics_startdf]
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
For more information, see <<security-privileges>>.
@ -49,6 +49,7 @@ If you are upgrading your cluster, you can use the following request to stop all
POST _xpack/ml/datafeeds/_all/_stop
----------------------------------
// CONSOLE
// TEST[skip:needs-licence]
[float]
[[closing-ml-jobs]]
@ -67,7 +68,7 @@ example, the following request closes the `job1` job:
POST _xpack/ml/anomaly_detectors/total-requests/_close
--------------------------------------------------
// CONSOLE
// TEST[setup:server_metrics_openjob]
// TEST[skip:setup:server_metrics_openjob]
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
For more information, see <<security-privileges>>.
@ -86,3 +87,4 @@ all open jobs on the cluster:
POST _xpack/ml/anomaly_detectors/_all/_close
----------------------------------
// CONSOLE
// TEST[skip:needs-licence]

View File

@ -95,7 +95,7 @@ PUT /my_index/my_type/1
}
----------------------------------
// CONSOLE
// TESTSETUP
// TEST[skip:SETUP]
<1> In this example, string fields are mapped as `keyword` fields to support
aggregation. If you want both a full text (`text`) and a keyword (`keyword`)
version of the same field, use multi-fields. For more information, see
@ -144,7 +144,7 @@ PUT _xpack/ml/datafeeds/datafeed-test1
}
----------------------------------
// CONSOLE
// TEST[skip:broken]
// TEST[skip:needs-licence]
<1> A script field named `total_error_count` is referenced in the detector
within the job.
<2> The script field is defined in the {dfeed}.
@ -163,7 +163,7 @@ You can preview the contents of the {dfeed} by using the following API:
GET _xpack/ml/datafeeds/datafeed-test1/_preview
----------------------------------
// CONSOLE
// TEST[continued]
// TEST[skip:continued]
In this example, the API returns the following results, which contain a sum of
the `error_count` and `aborted_count` values:
@ -177,8 +177,6 @@ the `error_count` and `aborted_count` values:
}
]
----------------------------------
// TESTRESPONSE
NOTE: This example demonstrates how to use script fields, but it contains
insufficient data to generate meaningful results. For a full demonstration of
@ -254,7 +252,7 @@ PUT _xpack/ml/datafeeds/datafeed-test2
GET _xpack/ml/datafeeds/datafeed-test2/_preview
--------------------------------------------------
// CONSOLE
// TEST[skip:broken]
// TEST[skip:needs-licence]
<1> The script field has a rather generic name in this case, since it will
be used for various tests in the subsequent examples.
<2> The script field uses the plus (+) operator to concatenate strings.
@ -271,7 +269,6 @@ and "SMITH " have been concatenated and an underscore was added:
}
]
----------------------------------
// TESTRESPONSE
[[ml-configuring-transform3]]
.Example 3: Trimming strings
@ -292,7 +289,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview
--------------------------------------------------
// CONSOLE
// TEST[continued]
// TEST[skip:continued]
<1> This script field uses the `trim()` function to trim extra white space from a
string.
@ -308,7 +305,6 @@ has been trimmed to "SMITH":
}
]
----------------------------------
// TESTRESPONSE
[[ml-configuring-transform4]]
.Example 4: Converting strings to lowercase
@ -329,7 +325,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview
--------------------------------------------------
// CONSOLE
// TEST[continued]
// TEST[skip:continued]
<1> This script field uses the `toLowerCase` function to convert a string to all
lowercase letters. Likewise, you can use the `toUpperCase{}` function to convert
a string to uppercase letters.
@ -346,7 +342,6 @@ has been converted to "joe":
}
]
----------------------------------
// TESTRESPONSE
[[ml-configuring-transform5]]
.Example 5: Converting strings to mixed case formats
@ -367,7 +362,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview
--------------------------------------------------
// CONSOLE
// TEST[continued]
// TEST[skip:continued]
<1> This script field is a more complicated example of case manipulation. It uses
the `subString()` function to capitalize the first letter of a string and
converts the remaining characters to lowercase.
@ -384,7 +379,6 @@ has been converted to "Joe":
}
]
----------------------------------
// TESTRESPONSE
[[ml-configuring-transform6]]
.Example 6: Replacing tokens
@ -405,7 +399,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview
--------------------------------------------------
// CONSOLE
// TEST[continued]
// TEST[skip:continued]
<1> This script field uses regular expressions to replace white
space with underscores.
@ -421,7 +415,6 @@ The preview {dfeed} API returns the following results, which show that
}
]
----------------------------------
// TESTRESPONSE
[[ml-configuring-transform7]]
.Example 7: Regular expression matching and concatenation
@ -442,7 +435,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview
--------------------------------------------------
// CONSOLE
// TEST[continued]
// TEST[skip:continued]
<1> This script field looks for a specific regular expression pattern and emits the
matched groups as a concatenated string. If no match is found, it emits an empty
string.
@ -459,7 +452,6 @@ The preview {dfeed} API returns the following results, which show that
}
]
----------------------------------
// TESTRESPONSE
[[ml-configuring-transform8]]
.Example 8: Splitting strings by domain name
@ -509,7 +501,7 @@ PUT _xpack/ml/datafeeds/datafeed-test3
GET _xpack/ml/datafeeds/datafeed-test3/_preview
--------------------------------------------------
// CONSOLE
// TEST[skip:broken]
// TEST[skip:needs-licence]
If you have a single field that contains a well-formed DNS domain name, you can
use the `domainSplit()` function to split the string into its highest registered
@ -537,7 +529,6 @@ The preview {dfeed} API returns the following results, which show that
}
]
----------------------------------
// TESTRESPONSE
[[ml-configuring-transform9]]
.Example 9: Transforming geo_point data
@ -583,7 +574,7 @@ PUT _xpack/ml/datafeeds/datafeed-test4
GET _xpack/ml/datafeeds/datafeed-test4/_preview
--------------------------------------------------
// CONSOLE
// TEST[skip:broken]
// TEST[skip:needs-licence]
In {es}, location data can be stored in `geo_point` fields but this data type is
not supported natively in {xpackml} analytics. This example of a script field
@ -602,4 +593,4 @@ The preview {dfeed} API returns the following results, which show that
}
]
----------------------------------
// TESTRESPONSE

View File

@ -544,3 +544,8 @@ You can use the following APIs to add, remove, and retrieve role mappings:
=== Privilege APIs
See <<security-api-has-privileges>>.
[role="exclude",id="xpack-commands"]
=== X-Pack commands
See <<commands>>.

View File

@ -23,7 +23,7 @@ include::{xes-repo-dir}/rest-api/graph/explore.asciidoc[]
include::{es-repo-dir}/licensing/index.asciidoc[]
include::{es-repo-dir}/migration/migration.asciidoc[]
include::{xes-repo-dir}/rest-api/ml-api.asciidoc[]
include::{xes-repo-dir}/rest-api/rollup-api.asciidoc[]
include::{es-repo-dir}/rollup/rollup-api.asciidoc[]
include::{xes-repo-dir}/rest-api/security.asciidoc[]
include::{xes-repo-dir}/rest-api/watcher.asciidoc[]
include::{xes-repo-dir}/rest-api/defs.asciidoc[]

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-api-quickref]]
== API Quick Reference

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-delete-job]]
=== Delete Job API
++++

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-get-job]]
=== Get Rollup Jobs API
++++

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-put-job]]
=== Create Job API
++++

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-get-rollup-caps]]
=== Get Rollup Job Capabilities
++++

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-job-config]]
=== Rollup Job Configuration

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-search]]
=== Rollup Search
++++

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-start-job]]
=== Start Job API
++++

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-stop-job]]
=== Stop Job API
++++

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[xpack-rollup]]
= Rolling up historical data

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-overview]]
== Overview

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-agg-limitations]]
== Rollup Aggregation Limitations

View File

@ -1,4 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-apis]]
== Rollup APIs
@ -26,12 +27,12 @@
include::rollup/delete-job.asciidoc[]
include::rollup/get-job.asciidoc[]
include::rollup/put-job.asciidoc[]
include::rollup/start-job.asciidoc[]
include::rollup/stop-job.asciidoc[]
include::rollup/rollup-caps.asciidoc[]
include::rollup/rollup-index-caps.asciidoc[]
include::rollup/rollup-search.asciidoc[]
include::rollup/rollup-job-config.asciidoc[]
include::apis/delete-job.asciidoc[]
include::apis/get-job.asciidoc[]
include::apis/put-job.asciidoc[]
include::apis/start-job.asciidoc[]
include::apis/stop-job.asciidoc[]
include::apis/rollup-caps.asciidoc[]
include::apis/rollup-index-caps.asciidoc[]
include::apis/rollup-search.asciidoc[]
include::apis/rollup-job-config.asciidoc[]

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-getting-started]]
== Getting Started

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-search-limitations]]
== Rollup Search Limitations

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-understanding-groups]]
== Understanding Groups

View File

@ -22,6 +22,15 @@ the first parameter:
$ ./bin/elasticsearch-sql-cli https://some.server:9200
--------------------------------------------------
If security is enabled on your cluster, you can pass the username
and password in the form `username:password@host_name:port`
to the SQL CLI:
[source,bash]
--------------------------------------------------
$ ./bin/elasticsearch-sql-cli https://sql_user:strongpassword@some.server:9200
--------------------------------------------------
Once the CLI is running you can use any <<sql-spec,query>> that
Elasticsearch supports:

View File

@ -46,12 +46,13 @@ if (!isEclipse && !isIdea) {
targetCompatibility = 9
}
/* Enable this when forbiddenapis was updated to 2.6.
* See: https://github.com/elastic/elasticsearch/issues/29292
forbiddenApisJava9 {
targetCompatibility = 9
if (project.runtimeJavaVersion < JavaVersion.VERSION_1_9) {
targetCompatibility = JavaVersion.VERSION_1_9
javaHome = project.java9Home
}
replaceSignatureFiles 'jdk-signatures'
}
*/
jar {
metaInf {

View File

@ -25,6 +25,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.IndexModule;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.join.ParentJoinPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
@ -58,6 +59,8 @@ public abstract class ParentChildTestCase extends ESIntegTestCase {
@Override
public Settings indexSettings() {
Settings.Builder builder = Settings.builder().put(super.indexSettings())
// AwaitsFix: https://github.com/elastic/elasticsearch/issues/33318
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), false)
// aggressive filter caching so that we can assert on the filter cache size
.put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), true)
.put(IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING.getKey(), true);

View File

@ -58,13 +58,13 @@ if (!isEclipse && !isIdea) {
sourceCompatibility = 9
targetCompatibility = 9
}
/* Enable this when forbiddenapis was updated to 2.6.
* See: https://github.com/elastic/elasticsearch/issues/29292
forbiddenApisJava9 {
targetCompatibility = 9
if (project.runtimeJavaVersion < JavaVersion.VERSION_1_9) {
targetCompatibility = JavaVersion.VERSION_1_9
javaHome = project.java9Home
}
}
*/
jar {
metaInf {

View File

@ -104,24 +104,12 @@ public class PathTrie<T> {
namedWildcard = key.substring(key.indexOf('{') + 1, key.indexOf('}'));
}
public boolean isWildcard() {
return isWildcard;
}
public synchronized void addChild(TrieNode child) {
addInnerChild(child.key, child);
}
private void addInnerChild(String key, TrieNode child) {
Map<String, TrieNode> newChildren = new HashMap<>(children);
newChildren.put(key, child);
children = unmodifiableMap(newChildren);
}
public TrieNode getChild(String key) {
return children.get(key);
}
public synchronized void insert(String[] path, int index, T value) {
if (index >= path.length)
return;
@ -302,7 +290,7 @@ public class PathTrie<T> {
}
int index = 0;
// Supports initial delimiter.
if (strings.length > 0 && strings[0].isEmpty()) {
if (strings[0].isEmpty()) {
index = 1;
}
root.insert(strings, index, value);
@ -327,7 +315,7 @@ public class PathTrie<T> {
}
int index = 0;
// Supports initial delimiter.
if (strings.length > 0 && strings[0].isEmpty()) {
if (strings[0].isEmpty()) {
index = 1;
}
root.insertOrUpdate(strings, index, value, updater);
@ -352,7 +340,7 @@ public class PathTrie<T> {
int index = 0;
// Supports initial delimiter.
if (strings.length > 0 && strings[0].isEmpty()) {
if (strings[0].isEmpty()) {
index = 1;
}

View File

@ -75,11 +75,10 @@ public final class IndexSettings {
switch(s) {
case "false":
case "true":
case "fix":
case "checksum":
return s;
default:
throw new IllegalArgumentException("unknown value for [index.shard.check_on_startup] must be one of [true, false, fix, checksum] but was: " + s);
throw new IllegalArgumentException("unknown value for [index.shard.check_on_startup] must be one of [true, false, checksum] but was: " + s);
}
}, Property.IndexScope);

View File

@ -1332,7 +1332,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
}
recoveryState.setStage(RecoveryState.Stage.VERIFY_INDEX);
// also check here, before we apply the translog
if (Booleans.isTrue(checkIndexOnStartup)) {
if (Booleans.isTrue(checkIndexOnStartup) || "checksum".equals(checkIndexOnStartup)) {
try {
checkIndex();
} catch (IOException ex) {
@ -1955,6 +1955,9 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
if (store.tryIncRef()) {
try {
doCheckIndex();
} catch (IOException e) {
store.markStoreCorrupted(e);
throw e;
} finally {
store.decRef();
}
@ -1998,18 +2001,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
return;
}
logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
if ("fix".equals(checkIndexOnStartup)) {
if (logger.isDebugEnabled()) {
logger.debug("fixing index, writing new segments file ...");
}
store.exorciseIndex(status);
if (logger.isDebugEnabled()) {
logger.debug("index fixed, wrote new segments file \"{}\"", status.segmentsFileName);
}
} else {
// only throw a failure if we are not going to fix the index
throw new IllegalStateException("index check failure but can't fix it");
}
throw new IOException("index check failure");
}
}

View File

@ -134,7 +134,8 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
static final int VERSION_STACK_TRACE = 1; // we write the stack trace too since 1.4.0
static final int VERSION_START = 0;
static final int VERSION = VERSION_WRITE_THROWABLE;
static final String CORRUPTED = "corrupted_";
// public is for test purposes
public static final String CORRUPTED = "corrupted_";
public static final Setting<TimeValue> INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING =
Setting.timeSetting("index.store.stats_refresh_interval", TimeValue.timeValueSeconds(10), Property.IndexScope);
@ -360,18 +361,6 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
}
}
/**
* Repairs the index using the previous returned status from {@link #checkIndex(PrintStream)}.
*/
public void exorciseIndex(CheckIndex.Status status) throws IOException {
metadataLock.writeLock().lock();
try (CheckIndex checkIndex = new CheckIndex(directory)) {
checkIndex.exorciseIndex(status);
} finally {
metadataLock.writeLock().unlock();
}
}
public StoreStats stats() throws IOException {
ensureOpen();
return new StoreStats(directory.estimateSize());

View File

@ -69,7 +69,7 @@ public class MetaDataIndexTemplateServiceTests extends ESSingleNodeTestCase {
containsString("Failed to parse value [0] for setting [index.number_of_shards] must be >= 1"));
assertThat(throwables.get(0).getMessage(),
containsString("unknown value for [index.shard.check_on_startup] " +
"must be one of [true, false, fix, checksum] but was: blargh"));
"must be one of [true, false, checksum] but was: blargh"));
}
public void testIndexTemplateValidationAccumulatesValidationErrors() {

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
@ -118,6 +119,7 @@ import org.elasticsearch.snapshots.Snapshot;
import org.elasticsearch.snapshots.SnapshotId;
import org.elasticsearch.snapshots.SnapshotInfo;
import org.elasticsearch.snapshots.SnapshotShardFailure;
import org.elasticsearch.test.CorruptionUtils;
import org.elasticsearch.test.DummyShardLock;
import org.elasticsearch.test.FieldMaskingReader;
import org.elasticsearch.test.VersionUtils;
@ -126,7 +128,11 @@ import org.elasticsearch.ElasticsearchException;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -1239,7 +1245,7 @@ public class IndexShardTests extends IndexShardTestCase {
};
try (Store store = createStore(shardId, new IndexSettings(metaData, Settings.EMPTY), directory)) {
IndexShard shard = newShard(shardRouting, shardPath, metaData, store,
IndexShard shard = newShard(shardRouting, shardPath, metaData, i -> store,
null, new InternalEngineFactory(), () -> {
}, EMPTY_EVENT_LISTENER);
AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false);
@ -2590,6 +2596,143 @@ public class IndexShardTests extends IndexShardTestCase {
closeShards(newShard);
}
public void testIndexCheckOnStartup() throws Exception {
final IndexShard indexShard = newStartedShard(true);
final long numDocs = between(10, 100);
for (long i = 0; i < numDocs; i++) {
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
}
indexShard.flush(new FlushRequest());
closeShards(indexShard);
final ShardPath shardPath = indexShard.shardPath();
final Path indexPath = corruptIndexFile(shardPath);
final AtomicInteger corruptedMarkerCount = new AtomicInteger();
final SimpleFileVisitor<Path> corruptedVisitor = new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (Files.isRegularFile(file) && file.getFileName().toString().startsWith(Store.CORRUPTED)) {
corruptedMarkerCount.incrementAndGet();
}
return FileVisitResult.CONTINUE;
}
};
Files.walkFileTree(indexPath, corruptedVisitor);
assertThat("corruption marker should not be there", corruptedMarkerCount.get(), equalTo(0));
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE
);
// start shard and perform index check on startup. It enforce shard to fail due to corrupted index files
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
.settings(Settings.builder()
.put(indexShard.indexSettings.getSettings())
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("true", "checksum")))
.build();
IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetaData,
null, null, indexShard.engineFactory,
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
final IndexShardRecoveryException indexShardRecoveryException =
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
assertThat(indexShardRecoveryException.getMessage(), equalTo("failed recovery"));
// check that corrupt marker is there
Files.walkFileTree(indexPath, corruptedVisitor);
assertThat("store has to be marked as corrupted", corruptedMarkerCount.get(), equalTo(1));
try {
closeShards(corruptedShard);
} catch (RuntimeException e) {
assertThat(e.getMessage(), equalTo("CheckIndex failed"));
}
}
public void testShardDoesNotStartIfCorruptedMarkerIsPresent() throws Exception {
final IndexShard indexShard = newStartedShard(true);
final long numDocs = between(10, 100);
for (long i = 0; i < numDocs; i++) {
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
}
indexShard.flush(new FlushRequest());
closeShards(indexShard);
final ShardPath shardPath = indexShard.shardPath();
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE
);
final IndexMetaData indexMetaData = indexShard.indexSettings().getIndexMetaData();
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
// create corrupted marker
final String corruptionMessage = "fake ioexception";
try(Store store = createStore(indexShard.indexSettings(), shardPath)) {
store.markStoreCorrupted(new IOException(corruptionMessage));
}
// try to start shard on corrupted files
final IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetaData,
null, null, indexShard.engineFactory,
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
final IndexShardRecoveryException exception1 = expectThrows(IndexShardRecoveryException.class,
() -> newStartedShard(p -> corruptedShard, true));
assertThat(exception1.getCause().getMessage(), equalTo(corruptionMessage + " (resource=preexisting_corruption)"));
closeShards(corruptedShard);
final AtomicInteger corruptedMarkerCount = new AtomicInteger();
final SimpleFileVisitor<Path> corruptedVisitor = new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (Files.isRegularFile(file) && file.getFileName().toString().startsWith(Store.CORRUPTED)) {
corruptedMarkerCount.incrementAndGet();
}
return FileVisitResult.CONTINUE;
}
};
Files.walkFileTree(indexPath, corruptedVisitor);
assertThat("store has to be marked as corrupted", corruptedMarkerCount.get(), equalTo(1));
// try to start another time shard on corrupted files
final IndexShard corruptedShard2 = newShard(shardRouting, shardPath, indexMetaData,
null, null, indexShard.engineFactory,
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
final IndexShardRecoveryException exception2 = expectThrows(IndexShardRecoveryException.class,
() -> newStartedShard(p -> corruptedShard2, true));
assertThat(exception2.getCause().getMessage(), equalTo(corruptionMessage + " (resource=preexisting_corruption)"));
closeShards(corruptedShard2);
// check that corrupt marker is there
corruptedMarkerCount.set(0);
Files.walkFileTree(indexPath, corruptedVisitor);
assertThat("store still has a single corrupt marker", corruptedMarkerCount.get(), equalTo(1));
}
private Path corruptIndexFile(ShardPath shardPath) throws IOException {
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
final Path[] filesToCorrupt =
Files.walk(indexPath)
.filter(p -> {
final String name = p.getFileName().toString();
return Files.isRegularFile(p)
&& name.startsWith("extra") == false // Skip files added by Lucene's ExtrasFS
&& IndexWriter.WRITE_LOCK_NAME.equals(name) == false
&& name.startsWith("segments_") == false && name.endsWith(".si") == false;
})
.toArray(Path[]::new);
CorruptionUtils.corruptFile(random(), filesToCorrupt);
return indexPath;
}
/**
* Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService
* and checking index concurrently. This should always be possible without any exception.
@ -2613,7 +2756,7 @@ public class IndexShardTests extends IndexShardTestCase {
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
.settings(Settings.builder()
.put(indexShard.indexSettings.getSettings())
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("false", "true", "checksum", "fix")))
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("false", "true", "checksum")))
.build();
final IndexShard newShard = newShard(shardRouting, indexShard.shardPath(), indexMetaData,
null, null, indexShard.engineFactory, indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);

View File

@ -32,6 +32,7 @@ import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardRoutingHelper;
import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.TestShardRouting;
import org.elasticsearch.common.CheckedFunction;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.lucene.uid.Versions;
@ -156,7 +157,6 @@ public abstract class IndexShardTestCase extends ESTestCase {
return Settings.EMPTY;
}
protected Store createStore(IndexSettings indexSettings, ShardPath shardPath) throws IOException {
return createStore(shardPath.getShardId(), indexSettings, newFSDirectory(shardPath.resolveIndex()));
}
@ -169,7 +169,6 @@ public abstract class IndexShardTestCase extends ESTestCase {
}
};
return new Store(shardId, indexSettings, directoryService, new DummyShardLock(shardId));
}
/**
@ -179,7 +178,17 @@ public abstract class IndexShardTestCase extends ESTestCase {
* another shard)
*/
protected IndexShard newShard(boolean primary) throws IOException {
return newShard(primary, Settings.EMPTY, new InternalEngineFactory());
return newShard(primary, Settings.EMPTY);
}
/**
* Creates a new initializing shard. The shard will have its own unique data path.
*
* @param primary indicates whether to a primary shard (ready to recover from an empty store) or a replica (ready to recover from
* another shard)
*/
protected IndexShard newShard(final boolean primary, final Settings settings) throws IOException {
return newShard(primary, settings, new InternalEngineFactory());
}
/**
@ -318,23 +327,25 @@ public abstract class IndexShardTestCase extends ESTestCase {
* @param routing shard routing to use
* @param shardPath path to use for shard data
* @param indexMetaData indexMetaData for the shard, including any mapping
* @param store an optional custom store to use. If null a default file based store will be created
* @param storeProvider an optional custom store provider to use. If null a default file based store will be created
* @param indexSearcherWrapper an optional wrapper to be used during searchers
* @param globalCheckpointSyncer callback for syncing global checkpoints
* @param indexEventListener index event listener
* @param listeners an optional set of listeners to add to the shard
*/
protected IndexShard newShard(ShardRouting routing, ShardPath shardPath, IndexMetaData indexMetaData,
@Nullable Store store, @Nullable IndexSearcherWrapper indexSearcherWrapper,
@Nullable CheckedFunction<IndexSettings, Store, IOException> storeProvider,
@Nullable IndexSearcherWrapper indexSearcherWrapper,
@Nullable EngineFactory engineFactory,
Runnable globalCheckpointSyncer,
IndexEventListener indexEventListener, IndexingOperationListener... listeners) throws IOException {
final Settings nodeSettings = Settings.builder().put("node.name", routing.currentNodeId()).build();
final IndexSettings indexSettings = new IndexSettings(indexMetaData, nodeSettings);
final IndexShard indexShard;
if (store == null) {
store = createStore(indexSettings, shardPath);
if (storeProvider == null) {
storeProvider = is -> createStore(is, shardPath);
}
final Store store = storeProvider.apply(indexSettings);
boolean success = false;
try {
IndexCache indexCache = new IndexCache(indexSettings, new DisabledQueryCache(indexSettings), null);
@ -424,7 +435,18 @@ public abstract class IndexShardTestCase extends ESTestCase {
*/
protected IndexShard newStartedShard(
final boolean primary, final Settings settings, final EngineFactory engineFactory) throws IOException {
IndexShard shard = newShard(primary, settings, engineFactory);
return newStartedShard(p -> newShard(p, settings, engineFactory), primary);
}
/**
* creates a new empty shard and starts it.
*
* @param shardFunction shard factory function
* @param primary controls whether the shard will be a primary or a replica.
*/
protected IndexShard newStartedShard(CheckedFunction<Boolean, IndexShard, IOException> shardFunction,
boolean primary) throws IOException {
IndexShard shard = shardFunction.apply(primary);
if (primary) {
recoverShardFromStore(shard);
} else {

View File

@ -1,102 +0,0 @@
[role="xpack"]
[[ml-api-quickref]]
== API quick reference
All {ml} endpoints have the following base:
[source,js]
----
/_xpack/ml/
----
// NOTCONSOLE
The main {ml} resources can be accessed with a variety of endpoints:
* <<ml-api-jobs,+/anomaly_detectors/+>>: Create and manage {ml} jobs
* <<ml-api-datafeeds,+/datafeeds/+>>: Select data from {es} to be analyzed
* <<ml-api-results,+/results/+>>: Access the results of a {ml} job
* <<ml-api-snapshots,+/model_snapshots/+>>: Manage model snapshots
//* <<ml-api-validate,+/validate/+>>: Validate subsections of job configurations
[float]
[[ml-api-jobs]]
=== /anomaly_detectors/
* {ref}/ml-put-job.html[PUT /anomaly_detectors/<job_id+++>+++]: Create a job
* {ref}/ml-open-job.html[POST /anomaly_detectors/<job_id>/_open]: Open a job
* {ref}/ml-post-data.html[POST /anomaly_detectors/<job_id>/_data]: Send data to a job
* {ref}/ml-get-job.html[GET /anomaly_detectors]: List jobs
* {ref}/ml-get-job.html[GET /anomaly_detectors/<job_id+++>+++]: Get job details
* {ref}/ml-get-job-stats.html[GET /anomaly_detectors/<job_id>/_stats]: Get job statistics
* {ref}/ml-update-job.html[POST /anomaly_detectors/<job_id>/_update]: Update certain properties of the job configuration
* {ref}/ml-flush-job.html[POST anomaly_detectors/<job_id>/_flush]: Force a job to analyze buffered data
* {ref}/ml-forecast.html[POST anomaly_detectors/<job_id>/_forecast]: Forecast future job behavior
* {ref}/ml-close-job.html[POST /anomaly_detectors/<job_id>/_close]: Close a job
* {ref}/ml-delete-job.html[DELETE /anomaly_detectors/<job_id+++>+++]: Delete a job
[float]
[[ml-api-calendars]]
=== /calendars/
* {ref}/ml-put-calendar.html[PUT /calendars/<calendar_id+++>+++]: Create a calendar
* {ref}/ml-post-calendar-event.html[POST /calendars/<calendar_id+++>+++/events]: Add a scheduled event to a calendar
* {ref}/ml-put-calendar-job.html[PUT /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Associate a job with a calendar
* {ref}/ml-get-calendar.html[GET /calendars/<calendar_id+++>+++]: Get calendar details
* {ref}/ml-get-calendar-event.html[GET /calendars/<calendar_id+++>+++/events]: Get scheduled event details
* {ref}/ml-delete-calendar-event.html[DELETE /calendars/<calendar_id+++>+++/events/<event_id+++>+++]: Remove a scheduled event from a calendar
* {ref}/ml-delete-calendar-job.html[DELETE /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Disassociate a job from a calendar
* {ref}/ml-delete-calendar.html[DELETE /calendars/<calendar_id+++>+++]: Delete a calendar
[float]
[[ml-api-filters]]
=== /filters/
* {ref}/ml-put-filter.html[PUT /filters/<filter_id+++>+++]: Create a filter
* {ref}/ml-update-filter.html[POST /filters/<filter_id+++>+++/_update]: Update a filter
* {ref}/ml-get-filter.html[GET /filters/<filter_id+++>+++]: List filters
* {ref}/ml-delete-filter.html[DELETE /filter/<filter_id+++>+++]: Delete a filter
[float]
[[ml-api-datafeeds]]
=== /datafeeds/
* {ref}/ml-put-datafeed.html[PUT /datafeeds/<datafeed_id+++>+++]: Create a {dfeed}
* {ref}/ml-start-datafeed.html[POST /datafeeds/<datafeed_id>/_start]: Start a {dfeed}
* {ref}/ml-get-datafeed.html[GET /datafeeds]: List {dfeeds}
* {ref}/ml-get-datafeed.html[GET /datafeeds/<datafeed_id+++>+++]: Get {dfeed} details
* {ref}/ml-get-datafeed-stats.html[GET /datafeeds/<datafeed_id>/_stats]: Get statistical information for {dfeeds}
* {ref}/ml-preview-datafeed.html[GET /datafeeds/<datafeed_id>/_preview]: Get a preview of a {dfeed}
* {ref}/ml-update-datafeed.html[POST /datafeeds/<datafeedid>/_update]: Update certain settings for a {dfeed}
* {ref}/ml-stop-datafeed.html[POST /datafeeds/<datafeed_id>/_stop]: Stop a {dfeed}
* {ref}/ml-delete-datafeed.html[DELETE /datafeeds/<datafeed_id+++>+++]: Delete {dfeed}
[float]
[[ml-api-results]]
=== /results/
* {ref}/ml-get-bucket.html[GET /results/buckets]: List the buckets in the results
* {ref}/ml-get-bucket.html[GET /results/buckets/<bucket_id+++>+++]: Get bucket details
* {ref}/ml-get-overall-buckets.html[GET /results/overall_buckets]: Get overall bucket results for multiple jobs
* {ref}/ml-get-category.html[GET /results/categories]: List the categories in the results
* {ref}/ml-get-category.html[GET /results/categories/<category_id+++>+++]: Get category details
* {ref}/ml-get-influencer.html[GET /results/influencers]: Get influencer details
* {ref}/ml-get-record.html[GET /results/records]: Get records from the results
[float]
[[ml-api-snapshots]]
=== /model_snapshots/
* {ref}/ml-get-snapshot.html[GET /model_snapshots]: List model snapshots
* {ref}/ml-get-snapshot.html[GET /model_snapshots/<snapshot_id+++>+++]: Get model snapshot details
* {ref}/ml-revert-snapshot.html[POST /model_snapshots/<snapshot_id>/_revert]: Revert a model snapshot
* {ref}/ml-update-snapshot.html[POST /model_snapshots/<snapshot_id>/_update]: Update certain settings for a model snapshot
* {ref}/ml-delete-snapshot.html[DELETE /model_snapshots/<snapshot_id+++>+++]: Delete a model snapshot
////
[float]
[[ml-api-validate]]
=== /validate/
* {ref}/ml-valid-detector.html[POST /anomaly_detectors/_validate/detector]: Validate a detector
* {ref}/ml-valid-job.html[POST /anomaly_detectors/_validate]: Validate a job
////

View File

@ -1,35 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.util.List;
public class CsvLogStructureFinderFactory implements LogStructureFinderFactory {
/**
* Rules are:
* - The file must be valid CSV
* - It must contain at least two complete records
* - There must be at least two fields per record (otherwise files with no commas could be treated as CSV!)
* - Every CSV record except the last must have the same number of fields
* The reason the last record is allowed to have fewer fields than the others is that
* it could have been truncated when the file was sampled.
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.EXCEL_PREFERENCE, "CSV");
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
CsvPreference.EXCEL_PREFERENCE, false);
}
}

View File

@ -29,17 +29,16 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
public class DelimitedLogStructureFinder implements LogStructureFinder {
private static final int MAX_LEVENSHTEIN_COMPARISONS = 100;
private final List<String> sampleMessages;
private final LogStructure structure;
static SeparatedValuesLogStructureFinder makeSeparatedValuesLogStructureFinder(List<String> explanation, String sample,
String charsetName, Boolean hasByteOrderMarker,
CsvPreference csvPreference, boolean trimFields)
throws IOException {
static DelimitedLogStructureFinder makeDelimitedLogStructureFinder(List<String> explanation, String sample, String charsetName,
Boolean hasByteOrderMarker, CsvPreference csvPreference,
boolean trimFields) throws IOException {
Tuple<List<List<String>>, List<Integer>> parsed = readRows(sample, csvPreference);
List<List<String>> rows = parsed.v1();
@ -73,13 +72,14 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n"));
char delimiter = (char) csvPreference.getDelimiterChar();
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.fromSeparator(delimiter))
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.DELIMITED)
.setCharset(charsetName)
.setHasByteOrderMarker(hasByteOrderMarker)
.setSampleStart(preamble)
.setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1))
.setNumMessagesAnalyzed(sampleRecords.size())
.setHasHeaderRow(isHeaderInFile)
.setDelimiter(delimiter)
.setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList()));
if (trimFields) {
@ -131,10 +131,10 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
.setExplanation(explanation)
.build();
return new SeparatedValuesLogStructureFinder(sampleMessages, structure);
return new DelimitedLogStructureFinder(sampleMessages, structure);
}
private SeparatedValuesLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
private DelimitedLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
this.sampleMessages = Collections.unmodifiableList(sampleMessages);
this.structure = structure;
}

View File

@ -0,0 +1,57 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
public class DelimitedLogStructureFinderFactory implements LogStructureFinderFactory {
private final CsvPreference csvPreference;
private final int minFieldsPerRow;
private final boolean trimFields;
DelimitedLogStructureFinderFactory(char delimiter, int minFieldsPerRow, boolean trimFields) {
csvPreference = new CsvPreference.Builder('"', delimiter, "\n").build();
this.minFieldsPerRow = minFieldsPerRow;
this.trimFields = trimFields;
}
/**
* Rules are:
* - It must contain at least two complete records
* - There must be a minimum number of fields per record (otherwise files with no commas could be treated as CSV!)
* - Every record except the last must have the same number of fields
* The reason the last record is allowed to have fewer fields than the others is that
* it could have been truncated when the file was sampled.
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
String formatName;
switch ((char) csvPreference.getDelimiterChar()) {
case ',':
formatName = "CSV";
break;
case '\t':
formatName = "TSV";
break;
default:
formatName = Character.getName(csvPreference.getDelimiterChar()).toLowerCase(Locale.ROOT) + " delimited values";
break;
}
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, minFieldsPerRow, csvPreference, formatName);
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
csvPreference, trimFields);
}
}

View File

@ -27,37 +27,14 @@ public class LogStructure implements ToXContentObject {
public enum Format {
JSON, XML, CSV, TSV, SEMI_COLON_SEPARATED_VALUES, PIPE_SEPARATED_VALUES, SEMI_STRUCTURED_TEXT;
public Character separator() {
switch (this) {
case JSON:
case XML:
return null;
case CSV:
return ',';
case TSV:
return '\t';
case SEMI_COLON_SEPARATED_VALUES:
return ';';
case PIPE_SEPARATED_VALUES:
return '|';
case SEMI_STRUCTURED_TEXT:
return null;
default:
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
}
}
JSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
public boolean supportsNesting() {
switch (this) {
case JSON:
case XML:
return true;
case CSV:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
case DELIMITED:
case SEMI_STRUCTURED_TEXT:
return false;
default:
@ -69,10 +46,7 @@ public class LogStructure implements ToXContentObject {
switch (this) {
case JSON:
case XML:
case CSV:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
case DELIMITED:
return true;
case SEMI_STRUCTURED_TEXT:
return false;
@ -85,10 +59,7 @@ public class LogStructure implements ToXContentObject {
switch (this) {
case JSON:
case XML:
case CSV:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
case DELIMITED:
return false;
case SEMI_STRUCTURED_TEXT:
return true;
@ -97,38 +68,6 @@ public class LogStructure implements ToXContentObject {
}
}
public boolean isSeparatedValues() {
switch (this) {
case JSON:
case XML:
return false;
case CSV:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
return true;
case SEMI_STRUCTURED_TEXT:
return false;
default:
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
}
}
public static Format fromSeparator(char separator) {
switch (separator) {
case ',':
return CSV;
case '\t':
return TSV;
case ';':
return SEMI_COLON_SEPARATED_VALUES;
case '|':
return PIPE_SEPARATED_VALUES;
default:
throw new IllegalArgumentException("No known format has separator [" + separator + "]");
}
}
public static Format fromString(String name) {
return valueOf(name.trim().toUpperCase(Locale.ROOT));
}
@ -149,7 +88,7 @@ public class LogStructure implements ToXContentObject {
static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
static final ParseField INPUT_FIELDS = new ParseField("input_fields");
static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
static final ParseField SEPARATOR = new ParseField("separator");
static final ParseField DELIMITER = new ParseField("delimiter");
static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
@ -171,7 +110,7 @@ public class LogStructure implements ToXContentObject {
PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS);
PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
PARSER.declareString((p, c) -> p.setSeparator(c.charAt(0)), SEPARATOR);
PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER);
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
@ -191,7 +130,7 @@ public class LogStructure implements ToXContentObject {
private final String excludeLinesPattern;
private final List<String> inputFields;
private final Boolean hasHeaderRow;
private final Character separator;
private final Character delimiter;
private final Boolean shouldTrimFields;
private final String grokPattern;
private final List<String> timestampFormats;
@ -202,7 +141,7 @@ public class LogStructure implements ToXContentObject {
public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> inputFields,
Boolean hasHeaderRow, Character separator, Boolean shouldTrimFields, String grokPattern, String timestampField,
Boolean hasHeaderRow, Character delimiter, Boolean shouldTrimFields, String grokPattern, String timestampField,
List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings,
List<String> explanation) {
@ -216,7 +155,7 @@ public class LogStructure implements ToXContentObject {
this.excludeLinesPattern = excludeLinesPattern;
this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields));
this.hasHeaderRow = hasHeaderRow;
this.separator = separator;
this.delimiter = delimiter;
this.shouldTrimFields = shouldTrimFields;
this.grokPattern = grokPattern;
this.timestampField = timestampField;
@ -266,8 +205,8 @@ public class LogStructure implements ToXContentObject {
return hasHeaderRow;
}
public Character getSeparator() {
return separator;
public Character getDelimiter() {
return delimiter;
}
public Boolean getShouldTrimFields() {
@ -322,8 +261,8 @@ public class LogStructure implements ToXContentObject {
if (hasHeaderRow != null) {
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
}
if (separator != null) {
builder.field(SEPARATOR.getPreferredName(), String.valueOf(separator));
if (delimiter != null) {
builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter));
}
if (shouldTrimFields != null) {
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
@ -349,7 +288,7 @@ public class LogStructure implements ToXContentObject {
public int hashCode() {
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, timestampField,
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, timestampField,
timestampFormats, needClientTimezone, mappings, explanation);
}
@ -376,7 +315,7 @@ public class LogStructure implements ToXContentObject {
Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
Objects.equals(this.inputFields, that.inputFields) &&
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
Objects.equals(this.separator, that.separator) &&
Objects.equals(this.delimiter, that.delimiter) &&
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
Objects.equals(this.grokPattern, that.grokPattern) &&
Objects.equals(this.timestampField, that.timestampField) &&
@ -397,7 +336,7 @@ public class LogStructure implements ToXContentObject {
private String excludeLinesPattern;
private List<String> inputFields;
private Boolean hasHeaderRow;
private Character separator;
private Character delimiter;
private Boolean shouldTrimFields;
private String grokPattern;
private String timestampField;
@ -441,7 +380,6 @@ public class LogStructure implements ToXContentObject {
public Builder setFormat(Format format) {
this.format = Objects.requireNonNull(format);
this.separator = format.separator();
return this;
}
@ -465,13 +403,13 @@ public class LogStructure implements ToXContentObject {
return this;
}
public Builder setShouldTrimFields(Boolean shouldTrimFields) {
this.shouldTrimFields = shouldTrimFields;
public Builder setDelimiter(Character delimiter) {
this.delimiter = delimiter;
return this;
}
public Builder setSeparator(Character separator) {
this.separator = separator;
public Builder setShouldTrimFields(Boolean shouldTrimFields) {
this.shouldTrimFields = shouldTrimFields;
return this;
}
@ -542,28 +480,22 @@ public class LogStructure implements ToXContentObject {
if (hasHeaderRow != null) {
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
}
if (separator != null) {
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures.");
if (delimiter != null) {
throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
}
if (grokPattern != null) {
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
}
break;
case CSV:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
case DELIMITED:
if (inputFields == null || inputFields.isEmpty()) {
throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures.");
}
if (hasHeaderRow == null) {
throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures.");
}
Character expectedSeparator = format.separator();
assert expectedSeparator != null;
if (expectedSeparator.equals(separator) == false) {
throw new IllegalArgumentException("Separator must be [" + expectedSeparator + "] for [" + format +
"] structures.");
if (delimiter == null) {
throw new IllegalArgumentException("Delimiter must be specified for [" + format + "] structures.");
}
if (grokPattern != null) {
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
@ -576,8 +508,8 @@ public class LogStructure implements ToXContentObject {
if (hasHeaderRow != null) {
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
}
if (separator != null) {
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures.");
if (delimiter != null) {
throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
}
if (shouldTrimFields != null) {
throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures.");
@ -607,7 +539,7 @@ public class LogStructure implements ToXContentObject {
}
return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern,
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern,
timestampField, timestampFormats, needClientTimezone, mappings, explanation);
}
}

View File

@ -69,10 +69,10 @@ public final class LogStructureFinderManager {
new JsonLogStructureFinderFactory(),
new XmlLogStructureFinderFactory(),
// ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV
new CsvLogStructureFinderFactory(),
new TsvLogStructureFinderFactory(),
new SemiColonSeparatedValuesLogStructureFinderFactory(),
new PipeSeparatedValuesLogStructureFinderFactory(),
new DelimitedLogStructureFinderFactory(',', 2, false),
new DelimitedLogStructureFinderFactory('\t', 2, false),
new DelimitedLogStructureFinderFactory(';', 4, false),
new DelimitedLogStructureFinderFactory('|', 5, true),
new TextLogStructureFinderFactory()
));

View File

@ -21,12 +21,12 @@ import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
final class LogStructureUtils {
public final class LogStructureUtils {
static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
static final String MAPPING_TYPE_SETTING = "type";
static final String MAPPING_FORMAT_SETTING = "format";
static final String MAPPING_PROPERTIES_SETTING = "properties";
public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
public static final String MAPPING_TYPE_SETTING = "type";
public static final String MAPPING_FORMAT_SETTING = "format";
public static final String MAPPING_PROPERTIES_SETTING = "properties";
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$");

View File

@ -1,38 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.util.List;
public class PipeSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
private static final CsvPreference PIPE_PREFERENCE = new CsvPreference.Builder('"', '|', "\n").build();
/**
* Rules are:
* - The file must be valid pipe (<code>|</code>) separated values
* - It must contain at least two complete records
* - There must be at least five fields per record (otherwise files with coincidental
* or no pipe characters could be treated as pipe separated)
* - Every pipe separated value record except the last must have the same number of fields
* The reason the last record is allowed to have fewer fields than the others is that
* it could have been truncated when the file was sampled.
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 5, PIPE_PREFERENCE, "pipe separated values");
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
PIPE_PREFERENCE, true);
}
}

View File

@ -1,37 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.util.List;
public class SemiColonSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
/**
* Rules are:
* - The file must be valid semi-colon separated values
* - It must contain at least two complete records
* - There must be at least four fields per record (otherwise files with coincidental
* or no semi-colons could be treated as semi-colon separated)
* - Every semi-colon separated value record except the last must have the same number of fields
* The reason the last record is allowed to have fewer fields than the others is that
* it could have been truncated when the file was sampled.
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 4,
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, "semi-colon separated values");
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, false);
}
}

View File

@ -23,13 +23,13 @@ public class TsvLogStructureFinderFactory implements LogStructureFinderFactory {
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
CsvPreference.TAB_PREFERENCE, false);
}
}

View File

@ -1,38 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
public class CsvLogStructureFinderFactoryTests extends LogStructureTestCase {
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
// No need to check JSON or XML because they come earlier in the order we check formats
public void testCanCreateFromSampleGivenCsv() {
assertTrue(factory.canCreateFromSample(explanation, CSV_SAMPLE));
}
public void testCanCreateFromSampleGivenTsv() {
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
}
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenText() {
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
}

View File

@ -0,0 +1,93 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
public class DelimitedLogStructureFinderFactoryTests extends LogStructureTestCase {
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
private LogStructureFinderFactory tsvFactory = new DelimitedLogStructureFinderFactory('\t', 2, false);
private LogStructureFinderFactory semiColonDelimitedfactory = new DelimitedLogStructureFinderFactory(';', 4, false);
private LogStructureFinderFactory pipeDelimitedFactory = new DelimitedLogStructureFinderFactory('|', 5, true);
// CSV - no need to check JSON or XML because they come earlier in the order we check formats
public void testCanCreateCsvFromSampleGivenCsv() {
assertTrue(csvFactory.canCreateFromSample(explanation, CSV_SAMPLE));
}
public void testCanCreateCsvFromSampleGivenTsv() {
assertFalse(csvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
}
public void testCanCreateCsvFromSampleGivenSemiColonDelimited() {
assertFalse(csvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
}
public void testCanCreateCsvFromSampleGivenPipeDelimited() {
assertFalse(csvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreateCsvFromSampleGivenText() {
assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
// TSV - no need to check JSON, XML or CSV because they come earlier in the order we check formats
public void testCanCreateTsvFromSampleGivenTsv() {
assertTrue(tsvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
}
public void testCanCreateTsvFromSampleGivenSemiColonDelimited() {
assertFalse(tsvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
}
public void testCanCreateTsvFromSampleGivenPipeDelimited() {
assertFalse(tsvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreateTsvFromSampleGivenText() {
assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
// Semi-colon delimited - no need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() {
assertTrue(semiColonDelimitedfactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
}
public void testCanCreateSemiColonDelimitedFromSampleGivenPipeDelimited() {
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreateSemiColonDelimitedFromSampleGivenText() {
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
// Pipe delimited - no need to check JSON, XML, CSV, TSV or semi-colon delimited
// values because they come earlier in the order we check formats
public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() {
assertTrue(pipeDelimitedFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreatePipeDelimitedFromSampleGivenText() {
assertFalse(pipeDelimitedFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
}

View File

@ -12,27 +12,27 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinFieldwiseCompareRows;
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinDistance;
import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinFieldwiseCompareRows;
import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinDistance;
import static org.hamcrest.Matchers.arrayContaining;
public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase {
public class DelimitedLogStructureFinderTests extends LogStructureTestCase {
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
public void testCreateConfigsGivenCompleteCsv() throws Exception {
String sample = "time,message\n" +
"2018-05-17T13:41:23,hello\n" +
"2018-05-17T13:41:32,hello again\n";
assertTrue(factory.canCreateFromSample(explanation, sample));
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat());
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker());
@ -41,7 +41,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
}
assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern());
assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator());
assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("time", "message"), structure.getInputFields());
@ -55,15 +55,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"\"hello\n" +
"world\",2018-05-17T13:41:23,1\n" +
"\"hello again\n"; // note that this last record is truncated
assertTrue(factory.canCreateFromSample(explanation, sample));
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat());
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker());
@ -72,7 +72,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
}
assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern());
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator());
assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields());
@ -88,15 +88,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
assertTrue(factory.canCreateFromSample(explanation, sample));
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat());
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker());
@ -108,7 +108,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?",
structure.getExcludeLinesPattern());
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator());
assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
@ -126,15 +126,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
assertTrue(factory.canCreateFromSample(explanation, sample));
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat());
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker());
@ -146,7 +146,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?",
structure.getExcludeLinesPattern());
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator());
assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
@ -161,15 +161,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" +
"\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" +
"\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n";
assertTrue(factory.canCreateFromSample(explanation, sample));
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat());
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker());
@ -179,7 +179,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?",
structure.getExcludeLinesPattern());
assertNull(structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator());
assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields());
@ -195,8 +195,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation,
SeparatedValuesLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
DelimitedLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
assertTrue(header.v1());
assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype"));
@ -208,8 +208,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation,
SeparatedValuesLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
DelimitedLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
assertFalse(header.v1());
assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4"));
@ -251,43 +251,43 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
public void testLineHasUnescapedQuote() {
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
}
public void testRowContainsDuplicateNonEmptyValues() {
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
}
}

View File

@ -29,14 +29,14 @@ public class JsonLogStructureFinderFactoryTests extends LogStructureTestCase {
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
}
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
public void testCanCreateFromSampleGivenSemiColonDelimited() {
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
}
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
public void testCanCreateFromSampleGivenPipeDelimited() {
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreateFromSampleGivenText() {

View File

@ -29,7 +29,7 @@ public class JsonLogStructureFinderTests extends LogStructureTestCase {
}
assertNull(structure.getExcludeLinesPattern());
assertNull(structure.getMultilineStartPattern());
assertNull(structure.getSeparator());
assertNull(structure.getDelimiter());
assertNull(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields());
assertNull(structure.getGrokPattern());

View File

@ -61,7 +61,7 @@ public class LogStructureFinderManagerTests extends LogStructureTestCase {
public void testMakeBestStructureGivenCsv() throws Exception {
assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" +
"2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()),
instanceOf(SeparatedValuesLogStructureFinder.class));
instanceOf(DelimitedLogStructureFinder.class));
}
public void testMakeBestStructureGivenText() throws Exception {

View File

@ -34,14 +34,14 @@ public abstract class LogStructureTestCase extends ESTestCase {
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
protected static final String PIPE_SEPARATED_VALUES_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
protected static final String PIPE_DELIMITED_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
"listening on 0.0.0.0:9987, :::9987\n" +
"2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " +
"'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" +
"2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " +
"'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)";
protected static final String SEMI_COLON_SEPARATED_VALUES_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
protected static final String SEMI_COLON_DELIMITED_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
"\"timestamp\"\n" +
"\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" +
"\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" +

View File

@ -43,14 +43,12 @@ public class LogStructureTests extends AbstractXContentTestCase<LogStructure> {
builder.setExcludeLinesPattern(randomAlphaOfLength(100));
}
if (format.isSeparatedValues() || (format.supportsNesting() && randomBoolean())) {
if (format == LogStructure.Format.DELIMITED || (format.supportsNesting() && randomBoolean())) {
builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
}
if (format.isSeparatedValues()) {
if (format == LogStructure.Format.DELIMITED) {
builder.setHasHeaderRow(randomBoolean());
if (rarely()) {
builder.setSeparator(format.separator());
}
builder.setDelimiter(randomFrom(',', '\t', ';', '|'));
}
if (format.isSemiStructured()) {
builder.setGrokPattern(randomAlphaOfLength(100));

View File

@ -1,23 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
public class PipeSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
private LogStructureFinderFactory factory = new PipeSeparatedValuesLogStructureFinderFactory();
// No need to check JSON, XML, CSV, TSV or semi-colon separated values because they come earlier in the order we check formats
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
assertTrue(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenText() {
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
}

View File

@ -1,28 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
public class SemiColonSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
private LogStructureFinderFactory factory = new SemiColonSeparatedValuesLogStructureFinderFactory();
// No need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
assertTrue(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenText() {
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
}

Some files were not shown because too many files have changed in this diff Show More