Merge branch 'master' into ccr

* master:
  Mute test watcher usage stats output
  [Rollup] Fix FullClusterRestart test
  Adjust soft-deletes version after backport into 6.5
  completely drop `index.shard.check_on_startup: fix` for 7.0 (#33194)
  Fix AwaitsFix issue number
  Mute SmokeTestWatcherWithSecurityIT testsi
  drop `index.shard.check_on_startup: fix` (#32279)
  tracked at
  [DOCS] Moves ml folder from x-pack/docs to docs (#33248)
  [DOCS] Move rollup APIs to docs (#31450)
  [DOCS] Rename X-Pack Commands section (#33005)
  TEST: Disable soft-deletes in ParentChildTestCase
  Fixes SecurityIntegTestCase so it always adds at least one alias (#33296)
  Fix pom for build-tools (#33300)
  Lazy evaluate java9home (#33301)
  SQL: test coverage for JdbcResultSet (#32813)
  Work around to be able to generate eclipse projects (#33295)
  Highlight that index_phrases only works if no slop is used (#33303)
  Different handling for security specific errors in the CLI. Fix for https://github.com/elastic/elasticsearch/issues/33230 (#33255)
  [ML] Refactor delimited file structure detection (#33233)
  SQL: Support multi-index format as table identifier (#33278)
  MINOR: Remove Dead Code from PathTrie (#33280)
  Enable forbiddenapis server java9 (#33245)
This commit is contained in:
Nhat Nguyen 2018-08-31 19:03:04 -04:00
commit b93507608a
122 changed files with 2541 additions and 841 deletions

View File

@ -16,7 +16,9 @@
* specific language governing permissions and limitations * specific language governing permissions and limitations
* under the License. * under the License.
*/ */
import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin
import org.apache.tools.ant.taskdefs.condition.Os
import org.elasticsearch.gradle.BuildPlugin import org.elasticsearch.gradle.BuildPlugin
import org.elasticsearch.gradle.LoggedExec import org.elasticsearch.gradle.LoggedExec
import org.elasticsearch.gradle.Version import org.elasticsearch.gradle.Version
@ -24,14 +26,9 @@ import org.elasticsearch.gradle.VersionCollection
import org.elasticsearch.gradle.VersionProperties import org.elasticsearch.gradle.VersionProperties
import org.elasticsearch.gradle.plugin.PluginBuildPlugin import org.elasticsearch.gradle.plugin.PluginBuildPlugin
import org.gradle.plugins.ide.eclipse.model.SourceFolder import org.gradle.plugins.ide.eclipse.model.SourceFolder
import org.gradle.util.GradleVersion
import org.gradle.util.DistributionLocator
import org.apache.tools.ant.taskdefs.condition.Os
import org.apache.tools.ant.filters.ReplaceTokens
import java.nio.file.Files import java.nio.file.Files
import java.nio.file.Path import java.nio.file.Path
import java.security.MessageDigest
plugins { plugins {
id 'com.gradle.build-scan' version '1.13.2' id 'com.gradle.build-scan' version '1.13.2'
@ -512,6 +509,16 @@ allprojects {
tasks.cleanEclipse.dependsOn(wipeEclipseSettings) tasks.cleanEclipse.dependsOn(wipeEclipseSettings)
// otherwise the eclipse merging is *super confusing* // otherwise the eclipse merging is *super confusing*
tasks.eclipse.dependsOn(cleanEclipse, copyEclipseSettings) tasks.eclipse.dependsOn(cleanEclipse, copyEclipseSettings)
// work arround https://github.com/gradle/gradle/issues/6582
tasks.eclipseProject.mustRunAfter tasks.cleanEclipseProject
tasks.matching { it.name == 'eclipseClasspath' }.all {
it.mustRunAfter { tasks.cleanEclipseClasspath }
}
tasks.matching { it.name == 'eclipseJdt' }.all {
it.mustRunAfter { tasks.cleanEclipseJdt }
}
tasks.copyEclipseSettings.mustRunAfter tasks.wipeEclipseSettings
} }
allprojects { allprojects {

View File

@ -24,15 +24,6 @@ plugins {
id 'groovy' id 'groovy'
} }
gradlePlugin {
plugins {
simplePlugin {
id = 'elasticsearch.clusterformation'
implementationClass = 'org.elasticsearch.gradle.clusterformation.ClusterformationPlugin'
}
}
}
group = 'org.elasticsearch.gradle' group = 'org.elasticsearch.gradle'
String minimumGradleVersion = file('src/main/resources/minimumGradleVersion').text.trim() String minimumGradleVersion = file('src/main/resources/minimumGradleVersion').text.trim()

View File

@ -38,7 +38,6 @@ import org.gradle.api.artifacts.ModuleDependency
import org.gradle.api.artifacts.ModuleVersionIdentifier import org.gradle.api.artifacts.ModuleVersionIdentifier
import org.gradle.api.artifacts.ProjectDependency import org.gradle.api.artifacts.ProjectDependency
import org.gradle.api.artifacts.ResolvedArtifact import org.gradle.api.artifacts.ResolvedArtifact
import org.gradle.api.artifacts.SelfResolvingDependency
import org.gradle.api.artifacts.dsl.RepositoryHandler import org.gradle.api.artifacts.dsl.RepositoryHandler
import org.gradle.api.execution.TaskExecutionGraph import org.gradle.api.execution.TaskExecutionGraph
import org.gradle.api.plugins.JavaPlugin import org.gradle.api.plugins.JavaPlugin
@ -212,6 +211,7 @@ class BuildPlugin implements Plugin<Project> {
project.rootProject.ext.minimumRuntimeVersion = minimumRuntimeVersion project.rootProject.ext.minimumRuntimeVersion = minimumRuntimeVersion
project.rootProject.ext.inFipsJvm = inFipsJvm project.rootProject.ext.inFipsJvm = inFipsJvm
project.rootProject.ext.gradleJavaVersion = JavaVersion.toVersion(gradleJavaVersion) project.rootProject.ext.gradleJavaVersion = JavaVersion.toVersion(gradleJavaVersion)
project.rootProject.ext.java9Home = "${-> findJavaHome("9")}"
} }
project.targetCompatibility = project.rootProject.ext.minimumRuntimeVersion project.targetCompatibility = project.rootProject.ext.minimumRuntimeVersion
@ -225,6 +225,7 @@ class BuildPlugin implements Plugin<Project> {
project.ext.javaVersions = project.rootProject.ext.javaVersions project.ext.javaVersions = project.rootProject.ext.javaVersions
project.ext.inFipsJvm = project.rootProject.ext.inFipsJvm project.ext.inFipsJvm = project.rootProject.ext.inFipsJvm
project.ext.gradleJavaVersion = project.rootProject.ext.gradleJavaVersion project.ext.gradleJavaVersion = project.rootProject.ext.gradleJavaVersion
project.ext.java9Home = project.rootProject.ext.java9Home
} }
private static String getPaddedMajorVersion(JavaVersion compilerJavaVersionEnum) { private static String getPaddedMajorVersion(JavaVersion compilerJavaVersionEnum) {

View File

@ -100,7 +100,7 @@ class PrecommitTasks {
private static Task configureForbiddenApisCli(Project project) { private static Task configureForbiddenApisCli(Project project) {
Task forbiddenApisCli = project.tasks.create('forbiddenApis') Task forbiddenApisCli = project.tasks.create('forbiddenApis')
project.sourceSets.forEach { sourceSet -> project.sourceSets.all { sourceSet ->
forbiddenApisCli.dependsOn( forbiddenApisCli.dependsOn(
project.tasks.create(sourceSet.getTaskName('forbiddenApis', null), ForbiddenApisCliTask) { project.tasks.create(sourceSet.getTaskName('forbiddenApis', null), ForbiddenApisCliTask) {
ExportElasticsearchBuildResourcesTask buildResources = project.tasks.getByName('buildResources') ExportElasticsearchBuildResourcesTask buildResources = project.tasks.getByName('buildResources')

View File

@ -51,7 +51,8 @@ public class ForbiddenApisCliTask extends DefaultTask {
private JavaVersion targetCompatibility; private JavaVersion targetCompatibility;
private FileCollection classesDirs; private FileCollection classesDirs;
private SourceSet sourceSet; private SourceSet sourceSet;
private String javaHome; // This needs to be an object so it can hold Groovy GStrings
private Object javaHome;
@Input @Input
public JavaVersion getTargetCompatibility() { public JavaVersion getTargetCompatibility() {
@ -142,11 +143,11 @@ public class ForbiddenApisCliTask extends DefaultTask {
} }
@Input @Input
public String getJavaHome() { public Object getJavaHome() {
return javaHome; return javaHome;
} }
public void setJavaHome(String javaHome) { public void setJavaHome(Object javaHome) {
this.javaHome = javaHome; this.javaHome = javaHome;
} }

View File

@ -0,0 +1 @@
implementation-class=org.elasticsearch.gradle.clusterformation.ClusterformationPlugin

View File

@ -19,6 +19,12 @@
apply plugin: 'elasticsearch.docs-test' apply plugin: 'elasticsearch.docs-test'
/* List of files that have snippets that require a gold or platinum licence
and therefore cannot be tested yet... */
buildRestTests.expectedUnconvertedCandidates = [
'reference/ml/transforms.asciidoc',
]
integTestCluster { integTestCluster {
/* Enable regexes in painless so our tests don't complain about example /* Enable regexes in painless so our tests don't complain about example
* snippets that use them. */ * snippets that use them. */
@ -74,6 +80,17 @@ buildRestTests.docs = fileTree(projectDir) {
exclude 'build' exclude 'build'
// Just syntax examples // Just syntax examples
exclude 'README.asciidoc' exclude 'README.asciidoc'
// Broken code snippet tests
exclude 'reference/rollup/rollup-getting-started.asciidoc'
exclude 'reference/rollup/apis/rollup-job-config.asciidoc'
exclude 'reference/rollup/apis/rollup-index-caps.asciidoc'
exclude 'reference/rollup/apis/put-job.asciidoc'
exclude 'reference/rollup/apis/stop-job.asciidoc'
exclude 'reference/rollup/apis/start-job.asciidoc'
exclude 'reference/rollup/apis/rollup-search.asciidoc'
exclude 'reference/rollup/apis/delete-job.asciidoc'
exclude 'reference/rollup/apis/get-job.asciidoc'
exclude 'reference/rollup/apis/rollup-caps.asciidoc'
} }
listSnippets.docs = buildRestTests.docs listSnippets.docs = buildRestTests.docs
@ -594,3 +611,259 @@ buildRestTests.setups['library'] = '''
{"name": "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288} {"name": "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288}
''' '''
buildRestTests.setups['sensor_rollup_job'] = '''
- do:
indices.create:
index: sensor-1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: float
node:
type: keyword
- do:
xpack.rollup.put_job:
id: "sensor"
body: >
{
"index_pattern": "sensor-*",
"rollup_index": "sensor_rollup",
"cron": "*/30 * * * * ?",
"page_size" :1000,
"groups" : {
"date_histogram": {
"field": "timestamp",
"interval": "1h",
"delay": "7d"
},
"terms": {
"fields": ["node"]
}
},
"metrics": [
{
"field": "temperature",
"metrics": ["min", "max", "sum"]
},
{
"field": "voltage",
"metrics": ["avg"]
}
]
}
'''
buildRestTests.setups['sensor_started_rollup_job'] = '''
- do:
indices.create:
index: sensor-1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: float
node:
type: keyword
- do:
bulk:
index: sensor-1
type: _doc
refresh: true
body: |
{"index":{}}
{"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"}
{"index":{}}
{"timestamp": 1516642894000, "temperature": 201, "voltage": 5.8, "node": "b"}
{"index":{}}
{"timestamp": 1516556494000, "temperature": 202, "voltage": 5.1, "node": "a"}
{"index":{}}
{"timestamp": 1516470094000, "temperature": 198, "voltage": 5.6, "node": "b"}
{"index":{}}
{"timestamp": 1516383694000, "temperature": 200, "voltage": 4.2, "node": "c"}
{"index":{}}
{"timestamp": 1516297294000, "temperature": 202, "voltage": 4.0, "node": "c"}
- do:
xpack.rollup.put_job:
id: "sensor"
body: >
{
"index_pattern": "sensor-*",
"rollup_index": "sensor_rollup",
"cron": "* * * * * ?",
"page_size" :1000,
"groups" : {
"date_histogram": {
"field": "timestamp",
"interval": "1h",
"delay": "7d"
},
"terms": {
"fields": ["node"]
}
},
"metrics": [
{
"field": "temperature",
"metrics": ["min", "max", "sum"]
},
{
"field": "voltage",
"metrics": ["avg"]
}
]
}
- do:
xpack.rollup.start_job:
id: "sensor"
'''
buildRestTests.setups['sensor_index'] = '''
- do:
indices.create:
index: sensor-1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: float
node:
type: keyword
load:
type: double
net_in:
type: long
net_out:
type: long
hostname:
type: keyword
datacenter:
type: keyword
'''
buildRestTests.setups['sensor_prefab_data'] = '''
- do:
indices.create:
index: sensor-1
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
timestamp:
type: date
temperature:
type: long
voltage:
type: float
node:
type: keyword
- do:
indices.create:
index: sensor_rollup
body:
settings:
number_of_shards: 1
number_of_replicas: 0
mappings:
_doc:
properties:
node.terms.value:
type: keyword
temperature.sum.value:
type: double
temperature.max.value:
type: double
temperature.min.value:
type: double
timestamp.date_histogram.time_zone:
type: keyword
timestamp.date_histogram.interval:
type: keyword
timestamp.date_histogram.timestamp:
type: date
timestamp.date_histogram._count:
type: long
voltage.avg.value:
type: double
voltage.avg._count:
type: long
_rollup.id:
type: keyword
_rollup.version:
type: long
_meta:
_rollup:
sensor:
cron: "* * * * * ?"
rollup_index: "sensor_rollup"
index_pattern: "sensor-*"
timeout: "20s"
page_size: 1000
groups:
date_histogram:
delay: "7d"
field: "timestamp"
interval: "1h"
time_zone: "UTC"
terms:
fields:
- "node"
id: sensor
metrics:
- field: "temperature"
metrics:
- min
- max
- sum
- field: "voltage"
metrics:
- avg
- do:
bulk:
index: sensor_rollup
type: _doc
refresh: true
body: |
{"index":{}}
{"node.terms.value":"b","temperature.sum.value":201.0,"temperature.max.value":201.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":201.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.800000190734863,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516640400000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"c","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516381200000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"a","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.099999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516554000000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"a","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516726800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"b","temperature.sum.value":198.0,"temperature.max.value":198.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":198.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.599999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516467600000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
{"index":{}}
{"node.terms.value":"c","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.0,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516294800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
'''

View File

@ -1,11 +1,11 @@
[role="xpack"] [[commands]]
[[xpack-commands]] = Command line tools
= {xpack} Commands
[partintro] [partintro]
-- --
{xpack} includes commands that help you configure security: {es} provides the following tools for configuring security and performing other
tasks from the command line:
* <<certgen>> * <<certgen>>
* <<certutil>> * <<certutil>>

View File

@ -63,12 +63,6 @@ corruption is detected, it will prevent the shard from being opened. Accepts:
Check for both physical and logical corruption. This is much more Check for both physical and logical corruption. This is much more
expensive in terms of CPU and memory usage. expensive in terms of CPU and memory usage.
`fix`::
Check for both physical and logical corruption. Segments that were reported
as corrupted will be automatically removed. This option *may result in data loss*.
Use with extreme caution!
WARNING: Expert only. Checking shards may take a lot of time on large indices. WARNING: Expert only. Checking shards may take a lot of time on large indices.
-- --

View File

@ -61,7 +61,7 @@ include::sql/index.asciidoc[]
include::monitoring/index.asciidoc[] include::monitoring/index.asciidoc[]
include::{xes-repo-dir}/rollup/index.asciidoc[] include::rollup/index.asciidoc[]
include::rest-api/index.asciidoc[] include::rest-api/index.asciidoc[]

View File

@ -99,7 +99,7 @@ The following parameters are accepted by `text` fields:
`index_phrases`:: `index_phrases`::
If enabled, two-term word combinations ('shingles') are indexed into a separate If enabled, two-term word combinations ('shingles') are indexed into a separate
field. This allows exact phrase queries to run more efficiently, at the expense field. This allows exact phrase queries (no slop) to run more efficiently, at the expense
of a larger index. Note that this works best when stopwords are not removed, of a larger index. Note that this works best when stopwords are not removed,
as phrases containing stopwords will not use the subsidiary field and will fall as phrases containing stopwords will not use the subsidiary field and will fall
back to a standard phrase query. Accepts `true` or `false` (default). back to a standard phrase query. Accepts `true` or `false` (default).
@ -171,4 +171,4 @@ PUT my_index
-------------------------------- --------------------------------
// CONSOLE // CONSOLE
<1> `min_chars` must be greater than zero, defaults to 2 <1> `min_chars` must be greater than zero, defaults to 2
<2> `max_chars` must be greater than or equal to `min_chars` and less than 20, defaults to 5 <2> `max_chars` must be greater than or equal to `min_chars` and less than 20, defaults to 5

View File

@ -78,3 +78,7 @@ The parent circuit breaker defines a new setting `indices.breaker.total.use_real
heap memory instead of only considering the reserved memory by child circuit breakers. When this heap memory instead of only considering the reserved memory by child circuit breakers. When this
setting is `true`, the default parent breaker limit also changes from 70% to 95% of the JVM heap size. setting is `true`, the default parent breaker limit also changes from 70% to 95% of the JVM heap size.
The previous behavior can be restored by setting `indices.breaker.total.use_real_memory` to `false`. The previous behavior can be restored by setting `indices.breaker.total.use_real_memory` to `false`.
==== `fix` value for `index.shard.check_on_startup` is removed
Deprecated option value `fix` for setting `index.shard.check_on_startup` is not supported.

View File

@ -41,7 +41,7 @@ PUT _xpack/ml/anomaly_detectors/farequote
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[setup:farequote_data] // TEST[skip:setup:farequote_data]
In this example, the `airline`, `responsetime`, and `time` fields are In this example, the `airline`, `responsetime`, and `time` fields are
aggregations. aggregations.
@ -90,7 +90,7 @@ PUT _xpack/ml/datafeeds/datafeed-farequote
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[setup:farequote_job] // TEST[skip:setup:farequote_job]
In this example, the aggregations have names that match the fields that they In this example, the aggregations have names that match the fields that they
operate on. That is to say, the `max` aggregation is named `time` and its operate on. That is to say, the `max` aggregation is named `time` and its

View File

@ -44,6 +44,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs
} }
---------------------------------- ----------------------------------
//CONSOLE //CONSOLE
// TEST[skip:needs-licence]
<1> The `categorization_field_name` property indicates which field will be <1> The `categorization_field_name` property indicates which field will be
categorized. categorized.
<2> The resulting categories are used in a detector by setting `by_field_name`, <2> The resulting categories are used in a detector by setting `by_field_name`,
@ -127,6 +128,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs2
} }
---------------------------------- ----------------------------------
//CONSOLE //CONSOLE
// TEST[skip:needs-licence]
<1> The <1> The
{ref}/analysis-pattern-replace-charfilter.html[`pattern_replace` character filter] {ref}/analysis-pattern-replace-charfilter.html[`pattern_replace` character filter]
here achieves exactly the same as the `categorization_filters` in the first here achieves exactly the same as the `categorization_filters` in the first
@ -193,6 +195,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs3
} }
---------------------------------- ----------------------------------
//CONSOLE //CONSOLE
// TEST[skip:needs-licence]
<1> Tokens basically consist of hyphens, digits, letters, underscores and dots. <1> Tokens basically consist of hyphens, digits, letters, underscores and dots.
<2> By default, categorization ignores tokens that begin with a digit. <2> By default, categorization ignores tokens that begin with a digit.
<3> By default, categorization also ignores tokens that are hexadecimal numbers. <3> By default, categorization also ignores tokens that are hexadecimal numbers.

View File

@ -36,20 +36,20 @@ The scenarios in this section describe some best practices for generating useful
* <<ml-configuring-transform>> * <<ml-configuring-transform>>
* <<ml-configuring-detector-custom-rules>> * <<ml-configuring-detector-custom-rules>>
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/customurl.asciidoc :edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/customurl.asciidoc
include::customurl.asciidoc[] include::customurl.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/aggregations.asciidoc :edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/aggregations.asciidoc
include::aggregations.asciidoc[] include::aggregations.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/categories.asciidoc :edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/categories.asciidoc
include::categories.asciidoc[] include::categories.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/populations.asciidoc :edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/populations.asciidoc
include::populations.asciidoc[] include::populations.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/transforms.asciidoc :edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/transforms.asciidoc
include::transforms.asciidoc[] include::transforms.asciidoc[]
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/detector-custom-rules.asciidoc :edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/detector-custom-rules.asciidoc
include::detector-custom-rules.asciidoc[] include::detector-custom-rules.asciidoc[]

View File

@ -106,7 +106,7 @@ POST _xpack/ml/anomaly_detectors/sample_job/_update
} }
---------------------------------- ----------------------------------
//CONSOLE //CONSOLE
//TEST[setup:sample_job] //TEST[skip:setup:sample_job]
When you click this custom URL in the anomalies table in {kib}, it opens up the When you click this custom URL in the anomalies table in {kib}, it opens up the
*Discover* page and displays source data for the period one hour before and *Discover* page and displays source data for the period one hour before and

View File

@ -39,6 +39,7 @@ PUT _xpack/ml/filters/safe_domains
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
Now, we can create our job specifying a scope that uses the `safe_domains` Now, we can create our job specifying a scope that uses the `safe_domains`
filter for the `highest_registered_domain` field: filter for the `highest_registered_domain` field:
@ -70,6 +71,7 @@ PUT _xpack/ml/anomaly_detectors/dns_exfiltration_with_rule
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
As time advances and we see more data and more results, we might encounter new As time advances and we see more data and more results, we might encounter new
domains that we want to add in the filter. We can do that by using the domains that we want to add in the filter. We can do that by using the
@ -83,7 +85,7 @@ POST _xpack/ml/filters/safe_domains/_update
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[setup:ml_filter_safe_domains] // TEST[skip:setup:ml_filter_safe_domains]
Note that we can use any of the `partition_field_name`, `over_field_name`, or Note that we can use any of the `partition_field_name`, `over_field_name`, or
`by_field_name` fields in the `scope`. `by_field_name` fields in the `scope`.
@ -123,6 +125,7 @@ PUT _xpack/ml/anomaly_detectors/scoping_multiple_fields
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
Such a detector will skip results when the values of all 3 scoped fields Such a detector will skip results when the values of all 3 scoped fields
are included in the referenced filters. are included in the referenced filters.
@ -166,6 +169,7 @@ PUT _xpack/ml/anomaly_detectors/cpu_with_rule
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
When there are multiple conditions they are combined with a logical `and`. When there are multiple conditions they are combined with a logical `and`.
This is useful when we want the rule to apply to a range. We simply create This is useful when we want the rule to apply to a range. We simply create
@ -205,6 +209,7 @@ PUT _xpack/ml/anomaly_detectors/rule_with_range
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
==== Custom rules in the life-cycle of a job ==== Custom rules in the life-cycle of a job

View File

@ -59,6 +59,7 @@ PUT _xpack/ml/anomaly_detectors/example1
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
This example is probably the simplest possible analysis. It identifies This example is probably the simplest possible analysis. It identifies
time buckets during which the overall count of events is higher or lower than time buckets during which the overall count of events is higher or lower than
@ -86,6 +87,7 @@ PUT _xpack/ml/anomaly_detectors/example2
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
If you use this `high_count` function in a detector in your job, it If you use this `high_count` function in a detector in your job, it
models the event rate for each error code. It detects users that generate an models the event rate for each error code. It detects users that generate an
@ -110,6 +112,7 @@ PUT _xpack/ml/anomaly_detectors/example3
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
In this example, the function detects when the count of events for a In this example, the function detects when the count of events for a
status code is lower than usual. status code is lower than usual.
@ -136,6 +139,7 @@ PUT _xpack/ml/anomaly_detectors/example4
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
If you are analyzing an aggregated `events_per_min` field, do not use a sum If you are analyzing an aggregated `events_per_min` field, do not use a sum
function (for example, `sum(events_per_min)`). Instead, use the count function function (for example, `sum(events_per_min)`). Instead, use the count function
@ -200,6 +204,7 @@ PUT _xpack/ml/anomaly_detectors/example5
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
If you use this `high_non_zero_count` function in a detector in your job, it If you use this `high_non_zero_count` function in a detector in your job, it
models the count of events for the `signaturename` field. It ignores any buckets models the count of events for the `signaturename` field. It ignores any buckets
@ -253,6 +258,7 @@ PUT _xpack/ml/anomaly_detectors/example6
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
This `distinct_count` function detects when a system has an unusual number This `distinct_count` function detects when a system has an unusual number
of logged in users. When you use this function in a detector in your job, it of logged in users. When you use this function in a detector in your job, it
@ -278,6 +284,7 @@ PUT _xpack/ml/anomaly_detectors/example7
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
This example detects instances of port scanning. When you use this function in a This example detects instances of port scanning. When you use this function in a
detector in your job, it models the distinct count of ports. It also detects the detector in your job, it models the distinct count of ports. It also detects the

View File

@ -47,6 +47,7 @@ PUT _xpack/ml/anomaly_detectors/example1
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
If you use this `lat_long` function in a detector in your job, it If you use this `lat_long` function in a detector in your job, it
detects anomalies where the geographic location of a credit card transaction is detects anomalies where the geographic location of a credit card transaction is
@ -98,6 +99,6 @@ PUT _xpack/ml/datafeeds/datafeed-test2
} }
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[setup:farequote_job] // TEST[skip:setup:farequote_job]
For more information, see <<ml-configuring-transform>>. For more information, see <<ml-configuring-transform>>.

View File

Before

Width:  |  Height:  |  Size: 118 KiB

After

Width:  |  Height:  |  Size: 118 KiB

View File

Before

Width:  |  Height:  |  Size: 347 KiB

After

Width:  |  Height:  |  Size: 347 KiB

View File

Before

Width:  |  Height:  |  Size: 70 KiB

After

Width:  |  Height:  |  Size: 70 KiB

View File

Before

Width:  |  Height:  |  Size: 187 KiB

After

Width:  |  Height:  |  Size: 187 KiB

View File

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 36 KiB

View File

Before

Width:  |  Height:  |  Size: 130 KiB

After

Width:  |  Height:  |  Size: 130 KiB

View File

Before

Width:  |  Height:  |  Size: 384 KiB

After

Width:  |  Height:  |  Size: 384 KiB

View File

Before

Width:  |  Height:  |  Size: 120 KiB

After

Width:  |  Height:  |  Size: 120 KiB

View File

Before

Width:  |  Height:  |  Size: 163 KiB

After

Width:  |  Height:  |  Size: 163 KiB

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

View File

Before

Width:  |  Height:  |  Size: 350 KiB

After

Width:  |  Height:  |  Size: 350 KiB

View File

Before

Width:  |  Height:  |  Size: 99 KiB

After

Width:  |  Height:  |  Size: 99 KiB

View File

Before

Width:  |  Height:  |  Size: 75 KiB

After

Width:  |  Height:  |  Size: 75 KiB

View File

Before

Width:  |  Height:  |  Size: 1.9 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

View File

Before

Width:  |  Height:  |  Size: 176 KiB

After

Width:  |  Height:  |  Size: 176 KiB

View File

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 96 KiB

View File

Before

Width:  |  Height:  |  Size: 205 KiB

After

Width:  |  Height:  |  Size: 205 KiB

View File

Before

Width:  |  Height:  |  Size: 100 KiB

After

Width:  |  Height:  |  Size: 100 KiB

View File

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

View File

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

Before

Width:  |  Height:  |  Size: 90 KiB

After

Width:  |  Height:  |  Size: 90 KiB

View File

@ -51,14 +51,11 @@ PUT _xpack/ml/anomaly_detectors/population
} }
---------------------------------- ----------------------------------
//CONSOLE //CONSOLE
// TEST[skip:needs-licence]
<1> This `over_field_name` property indicates that the metrics for each user ( <1> This `over_field_name` property indicates that the metrics for each user (
as identified by their `username` value) are analyzed relative to other users as identified by their `username` value) are analyzed relative to other users
in each bucket. in each bucket.
//TO-DO: Per sophiec20 "Perhaps add the datafeed config and add a query filter to
//include only workstations as servers and printers would behave differently
//from the population
If your data is stored in {es}, you can use the population job wizard in {kib} If your data is stored in {es}, you can use the population job wizard in {kib}
to create a job with these same properties. For example, the population job to create a job with these same properties. For example, the population job
wizard provides the following job settings: wizard provides the following job settings:

View File

@ -28,7 +28,7 @@ request stops the `feed1` {dfeed}:
POST _xpack/ml/datafeeds/datafeed-total-requests/_stop POST _xpack/ml/datafeeds/datafeed-total-requests/_stop
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[setup:server_metrics_startdf] // TEST[skip:setup:server_metrics_startdf]
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}. NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
For more information, see <<security-privileges>>. For more information, see <<security-privileges>>.
@ -49,6 +49,7 @@ If you are upgrading your cluster, you can use the following request to stop all
POST _xpack/ml/datafeeds/_all/_stop POST _xpack/ml/datafeeds/_all/_stop
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]
[float] [float]
[[closing-ml-jobs]] [[closing-ml-jobs]]
@ -67,7 +68,7 @@ example, the following request closes the `job1` job:
POST _xpack/ml/anomaly_detectors/total-requests/_close POST _xpack/ml/anomaly_detectors/total-requests/_close
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[setup:server_metrics_openjob] // TEST[skip:setup:server_metrics_openjob]
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}. NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
For more information, see <<security-privileges>>. For more information, see <<security-privileges>>.
@ -86,3 +87,4 @@ all open jobs on the cluster:
POST _xpack/ml/anomaly_detectors/_all/_close POST _xpack/ml/anomaly_detectors/_all/_close
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[skip:needs-licence]

View File

@ -95,7 +95,7 @@ PUT /my_index/my_type/1
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TESTSETUP // TEST[skip:SETUP]
<1> In this example, string fields are mapped as `keyword` fields to support <1> In this example, string fields are mapped as `keyword` fields to support
aggregation. If you want both a full text (`text`) and a keyword (`keyword`) aggregation. If you want both a full text (`text`) and a keyword (`keyword`)
version of the same field, use multi-fields. For more information, see version of the same field, use multi-fields. For more information, see
@ -144,7 +144,7 @@ PUT _xpack/ml/datafeeds/datafeed-test1
} }
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[skip:broken] // TEST[skip:needs-licence]
<1> A script field named `total_error_count` is referenced in the detector <1> A script field named `total_error_count` is referenced in the detector
within the job. within the job.
<2> The script field is defined in the {dfeed}. <2> The script field is defined in the {dfeed}.
@ -163,7 +163,7 @@ You can preview the contents of the {dfeed} by using the following API:
GET _xpack/ml/datafeeds/datafeed-test1/_preview GET _xpack/ml/datafeeds/datafeed-test1/_preview
---------------------------------- ----------------------------------
// CONSOLE // CONSOLE
// TEST[continued] // TEST[skip:continued]
In this example, the API returns the following results, which contain a sum of In this example, the API returns the following results, which contain a sum of
the `error_count` and `aborted_count` values: the `error_count` and `aborted_count` values:
@ -177,8 +177,6 @@ the `error_count` and `aborted_count` values:
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE
NOTE: This example demonstrates how to use script fields, but it contains NOTE: This example demonstrates how to use script fields, but it contains
insufficient data to generate meaningful results. For a full demonstration of insufficient data to generate meaningful results. For a full demonstration of
@ -254,7 +252,7 @@ PUT _xpack/ml/datafeeds/datafeed-test2
GET _xpack/ml/datafeeds/datafeed-test2/_preview GET _xpack/ml/datafeeds/datafeed-test2/_preview
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:broken] // TEST[skip:needs-licence]
<1> The script field has a rather generic name in this case, since it will <1> The script field has a rather generic name in this case, since it will
be used for various tests in the subsequent examples. be used for various tests in the subsequent examples.
<2> The script field uses the plus (+) operator to concatenate strings. <2> The script field uses the plus (+) operator to concatenate strings.
@ -271,7 +269,6 @@ and "SMITH " have been concatenated and an underscore was added:
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE
[[ml-configuring-transform3]] [[ml-configuring-transform3]]
.Example 3: Trimming strings .Example 3: Trimming strings
@ -292,7 +289,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview GET _xpack/ml/datafeeds/datafeed-test2/_preview
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[continued] // TEST[skip:continued]
<1> This script field uses the `trim()` function to trim extra white space from a <1> This script field uses the `trim()` function to trim extra white space from a
string. string.
@ -308,7 +305,6 @@ has been trimmed to "SMITH":
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE
[[ml-configuring-transform4]] [[ml-configuring-transform4]]
.Example 4: Converting strings to lowercase .Example 4: Converting strings to lowercase
@ -329,7 +325,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview GET _xpack/ml/datafeeds/datafeed-test2/_preview
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[continued] // TEST[skip:continued]
<1> This script field uses the `toLowerCase` function to convert a string to all <1> This script field uses the `toLowerCase` function to convert a string to all
lowercase letters. Likewise, you can use the `toUpperCase{}` function to convert lowercase letters. Likewise, you can use the `toUpperCase{}` function to convert
a string to uppercase letters. a string to uppercase letters.
@ -346,7 +342,6 @@ has been converted to "joe":
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE
[[ml-configuring-transform5]] [[ml-configuring-transform5]]
.Example 5: Converting strings to mixed case formats .Example 5: Converting strings to mixed case formats
@ -367,7 +362,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview GET _xpack/ml/datafeeds/datafeed-test2/_preview
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[continued] // TEST[skip:continued]
<1> This script field is a more complicated example of case manipulation. It uses <1> This script field is a more complicated example of case manipulation. It uses
the `subString()` function to capitalize the first letter of a string and the `subString()` function to capitalize the first letter of a string and
converts the remaining characters to lowercase. converts the remaining characters to lowercase.
@ -384,7 +379,6 @@ has been converted to "Joe":
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE
[[ml-configuring-transform6]] [[ml-configuring-transform6]]
.Example 6: Replacing tokens .Example 6: Replacing tokens
@ -405,7 +399,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview GET _xpack/ml/datafeeds/datafeed-test2/_preview
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[continued] // TEST[skip:continued]
<1> This script field uses regular expressions to replace white <1> This script field uses regular expressions to replace white
space with underscores. space with underscores.
@ -421,7 +415,6 @@ The preview {dfeed} API returns the following results, which show that
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE
[[ml-configuring-transform7]] [[ml-configuring-transform7]]
.Example 7: Regular expression matching and concatenation .Example 7: Regular expression matching and concatenation
@ -442,7 +435,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
GET _xpack/ml/datafeeds/datafeed-test2/_preview GET _xpack/ml/datafeeds/datafeed-test2/_preview
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[continued] // TEST[skip:continued]
<1> This script field looks for a specific regular expression pattern and emits the <1> This script field looks for a specific regular expression pattern and emits the
matched groups as a concatenated string. If no match is found, it emits an empty matched groups as a concatenated string. If no match is found, it emits an empty
string. string.
@ -459,7 +452,6 @@ The preview {dfeed} API returns the following results, which show that
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE
[[ml-configuring-transform8]] [[ml-configuring-transform8]]
.Example 8: Splitting strings by domain name .Example 8: Splitting strings by domain name
@ -509,7 +501,7 @@ PUT _xpack/ml/datafeeds/datafeed-test3
GET _xpack/ml/datafeeds/datafeed-test3/_preview GET _xpack/ml/datafeeds/datafeed-test3/_preview
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:broken] // TEST[skip:needs-licence]
If you have a single field that contains a well-formed DNS domain name, you can If you have a single field that contains a well-formed DNS domain name, you can
use the `domainSplit()` function to split the string into its highest registered use the `domainSplit()` function to split the string into its highest registered
@ -537,7 +529,6 @@ The preview {dfeed} API returns the following results, which show that
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE
[[ml-configuring-transform9]] [[ml-configuring-transform9]]
.Example 9: Transforming geo_point data .Example 9: Transforming geo_point data
@ -583,7 +574,7 @@ PUT _xpack/ml/datafeeds/datafeed-test4
GET _xpack/ml/datafeeds/datafeed-test4/_preview GET _xpack/ml/datafeeds/datafeed-test4/_preview
-------------------------------------------------- --------------------------------------------------
// CONSOLE // CONSOLE
// TEST[skip:broken] // TEST[skip:needs-licence]
In {es}, location data can be stored in `geo_point` fields but this data type is In {es}, location data can be stored in `geo_point` fields but this data type is
not supported natively in {xpackml} analytics. This example of a script field not supported natively in {xpackml} analytics. This example of a script field
@ -602,4 +593,4 @@ The preview {dfeed} API returns the following results, which show that
} }
] ]
---------------------------------- ----------------------------------
// TESTRESPONSE

View File

@ -544,3 +544,8 @@ You can use the following APIs to add, remove, and retrieve role mappings:
=== Privilege APIs === Privilege APIs
See <<security-api-has-privileges>>. See <<security-api-has-privileges>>.
[role="exclude",id="xpack-commands"]
=== X-Pack commands
See <<commands>>.

View File

@ -23,7 +23,7 @@ include::{xes-repo-dir}/rest-api/graph/explore.asciidoc[]
include::{es-repo-dir}/licensing/index.asciidoc[] include::{es-repo-dir}/licensing/index.asciidoc[]
include::{es-repo-dir}/migration/migration.asciidoc[] include::{es-repo-dir}/migration/migration.asciidoc[]
include::{xes-repo-dir}/rest-api/ml-api.asciidoc[] include::{xes-repo-dir}/rest-api/ml-api.asciidoc[]
include::{xes-repo-dir}/rest-api/rollup-api.asciidoc[] include::{es-repo-dir}/rollup/rollup-api.asciidoc[]
include::{xes-repo-dir}/rest-api/security.asciidoc[] include::{xes-repo-dir}/rest-api/security.asciidoc[]
include::{xes-repo-dir}/rest-api/watcher.asciidoc[] include::{xes-repo-dir}/rest-api/watcher.asciidoc[]
include::{xes-repo-dir}/rest-api/defs.asciidoc[] include::{xes-repo-dir}/rest-api/defs.asciidoc[]

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-api-quickref]] [[rollup-api-quickref]]
== API Quick Reference == API Quick Reference

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-delete-job]] [[rollup-delete-job]]
=== Delete Job API === Delete Job API
++++ ++++

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-get-job]] [[rollup-get-job]]
=== Get Rollup Jobs API === Get Rollup Jobs API
++++ ++++

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-put-job]] [[rollup-put-job]]
=== Create Job API === Create Job API
++++ ++++

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-get-rollup-caps]] [[rollup-get-rollup-caps]]
=== Get Rollup Job Capabilities === Get Rollup Job Capabilities
++++ ++++

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-job-config]] [[rollup-job-config]]
=== Rollup Job Configuration === Rollup Job Configuration

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-search]] [[rollup-search]]
=== Rollup Search === Rollup Search
++++ ++++

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-start-job]] [[rollup-start-job]]
=== Start Job API === Start Job API
++++ ++++

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-stop-job]] [[rollup-stop-job]]
=== Stop Job API === Stop Job API
++++ ++++

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[xpack-rollup]] [[xpack-rollup]]
= Rolling up historical data = Rolling up historical data

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-overview]] [[rollup-overview]]
== Overview == Overview

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-agg-limitations]] [[rollup-agg-limitations]]
== Rollup Aggregation Limitations == Rollup Aggregation Limitations

View File

@ -1,4 +1,5 @@
[role="xpack"] [role="xpack"]
[testenv="basic"]
[[rollup-apis]] [[rollup-apis]]
== Rollup APIs == Rollup APIs
@ -26,12 +27,12 @@
include::rollup/delete-job.asciidoc[] include::apis/delete-job.asciidoc[]
include::rollup/get-job.asciidoc[] include::apis/get-job.asciidoc[]
include::rollup/put-job.asciidoc[] include::apis/put-job.asciidoc[]
include::rollup/start-job.asciidoc[] include::apis/start-job.asciidoc[]
include::rollup/stop-job.asciidoc[] include::apis/stop-job.asciidoc[]
include::rollup/rollup-caps.asciidoc[] include::apis/rollup-caps.asciidoc[]
include::rollup/rollup-index-caps.asciidoc[] include::apis/rollup-index-caps.asciidoc[]
include::rollup/rollup-search.asciidoc[] include::apis/rollup-search.asciidoc[]
include::rollup/rollup-job-config.asciidoc[] include::apis/rollup-job-config.asciidoc[]

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-getting-started]] [[rollup-getting-started]]
== Getting Started == Getting Started

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-search-limitations]] [[rollup-search-limitations]]
== Rollup Search Limitations == Rollup Search Limitations

View File

@ -1,3 +1,5 @@
[role="xpack"]
[testenv="basic"]
[[rollup-understanding-groups]] [[rollup-understanding-groups]]
== Understanding Groups == Understanding Groups

View File

@ -22,6 +22,15 @@ the first parameter:
$ ./bin/elasticsearch-sql-cli https://some.server:9200 $ ./bin/elasticsearch-sql-cli https://some.server:9200
-------------------------------------------------- --------------------------------------------------
If security is enabled on your cluster, you can pass the username
and password in the form `username:password@host_name:port`
to the SQL CLI:
[source,bash]
--------------------------------------------------
$ ./bin/elasticsearch-sql-cli https://sql_user:strongpassword@some.server:9200
--------------------------------------------------
Once the CLI is running you can use any <<sql-spec,query>> that Once the CLI is running you can use any <<sql-spec,query>> that
Elasticsearch supports: Elasticsearch supports:

View File

@ -46,12 +46,13 @@ if (!isEclipse && !isIdea) {
targetCompatibility = 9 targetCompatibility = 9
} }
/* Enable this when forbiddenapis was updated to 2.6.
* See: https://github.com/elastic/elasticsearch/issues/29292
forbiddenApisJava9 { forbiddenApisJava9 {
targetCompatibility = 9 if (project.runtimeJavaVersion < JavaVersion.VERSION_1_9) {
targetCompatibility = JavaVersion.VERSION_1_9
javaHome = project.java9Home
}
replaceSignatureFiles 'jdk-signatures'
} }
*/
jar { jar {
metaInf { metaInf {

View File

@ -25,6 +25,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.index.IndexModule; import org.elasticsearch.index.IndexModule;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.join.ParentJoinPlugin; import org.elasticsearch.join.ParentJoinPlugin;
import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase;
@ -58,6 +59,8 @@ public abstract class ParentChildTestCase extends ESIntegTestCase {
@Override @Override
public Settings indexSettings() { public Settings indexSettings() {
Settings.Builder builder = Settings.builder().put(super.indexSettings()) Settings.Builder builder = Settings.builder().put(super.indexSettings())
// AwaitsFix: https://github.com/elastic/elasticsearch/issues/33318
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), false)
// aggressive filter caching so that we can assert on the filter cache size // aggressive filter caching so that we can assert on the filter cache size
.put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), true) .put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), true)
.put(IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING.getKey(), true); .put(IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING.getKey(), true);

View File

@ -58,13 +58,13 @@ if (!isEclipse && !isIdea) {
sourceCompatibility = 9 sourceCompatibility = 9
targetCompatibility = 9 targetCompatibility = 9
} }
/* Enable this when forbiddenapis was updated to 2.6.
* See: https://github.com/elastic/elasticsearch/issues/29292
forbiddenApisJava9 { forbiddenApisJava9 {
targetCompatibility = 9 if (project.runtimeJavaVersion < JavaVersion.VERSION_1_9) {
targetCompatibility = JavaVersion.VERSION_1_9
javaHome = project.java9Home
}
} }
*/
jar { jar {
metaInf { metaInf {

View File

@ -104,24 +104,12 @@ public class PathTrie<T> {
namedWildcard = key.substring(key.indexOf('{') + 1, key.indexOf('}')); namedWildcard = key.substring(key.indexOf('{') + 1, key.indexOf('}'));
} }
public boolean isWildcard() {
return isWildcard;
}
public synchronized void addChild(TrieNode child) {
addInnerChild(child.key, child);
}
private void addInnerChild(String key, TrieNode child) { private void addInnerChild(String key, TrieNode child) {
Map<String, TrieNode> newChildren = new HashMap<>(children); Map<String, TrieNode> newChildren = new HashMap<>(children);
newChildren.put(key, child); newChildren.put(key, child);
children = unmodifiableMap(newChildren); children = unmodifiableMap(newChildren);
} }
public TrieNode getChild(String key) {
return children.get(key);
}
public synchronized void insert(String[] path, int index, T value) { public synchronized void insert(String[] path, int index, T value) {
if (index >= path.length) if (index >= path.length)
return; return;
@ -302,7 +290,7 @@ public class PathTrie<T> {
} }
int index = 0; int index = 0;
// Supports initial delimiter. // Supports initial delimiter.
if (strings.length > 0 && strings[0].isEmpty()) { if (strings[0].isEmpty()) {
index = 1; index = 1;
} }
root.insert(strings, index, value); root.insert(strings, index, value);
@ -327,7 +315,7 @@ public class PathTrie<T> {
} }
int index = 0; int index = 0;
// Supports initial delimiter. // Supports initial delimiter.
if (strings.length > 0 && strings[0].isEmpty()) { if (strings[0].isEmpty()) {
index = 1; index = 1;
} }
root.insertOrUpdate(strings, index, value, updater); root.insertOrUpdate(strings, index, value, updater);
@ -352,7 +340,7 @@ public class PathTrie<T> {
int index = 0; int index = 0;
// Supports initial delimiter. // Supports initial delimiter.
if (strings.length > 0 && strings[0].isEmpty()) { if (strings[0].isEmpty()) {
index = 1; index = 1;
} }

View File

@ -75,11 +75,10 @@ public final class IndexSettings {
switch(s) { switch(s) {
case "false": case "false":
case "true": case "true":
case "fix":
case "checksum": case "checksum":
return s; return s;
default: default:
throw new IllegalArgumentException("unknown value for [index.shard.check_on_startup] must be one of [true, false, fix, checksum] but was: " + s); throw new IllegalArgumentException("unknown value for [index.shard.check_on_startup] must be one of [true, false, checksum] but was: " + s);
} }
}, Property.IndexScope); }, Property.IndexScope);

View File

@ -1332,7 +1332,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
} }
recoveryState.setStage(RecoveryState.Stage.VERIFY_INDEX); recoveryState.setStage(RecoveryState.Stage.VERIFY_INDEX);
// also check here, before we apply the translog // also check here, before we apply the translog
if (Booleans.isTrue(checkIndexOnStartup)) { if (Booleans.isTrue(checkIndexOnStartup) || "checksum".equals(checkIndexOnStartup)) {
try { try {
checkIndex(); checkIndex();
} catch (IOException ex) { } catch (IOException ex) {
@ -1955,6 +1955,9 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
if (store.tryIncRef()) { if (store.tryIncRef()) {
try { try {
doCheckIndex(); doCheckIndex();
} catch (IOException e) {
store.markStoreCorrupted(e);
throw e;
} finally { } finally {
store.decRef(); store.decRef();
} }
@ -1998,18 +2001,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
return; return;
} }
logger.warn("check index [failure]\n{}", os.bytes().utf8ToString()); logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
if ("fix".equals(checkIndexOnStartup)) { throw new IOException("index check failure");
if (logger.isDebugEnabled()) {
logger.debug("fixing index, writing new segments file ...");
}
store.exorciseIndex(status);
if (logger.isDebugEnabled()) {
logger.debug("index fixed, wrote new segments file \"{}\"", status.segmentsFileName);
}
} else {
// only throw a failure if we are not going to fix the index
throw new IllegalStateException("index check failure but can't fix it");
}
} }
} }

View File

@ -134,7 +134,8 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
static final int VERSION_STACK_TRACE = 1; // we write the stack trace too since 1.4.0 static final int VERSION_STACK_TRACE = 1; // we write the stack trace too since 1.4.0
static final int VERSION_START = 0; static final int VERSION_START = 0;
static final int VERSION = VERSION_WRITE_THROWABLE; static final int VERSION = VERSION_WRITE_THROWABLE;
static final String CORRUPTED = "corrupted_"; // public is for test purposes
public static final String CORRUPTED = "corrupted_";
public static final Setting<TimeValue> INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING = public static final Setting<TimeValue> INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING =
Setting.timeSetting("index.store.stats_refresh_interval", TimeValue.timeValueSeconds(10), Property.IndexScope); Setting.timeSetting("index.store.stats_refresh_interval", TimeValue.timeValueSeconds(10), Property.IndexScope);
@ -360,18 +361,6 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
} }
} }
/**
* Repairs the index using the previous returned status from {@link #checkIndex(PrintStream)}.
*/
public void exorciseIndex(CheckIndex.Status status) throws IOException {
metadataLock.writeLock().lock();
try (CheckIndex checkIndex = new CheckIndex(directory)) {
checkIndex.exorciseIndex(status);
} finally {
metadataLock.writeLock().unlock();
}
}
public StoreStats stats() throws IOException { public StoreStats stats() throws IOException {
ensureOpen(); ensureOpen();
return new StoreStats(directory.estimateSize()); return new StoreStats(directory.estimateSize());

View File

@ -69,7 +69,7 @@ public class MetaDataIndexTemplateServiceTests extends ESSingleNodeTestCase {
containsString("Failed to parse value [0] for setting [index.number_of_shards] must be >= 1")); containsString("Failed to parse value [0] for setting [index.number_of_shards] must be >= 1"));
assertThat(throwables.get(0).getMessage(), assertThat(throwables.get(0).getMessage(),
containsString("unknown value for [index.shard.check_on_startup] " + containsString("unknown value for [index.shard.check_on_startup] " +
"must be one of [true, false, fix, checksum] but was: blargh")); "must be one of [true, false, checksum] but was: blargh"));
} }
public void testIndexTemplateValidationAccumulatesValidationErrors() { public void testIndexTemplateValidationAccumulatesValidationErrors() {

View File

@ -23,6 +23,7 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
@ -118,6 +119,7 @@ import org.elasticsearch.snapshots.Snapshot;
import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.snapshots.SnapshotId;
import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.snapshots.SnapshotInfo;
import org.elasticsearch.snapshots.SnapshotShardFailure; import org.elasticsearch.snapshots.SnapshotShardFailure;
import org.elasticsearch.test.CorruptionUtils;
import org.elasticsearch.test.DummyShardLock; import org.elasticsearch.test.DummyShardLock;
import org.elasticsearch.test.FieldMaskingReader; import org.elasticsearch.test.FieldMaskingReader;
import org.elasticsearch.test.VersionUtils; import org.elasticsearch.test.VersionUtils;
@ -126,7 +128,11 @@ import org.elasticsearch.ElasticsearchException;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
@ -1239,7 +1245,7 @@ public class IndexShardTests extends IndexShardTestCase {
}; };
try (Store store = createStore(shardId, new IndexSettings(metaData, Settings.EMPTY), directory)) { try (Store store = createStore(shardId, new IndexSettings(metaData, Settings.EMPTY), directory)) {
IndexShard shard = newShard(shardRouting, shardPath, metaData, store, IndexShard shard = newShard(shardRouting, shardPath, metaData, i -> store,
null, new InternalEngineFactory(), () -> { null, new InternalEngineFactory(), () -> {
}, EMPTY_EVENT_LISTENER); }, EMPTY_EVENT_LISTENER);
AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false); AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false);
@ -2590,6 +2596,143 @@ public class IndexShardTests extends IndexShardTestCase {
closeShards(newShard); closeShards(newShard);
} }
public void testIndexCheckOnStartup() throws Exception {
final IndexShard indexShard = newStartedShard(true);
final long numDocs = between(10, 100);
for (long i = 0; i < numDocs; i++) {
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
}
indexShard.flush(new FlushRequest());
closeShards(indexShard);
final ShardPath shardPath = indexShard.shardPath();
final Path indexPath = corruptIndexFile(shardPath);
final AtomicInteger corruptedMarkerCount = new AtomicInteger();
final SimpleFileVisitor<Path> corruptedVisitor = new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (Files.isRegularFile(file) && file.getFileName().toString().startsWith(Store.CORRUPTED)) {
corruptedMarkerCount.incrementAndGet();
}
return FileVisitResult.CONTINUE;
}
};
Files.walkFileTree(indexPath, corruptedVisitor);
assertThat("corruption marker should not be there", corruptedMarkerCount.get(), equalTo(0));
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE
);
// start shard and perform index check on startup. It enforce shard to fail due to corrupted index files
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
.settings(Settings.builder()
.put(indexShard.indexSettings.getSettings())
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("true", "checksum")))
.build();
IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetaData,
null, null, indexShard.engineFactory,
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
final IndexShardRecoveryException indexShardRecoveryException =
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
assertThat(indexShardRecoveryException.getMessage(), equalTo("failed recovery"));
// check that corrupt marker is there
Files.walkFileTree(indexPath, corruptedVisitor);
assertThat("store has to be marked as corrupted", corruptedMarkerCount.get(), equalTo(1));
try {
closeShards(corruptedShard);
} catch (RuntimeException e) {
assertThat(e.getMessage(), equalTo("CheckIndex failed"));
}
}
public void testShardDoesNotStartIfCorruptedMarkerIsPresent() throws Exception {
final IndexShard indexShard = newStartedShard(true);
final long numDocs = between(10, 100);
for (long i = 0; i < numDocs; i++) {
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
}
indexShard.flush(new FlushRequest());
closeShards(indexShard);
final ShardPath shardPath = indexShard.shardPath();
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE
);
final IndexMetaData indexMetaData = indexShard.indexSettings().getIndexMetaData();
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
// create corrupted marker
final String corruptionMessage = "fake ioexception";
try(Store store = createStore(indexShard.indexSettings(), shardPath)) {
store.markStoreCorrupted(new IOException(corruptionMessage));
}
// try to start shard on corrupted files
final IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetaData,
null, null, indexShard.engineFactory,
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
final IndexShardRecoveryException exception1 = expectThrows(IndexShardRecoveryException.class,
() -> newStartedShard(p -> corruptedShard, true));
assertThat(exception1.getCause().getMessage(), equalTo(corruptionMessage + " (resource=preexisting_corruption)"));
closeShards(corruptedShard);
final AtomicInteger corruptedMarkerCount = new AtomicInteger();
final SimpleFileVisitor<Path> corruptedVisitor = new SimpleFileVisitor<Path>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
if (Files.isRegularFile(file) && file.getFileName().toString().startsWith(Store.CORRUPTED)) {
corruptedMarkerCount.incrementAndGet();
}
return FileVisitResult.CONTINUE;
}
};
Files.walkFileTree(indexPath, corruptedVisitor);
assertThat("store has to be marked as corrupted", corruptedMarkerCount.get(), equalTo(1));
// try to start another time shard on corrupted files
final IndexShard corruptedShard2 = newShard(shardRouting, shardPath, indexMetaData,
null, null, indexShard.engineFactory,
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
final IndexShardRecoveryException exception2 = expectThrows(IndexShardRecoveryException.class,
() -> newStartedShard(p -> corruptedShard2, true));
assertThat(exception2.getCause().getMessage(), equalTo(corruptionMessage + " (resource=preexisting_corruption)"));
closeShards(corruptedShard2);
// check that corrupt marker is there
corruptedMarkerCount.set(0);
Files.walkFileTree(indexPath, corruptedVisitor);
assertThat("store still has a single corrupt marker", corruptedMarkerCount.get(), equalTo(1));
}
private Path corruptIndexFile(ShardPath shardPath) throws IOException {
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
final Path[] filesToCorrupt =
Files.walk(indexPath)
.filter(p -> {
final String name = p.getFileName().toString();
return Files.isRegularFile(p)
&& name.startsWith("extra") == false // Skip files added by Lucene's ExtrasFS
&& IndexWriter.WRITE_LOCK_NAME.equals(name) == false
&& name.startsWith("segments_") == false && name.endsWith(".si") == false;
})
.toArray(Path[]::new);
CorruptionUtils.corruptFile(random(), filesToCorrupt);
return indexPath;
}
/** /**
* Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService * Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService
* and checking index concurrently. This should always be possible without any exception. * and checking index concurrently. This should always be possible without any exception.
@ -2613,7 +2756,7 @@ public class IndexShardTests extends IndexShardTestCase {
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData()) final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
.settings(Settings.builder() .settings(Settings.builder()
.put(indexShard.indexSettings.getSettings()) .put(indexShard.indexSettings.getSettings())
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("false", "true", "checksum", "fix"))) .put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("false", "true", "checksum")))
.build(); .build();
final IndexShard newShard = newShard(shardRouting, indexShard.shardPath(), indexMetaData, final IndexShard newShard = newShard(shardRouting, indexShard.shardPath(), indexMetaData,
null, null, indexShard.engineFactory, indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER); null, null, indexShard.engineFactory, indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);

View File

@ -32,6 +32,7 @@ import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.ShardRoutingHelper; import org.elasticsearch.cluster.routing.ShardRoutingHelper;
import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.cluster.routing.ShardRoutingState;
import org.elasticsearch.cluster.routing.TestShardRouting; import org.elasticsearch.cluster.routing.TestShardRouting;
import org.elasticsearch.common.CheckedFunction;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.lucene.uid.Versions;
@ -156,7 +157,6 @@ public abstract class IndexShardTestCase extends ESTestCase {
return Settings.EMPTY; return Settings.EMPTY;
} }
protected Store createStore(IndexSettings indexSettings, ShardPath shardPath) throws IOException { protected Store createStore(IndexSettings indexSettings, ShardPath shardPath) throws IOException {
return createStore(shardPath.getShardId(), indexSettings, newFSDirectory(shardPath.resolveIndex())); return createStore(shardPath.getShardId(), indexSettings, newFSDirectory(shardPath.resolveIndex()));
} }
@ -169,7 +169,6 @@ public abstract class IndexShardTestCase extends ESTestCase {
} }
}; };
return new Store(shardId, indexSettings, directoryService, new DummyShardLock(shardId)); return new Store(shardId, indexSettings, directoryService, new DummyShardLock(shardId));
} }
/** /**
@ -179,7 +178,17 @@ public abstract class IndexShardTestCase extends ESTestCase {
* another shard) * another shard)
*/ */
protected IndexShard newShard(boolean primary) throws IOException { protected IndexShard newShard(boolean primary) throws IOException {
return newShard(primary, Settings.EMPTY, new InternalEngineFactory()); return newShard(primary, Settings.EMPTY);
}
/**
* Creates a new initializing shard. The shard will have its own unique data path.
*
* @param primary indicates whether to a primary shard (ready to recover from an empty store) or a replica (ready to recover from
* another shard)
*/
protected IndexShard newShard(final boolean primary, final Settings settings) throws IOException {
return newShard(primary, settings, new InternalEngineFactory());
} }
/** /**
@ -318,23 +327,25 @@ public abstract class IndexShardTestCase extends ESTestCase {
* @param routing shard routing to use * @param routing shard routing to use
* @param shardPath path to use for shard data * @param shardPath path to use for shard data
* @param indexMetaData indexMetaData for the shard, including any mapping * @param indexMetaData indexMetaData for the shard, including any mapping
* @param store an optional custom store to use. If null a default file based store will be created * @param storeProvider an optional custom store provider to use. If null a default file based store will be created
* @param indexSearcherWrapper an optional wrapper to be used during searchers * @param indexSearcherWrapper an optional wrapper to be used during searchers
* @param globalCheckpointSyncer callback for syncing global checkpoints * @param globalCheckpointSyncer callback for syncing global checkpoints
* @param indexEventListener index event listener * @param indexEventListener index event listener
* @param listeners an optional set of listeners to add to the shard * @param listeners an optional set of listeners to add to the shard
*/ */
protected IndexShard newShard(ShardRouting routing, ShardPath shardPath, IndexMetaData indexMetaData, protected IndexShard newShard(ShardRouting routing, ShardPath shardPath, IndexMetaData indexMetaData,
@Nullable Store store, @Nullable IndexSearcherWrapper indexSearcherWrapper, @Nullable CheckedFunction<IndexSettings, Store, IOException> storeProvider,
@Nullable IndexSearcherWrapper indexSearcherWrapper,
@Nullable EngineFactory engineFactory, @Nullable EngineFactory engineFactory,
Runnable globalCheckpointSyncer, Runnable globalCheckpointSyncer,
IndexEventListener indexEventListener, IndexingOperationListener... listeners) throws IOException { IndexEventListener indexEventListener, IndexingOperationListener... listeners) throws IOException {
final Settings nodeSettings = Settings.builder().put("node.name", routing.currentNodeId()).build(); final Settings nodeSettings = Settings.builder().put("node.name", routing.currentNodeId()).build();
final IndexSettings indexSettings = new IndexSettings(indexMetaData, nodeSettings); final IndexSettings indexSettings = new IndexSettings(indexMetaData, nodeSettings);
final IndexShard indexShard; final IndexShard indexShard;
if (store == null) { if (storeProvider == null) {
store = createStore(indexSettings, shardPath); storeProvider = is -> createStore(is, shardPath);
} }
final Store store = storeProvider.apply(indexSettings);
boolean success = false; boolean success = false;
try { try {
IndexCache indexCache = new IndexCache(indexSettings, new DisabledQueryCache(indexSettings), null); IndexCache indexCache = new IndexCache(indexSettings, new DisabledQueryCache(indexSettings), null);
@ -424,7 +435,18 @@ public abstract class IndexShardTestCase extends ESTestCase {
*/ */
protected IndexShard newStartedShard( protected IndexShard newStartedShard(
final boolean primary, final Settings settings, final EngineFactory engineFactory) throws IOException { final boolean primary, final Settings settings, final EngineFactory engineFactory) throws IOException {
IndexShard shard = newShard(primary, settings, engineFactory); return newStartedShard(p -> newShard(p, settings, engineFactory), primary);
}
/**
* creates a new empty shard and starts it.
*
* @param shardFunction shard factory function
* @param primary controls whether the shard will be a primary or a replica.
*/
protected IndexShard newStartedShard(CheckedFunction<Boolean, IndexShard, IOException> shardFunction,
boolean primary) throws IOException {
IndexShard shard = shardFunction.apply(primary);
if (primary) { if (primary) {
recoverShardFromStore(shard); recoverShardFromStore(shard);
} else { } else {

View File

@ -1,102 +0,0 @@
[role="xpack"]
[[ml-api-quickref]]
== API quick reference
All {ml} endpoints have the following base:
[source,js]
----
/_xpack/ml/
----
// NOTCONSOLE
The main {ml} resources can be accessed with a variety of endpoints:
* <<ml-api-jobs,+/anomaly_detectors/+>>: Create and manage {ml} jobs
* <<ml-api-datafeeds,+/datafeeds/+>>: Select data from {es} to be analyzed
* <<ml-api-results,+/results/+>>: Access the results of a {ml} job
* <<ml-api-snapshots,+/model_snapshots/+>>: Manage model snapshots
//* <<ml-api-validate,+/validate/+>>: Validate subsections of job configurations
[float]
[[ml-api-jobs]]
=== /anomaly_detectors/
* {ref}/ml-put-job.html[PUT /anomaly_detectors/<job_id+++>+++]: Create a job
* {ref}/ml-open-job.html[POST /anomaly_detectors/<job_id>/_open]: Open a job
* {ref}/ml-post-data.html[POST /anomaly_detectors/<job_id>/_data]: Send data to a job
* {ref}/ml-get-job.html[GET /anomaly_detectors]: List jobs
* {ref}/ml-get-job.html[GET /anomaly_detectors/<job_id+++>+++]: Get job details
* {ref}/ml-get-job-stats.html[GET /anomaly_detectors/<job_id>/_stats]: Get job statistics
* {ref}/ml-update-job.html[POST /anomaly_detectors/<job_id>/_update]: Update certain properties of the job configuration
* {ref}/ml-flush-job.html[POST anomaly_detectors/<job_id>/_flush]: Force a job to analyze buffered data
* {ref}/ml-forecast.html[POST anomaly_detectors/<job_id>/_forecast]: Forecast future job behavior
* {ref}/ml-close-job.html[POST /anomaly_detectors/<job_id>/_close]: Close a job
* {ref}/ml-delete-job.html[DELETE /anomaly_detectors/<job_id+++>+++]: Delete a job
[float]
[[ml-api-calendars]]
=== /calendars/
* {ref}/ml-put-calendar.html[PUT /calendars/<calendar_id+++>+++]: Create a calendar
* {ref}/ml-post-calendar-event.html[POST /calendars/<calendar_id+++>+++/events]: Add a scheduled event to a calendar
* {ref}/ml-put-calendar-job.html[PUT /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Associate a job with a calendar
* {ref}/ml-get-calendar.html[GET /calendars/<calendar_id+++>+++]: Get calendar details
* {ref}/ml-get-calendar-event.html[GET /calendars/<calendar_id+++>+++/events]: Get scheduled event details
* {ref}/ml-delete-calendar-event.html[DELETE /calendars/<calendar_id+++>+++/events/<event_id+++>+++]: Remove a scheduled event from a calendar
* {ref}/ml-delete-calendar-job.html[DELETE /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Disassociate a job from a calendar
* {ref}/ml-delete-calendar.html[DELETE /calendars/<calendar_id+++>+++]: Delete a calendar
[float]
[[ml-api-filters]]
=== /filters/
* {ref}/ml-put-filter.html[PUT /filters/<filter_id+++>+++]: Create a filter
* {ref}/ml-update-filter.html[POST /filters/<filter_id+++>+++/_update]: Update a filter
* {ref}/ml-get-filter.html[GET /filters/<filter_id+++>+++]: List filters
* {ref}/ml-delete-filter.html[DELETE /filter/<filter_id+++>+++]: Delete a filter
[float]
[[ml-api-datafeeds]]
=== /datafeeds/
* {ref}/ml-put-datafeed.html[PUT /datafeeds/<datafeed_id+++>+++]: Create a {dfeed}
* {ref}/ml-start-datafeed.html[POST /datafeeds/<datafeed_id>/_start]: Start a {dfeed}
* {ref}/ml-get-datafeed.html[GET /datafeeds]: List {dfeeds}
* {ref}/ml-get-datafeed.html[GET /datafeeds/<datafeed_id+++>+++]: Get {dfeed} details
* {ref}/ml-get-datafeed-stats.html[GET /datafeeds/<datafeed_id>/_stats]: Get statistical information for {dfeeds}
* {ref}/ml-preview-datafeed.html[GET /datafeeds/<datafeed_id>/_preview]: Get a preview of a {dfeed}
* {ref}/ml-update-datafeed.html[POST /datafeeds/<datafeedid>/_update]: Update certain settings for a {dfeed}
* {ref}/ml-stop-datafeed.html[POST /datafeeds/<datafeed_id>/_stop]: Stop a {dfeed}
* {ref}/ml-delete-datafeed.html[DELETE /datafeeds/<datafeed_id+++>+++]: Delete {dfeed}
[float]
[[ml-api-results]]
=== /results/
* {ref}/ml-get-bucket.html[GET /results/buckets]: List the buckets in the results
* {ref}/ml-get-bucket.html[GET /results/buckets/<bucket_id+++>+++]: Get bucket details
* {ref}/ml-get-overall-buckets.html[GET /results/overall_buckets]: Get overall bucket results for multiple jobs
* {ref}/ml-get-category.html[GET /results/categories]: List the categories in the results
* {ref}/ml-get-category.html[GET /results/categories/<category_id+++>+++]: Get category details
* {ref}/ml-get-influencer.html[GET /results/influencers]: Get influencer details
* {ref}/ml-get-record.html[GET /results/records]: Get records from the results
[float]
[[ml-api-snapshots]]
=== /model_snapshots/
* {ref}/ml-get-snapshot.html[GET /model_snapshots]: List model snapshots
* {ref}/ml-get-snapshot.html[GET /model_snapshots/<snapshot_id+++>+++]: Get model snapshot details
* {ref}/ml-revert-snapshot.html[POST /model_snapshots/<snapshot_id>/_revert]: Revert a model snapshot
* {ref}/ml-update-snapshot.html[POST /model_snapshots/<snapshot_id>/_update]: Update certain settings for a model snapshot
* {ref}/ml-delete-snapshot.html[DELETE /model_snapshots/<snapshot_id+++>+++]: Delete a model snapshot
////
[float]
[[ml-api-validate]]
=== /validate/
* {ref}/ml-valid-detector.html[POST /anomaly_detectors/_validate/detector]: Validate a detector
* {ref}/ml-valid-job.html[POST /anomaly_detectors/_validate]: Validate a job
////

View File

@ -1,35 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.util.List;
public class CsvLogStructureFinderFactory implements LogStructureFinderFactory {
/**
* Rules are:
* - The file must be valid CSV
* - It must contain at least two complete records
* - There must be at least two fields per record (otherwise files with no commas could be treated as CSV!)
* - Every CSV record except the last must have the same number of fields
* The reason the last record is allowed to have fewer fields than the others is that
* it could have been truncated when the file was sampled.
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.EXCEL_PREFERENCE, "CSV");
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
CsvPreference.EXCEL_PREFERENCE, false);
}
}

View File

@ -29,17 +29,16 @@ import java.util.regex.Pattern;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.IntStream; import java.util.stream.IntStream;
public class SeparatedValuesLogStructureFinder implements LogStructureFinder { public class DelimitedLogStructureFinder implements LogStructureFinder {
private static final int MAX_LEVENSHTEIN_COMPARISONS = 100; private static final int MAX_LEVENSHTEIN_COMPARISONS = 100;
private final List<String> sampleMessages; private final List<String> sampleMessages;
private final LogStructure structure; private final LogStructure structure;
static SeparatedValuesLogStructureFinder makeSeparatedValuesLogStructureFinder(List<String> explanation, String sample, static DelimitedLogStructureFinder makeDelimitedLogStructureFinder(List<String> explanation, String sample, String charsetName,
String charsetName, Boolean hasByteOrderMarker, Boolean hasByteOrderMarker, CsvPreference csvPreference,
CsvPreference csvPreference, boolean trimFields) boolean trimFields) throws IOException {
throws IOException {
Tuple<List<List<String>>, List<Integer>> parsed = readRows(sample, csvPreference); Tuple<List<List<String>>, List<Integer>> parsed = readRows(sample, csvPreference);
List<List<String>> rows = parsed.v1(); List<List<String>> rows = parsed.v1();
@ -73,13 +72,14 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n")); String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n"));
char delimiter = (char) csvPreference.getDelimiterChar(); char delimiter = (char) csvPreference.getDelimiterChar();
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.fromSeparator(delimiter)) LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.DELIMITED)
.setCharset(charsetName) .setCharset(charsetName)
.setHasByteOrderMarker(hasByteOrderMarker) .setHasByteOrderMarker(hasByteOrderMarker)
.setSampleStart(preamble) .setSampleStart(preamble)
.setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1)) .setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1))
.setNumMessagesAnalyzed(sampleRecords.size()) .setNumMessagesAnalyzed(sampleRecords.size())
.setHasHeaderRow(isHeaderInFile) .setHasHeaderRow(isHeaderInFile)
.setDelimiter(delimiter)
.setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList())); .setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList()));
if (trimFields) { if (trimFields) {
@ -131,10 +131,10 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
.setExplanation(explanation) .setExplanation(explanation)
.build(); .build();
return new SeparatedValuesLogStructureFinder(sampleMessages, structure); return new DelimitedLogStructureFinder(sampleMessages, structure);
} }
private SeparatedValuesLogStructureFinder(List<String> sampleMessages, LogStructure structure) { private DelimitedLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
this.sampleMessages = Collections.unmodifiableList(sampleMessages); this.sampleMessages = Collections.unmodifiableList(sampleMessages);
this.structure = structure; this.structure = structure;
} }

View File

@ -0,0 +1,57 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
public class DelimitedLogStructureFinderFactory implements LogStructureFinderFactory {
private final CsvPreference csvPreference;
private final int minFieldsPerRow;
private final boolean trimFields;
DelimitedLogStructureFinderFactory(char delimiter, int minFieldsPerRow, boolean trimFields) {
csvPreference = new CsvPreference.Builder('"', delimiter, "\n").build();
this.minFieldsPerRow = minFieldsPerRow;
this.trimFields = trimFields;
}
/**
* Rules are:
* - It must contain at least two complete records
* - There must be a minimum number of fields per record (otherwise files with no commas could be treated as CSV!)
* - Every record except the last must have the same number of fields
* The reason the last record is allowed to have fewer fields than the others is that
* it could have been truncated when the file was sampled.
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
String formatName;
switch ((char) csvPreference.getDelimiterChar()) {
case ',':
formatName = "CSV";
break;
case '\t':
formatName = "TSV";
break;
default:
formatName = Character.getName(csvPreference.getDelimiterChar()).toLowerCase(Locale.ROOT) + " delimited values";
break;
}
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, minFieldsPerRow, csvPreference, formatName);
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
csvPreference, trimFields);
}
}

View File

@ -27,37 +27,14 @@ public class LogStructure implements ToXContentObject {
public enum Format { public enum Format {
JSON, XML, CSV, TSV, SEMI_COLON_SEPARATED_VALUES, PIPE_SEPARATED_VALUES, SEMI_STRUCTURED_TEXT; JSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
public Character separator() {
switch (this) {
case JSON:
case XML:
return null;
case CSV:
return ',';
case TSV:
return '\t';
case SEMI_COLON_SEPARATED_VALUES:
return ';';
case PIPE_SEPARATED_VALUES:
return '|';
case SEMI_STRUCTURED_TEXT:
return null;
default:
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
}
}
public boolean supportsNesting() { public boolean supportsNesting() {
switch (this) { switch (this) {
case JSON: case JSON:
case XML: case XML:
return true; return true;
case CSV: case DELIMITED:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
case SEMI_STRUCTURED_TEXT: case SEMI_STRUCTURED_TEXT:
return false; return false;
default: default:
@ -69,10 +46,7 @@ public class LogStructure implements ToXContentObject {
switch (this) { switch (this) {
case JSON: case JSON:
case XML: case XML:
case CSV: case DELIMITED:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
return true; return true;
case SEMI_STRUCTURED_TEXT: case SEMI_STRUCTURED_TEXT:
return false; return false;
@ -85,10 +59,7 @@ public class LogStructure implements ToXContentObject {
switch (this) { switch (this) {
case JSON: case JSON:
case XML: case XML:
case CSV: case DELIMITED:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
return false; return false;
case SEMI_STRUCTURED_TEXT: case SEMI_STRUCTURED_TEXT:
return true; return true;
@ -97,38 +68,6 @@ public class LogStructure implements ToXContentObject {
} }
} }
public boolean isSeparatedValues() {
switch (this) {
case JSON:
case XML:
return false;
case CSV:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
return true;
case SEMI_STRUCTURED_TEXT:
return false;
default:
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
}
}
public static Format fromSeparator(char separator) {
switch (separator) {
case ',':
return CSV;
case '\t':
return TSV;
case ';':
return SEMI_COLON_SEPARATED_VALUES;
case '|':
return PIPE_SEPARATED_VALUES;
default:
throw new IllegalArgumentException("No known format has separator [" + separator + "]");
}
}
public static Format fromString(String name) { public static Format fromString(String name) {
return valueOf(name.trim().toUpperCase(Locale.ROOT)); return valueOf(name.trim().toUpperCase(Locale.ROOT));
} }
@ -149,7 +88,7 @@ public class LogStructure implements ToXContentObject {
static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern"); static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
static final ParseField INPUT_FIELDS = new ParseField("input_fields"); static final ParseField INPUT_FIELDS = new ParseField("input_fields");
static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row"); static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
static final ParseField SEPARATOR = new ParseField("separator"); static final ParseField DELIMITER = new ParseField("delimiter");
static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields"); static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
static final ParseField GROK_PATTERN = new ParseField("grok_pattern"); static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field"); static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
@ -171,7 +110,7 @@ public class LogStructure implements ToXContentObject {
PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN); PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS); PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS);
PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW); PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
PARSER.declareString((p, c) -> p.setSeparator(c.charAt(0)), SEPARATOR); PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER);
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS); PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN); PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD); PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
@ -191,7 +130,7 @@ public class LogStructure implements ToXContentObject {
private final String excludeLinesPattern; private final String excludeLinesPattern;
private final List<String> inputFields; private final List<String> inputFields;
private final Boolean hasHeaderRow; private final Boolean hasHeaderRow;
private final Character separator; private final Character delimiter;
private final Boolean shouldTrimFields; private final Boolean shouldTrimFields;
private final String grokPattern; private final String grokPattern;
private final List<String> timestampFormats; private final List<String> timestampFormats;
@ -202,7 +141,7 @@ public class LogStructure implements ToXContentObject {
public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker, public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> inputFields, Format format, String multilineStartPattern, String excludeLinesPattern, List<String> inputFields,
Boolean hasHeaderRow, Character separator, Boolean shouldTrimFields, String grokPattern, String timestampField, Boolean hasHeaderRow, Character delimiter, Boolean shouldTrimFields, String grokPattern, String timestampField,
List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings, List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings,
List<String> explanation) { List<String> explanation) {
@ -216,7 +155,7 @@ public class LogStructure implements ToXContentObject {
this.excludeLinesPattern = excludeLinesPattern; this.excludeLinesPattern = excludeLinesPattern;
this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields)); this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields));
this.hasHeaderRow = hasHeaderRow; this.hasHeaderRow = hasHeaderRow;
this.separator = separator; this.delimiter = delimiter;
this.shouldTrimFields = shouldTrimFields; this.shouldTrimFields = shouldTrimFields;
this.grokPattern = grokPattern; this.grokPattern = grokPattern;
this.timestampField = timestampField; this.timestampField = timestampField;
@ -266,8 +205,8 @@ public class LogStructure implements ToXContentObject {
return hasHeaderRow; return hasHeaderRow;
} }
public Character getSeparator() { public Character getDelimiter() {
return separator; return delimiter;
} }
public Boolean getShouldTrimFields() { public Boolean getShouldTrimFields() {
@ -322,8 +261,8 @@ public class LogStructure implements ToXContentObject {
if (hasHeaderRow != null) { if (hasHeaderRow != null) {
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue()); builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
} }
if (separator != null) { if (delimiter != null) {
builder.field(SEPARATOR.getPreferredName(), String.valueOf(separator)); builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter));
} }
if (shouldTrimFields != null) { if (shouldTrimFields != null) {
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue()); builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
@ -349,7 +288,7 @@ public class LogStructure implements ToXContentObject {
public int hashCode() { public int hashCode() {
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, timestampField, multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, timestampField,
timestampFormats, needClientTimezone, mappings, explanation); timestampFormats, needClientTimezone, mappings, explanation);
} }
@ -376,7 +315,7 @@ public class LogStructure implements ToXContentObject {
Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) && Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
Objects.equals(this.inputFields, that.inputFields) && Objects.equals(this.inputFields, that.inputFields) &&
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) && Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
Objects.equals(this.separator, that.separator) && Objects.equals(this.delimiter, that.delimiter) &&
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) && Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
Objects.equals(this.grokPattern, that.grokPattern) && Objects.equals(this.grokPattern, that.grokPattern) &&
Objects.equals(this.timestampField, that.timestampField) && Objects.equals(this.timestampField, that.timestampField) &&
@ -397,7 +336,7 @@ public class LogStructure implements ToXContentObject {
private String excludeLinesPattern; private String excludeLinesPattern;
private List<String> inputFields; private List<String> inputFields;
private Boolean hasHeaderRow; private Boolean hasHeaderRow;
private Character separator; private Character delimiter;
private Boolean shouldTrimFields; private Boolean shouldTrimFields;
private String grokPattern; private String grokPattern;
private String timestampField; private String timestampField;
@ -441,7 +380,6 @@ public class LogStructure implements ToXContentObject {
public Builder setFormat(Format format) { public Builder setFormat(Format format) {
this.format = Objects.requireNonNull(format); this.format = Objects.requireNonNull(format);
this.separator = format.separator();
return this; return this;
} }
@ -465,13 +403,13 @@ public class LogStructure implements ToXContentObject {
return this; return this;
} }
public Builder setShouldTrimFields(Boolean shouldTrimFields) { public Builder setDelimiter(Character delimiter) {
this.shouldTrimFields = shouldTrimFields; this.delimiter = delimiter;
return this; return this;
} }
public Builder setSeparator(Character separator) { public Builder setShouldTrimFields(Boolean shouldTrimFields) {
this.separator = separator; this.shouldTrimFields = shouldTrimFields;
return this; return this;
} }
@ -542,28 +480,22 @@ public class LogStructure implements ToXContentObject {
if (hasHeaderRow != null) { if (hasHeaderRow != null) {
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures."); throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
} }
if (separator != null) { if (delimiter != null) {
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures."); throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
} }
if (grokPattern != null) { if (grokPattern != null) {
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures."); throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
} }
break; break;
case CSV: case DELIMITED:
case TSV:
case SEMI_COLON_SEPARATED_VALUES:
case PIPE_SEPARATED_VALUES:
if (inputFields == null || inputFields.isEmpty()) { if (inputFields == null || inputFields.isEmpty()) {
throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures."); throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures.");
} }
if (hasHeaderRow == null) { if (hasHeaderRow == null) {
throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures."); throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures.");
} }
Character expectedSeparator = format.separator(); if (delimiter == null) {
assert expectedSeparator != null; throw new IllegalArgumentException("Delimiter must be specified for [" + format + "] structures.");
if (expectedSeparator.equals(separator) == false) {
throw new IllegalArgumentException("Separator must be [" + expectedSeparator + "] for [" + format +
"] structures.");
} }
if (grokPattern != null) { if (grokPattern != null) {
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures."); throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
@ -576,8 +508,8 @@ public class LogStructure implements ToXContentObject {
if (hasHeaderRow != null) { if (hasHeaderRow != null) {
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures."); throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
} }
if (separator != null) { if (delimiter != null) {
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures."); throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
} }
if (shouldTrimFields != null) { if (shouldTrimFields != null) {
throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures."); throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures.");
@ -607,7 +539,7 @@ public class LogStructure implements ToXContentObject {
} }
return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format, return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern,
timestampField, timestampFormats, needClientTimezone, mappings, explanation); timestampField, timestampFormats, needClientTimezone, mappings, explanation);
} }
} }

View File

@ -69,10 +69,10 @@ public final class LogStructureFinderManager {
new JsonLogStructureFinderFactory(), new JsonLogStructureFinderFactory(),
new XmlLogStructureFinderFactory(), new XmlLogStructureFinderFactory(),
// ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV // ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV
new CsvLogStructureFinderFactory(), new DelimitedLogStructureFinderFactory(',', 2, false),
new TsvLogStructureFinderFactory(), new DelimitedLogStructureFinderFactory('\t', 2, false),
new SemiColonSeparatedValuesLogStructureFinderFactory(), new DelimitedLogStructureFinderFactory(';', 4, false),
new PipeSeparatedValuesLogStructureFinderFactory(), new DelimitedLogStructureFinderFactory('|', 5, true),
new TextLogStructureFinderFactory() new TextLogStructureFinderFactory()
)); ));

View File

@ -21,12 +21,12 @@ import java.util.TreeMap;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
final class LogStructureUtils { public final class LogStructureUtils {
static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp"; public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
static final String MAPPING_TYPE_SETTING = "type"; public static final String MAPPING_TYPE_SETTING = "type";
static final String MAPPING_FORMAT_SETTING = "format"; public static final String MAPPING_FORMAT_SETTING = "format";
static final String MAPPING_PROPERTIES_SETTING = "properties"; public static final String MAPPING_PROPERTIES_SETTING = "properties";
// NUMBER Grok pattern doesn't support scientific notation, so we extend it // NUMBER Grok pattern doesn't support scientific notation, so we extend it
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$"); private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$");

View File

@ -1,38 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.util.List;
public class PipeSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
private static final CsvPreference PIPE_PREFERENCE = new CsvPreference.Builder('"', '|', "\n").build();
/**
* Rules are:
* - The file must be valid pipe (<code>|</code>) separated values
* - It must contain at least two complete records
* - There must be at least five fields per record (otherwise files with coincidental
* or no pipe characters could be treated as pipe separated)
* - Every pipe separated value record except the last must have the same number of fields
* The reason the last record is allowed to have fewer fields than the others is that
* it could have been truncated when the file was sampled.
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 5, PIPE_PREFERENCE, "pipe separated values");
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
PIPE_PREFERENCE, true);
}
}

View File

@ -1,37 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
import org.supercsv.prefs.CsvPreference;
import java.io.IOException;
import java.util.List;
public class SemiColonSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
/**
* Rules are:
* - The file must be valid semi-colon separated values
* - It must contain at least two complete records
* - There must be at least four fields per record (otherwise files with coincidental
* or no semi-colons could be treated as semi-colon separated)
* - Every semi-colon separated value record except the last must have the same number of fields
* The reason the last record is allowed to have fewer fields than the others is that
* it could have been truncated when the file was sampled.
*/
@Override
public boolean canCreateFromSample(List<String> explanation, String sample) {
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 4,
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, "semi-colon separated values");
}
@Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException {
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, false);
}
}

View File

@ -23,13 +23,13 @@ public class TsvLogStructureFinderFactory implements LogStructureFinderFactory {
*/ */
@Override @Override
public boolean canCreateFromSample(List<String> explanation, String sample) { public boolean canCreateFromSample(List<String> explanation, String sample) {
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV"); return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
} }
@Override @Override
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker) public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
throws IOException { throws IOException {
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker, return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
CsvPreference.TAB_PREFERENCE, false); CsvPreference.TAB_PREFERENCE, false);
} }
} }

View File

@ -1,38 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
public class CsvLogStructureFinderFactoryTests extends LogStructureTestCase {
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
// No need to check JSON or XML because they come earlier in the order we check formats
public void testCanCreateFromSampleGivenCsv() {
assertTrue(factory.canCreateFromSample(explanation, CSV_SAMPLE));
}
public void testCanCreateFromSampleGivenTsv() {
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
}
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenText() {
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
}

View File

@ -0,0 +1,93 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
public class DelimitedLogStructureFinderFactoryTests extends LogStructureTestCase {
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
private LogStructureFinderFactory tsvFactory = new DelimitedLogStructureFinderFactory('\t', 2, false);
private LogStructureFinderFactory semiColonDelimitedfactory = new DelimitedLogStructureFinderFactory(';', 4, false);
private LogStructureFinderFactory pipeDelimitedFactory = new DelimitedLogStructureFinderFactory('|', 5, true);
// CSV - no need to check JSON or XML because they come earlier in the order we check formats
public void testCanCreateCsvFromSampleGivenCsv() {
assertTrue(csvFactory.canCreateFromSample(explanation, CSV_SAMPLE));
}
public void testCanCreateCsvFromSampleGivenTsv() {
assertFalse(csvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
}
public void testCanCreateCsvFromSampleGivenSemiColonDelimited() {
assertFalse(csvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
}
public void testCanCreateCsvFromSampleGivenPipeDelimited() {
assertFalse(csvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreateCsvFromSampleGivenText() {
assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
// TSV - no need to check JSON, XML or CSV because they come earlier in the order we check formats
public void testCanCreateTsvFromSampleGivenTsv() {
assertTrue(tsvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
}
public void testCanCreateTsvFromSampleGivenSemiColonDelimited() {
assertFalse(tsvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
}
public void testCanCreateTsvFromSampleGivenPipeDelimited() {
assertFalse(tsvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreateTsvFromSampleGivenText() {
assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
// Semi-colon delimited - no need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() {
assertTrue(semiColonDelimitedfactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
}
public void testCanCreateSemiColonDelimitedFromSampleGivenPipeDelimited() {
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreateSemiColonDelimitedFromSampleGivenText() {
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
// Pipe delimited - no need to check JSON, XML, CSV, TSV or semi-colon delimited
// values because they come earlier in the order we check formats
public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() {
assertTrue(pipeDelimitedFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
}
public void testCanCreatePipeDelimitedFromSampleGivenText() {
assertFalse(pipeDelimitedFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
}

View File

@ -12,27 +12,27 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinFieldwiseCompareRows; import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinFieldwiseCompareRows;
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinDistance; import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinDistance;
import static org.hamcrest.Matchers.arrayContaining; import static org.hamcrest.Matchers.arrayContaining;
public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase { public class DelimitedLogStructureFinderTests extends LogStructureTestCase {
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory(); private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
public void testCreateConfigsGivenCompleteCsv() throws Exception { public void testCreateConfigsGivenCompleteCsv() throws Exception {
String sample = "time,message\n" + String sample = "time,message\n" +
"2018-05-17T13:41:23,hello\n" + "2018-05-17T13:41:23,hello\n" +
"2018-05-17T13:41:32,hello again\n"; "2018-05-17T13:41:32,hello again\n";
assertTrue(factory.canCreateFromSample(explanation, sample)); assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS); String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure(); LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat()); assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset()); assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) { if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker()); assertNull(structure.getHasByteOrderMarker());
@ -41,7 +41,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
} }
assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern()); assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern());
assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator()); assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow()); assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields()); assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("time", "message"), structure.getInputFields()); assertEquals(Arrays.asList("time", "message"), structure.getInputFields());
@ -55,15 +55,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"\"hello\n" + "\"hello\n" +
"world\",2018-05-17T13:41:23,1\n" + "world\",2018-05-17T13:41:23,1\n" +
"\"hello again\n"; // note that this last record is truncated "\"hello again\n"; // note that this last record is truncated
assertTrue(factory.canCreateFromSample(explanation, sample)); assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS); String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure(); LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat()); assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset()); assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) { if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker()); assertNull(structure.getHasByteOrderMarker());
@ -72,7 +72,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
} }
assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern()); assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern());
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator()); assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow()); assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields()); assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields()); assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields());
@ -88,15 +88,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" + "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" + "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n"; "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
assertTrue(factory.canCreateFromSample(explanation, sample)); assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS); String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure(); LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat()); assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset()); assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) { if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker()); assertNull(structure.getHasByteOrderMarker());
@ -108,7 +108,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?", "\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?",
structure.getExcludeLinesPattern()); structure.getExcludeLinesPattern());
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator()); assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow()); assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields()); assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance", assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
@ -126,15 +126,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" + "2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" + "1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n"; "1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
assertTrue(factory.canCreateFromSample(explanation, sample)); assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS); String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure(); LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat()); assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset()); assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) { if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker()); assertNull(structure.getHasByteOrderMarker());
@ -146,7 +146,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?", "\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?",
structure.getExcludeLinesPattern()); structure.getExcludeLinesPattern());
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern()); assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator()); assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow()); assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields()); assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance", assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
@ -161,15 +161,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" + String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" +
"\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" + "\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" +
"\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n"; "\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n";
assertTrue(factory.canCreateFromSample(explanation, sample)); assertTrue(csvFactory.canCreateFromSample(explanation, sample));
String charset = randomFrom(POSSIBLE_CHARSETS); String charset = randomFrom(POSSIBLE_CHARSETS);
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker); LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
LogStructure structure = structureFinder.getStructure(); LogStructure structure = structureFinder.getStructure();
assertEquals(LogStructure.Format.CSV, structure.getFormat()); assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
assertEquals(charset, structure.getCharset()); assertEquals(charset, structure.getCharset());
if (hasByteOrderMarker == null) { if (hasByteOrderMarker == null) {
assertNull(structure.getHasByteOrderMarker()); assertNull(structure.getHasByteOrderMarker());
@ -179,7 +179,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?", assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?",
structure.getExcludeLinesPattern()); structure.getExcludeLinesPattern());
assertNull(structure.getMultilineStartPattern()); assertNull(structure.getMultilineStartPattern());
assertEquals(Character.valueOf(','), structure.getSeparator()); assertEquals(Character.valueOf(','), structure.getDelimiter());
assertTrue(structure.getHasHeaderRow()); assertTrue(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields()); assertNull(structure.getShouldTrimFields());
assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields()); assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields());
@ -195,8 +195,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" + "2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n"; "2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation, Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
SeparatedValuesLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1()); DelimitedLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
assertTrue(header.v1()); assertTrue(header.v1());
assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype")); assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype"));
@ -208,8 +208,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" + "2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n"; "2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation, Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
SeparatedValuesLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1()); DelimitedLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
assertFalse(header.v1()); assertFalse(header.v1());
assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4")); assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4"));
@ -251,43 +251,43 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
public void testLineHasUnescapedQuote() { public void testLineHasUnescapedQuote() {
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE)); assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE)); assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE)); assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE)); assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE)); assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
} }
public void testRowContainsDuplicateNonEmptyValues() { public void testRowContainsDuplicateNonEmptyValues() {
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a"))); assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList(""))); assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c"))); assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a"))); assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b"))); assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", ""))); assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", ""))); assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
} }
} }

View File

@ -29,14 +29,14 @@ public class JsonLogStructureFinderFactoryTests extends LogStructureTestCase {
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE)); assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
} }
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() { public void testCanCreateFromSampleGivenSemiColonDelimited() {
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE)); assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
} }
public void testCanCreateFromSampleGivenPipeSeparatedValues() { public void testCanCreateFromSampleGivenPipeDelimited() {
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE)); assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
} }
public void testCanCreateFromSampleGivenText() { public void testCanCreateFromSampleGivenText() {

View File

@ -29,7 +29,7 @@ public class JsonLogStructureFinderTests extends LogStructureTestCase {
} }
assertNull(structure.getExcludeLinesPattern()); assertNull(structure.getExcludeLinesPattern());
assertNull(structure.getMultilineStartPattern()); assertNull(structure.getMultilineStartPattern());
assertNull(structure.getSeparator()); assertNull(structure.getDelimiter());
assertNull(structure.getHasHeaderRow()); assertNull(structure.getHasHeaderRow());
assertNull(structure.getShouldTrimFields()); assertNull(structure.getShouldTrimFields());
assertNull(structure.getGrokPattern()); assertNull(structure.getGrokPattern());

View File

@ -61,7 +61,7 @@ public class LogStructureFinderManagerTests extends LogStructureTestCase {
public void testMakeBestStructureGivenCsv() throws Exception { public void testMakeBestStructureGivenCsv() throws Exception {
assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" + assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" +
"2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()), "2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()),
instanceOf(SeparatedValuesLogStructureFinder.class)); instanceOf(DelimitedLogStructureFinder.class));
} }
public void testMakeBestStructureGivenText() throws Exception { public void testMakeBestStructureGivenText() throws Exception {

View File

@ -34,14 +34,14 @@ public abstract class LogStructureTestCase extends ESTestCase {
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," + "\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n"; "\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
protected static final String PIPE_SEPARATED_VALUES_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" + protected static final String PIPE_DELIMITED_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
"listening on 0.0.0.0:9987, :::9987\n" + "listening on 0.0.0.0:9987, :::9987\n" +
"2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " + "2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " +
"'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" + "'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" +
"2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " + "2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " +
"'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)"; "'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)";
protected static final String SEMI_COLON_SEPARATED_VALUES_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" + protected static final String SEMI_COLON_DELIMITED_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
"\"timestamp\"\n" + "\"timestamp\"\n" +
"\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" + "\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" +
"\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" + "\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" +

View File

@ -43,14 +43,12 @@ public class LogStructureTests extends AbstractXContentTestCase<LogStructure> {
builder.setExcludeLinesPattern(randomAlphaOfLength(100)); builder.setExcludeLinesPattern(randomAlphaOfLength(100));
} }
if (format.isSeparatedValues() || (format.supportsNesting() && randomBoolean())) { if (format == LogStructure.Format.DELIMITED || (format.supportsNesting() && randomBoolean())) {
builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false))); builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
} }
if (format.isSeparatedValues()) { if (format == LogStructure.Format.DELIMITED) {
builder.setHasHeaderRow(randomBoolean()); builder.setHasHeaderRow(randomBoolean());
if (rarely()) { builder.setDelimiter(randomFrom(',', '\t', ';', '|'));
builder.setSeparator(format.separator());
}
} }
if (format.isSemiStructured()) { if (format.isSemiStructured()) {
builder.setGrokPattern(randomAlphaOfLength(100)); builder.setGrokPattern(randomAlphaOfLength(100));

View File

@ -1,23 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
public class PipeSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
private LogStructureFinderFactory factory = new PipeSeparatedValuesLogStructureFinderFactory();
// No need to check JSON, XML, CSV, TSV or semi-colon separated values because they come earlier in the order we check formats
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
assertTrue(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenText() {
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
}

View File

@ -1,28 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.logstructurefinder;
public class SemiColonSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
private LogStructureFinderFactory factory = new SemiColonSeparatedValuesLogStructureFinderFactory();
// No need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
assertTrue(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
}
public void testCanCreateFromSampleGivenText() {
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
}
}

Some files were not shown because too many files have changed in this diff Show More