Merge branch 'master' into ccr
* master: Mute test watcher usage stats output [Rollup] Fix FullClusterRestart test Adjust soft-deletes version after backport into 6.5 completely drop `index.shard.check_on_startup: fix` for 7.0 (#33194) Fix AwaitsFix issue number Mute SmokeTestWatcherWithSecurityIT testsi drop `index.shard.check_on_startup: fix` (#32279) tracked at [DOCS] Moves ml folder from x-pack/docs to docs (#33248) [DOCS] Move rollup APIs to docs (#31450) [DOCS] Rename X-Pack Commands section (#33005) TEST: Disable soft-deletes in ParentChildTestCase Fixes SecurityIntegTestCase so it always adds at least one alias (#33296) Fix pom for build-tools (#33300) Lazy evaluate java9home (#33301) SQL: test coverage for JdbcResultSet (#32813) Work around to be able to generate eclipse projects (#33295) Highlight that index_phrases only works if no slop is used (#33303) Different handling for security specific errors in the CLI. Fix for https://github.com/elastic/elasticsearch/issues/33230 (#33255) [ML] Refactor delimited file structure detection (#33233) SQL: Support multi-index format as table identifier (#33278) MINOR: Remove Dead Code from PathTrie (#33280) Enable forbiddenapis server java9 (#33245)
17
build.gradle
|
@ -16,7 +16,9 @@
|
||||||
* specific language governing permissions and limitations
|
* specific language governing permissions and limitations
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin
|
import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin
|
||||||
|
import org.apache.tools.ant.taskdefs.condition.Os
|
||||||
import org.elasticsearch.gradle.BuildPlugin
|
import org.elasticsearch.gradle.BuildPlugin
|
||||||
import org.elasticsearch.gradle.LoggedExec
|
import org.elasticsearch.gradle.LoggedExec
|
||||||
import org.elasticsearch.gradle.Version
|
import org.elasticsearch.gradle.Version
|
||||||
|
@ -24,14 +26,9 @@ import org.elasticsearch.gradle.VersionCollection
|
||||||
import org.elasticsearch.gradle.VersionProperties
|
import org.elasticsearch.gradle.VersionProperties
|
||||||
import org.elasticsearch.gradle.plugin.PluginBuildPlugin
|
import org.elasticsearch.gradle.plugin.PluginBuildPlugin
|
||||||
import org.gradle.plugins.ide.eclipse.model.SourceFolder
|
import org.gradle.plugins.ide.eclipse.model.SourceFolder
|
||||||
import org.gradle.util.GradleVersion
|
|
||||||
import org.gradle.util.DistributionLocator
|
|
||||||
import org.apache.tools.ant.taskdefs.condition.Os
|
|
||||||
import org.apache.tools.ant.filters.ReplaceTokens
|
|
||||||
|
|
||||||
import java.nio.file.Files
|
import java.nio.file.Files
|
||||||
import java.nio.file.Path
|
import java.nio.file.Path
|
||||||
import java.security.MessageDigest
|
|
||||||
|
|
||||||
plugins {
|
plugins {
|
||||||
id 'com.gradle.build-scan' version '1.13.2'
|
id 'com.gradle.build-scan' version '1.13.2'
|
||||||
|
@ -512,6 +509,16 @@ allprojects {
|
||||||
tasks.cleanEclipse.dependsOn(wipeEclipseSettings)
|
tasks.cleanEclipse.dependsOn(wipeEclipseSettings)
|
||||||
// otherwise the eclipse merging is *super confusing*
|
// otherwise the eclipse merging is *super confusing*
|
||||||
tasks.eclipse.dependsOn(cleanEclipse, copyEclipseSettings)
|
tasks.eclipse.dependsOn(cleanEclipse, copyEclipseSettings)
|
||||||
|
|
||||||
|
// work arround https://github.com/gradle/gradle/issues/6582
|
||||||
|
tasks.eclipseProject.mustRunAfter tasks.cleanEclipseProject
|
||||||
|
tasks.matching { it.name == 'eclipseClasspath' }.all {
|
||||||
|
it.mustRunAfter { tasks.cleanEclipseClasspath }
|
||||||
|
}
|
||||||
|
tasks.matching { it.name == 'eclipseJdt' }.all {
|
||||||
|
it.mustRunAfter { tasks.cleanEclipseJdt }
|
||||||
|
}
|
||||||
|
tasks.copyEclipseSettings.mustRunAfter tasks.wipeEclipseSettings
|
||||||
}
|
}
|
||||||
|
|
||||||
allprojects {
|
allprojects {
|
||||||
|
|
|
@ -24,15 +24,6 @@ plugins {
|
||||||
id 'groovy'
|
id 'groovy'
|
||||||
}
|
}
|
||||||
|
|
||||||
gradlePlugin {
|
|
||||||
plugins {
|
|
||||||
simplePlugin {
|
|
||||||
id = 'elasticsearch.clusterformation'
|
|
||||||
implementationClass = 'org.elasticsearch.gradle.clusterformation.ClusterformationPlugin'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
group = 'org.elasticsearch.gradle'
|
group = 'org.elasticsearch.gradle'
|
||||||
|
|
||||||
String minimumGradleVersion = file('src/main/resources/minimumGradleVersion').text.trim()
|
String minimumGradleVersion = file('src/main/resources/minimumGradleVersion').text.trim()
|
||||||
|
|
|
@ -38,7 +38,6 @@ import org.gradle.api.artifacts.ModuleDependency
|
||||||
import org.gradle.api.artifacts.ModuleVersionIdentifier
|
import org.gradle.api.artifacts.ModuleVersionIdentifier
|
||||||
import org.gradle.api.artifacts.ProjectDependency
|
import org.gradle.api.artifacts.ProjectDependency
|
||||||
import org.gradle.api.artifacts.ResolvedArtifact
|
import org.gradle.api.artifacts.ResolvedArtifact
|
||||||
import org.gradle.api.artifacts.SelfResolvingDependency
|
|
||||||
import org.gradle.api.artifacts.dsl.RepositoryHandler
|
import org.gradle.api.artifacts.dsl.RepositoryHandler
|
||||||
import org.gradle.api.execution.TaskExecutionGraph
|
import org.gradle.api.execution.TaskExecutionGraph
|
||||||
import org.gradle.api.plugins.JavaPlugin
|
import org.gradle.api.plugins.JavaPlugin
|
||||||
|
@ -212,6 +211,7 @@ class BuildPlugin implements Plugin<Project> {
|
||||||
project.rootProject.ext.minimumRuntimeVersion = minimumRuntimeVersion
|
project.rootProject.ext.minimumRuntimeVersion = minimumRuntimeVersion
|
||||||
project.rootProject.ext.inFipsJvm = inFipsJvm
|
project.rootProject.ext.inFipsJvm = inFipsJvm
|
||||||
project.rootProject.ext.gradleJavaVersion = JavaVersion.toVersion(gradleJavaVersion)
|
project.rootProject.ext.gradleJavaVersion = JavaVersion.toVersion(gradleJavaVersion)
|
||||||
|
project.rootProject.ext.java9Home = "${-> findJavaHome("9")}"
|
||||||
}
|
}
|
||||||
|
|
||||||
project.targetCompatibility = project.rootProject.ext.minimumRuntimeVersion
|
project.targetCompatibility = project.rootProject.ext.minimumRuntimeVersion
|
||||||
|
@ -225,6 +225,7 @@ class BuildPlugin implements Plugin<Project> {
|
||||||
project.ext.javaVersions = project.rootProject.ext.javaVersions
|
project.ext.javaVersions = project.rootProject.ext.javaVersions
|
||||||
project.ext.inFipsJvm = project.rootProject.ext.inFipsJvm
|
project.ext.inFipsJvm = project.rootProject.ext.inFipsJvm
|
||||||
project.ext.gradleJavaVersion = project.rootProject.ext.gradleJavaVersion
|
project.ext.gradleJavaVersion = project.rootProject.ext.gradleJavaVersion
|
||||||
|
project.ext.java9Home = project.rootProject.ext.java9Home
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String getPaddedMajorVersion(JavaVersion compilerJavaVersionEnum) {
|
private static String getPaddedMajorVersion(JavaVersion compilerJavaVersionEnum) {
|
||||||
|
|
|
@ -100,7 +100,7 @@ class PrecommitTasks {
|
||||||
|
|
||||||
private static Task configureForbiddenApisCli(Project project) {
|
private static Task configureForbiddenApisCli(Project project) {
|
||||||
Task forbiddenApisCli = project.tasks.create('forbiddenApis')
|
Task forbiddenApisCli = project.tasks.create('forbiddenApis')
|
||||||
project.sourceSets.forEach { sourceSet ->
|
project.sourceSets.all { sourceSet ->
|
||||||
forbiddenApisCli.dependsOn(
|
forbiddenApisCli.dependsOn(
|
||||||
project.tasks.create(sourceSet.getTaskName('forbiddenApis', null), ForbiddenApisCliTask) {
|
project.tasks.create(sourceSet.getTaskName('forbiddenApis', null), ForbiddenApisCliTask) {
|
||||||
ExportElasticsearchBuildResourcesTask buildResources = project.tasks.getByName('buildResources')
|
ExportElasticsearchBuildResourcesTask buildResources = project.tasks.getByName('buildResources')
|
||||||
|
|
|
@ -51,7 +51,8 @@ public class ForbiddenApisCliTask extends DefaultTask {
|
||||||
private JavaVersion targetCompatibility;
|
private JavaVersion targetCompatibility;
|
||||||
private FileCollection classesDirs;
|
private FileCollection classesDirs;
|
||||||
private SourceSet sourceSet;
|
private SourceSet sourceSet;
|
||||||
private String javaHome;
|
// This needs to be an object so it can hold Groovy GStrings
|
||||||
|
private Object javaHome;
|
||||||
|
|
||||||
@Input
|
@Input
|
||||||
public JavaVersion getTargetCompatibility() {
|
public JavaVersion getTargetCompatibility() {
|
||||||
|
@ -142,11 +143,11 @@ public class ForbiddenApisCliTask extends DefaultTask {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Input
|
@Input
|
||||||
public String getJavaHome() {
|
public Object getJavaHome() {
|
||||||
return javaHome;
|
return javaHome;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setJavaHome(String javaHome) {
|
public void setJavaHome(Object javaHome) {
|
||||||
this.javaHome = javaHome;
|
this.javaHome = javaHome;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
implementation-class=org.elasticsearch.gradle.clusterformation.ClusterformationPlugin
|
|
@ -19,6 +19,12 @@
|
||||||
|
|
||||||
apply plugin: 'elasticsearch.docs-test'
|
apply plugin: 'elasticsearch.docs-test'
|
||||||
|
|
||||||
|
/* List of files that have snippets that require a gold or platinum licence
|
||||||
|
and therefore cannot be tested yet... */
|
||||||
|
buildRestTests.expectedUnconvertedCandidates = [
|
||||||
|
'reference/ml/transforms.asciidoc',
|
||||||
|
]
|
||||||
|
|
||||||
integTestCluster {
|
integTestCluster {
|
||||||
/* Enable regexes in painless so our tests don't complain about example
|
/* Enable regexes in painless so our tests don't complain about example
|
||||||
* snippets that use them. */
|
* snippets that use them. */
|
||||||
|
@ -74,6 +80,17 @@ buildRestTests.docs = fileTree(projectDir) {
|
||||||
exclude 'build'
|
exclude 'build'
|
||||||
// Just syntax examples
|
// Just syntax examples
|
||||||
exclude 'README.asciidoc'
|
exclude 'README.asciidoc'
|
||||||
|
// Broken code snippet tests
|
||||||
|
exclude 'reference/rollup/rollup-getting-started.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/rollup-job-config.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/rollup-index-caps.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/put-job.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/stop-job.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/start-job.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/rollup-search.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/delete-job.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/get-job.asciidoc'
|
||||||
|
exclude 'reference/rollup/apis/rollup-caps.asciidoc'
|
||||||
}
|
}
|
||||||
|
|
||||||
listSnippets.docs = buildRestTests.docs
|
listSnippets.docs = buildRestTests.docs
|
||||||
|
@ -594,3 +611,259 @@ buildRestTests.setups['library'] = '''
|
||||||
{"name": "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288}
|
{"name": "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288}
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
buildRestTests.setups['sensor_rollup_job'] = '''
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: sensor-1
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas: 0
|
||||||
|
mappings:
|
||||||
|
_doc:
|
||||||
|
properties:
|
||||||
|
timestamp:
|
||||||
|
type: date
|
||||||
|
temperature:
|
||||||
|
type: long
|
||||||
|
voltage:
|
||||||
|
type: float
|
||||||
|
node:
|
||||||
|
type: keyword
|
||||||
|
- do:
|
||||||
|
xpack.rollup.put_job:
|
||||||
|
id: "sensor"
|
||||||
|
body: >
|
||||||
|
{
|
||||||
|
"index_pattern": "sensor-*",
|
||||||
|
"rollup_index": "sensor_rollup",
|
||||||
|
"cron": "*/30 * * * * ?",
|
||||||
|
"page_size" :1000,
|
||||||
|
"groups" : {
|
||||||
|
"date_histogram": {
|
||||||
|
"field": "timestamp",
|
||||||
|
"interval": "1h",
|
||||||
|
"delay": "7d"
|
||||||
|
},
|
||||||
|
"terms": {
|
||||||
|
"fields": ["node"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metrics": [
|
||||||
|
{
|
||||||
|
"field": "temperature",
|
||||||
|
"metrics": ["min", "max", "sum"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "voltage",
|
||||||
|
"metrics": ["avg"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
buildRestTests.setups['sensor_started_rollup_job'] = '''
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: sensor-1
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas: 0
|
||||||
|
mappings:
|
||||||
|
_doc:
|
||||||
|
properties:
|
||||||
|
timestamp:
|
||||||
|
type: date
|
||||||
|
temperature:
|
||||||
|
type: long
|
||||||
|
voltage:
|
||||||
|
type: float
|
||||||
|
node:
|
||||||
|
type: keyword
|
||||||
|
|
||||||
|
- do:
|
||||||
|
bulk:
|
||||||
|
index: sensor-1
|
||||||
|
type: _doc
|
||||||
|
refresh: true
|
||||||
|
body: |
|
||||||
|
{"index":{}}
|
||||||
|
{"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"}
|
||||||
|
{"index":{}}
|
||||||
|
{"timestamp": 1516642894000, "temperature": 201, "voltage": 5.8, "node": "b"}
|
||||||
|
{"index":{}}
|
||||||
|
{"timestamp": 1516556494000, "temperature": 202, "voltage": 5.1, "node": "a"}
|
||||||
|
{"index":{}}
|
||||||
|
{"timestamp": 1516470094000, "temperature": 198, "voltage": 5.6, "node": "b"}
|
||||||
|
{"index":{}}
|
||||||
|
{"timestamp": 1516383694000, "temperature": 200, "voltage": 4.2, "node": "c"}
|
||||||
|
{"index":{}}
|
||||||
|
{"timestamp": 1516297294000, "temperature": 202, "voltage": 4.0, "node": "c"}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
xpack.rollup.put_job:
|
||||||
|
id: "sensor"
|
||||||
|
body: >
|
||||||
|
{
|
||||||
|
"index_pattern": "sensor-*",
|
||||||
|
"rollup_index": "sensor_rollup",
|
||||||
|
"cron": "* * * * * ?",
|
||||||
|
"page_size" :1000,
|
||||||
|
"groups" : {
|
||||||
|
"date_histogram": {
|
||||||
|
"field": "timestamp",
|
||||||
|
"interval": "1h",
|
||||||
|
"delay": "7d"
|
||||||
|
},
|
||||||
|
"terms": {
|
||||||
|
"fields": ["node"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metrics": [
|
||||||
|
{
|
||||||
|
"field": "temperature",
|
||||||
|
"metrics": ["min", "max", "sum"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "voltage",
|
||||||
|
"metrics": ["avg"]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
- do:
|
||||||
|
xpack.rollup.start_job:
|
||||||
|
id: "sensor"
|
||||||
|
'''
|
||||||
|
|
||||||
|
buildRestTests.setups['sensor_index'] = '''
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: sensor-1
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas: 0
|
||||||
|
mappings:
|
||||||
|
_doc:
|
||||||
|
properties:
|
||||||
|
timestamp:
|
||||||
|
type: date
|
||||||
|
temperature:
|
||||||
|
type: long
|
||||||
|
voltage:
|
||||||
|
type: float
|
||||||
|
node:
|
||||||
|
type: keyword
|
||||||
|
load:
|
||||||
|
type: double
|
||||||
|
net_in:
|
||||||
|
type: long
|
||||||
|
net_out:
|
||||||
|
type: long
|
||||||
|
hostname:
|
||||||
|
type: keyword
|
||||||
|
datacenter:
|
||||||
|
type: keyword
|
||||||
|
'''
|
||||||
|
|
||||||
|
buildRestTests.setups['sensor_prefab_data'] = '''
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: sensor-1
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas: 0
|
||||||
|
mappings:
|
||||||
|
_doc:
|
||||||
|
properties:
|
||||||
|
timestamp:
|
||||||
|
type: date
|
||||||
|
temperature:
|
||||||
|
type: long
|
||||||
|
voltage:
|
||||||
|
type: float
|
||||||
|
node:
|
||||||
|
type: keyword
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: sensor_rollup
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas: 0
|
||||||
|
mappings:
|
||||||
|
_doc:
|
||||||
|
properties:
|
||||||
|
node.terms.value:
|
||||||
|
type: keyword
|
||||||
|
temperature.sum.value:
|
||||||
|
type: double
|
||||||
|
temperature.max.value:
|
||||||
|
type: double
|
||||||
|
temperature.min.value:
|
||||||
|
type: double
|
||||||
|
timestamp.date_histogram.time_zone:
|
||||||
|
type: keyword
|
||||||
|
timestamp.date_histogram.interval:
|
||||||
|
type: keyword
|
||||||
|
timestamp.date_histogram.timestamp:
|
||||||
|
type: date
|
||||||
|
timestamp.date_histogram._count:
|
||||||
|
type: long
|
||||||
|
voltage.avg.value:
|
||||||
|
type: double
|
||||||
|
voltage.avg._count:
|
||||||
|
type: long
|
||||||
|
_rollup.id:
|
||||||
|
type: keyword
|
||||||
|
_rollup.version:
|
||||||
|
type: long
|
||||||
|
_meta:
|
||||||
|
_rollup:
|
||||||
|
sensor:
|
||||||
|
cron: "* * * * * ?"
|
||||||
|
rollup_index: "sensor_rollup"
|
||||||
|
index_pattern: "sensor-*"
|
||||||
|
timeout: "20s"
|
||||||
|
page_size: 1000
|
||||||
|
groups:
|
||||||
|
date_histogram:
|
||||||
|
delay: "7d"
|
||||||
|
field: "timestamp"
|
||||||
|
interval: "1h"
|
||||||
|
time_zone: "UTC"
|
||||||
|
terms:
|
||||||
|
fields:
|
||||||
|
- "node"
|
||||||
|
id: sensor
|
||||||
|
metrics:
|
||||||
|
- field: "temperature"
|
||||||
|
metrics:
|
||||||
|
- min
|
||||||
|
- max
|
||||||
|
- sum
|
||||||
|
- field: "voltage"
|
||||||
|
metrics:
|
||||||
|
- avg
|
||||||
|
|
||||||
|
- do:
|
||||||
|
bulk:
|
||||||
|
index: sensor_rollup
|
||||||
|
type: _doc
|
||||||
|
refresh: true
|
||||||
|
body: |
|
||||||
|
{"index":{}}
|
||||||
|
{"node.terms.value":"b","temperature.sum.value":201.0,"temperature.max.value":201.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":201.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.800000190734863,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516640400000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||||
|
{"index":{}}
|
||||||
|
{"node.terms.value":"c","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516381200000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||||
|
{"index":{}}
|
||||||
|
{"node.terms.value":"a","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.099999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516554000000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||||
|
{"index":{}}
|
||||||
|
{"node.terms.value":"a","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516726800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||||
|
{"index":{}}
|
||||||
|
{"node.terms.value":"b","temperature.sum.value":198.0,"temperature.max.value":198.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":198.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.599999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516467600000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||||
|
{"index":{}}
|
||||||
|
{"node.terms.value":"c","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.0,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516294800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||||
|
|
||||||
|
'''
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
[role="xpack"]
|
[[commands]]
|
||||||
[[xpack-commands]]
|
= Command line tools
|
||||||
= {xpack} Commands
|
|
||||||
|
|
||||||
[partintro]
|
[partintro]
|
||||||
--
|
--
|
||||||
|
|
||||||
{xpack} includes commands that help you configure security:
|
{es} provides the following tools for configuring security and performing other
|
||||||
|
tasks from the command line:
|
||||||
|
|
||||||
* <<certgen>>
|
* <<certgen>>
|
||||||
* <<certutil>>
|
* <<certutil>>
|
||||||
|
|
|
@ -63,12 +63,6 @@ corruption is detected, it will prevent the shard from being opened. Accepts:
|
||||||
Check for both physical and logical corruption. This is much more
|
Check for both physical and logical corruption. This is much more
|
||||||
expensive in terms of CPU and memory usage.
|
expensive in terms of CPU and memory usage.
|
||||||
|
|
||||||
`fix`::
|
|
||||||
|
|
||||||
Check for both physical and logical corruption. Segments that were reported
|
|
||||||
as corrupted will be automatically removed. This option *may result in data loss*.
|
|
||||||
Use with extreme caution!
|
|
||||||
|
|
||||||
WARNING: Expert only. Checking shards may take a lot of time on large indices.
|
WARNING: Expert only. Checking shards may take a lot of time on large indices.
|
||||||
--
|
--
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,7 @@ include::sql/index.asciidoc[]
|
||||||
|
|
||||||
include::monitoring/index.asciidoc[]
|
include::monitoring/index.asciidoc[]
|
||||||
|
|
||||||
include::{xes-repo-dir}/rollup/index.asciidoc[]
|
include::rollup/index.asciidoc[]
|
||||||
|
|
||||||
include::rest-api/index.asciidoc[]
|
include::rest-api/index.asciidoc[]
|
||||||
|
|
||||||
|
|
|
@ -99,7 +99,7 @@ The following parameters are accepted by `text` fields:
|
||||||
`index_phrases`::
|
`index_phrases`::
|
||||||
|
|
||||||
If enabled, two-term word combinations ('shingles') are indexed into a separate
|
If enabled, two-term word combinations ('shingles') are indexed into a separate
|
||||||
field. This allows exact phrase queries to run more efficiently, at the expense
|
field. This allows exact phrase queries (no slop) to run more efficiently, at the expense
|
||||||
of a larger index. Note that this works best when stopwords are not removed,
|
of a larger index. Note that this works best when stopwords are not removed,
|
||||||
as phrases containing stopwords will not use the subsidiary field and will fall
|
as phrases containing stopwords will not use the subsidiary field and will fall
|
||||||
back to a standard phrase query. Accepts `true` or `false` (default).
|
back to a standard phrase query. Accepts `true` or `false` (default).
|
||||||
|
@ -171,4 +171,4 @@ PUT my_index
|
||||||
--------------------------------
|
--------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
<1> `min_chars` must be greater than zero, defaults to 2
|
<1> `min_chars` must be greater than zero, defaults to 2
|
||||||
<2> `max_chars` must be greater than or equal to `min_chars` and less than 20, defaults to 5
|
<2> `max_chars` must be greater than or equal to `min_chars` and less than 20, defaults to 5
|
||||||
|
|
|
@ -78,3 +78,7 @@ The parent circuit breaker defines a new setting `indices.breaker.total.use_real
|
||||||
heap memory instead of only considering the reserved memory by child circuit breakers. When this
|
heap memory instead of only considering the reserved memory by child circuit breakers. When this
|
||||||
setting is `true`, the default parent breaker limit also changes from 70% to 95% of the JVM heap size.
|
setting is `true`, the default parent breaker limit also changes from 70% to 95% of the JVM heap size.
|
||||||
The previous behavior can be restored by setting `indices.breaker.total.use_real_memory` to `false`.
|
The previous behavior can be restored by setting `indices.breaker.total.use_real_memory` to `false`.
|
||||||
|
|
||||||
|
==== `fix` value for `index.shard.check_on_startup` is removed
|
||||||
|
|
||||||
|
Deprecated option value `fix` for setting `index.shard.check_on_startup` is not supported.
|
|
@ -41,7 +41,7 @@ PUT _xpack/ml/anomaly_detectors/farequote
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[setup:farequote_data]
|
// TEST[skip:setup:farequote_data]
|
||||||
|
|
||||||
In this example, the `airline`, `responsetime`, and `time` fields are
|
In this example, the `airline`, `responsetime`, and `time` fields are
|
||||||
aggregations.
|
aggregations.
|
||||||
|
@ -90,7 +90,7 @@ PUT _xpack/ml/datafeeds/datafeed-farequote
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[setup:farequote_job]
|
// TEST[skip:setup:farequote_job]
|
||||||
|
|
||||||
In this example, the aggregations have names that match the fields that they
|
In this example, the aggregations have names that match the fields that they
|
||||||
operate on. That is to say, the `max` aggregation is named `time` and its
|
operate on. That is to say, the `max` aggregation is named `time` and its
|
|
@ -44,6 +44,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
//CONSOLE
|
//CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
<1> The `categorization_field_name` property indicates which field will be
|
<1> The `categorization_field_name` property indicates which field will be
|
||||||
categorized.
|
categorized.
|
||||||
<2> The resulting categories are used in a detector by setting `by_field_name`,
|
<2> The resulting categories are used in a detector by setting `by_field_name`,
|
||||||
|
@ -127,6 +128,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs2
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
//CONSOLE
|
//CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
<1> The
|
<1> The
|
||||||
{ref}/analysis-pattern-replace-charfilter.html[`pattern_replace` character filter]
|
{ref}/analysis-pattern-replace-charfilter.html[`pattern_replace` character filter]
|
||||||
here achieves exactly the same as the `categorization_filters` in the first
|
here achieves exactly the same as the `categorization_filters` in the first
|
||||||
|
@ -193,6 +195,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs3
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
//CONSOLE
|
//CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
<1> Tokens basically consist of hyphens, digits, letters, underscores and dots.
|
<1> Tokens basically consist of hyphens, digits, letters, underscores and dots.
|
||||||
<2> By default, categorization ignores tokens that begin with a digit.
|
<2> By default, categorization ignores tokens that begin with a digit.
|
||||||
<3> By default, categorization also ignores tokens that are hexadecimal numbers.
|
<3> By default, categorization also ignores tokens that are hexadecimal numbers.
|
|
@ -36,20 +36,20 @@ The scenarios in this section describe some best practices for generating useful
|
||||||
* <<ml-configuring-transform>>
|
* <<ml-configuring-transform>>
|
||||||
* <<ml-configuring-detector-custom-rules>>
|
* <<ml-configuring-detector-custom-rules>>
|
||||||
|
|
||||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/customurl.asciidoc
|
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/customurl.asciidoc
|
||||||
include::customurl.asciidoc[]
|
include::customurl.asciidoc[]
|
||||||
|
|
||||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/aggregations.asciidoc
|
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/aggregations.asciidoc
|
||||||
include::aggregations.asciidoc[]
|
include::aggregations.asciidoc[]
|
||||||
|
|
||||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/categories.asciidoc
|
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/categories.asciidoc
|
||||||
include::categories.asciidoc[]
|
include::categories.asciidoc[]
|
||||||
|
|
||||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/populations.asciidoc
|
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/populations.asciidoc
|
||||||
include::populations.asciidoc[]
|
include::populations.asciidoc[]
|
||||||
|
|
||||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/transforms.asciidoc
|
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/transforms.asciidoc
|
||||||
include::transforms.asciidoc[]
|
include::transforms.asciidoc[]
|
||||||
|
|
||||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/detector-custom-rules.asciidoc
|
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/detector-custom-rules.asciidoc
|
||||||
include::detector-custom-rules.asciidoc[]
|
include::detector-custom-rules.asciidoc[]
|
|
@ -106,7 +106,7 @@ POST _xpack/ml/anomaly_detectors/sample_job/_update
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
//CONSOLE
|
//CONSOLE
|
||||||
//TEST[setup:sample_job]
|
//TEST[skip:setup:sample_job]
|
||||||
|
|
||||||
When you click this custom URL in the anomalies table in {kib}, it opens up the
|
When you click this custom URL in the anomalies table in {kib}, it opens up the
|
||||||
*Discover* page and displays source data for the period one hour before and
|
*Discover* page and displays source data for the period one hour before and
|
|
@ -39,6 +39,7 @@ PUT _xpack/ml/filters/safe_domains
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
Now, we can create our job specifying a scope that uses the `safe_domains`
|
Now, we can create our job specifying a scope that uses the `safe_domains`
|
||||||
filter for the `highest_registered_domain` field:
|
filter for the `highest_registered_domain` field:
|
||||||
|
@ -70,6 +71,7 @@ PUT _xpack/ml/anomaly_detectors/dns_exfiltration_with_rule
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
As time advances and we see more data and more results, we might encounter new
|
As time advances and we see more data and more results, we might encounter new
|
||||||
domains that we want to add in the filter. We can do that by using the
|
domains that we want to add in the filter. We can do that by using the
|
||||||
|
@ -83,7 +85,7 @@ POST _xpack/ml/filters/safe_domains/_update
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[setup:ml_filter_safe_domains]
|
// TEST[skip:setup:ml_filter_safe_domains]
|
||||||
|
|
||||||
Note that we can use any of the `partition_field_name`, `over_field_name`, or
|
Note that we can use any of the `partition_field_name`, `over_field_name`, or
|
||||||
`by_field_name` fields in the `scope`.
|
`by_field_name` fields in the `scope`.
|
||||||
|
@ -123,6 +125,7 @@ PUT _xpack/ml/anomaly_detectors/scoping_multiple_fields
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
Such a detector will skip results when the values of all 3 scoped fields
|
Such a detector will skip results when the values of all 3 scoped fields
|
||||||
are included in the referenced filters.
|
are included in the referenced filters.
|
||||||
|
@ -166,6 +169,7 @@ PUT _xpack/ml/anomaly_detectors/cpu_with_rule
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
When there are multiple conditions they are combined with a logical `and`.
|
When there are multiple conditions they are combined with a logical `and`.
|
||||||
This is useful when we want the rule to apply to a range. We simply create
|
This is useful when we want the rule to apply to a range. We simply create
|
||||||
|
@ -205,6 +209,7 @@ PUT _xpack/ml/anomaly_detectors/rule_with_range
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
==== Custom rules in the life-cycle of a job
|
==== Custom rules in the life-cycle of a job
|
||||||
|
|
|
@ -59,6 +59,7 @@ PUT _xpack/ml/anomaly_detectors/example1
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
This example is probably the simplest possible analysis. It identifies
|
This example is probably the simplest possible analysis. It identifies
|
||||||
time buckets during which the overall count of events is higher or lower than
|
time buckets during which the overall count of events is higher or lower than
|
||||||
|
@ -86,6 +87,7 @@ PUT _xpack/ml/anomaly_detectors/example2
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
If you use this `high_count` function in a detector in your job, it
|
If you use this `high_count` function in a detector in your job, it
|
||||||
models the event rate for each error code. It detects users that generate an
|
models the event rate for each error code. It detects users that generate an
|
||||||
|
@ -110,6 +112,7 @@ PUT _xpack/ml/anomaly_detectors/example3
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
In this example, the function detects when the count of events for a
|
In this example, the function detects when the count of events for a
|
||||||
status code is lower than usual.
|
status code is lower than usual.
|
||||||
|
@ -136,6 +139,7 @@ PUT _xpack/ml/anomaly_detectors/example4
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
If you are analyzing an aggregated `events_per_min` field, do not use a sum
|
If you are analyzing an aggregated `events_per_min` field, do not use a sum
|
||||||
function (for example, `sum(events_per_min)`). Instead, use the count function
|
function (for example, `sum(events_per_min)`). Instead, use the count function
|
||||||
|
@ -200,6 +204,7 @@ PUT _xpack/ml/anomaly_detectors/example5
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
If you use this `high_non_zero_count` function in a detector in your job, it
|
If you use this `high_non_zero_count` function in a detector in your job, it
|
||||||
models the count of events for the `signaturename` field. It ignores any buckets
|
models the count of events for the `signaturename` field. It ignores any buckets
|
||||||
|
@ -253,6 +258,7 @@ PUT _xpack/ml/anomaly_detectors/example6
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
This `distinct_count` function detects when a system has an unusual number
|
This `distinct_count` function detects when a system has an unusual number
|
||||||
of logged in users. When you use this function in a detector in your job, it
|
of logged in users. When you use this function in a detector in your job, it
|
||||||
|
@ -278,6 +284,7 @@ PUT _xpack/ml/anomaly_detectors/example7
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
This example detects instances of port scanning. When you use this function in a
|
This example detects instances of port scanning. When you use this function in a
|
||||||
detector in your job, it models the distinct count of ports. It also detects the
|
detector in your job, it models the distinct count of ports. It also detects the
|
|
@ -47,6 +47,7 @@ PUT _xpack/ml/anomaly_detectors/example1
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
If you use this `lat_long` function in a detector in your job, it
|
If you use this `lat_long` function in a detector in your job, it
|
||||||
detects anomalies where the geographic location of a credit card transaction is
|
detects anomalies where the geographic location of a credit card transaction is
|
||||||
|
@ -98,6 +99,6 @@ PUT _xpack/ml/datafeeds/datafeed-test2
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[setup:farequote_job]
|
// TEST[skip:setup:farequote_job]
|
||||||
|
|
||||||
For more information, see <<ml-configuring-transform>>.
|
For more information, see <<ml-configuring-transform>>.
|
Before Width: | Height: | Size: 118 KiB After Width: | Height: | Size: 118 KiB |
Before Width: | Height: | Size: 347 KiB After Width: | Height: | Size: 347 KiB |
Before Width: | Height: | Size: 70 KiB After Width: | Height: | Size: 70 KiB |
Before Width: | Height: | Size: 187 KiB After Width: | Height: | Size: 187 KiB |
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 130 KiB After Width: | Height: | Size: 130 KiB |
Before Width: | Height: | Size: 384 KiB After Width: | Height: | Size: 384 KiB |
Before Width: | Height: | Size: 120 KiB After Width: | Height: | Size: 120 KiB |
Before Width: | Height: | Size: 163 KiB After Width: | Height: | Size: 163 KiB |
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 350 KiB After Width: | Height: | Size: 350 KiB |
Before Width: | Height: | Size: 99 KiB After Width: | Height: | Size: 99 KiB |
Before Width: | Height: | Size: 75 KiB After Width: | Height: | Size: 75 KiB |
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
Before Width: | Height: | Size: 176 KiB After Width: | Height: | Size: 176 KiB |
Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB |
Before Width: | Height: | Size: 205 KiB After Width: | Height: | Size: 205 KiB |
Before Width: | Height: | Size: 100 KiB After Width: | Height: | Size: 100 KiB |
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 4.5 KiB After Width: | Height: | Size: 4.5 KiB |
Before Width: | Height: | Size: 90 KiB After Width: | Height: | Size: 90 KiB |
|
@ -51,14 +51,11 @@ PUT _xpack/ml/anomaly_detectors/population
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
//CONSOLE
|
//CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
<1> This `over_field_name` property indicates that the metrics for each user (
|
<1> This `over_field_name` property indicates that the metrics for each user (
|
||||||
as identified by their `username` value) are analyzed relative to other users
|
as identified by their `username` value) are analyzed relative to other users
|
||||||
in each bucket.
|
in each bucket.
|
||||||
|
|
||||||
//TO-DO: Per sophiec20 "Perhaps add the datafeed config and add a query filter to
|
|
||||||
//include only workstations as servers and printers would behave differently
|
|
||||||
//from the population
|
|
||||||
|
|
||||||
If your data is stored in {es}, you can use the population job wizard in {kib}
|
If your data is stored in {es}, you can use the population job wizard in {kib}
|
||||||
to create a job with these same properties. For example, the population job
|
to create a job with these same properties. For example, the population job
|
||||||
wizard provides the following job settings:
|
wizard provides the following job settings:
|
|
@ -28,7 +28,7 @@ request stops the `feed1` {dfeed}:
|
||||||
POST _xpack/ml/datafeeds/datafeed-total-requests/_stop
|
POST _xpack/ml/datafeeds/datafeed-total-requests/_stop
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[setup:server_metrics_startdf]
|
// TEST[skip:setup:server_metrics_startdf]
|
||||||
|
|
||||||
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
|
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
|
||||||
For more information, see <<security-privileges>>.
|
For more information, see <<security-privileges>>.
|
||||||
|
@ -49,6 +49,7 @@ If you are upgrading your cluster, you can use the following request to stop all
|
||||||
POST _xpack/ml/datafeeds/_all/_stop
|
POST _xpack/ml/datafeeds/_all/_stop
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[closing-ml-jobs]]
|
[[closing-ml-jobs]]
|
||||||
|
@ -67,7 +68,7 @@ example, the following request closes the `job1` job:
|
||||||
POST _xpack/ml/anomaly_detectors/total-requests/_close
|
POST _xpack/ml/anomaly_detectors/total-requests/_close
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[setup:server_metrics_openjob]
|
// TEST[skip:setup:server_metrics_openjob]
|
||||||
|
|
||||||
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
|
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
|
||||||
For more information, see <<security-privileges>>.
|
For more information, see <<security-privileges>>.
|
||||||
|
@ -86,3 +87,4 @@ all open jobs on the cluster:
|
||||||
POST _xpack/ml/anomaly_detectors/_all/_close
|
POST _xpack/ml/anomaly_detectors/_all/_close
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
// TEST[skip:needs-licence]
|
|
@ -95,7 +95,7 @@ PUT /my_index/my_type/1
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TESTSETUP
|
// TEST[skip:SETUP]
|
||||||
<1> In this example, string fields are mapped as `keyword` fields to support
|
<1> In this example, string fields are mapped as `keyword` fields to support
|
||||||
aggregation. If you want both a full text (`text`) and a keyword (`keyword`)
|
aggregation. If you want both a full text (`text`) and a keyword (`keyword`)
|
||||||
version of the same field, use multi-fields. For more information, see
|
version of the same field, use multi-fields. For more information, see
|
||||||
|
@ -144,7 +144,7 @@ PUT _xpack/ml/datafeeds/datafeed-test1
|
||||||
}
|
}
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[skip:broken]
|
// TEST[skip:needs-licence]
|
||||||
<1> A script field named `total_error_count` is referenced in the detector
|
<1> A script field named `total_error_count` is referenced in the detector
|
||||||
within the job.
|
within the job.
|
||||||
<2> The script field is defined in the {dfeed}.
|
<2> The script field is defined in the {dfeed}.
|
||||||
|
@ -163,7 +163,7 @@ You can preview the contents of the {dfeed} by using the following API:
|
||||||
GET _xpack/ml/datafeeds/datafeed-test1/_preview
|
GET _xpack/ml/datafeeds/datafeed-test1/_preview
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[continued]
|
// TEST[skip:continued]
|
||||||
|
|
||||||
In this example, the API returns the following results, which contain a sum of
|
In this example, the API returns the following results, which contain a sum of
|
||||||
the `error_count` and `aborted_count` values:
|
the `error_count` and `aborted_count` values:
|
||||||
|
@ -177,8 +177,6 @@ the `error_count` and `aborted_count` values:
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
||||||
|
|
||||||
|
|
||||||
NOTE: This example demonstrates how to use script fields, but it contains
|
NOTE: This example demonstrates how to use script fields, but it contains
|
||||||
insufficient data to generate meaningful results. For a full demonstration of
|
insufficient data to generate meaningful results. For a full demonstration of
|
||||||
|
@ -254,7 +252,7 @@ PUT _xpack/ml/datafeeds/datafeed-test2
|
||||||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[skip:broken]
|
// TEST[skip:needs-licence]
|
||||||
<1> The script field has a rather generic name in this case, since it will
|
<1> The script field has a rather generic name in this case, since it will
|
||||||
be used for various tests in the subsequent examples.
|
be used for various tests in the subsequent examples.
|
||||||
<2> The script field uses the plus (+) operator to concatenate strings.
|
<2> The script field uses the plus (+) operator to concatenate strings.
|
||||||
|
@ -271,7 +269,6 @@ and "SMITH " have been concatenated and an underscore was added:
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
||||||
|
|
||||||
[[ml-configuring-transform3]]
|
[[ml-configuring-transform3]]
|
||||||
.Example 3: Trimming strings
|
.Example 3: Trimming strings
|
||||||
|
@ -292,7 +289,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
||||||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[continued]
|
// TEST[skip:continued]
|
||||||
<1> This script field uses the `trim()` function to trim extra white space from a
|
<1> This script field uses the `trim()` function to trim extra white space from a
|
||||||
string.
|
string.
|
||||||
|
|
||||||
|
@ -308,7 +305,6 @@ has been trimmed to "SMITH":
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
||||||
|
|
||||||
[[ml-configuring-transform4]]
|
[[ml-configuring-transform4]]
|
||||||
.Example 4: Converting strings to lowercase
|
.Example 4: Converting strings to lowercase
|
||||||
|
@ -329,7 +325,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
||||||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[continued]
|
// TEST[skip:continued]
|
||||||
<1> This script field uses the `toLowerCase` function to convert a string to all
|
<1> This script field uses the `toLowerCase` function to convert a string to all
|
||||||
lowercase letters. Likewise, you can use the `toUpperCase{}` function to convert
|
lowercase letters. Likewise, you can use the `toUpperCase{}` function to convert
|
||||||
a string to uppercase letters.
|
a string to uppercase letters.
|
||||||
|
@ -346,7 +342,6 @@ has been converted to "joe":
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
||||||
|
|
||||||
[[ml-configuring-transform5]]
|
[[ml-configuring-transform5]]
|
||||||
.Example 5: Converting strings to mixed case formats
|
.Example 5: Converting strings to mixed case formats
|
||||||
|
@ -367,7 +362,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
||||||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[continued]
|
// TEST[skip:continued]
|
||||||
<1> This script field is a more complicated example of case manipulation. It uses
|
<1> This script field is a more complicated example of case manipulation. It uses
|
||||||
the `subString()` function to capitalize the first letter of a string and
|
the `subString()` function to capitalize the first letter of a string and
|
||||||
converts the remaining characters to lowercase.
|
converts the remaining characters to lowercase.
|
||||||
|
@ -384,7 +379,6 @@ has been converted to "Joe":
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
||||||
|
|
||||||
[[ml-configuring-transform6]]
|
[[ml-configuring-transform6]]
|
||||||
.Example 6: Replacing tokens
|
.Example 6: Replacing tokens
|
||||||
|
@ -405,7 +399,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
||||||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[continued]
|
// TEST[skip:continued]
|
||||||
<1> This script field uses regular expressions to replace white
|
<1> This script field uses regular expressions to replace white
|
||||||
space with underscores.
|
space with underscores.
|
||||||
|
|
||||||
|
@ -421,7 +415,6 @@ The preview {dfeed} API returns the following results, which show that
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
||||||
|
|
||||||
[[ml-configuring-transform7]]
|
[[ml-configuring-transform7]]
|
||||||
.Example 7: Regular expression matching and concatenation
|
.Example 7: Regular expression matching and concatenation
|
||||||
|
@ -442,7 +435,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
||||||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[continued]
|
// TEST[skip:continued]
|
||||||
<1> This script field looks for a specific regular expression pattern and emits the
|
<1> This script field looks for a specific regular expression pattern and emits the
|
||||||
matched groups as a concatenated string. If no match is found, it emits an empty
|
matched groups as a concatenated string. If no match is found, it emits an empty
|
||||||
string.
|
string.
|
||||||
|
@ -459,7 +452,6 @@ The preview {dfeed} API returns the following results, which show that
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
||||||
|
|
||||||
[[ml-configuring-transform8]]
|
[[ml-configuring-transform8]]
|
||||||
.Example 8: Splitting strings by domain name
|
.Example 8: Splitting strings by domain name
|
||||||
|
@ -509,7 +501,7 @@ PUT _xpack/ml/datafeeds/datafeed-test3
|
||||||
GET _xpack/ml/datafeeds/datafeed-test3/_preview
|
GET _xpack/ml/datafeeds/datafeed-test3/_preview
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[skip:broken]
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
If you have a single field that contains a well-formed DNS domain name, you can
|
If you have a single field that contains a well-formed DNS domain name, you can
|
||||||
use the `domainSplit()` function to split the string into its highest registered
|
use the `domainSplit()` function to split the string into its highest registered
|
||||||
|
@ -537,7 +529,6 @@ The preview {dfeed} API returns the following results, which show that
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
||||||
|
|
||||||
[[ml-configuring-transform9]]
|
[[ml-configuring-transform9]]
|
||||||
.Example 9: Transforming geo_point data
|
.Example 9: Transforming geo_point data
|
||||||
|
@ -583,7 +574,7 @@ PUT _xpack/ml/datafeeds/datafeed-test4
|
||||||
GET _xpack/ml/datafeeds/datafeed-test4/_preview
|
GET _xpack/ml/datafeeds/datafeed-test4/_preview
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
// TEST[skip:broken]
|
// TEST[skip:needs-licence]
|
||||||
|
|
||||||
In {es}, location data can be stored in `geo_point` fields but this data type is
|
In {es}, location data can be stored in `geo_point` fields but this data type is
|
||||||
not supported natively in {xpackml} analytics. This example of a script field
|
not supported natively in {xpackml} analytics. This example of a script field
|
||||||
|
@ -602,4 +593,4 @@ The preview {dfeed} API returns the following results, which show that
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
----------------------------------
|
----------------------------------
|
||||||
// TESTRESPONSE
|
|
|
@ -544,3 +544,8 @@ You can use the following APIs to add, remove, and retrieve role mappings:
|
||||||
=== Privilege APIs
|
=== Privilege APIs
|
||||||
|
|
||||||
See <<security-api-has-privileges>>.
|
See <<security-api-has-privileges>>.
|
||||||
|
|
||||||
|
[role="exclude",id="xpack-commands"]
|
||||||
|
=== X-Pack commands
|
||||||
|
|
||||||
|
See <<commands>>.
|
||||||
|
|
|
@ -23,7 +23,7 @@ include::{xes-repo-dir}/rest-api/graph/explore.asciidoc[]
|
||||||
include::{es-repo-dir}/licensing/index.asciidoc[]
|
include::{es-repo-dir}/licensing/index.asciidoc[]
|
||||||
include::{es-repo-dir}/migration/migration.asciidoc[]
|
include::{es-repo-dir}/migration/migration.asciidoc[]
|
||||||
include::{xes-repo-dir}/rest-api/ml-api.asciidoc[]
|
include::{xes-repo-dir}/rest-api/ml-api.asciidoc[]
|
||||||
include::{xes-repo-dir}/rest-api/rollup-api.asciidoc[]
|
include::{es-repo-dir}/rollup/rollup-api.asciidoc[]
|
||||||
include::{xes-repo-dir}/rest-api/security.asciidoc[]
|
include::{xes-repo-dir}/rest-api/security.asciidoc[]
|
||||||
include::{xes-repo-dir}/rest-api/watcher.asciidoc[]
|
include::{xes-repo-dir}/rest-api/watcher.asciidoc[]
|
||||||
include::{xes-repo-dir}/rest-api/defs.asciidoc[]
|
include::{xes-repo-dir}/rest-api/defs.asciidoc[]
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-api-quickref]]
|
[[rollup-api-quickref]]
|
||||||
== API Quick Reference
|
== API Quick Reference
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-delete-job]]
|
[[rollup-delete-job]]
|
||||||
=== Delete Job API
|
=== Delete Job API
|
||||||
++++
|
++++
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-get-job]]
|
[[rollup-get-job]]
|
||||||
=== Get Rollup Jobs API
|
=== Get Rollup Jobs API
|
||||||
++++
|
++++
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-put-job]]
|
[[rollup-put-job]]
|
||||||
=== Create Job API
|
=== Create Job API
|
||||||
++++
|
++++
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-get-rollup-caps]]
|
[[rollup-get-rollup-caps]]
|
||||||
=== Get Rollup Job Capabilities
|
=== Get Rollup Job Capabilities
|
||||||
++++
|
++++
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-job-config]]
|
[[rollup-job-config]]
|
||||||
=== Rollup Job Configuration
|
=== Rollup Job Configuration
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-search]]
|
[[rollup-search]]
|
||||||
=== Rollup Search
|
=== Rollup Search
|
||||||
++++
|
++++
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-start-job]]
|
[[rollup-start-job]]
|
||||||
=== Start Job API
|
=== Start Job API
|
||||||
++++
|
++++
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-stop-job]]
|
[[rollup-stop-job]]
|
||||||
=== Stop Job API
|
=== Stop Job API
|
||||||
++++
|
++++
|
|
@ -1,3 +1,5 @@
|
||||||
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[xpack-rollup]]
|
[[xpack-rollup]]
|
||||||
= Rolling up historical data
|
= Rolling up historical data
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-overview]]
|
[[rollup-overview]]
|
||||||
== Overview
|
== Overview
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-agg-limitations]]
|
[[rollup-agg-limitations]]
|
||||||
== Rollup Aggregation Limitations
|
== Rollup Aggregation Limitations
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
[role="xpack"]
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-apis]]
|
[[rollup-apis]]
|
||||||
== Rollup APIs
|
== Rollup APIs
|
||||||
|
|
||||||
|
@ -26,12 +27,12 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
include::rollup/delete-job.asciidoc[]
|
include::apis/delete-job.asciidoc[]
|
||||||
include::rollup/get-job.asciidoc[]
|
include::apis/get-job.asciidoc[]
|
||||||
include::rollup/put-job.asciidoc[]
|
include::apis/put-job.asciidoc[]
|
||||||
include::rollup/start-job.asciidoc[]
|
include::apis/start-job.asciidoc[]
|
||||||
include::rollup/stop-job.asciidoc[]
|
include::apis/stop-job.asciidoc[]
|
||||||
include::rollup/rollup-caps.asciidoc[]
|
include::apis/rollup-caps.asciidoc[]
|
||||||
include::rollup/rollup-index-caps.asciidoc[]
|
include::apis/rollup-index-caps.asciidoc[]
|
||||||
include::rollup/rollup-search.asciidoc[]
|
include::apis/rollup-search.asciidoc[]
|
||||||
include::rollup/rollup-job-config.asciidoc[]
|
include::apis/rollup-job-config.asciidoc[]
|
|
@ -1,3 +1,5 @@
|
||||||
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-getting-started]]
|
[[rollup-getting-started]]
|
||||||
== Getting Started
|
== Getting Started
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-search-limitations]]
|
[[rollup-search-limitations]]
|
||||||
== Rollup Search Limitations
|
== Rollup Search Limitations
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
[role="xpack"]
|
||||||
|
[testenv="basic"]
|
||||||
[[rollup-understanding-groups]]
|
[[rollup-understanding-groups]]
|
||||||
== Understanding Groups
|
== Understanding Groups
|
||||||
|
|
|
@ -22,6 +22,15 @@ the first parameter:
|
||||||
$ ./bin/elasticsearch-sql-cli https://some.server:9200
|
$ ./bin/elasticsearch-sql-cli https://some.server:9200
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
If security is enabled on your cluster, you can pass the username
|
||||||
|
and password in the form `username:password@host_name:port`
|
||||||
|
to the SQL CLI:
|
||||||
|
|
||||||
|
[source,bash]
|
||||||
|
--------------------------------------------------
|
||||||
|
$ ./bin/elasticsearch-sql-cli https://sql_user:strongpassword@some.server:9200
|
||||||
|
--------------------------------------------------
|
||||||
|
|
||||||
Once the CLI is running you can use any <<sql-spec,query>> that
|
Once the CLI is running you can use any <<sql-spec,query>> that
|
||||||
Elasticsearch supports:
|
Elasticsearch supports:
|
||||||
|
|
||||||
|
|
|
@ -46,12 +46,13 @@ if (!isEclipse && !isIdea) {
|
||||||
targetCompatibility = 9
|
targetCompatibility = 9
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Enable this when forbiddenapis was updated to 2.6.
|
|
||||||
* See: https://github.com/elastic/elasticsearch/issues/29292
|
|
||||||
forbiddenApisJava9 {
|
forbiddenApisJava9 {
|
||||||
targetCompatibility = 9
|
if (project.runtimeJavaVersion < JavaVersion.VERSION_1_9) {
|
||||||
|
targetCompatibility = JavaVersion.VERSION_1_9
|
||||||
|
javaHome = project.java9Home
|
||||||
|
}
|
||||||
|
replaceSignatureFiles 'jdk-signatures'
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
jar {
|
jar {
|
||||||
metaInf {
|
metaInf {
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||||
import org.elasticsearch.index.IndexModule;
|
import org.elasticsearch.index.IndexModule;
|
||||||
|
import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.join.ParentJoinPlugin;
|
import org.elasticsearch.join.ParentJoinPlugin;
|
||||||
import org.elasticsearch.plugins.Plugin;
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
|
@ -58,6 +59,8 @@ public abstract class ParentChildTestCase extends ESIntegTestCase {
|
||||||
@Override
|
@Override
|
||||||
public Settings indexSettings() {
|
public Settings indexSettings() {
|
||||||
Settings.Builder builder = Settings.builder().put(super.indexSettings())
|
Settings.Builder builder = Settings.builder().put(super.indexSettings())
|
||||||
|
// AwaitsFix: https://github.com/elastic/elasticsearch/issues/33318
|
||||||
|
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), false)
|
||||||
// aggressive filter caching so that we can assert on the filter cache size
|
// aggressive filter caching so that we can assert on the filter cache size
|
||||||
.put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), true)
|
.put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), true)
|
||||||
.put(IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING.getKey(), true);
|
.put(IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING.getKey(), true);
|
||||||
|
|
|
@ -58,13 +58,13 @@ if (!isEclipse && !isIdea) {
|
||||||
sourceCompatibility = 9
|
sourceCompatibility = 9
|
||||||
targetCompatibility = 9
|
targetCompatibility = 9
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Enable this when forbiddenapis was updated to 2.6.
|
|
||||||
* See: https://github.com/elastic/elasticsearch/issues/29292
|
|
||||||
forbiddenApisJava9 {
|
forbiddenApisJava9 {
|
||||||
targetCompatibility = 9
|
if (project.runtimeJavaVersion < JavaVersion.VERSION_1_9) {
|
||||||
|
targetCompatibility = JavaVersion.VERSION_1_9
|
||||||
|
javaHome = project.java9Home
|
||||||
|
}
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
jar {
|
jar {
|
||||||
metaInf {
|
metaInf {
|
||||||
|
|
|
@ -104,24 +104,12 @@ public class PathTrie<T> {
|
||||||
namedWildcard = key.substring(key.indexOf('{') + 1, key.indexOf('}'));
|
namedWildcard = key.substring(key.indexOf('{') + 1, key.indexOf('}'));
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isWildcard() {
|
|
||||||
return isWildcard;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void addChild(TrieNode child) {
|
|
||||||
addInnerChild(child.key, child);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addInnerChild(String key, TrieNode child) {
|
private void addInnerChild(String key, TrieNode child) {
|
||||||
Map<String, TrieNode> newChildren = new HashMap<>(children);
|
Map<String, TrieNode> newChildren = new HashMap<>(children);
|
||||||
newChildren.put(key, child);
|
newChildren.put(key, child);
|
||||||
children = unmodifiableMap(newChildren);
|
children = unmodifiableMap(newChildren);
|
||||||
}
|
}
|
||||||
|
|
||||||
public TrieNode getChild(String key) {
|
|
||||||
return children.get(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized void insert(String[] path, int index, T value) {
|
public synchronized void insert(String[] path, int index, T value) {
|
||||||
if (index >= path.length)
|
if (index >= path.length)
|
||||||
return;
|
return;
|
||||||
|
@ -302,7 +290,7 @@ public class PathTrie<T> {
|
||||||
}
|
}
|
||||||
int index = 0;
|
int index = 0;
|
||||||
// Supports initial delimiter.
|
// Supports initial delimiter.
|
||||||
if (strings.length > 0 && strings[0].isEmpty()) {
|
if (strings[0].isEmpty()) {
|
||||||
index = 1;
|
index = 1;
|
||||||
}
|
}
|
||||||
root.insert(strings, index, value);
|
root.insert(strings, index, value);
|
||||||
|
@ -327,7 +315,7 @@ public class PathTrie<T> {
|
||||||
}
|
}
|
||||||
int index = 0;
|
int index = 0;
|
||||||
// Supports initial delimiter.
|
// Supports initial delimiter.
|
||||||
if (strings.length > 0 && strings[0].isEmpty()) {
|
if (strings[0].isEmpty()) {
|
||||||
index = 1;
|
index = 1;
|
||||||
}
|
}
|
||||||
root.insertOrUpdate(strings, index, value, updater);
|
root.insertOrUpdate(strings, index, value, updater);
|
||||||
|
@ -352,7 +340,7 @@ public class PathTrie<T> {
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
||||||
// Supports initial delimiter.
|
// Supports initial delimiter.
|
||||||
if (strings.length > 0 && strings[0].isEmpty()) {
|
if (strings[0].isEmpty()) {
|
||||||
index = 1;
|
index = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -75,11 +75,10 @@ public final class IndexSettings {
|
||||||
switch(s) {
|
switch(s) {
|
||||||
case "false":
|
case "false":
|
||||||
case "true":
|
case "true":
|
||||||
case "fix":
|
|
||||||
case "checksum":
|
case "checksum":
|
||||||
return s;
|
return s;
|
||||||
default:
|
default:
|
||||||
throw new IllegalArgumentException("unknown value for [index.shard.check_on_startup] must be one of [true, false, fix, checksum] but was: " + s);
|
throw new IllegalArgumentException("unknown value for [index.shard.check_on_startup] must be one of [true, false, checksum] but was: " + s);
|
||||||
}
|
}
|
||||||
}, Property.IndexScope);
|
}, Property.IndexScope);
|
||||||
|
|
||||||
|
|
|
@ -1332,7 +1332,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
||||||
}
|
}
|
||||||
recoveryState.setStage(RecoveryState.Stage.VERIFY_INDEX);
|
recoveryState.setStage(RecoveryState.Stage.VERIFY_INDEX);
|
||||||
// also check here, before we apply the translog
|
// also check here, before we apply the translog
|
||||||
if (Booleans.isTrue(checkIndexOnStartup)) {
|
if (Booleans.isTrue(checkIndexOnStartup) || "checksum".equals(checkIndexOnStartup)) {
|
||||||
try {
|
try {
|
||||||
checkIndex();
|
checkIndex();
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
|
@ -1955,6 +1955,9 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
||||||
if (store.tryIncRef()) {
|
if (store.tryIncRef()) {
|
||||||
try {
|
try {
|
||||||
doCheckIndex();
|
doCheckIndex();
|
||||||
|
} catch (IOException e) {
|
||||||
|
store.markStoreCorrupted(e);
|
||||||
|
throw e;
|
||||||
} finally {
|
} finally {
|
||||||
store.decRef();
|
store.decRef();
|
||||||
}
|
}
|
||||||
|
@ -1998,18 +2001,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
|
logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
|
||||||
if ("fix".equals(checkIndexOnStartup)) {
|
throw new IOException("index check failure");
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("fixing index, writing new segments file ...");
|
|
||||||
}
|
|
||||||
store.exorciseIndex(status);
|
|
||||||
if (logger.isDebugEnabled()) {
|
|
||||||
logger.debug("index fixed, wrote new segments file \"{}\"", status.segmentsFileName);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// only throw a failure if we are not going to fix the index
|
|
||||||
throw new IllegalStateException("index check failure but can't fix it");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -134,7 +134,8 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
||||||
static final int VERSION_STACK_TRACE = 1; // we write the stack trace too since 1.4.0
|
static final int VERSION_STACK_TRACE = 1; // we write the stack trace too since 1.4.0
|
||||||
static final int VERSION_START = 0;
|
static final int VERSION_START = 0;
|
||||||
static final int VERSION = VERSION_WRITE_THROWABLE;
|
static final int VERSION = VERSION_WRITE_THROWABLE;
|
||||||
static final String CORRUPTED = "corrupted_";
|
// public is for test purposes
|
||||||
|
public static final String CORRUPTED = "corrupted_";
|
||||||
public static final Setting<TimeValue> INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING =
|
public static final Setting<TimeValue> INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING =
|
||||||
Setting.timeSetting("index.store.stats_refresh_interval", TimeValue.timeValueSeconds(10), Property.IndexScope);
|
Setting.timeSetting("index.store.stats_refresh_interval", TimeValue.timeValueSeconds(10), Property.IndexScope);
|
||||||
|
|
||||||
|
@ -360,18 +361,6 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Repairs the index using the previous returned status from {@link #checkIndex(PrintStream)}.
|
|
||||||
*/
|
|
||||||
public void exorciseIndex(CheckIndex.Status status) throws IOException {
|
|
||||||
metadataLock.writeLock().lock();
|
|
||||||
try (CheckIndex checkIndex = new CheckIndex(directory)) {
|
|
||||||
checkIndex.exorciseIndex(status);
|
|
||||||
} finally {
|
|
||||||
metadataLock.writeLock().unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public StoreStats stats() throws IOException {
|
public StoreStats stats() throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
return new StoreStats(directory.estimateSize());
|
return new StoreStats(directory.estimateSize());
|
||||||
|
|
|
@ -69,7 +69,7 @@ public class MetaDataIndexTemplateServiceTests extends ESSingleNodeTestCase {
|
||||||
containsString("Failed to parse value [0] for setting [index.number_of_shards] must be >= 1"));
|
containsString("Failed to parse value [0] for setting [index.number_of_shards] must be >= 1"));
|
||||||
assertThat(throwables.get(0).getMessage(),
|
assertThat(throwables.get(0).getMessage(),
|
||||||
containsString("unknown value for [index.shard.check_on_startup] " +
|
containsString("unknown value for [index.shard.check_on_startup] " +
|
||||||
"must be one of [true, false, fix, checksum] but was: blargh"));
|
"must be one of [true, false, checksum] but was: blargh"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testIndexTemplateValidationAccumulatesValidationErrors() {
|
public void testIndexTemplateValidationAccumulatesValidationErrors() {
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexCommit;
|
import org.apache.lucene.index.IndexCommit;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
@ -118,6 +119,7 @@ import org.elasticsearch.snapshots.Snapshot;
|
||||||
import org.elasticsearch.snapshots.SnapshotId;
|
import org.elasticsearch.snapshots.SnapshotId;
|
||||||
import org.elasticsearch.snapshots.SnapshotInfo;
|
import org.elasticsearch.snapshots.SnapshotInfo;
|
||||||
import org.elasticsearch.snapshots.SnapshotShardFailure;
|
import org.elasticsearch.snapshots.SnapshotShardFailure;
|
||||||
|
import org.elasticsearch.test.CorruptionUtils;
|
||||||
import org.elasticsearch.test.DummyShardLock;
|
import org.elasticsearch.test.DummyShardLock;
|
||||||
import org.elasticsearch.test.FieldMaskingReader;
|
import org.elasticsearch.test.FieldMaskingReader;
|
||||||
import org.elasticsearch.test.VersionUtils;
|
import org.elasticsearch.test.VersionUtils;
|
||||||
|
@ -126,7 +128,11 @@ import org.elasticsearch.ElasticsearchException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.file.FileVisitResult;
|
||||||
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.SimpleFileVisitor;
|
||||||
|
import java.nio.file.attribute.BasicFileAttributes;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
@ -1239,7 +1245,7 @@ public class IndexShardTests extends IndexShardTestCase {
|
||||||
};
|
};
|
||||||
|
|
||||||
try (Store store = createStore(shardId, new IndexSettings(metaData, Settings.EMPTY), directory)) {
|
try (Store store = createStore(shardId, new IndexSettings(metaData, Settings.EMPTY), directory)) {
|
||||||
IndexShard shard = newShard(shardRouting, shardPath, metaData, store,
|
IndexShard shard = newShard(shardRouting, shardPath, metaData, i -> store,
|
||||||
null, new InternalEngineFactory(), () -> {
|
null, new InternalEngineFactory(), () -> {
|
||||||
}, EMPTY_EVENT_LISTENER);
|
}, EMPTY_EVENT_LISTENER);
|
||||||
AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false);
|
AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false);
|
||||||
|
@ -2590,6 +2596,143 @@ public class IndexShardTests extends IndexShardTestCase {
|
||||||
closeShards(newShard);
|
closeShards(newShard);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testIndexCheckOnStartup() throws Exception {
|
||||||
|
final IndexShard indexShard = newStartedShard(true);
|
||||||
|
|
||||||
|
final long numDocs = between(10, 100);
|
||||||
|
for (long i = 0; i < numDocs; i++) {
|
||||||
|
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
|
||||||
|
}
|
||||||
|
indexShard.flush(new FlushRequest());
|
||||||
|
closeShards(indexShard);
|
||||||
|
|
||||||
|
final ShardPath shardPath = indexShard.shardPath();
|
||||||
|
|
||||||
|
final Path indexPath = corruptIndexFile(shardPath);
|
||||||
|
|
||||||
|
final AtomicInteger corruptedMarkerCount = new AtomicInteger();
|
||||||
|
final SimpleFileVisitor<Path> corruptedVisitor = new SimpleFileVisitor<Path>() {
|
||||||
|
@Override
|
||||||
|
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
|
||||||
|
if (Files.isRegularFile(file) && file.getFileName().toString().startsWith(Store.CORRUPTED)) {
|
||||||
|
corruptedMarkerCount.incrementAndGet();
|
||||||
|
}
|
||||||
|
return FileVisitResult.CONTINUE;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Files.walkFileTree(indexPath, corruptedVisitor);
|
||||||
|
|
||||||
|
assertThat("corruption marker should not be there", corruptedMarkerCount.get(), equalTo(0));
|
||||||
|
|
||||||
|
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
|
||||||
|
RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE
|
||||||
|
);
|
||||||
|
// start shard and perform index check on startup. It enforce shard to fail due to corrupted index files
|
||||||
|
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
|
||||||
|
.settings(Settings.builder()
|
||||||
|
.put(indexShard.indexSettings.getSettings())
|
||||||
|
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("true", "checksum")))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetaData,
|
||||||
|
null, null, indexShard.engineFactory,
|
||||||
|
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
||||||
|
|
||||||
|
final IndexShardRecoveryException indexShardRecoveryException =
|
||||||
|
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
|
||||||
|
assertThat(indexShardRecoveryException.getMessage(), equalTo("failed recovery"));
|
||||||
|
|
||||||
|
// check that corrupt marker is there
|
||||||
|
Files.walkFileTree(indexPath, corruptedVisitor);
|
||||||
|
assertThat("store has to be marked as corrupted", corruptedMarkerCount.get(), equalTo(1));
|
||||||
|
|
||||||
|
try {
|
||||||
|
closeShards(corruptedShard);
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
assertThat(e.getMessage(), equalTo("CheckIndex failed"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testShardDoesNotStartIfCorruptedMarkerIsPresent() throws Exception {
|
||||||
|
final IndexShard indexShard = newStartedShard(true);
|
||||||
|
|
||||||
|
final long numDocs = between(10, 100);
|
||||||
|
for (long i = 0; i < numDocs; i++) {
|
||||||
|
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
|
||||||
|
}
|
||||||
|
indexShard.flush(new FlushRequest());
|
||||||
|
closeShards(indexShard);
|
||||||
|
|
||||||
|
final ShardPath shardPath = indexShard.shardPath();
|
||||||
|
|
||||||
|
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
|
||||||
|
RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE
|
||||||
|
);
|
||||||
|
final IndexMetaData indexMetaData = indexShard.indexSettings().getIndexMetaData();
|
||||||
|
|
||||||
|
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
|
||||||
|
|
||||||
|
// create corrupted marker
|
||||||
|
final String corruptionMessage = "fake ioexception";
|
||||||
|
try(Store store = createStore(indexShard.indexSettings(), shardPath)) {
|
||||||
|
store.markStoreCorrupted(new IOException(corruptionMessage));
|
||||||
|
}
|
||||||
|
|
||||||
|
// try to start shard on corrupted files
|
||||||
|
final IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetaData,
|
||||||
|
null, null, indexShard.engineFactory,
|
||||||
|
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
||||||
|
|
||||||
|
final IndexShardRecoveryException exception1 = expectThrows(IndexShardRecoveryException.class,
|
||||||
|
() -> newStartedShard(p -> corruptedShard, true));
|
||||||
|
assertThat(exception1.getCause().getMessage(), equalTo(corruptionMessage + " (resource=preexisting_corruption)"));
|
||||||
|
closeShards(corruptedShard);
|
||||||
|
|
||||||
|
final AtomicInteger corruptedMarkerCount = new AtomicInteger();
|
||||||
|
final SimpleFileVisitor<Path> corruptedVisitor = new SimpleFileVisitor<Path>() {
|
||||||
|
@Override
|
||||||
|
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
|
||||||
|
if (Files.isRegularFile(file) && file.getFileName().toString().startsWith(Store.CORRUPTED)) {
|
||||||
|
corruptedMarkerCount.incrementAndGet();
|
||||||
|
}
|
||||||
|
return FileVisitResult.CONTINUE;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Files.walkFileTree(indexPath, corruptedVisitor);
|
||||||
|
assertThat("store has to be marked as corrupted", corruptedMarkerCount.get(), equalTo(1));
|
||||||
|
|
||||||
|
// try to start another time shard on corrupted files
|
||||||
|
final IndexShard corruptedShard2 = newShard(shardRouting, shardPath, indexMetaData,
|
||||||
|
null, null, indexShard.engineFactory,
|
||||||
|
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
||||||
|
|
||||||
|
final IndexShardRecoveryException exception2 = expectThrows(IndexShardRecoveryException.class,
|
||||||
|
() -> newStartedShard(p -> corruptedShard2, true));
|
||||||
|
assertThat(exception2.getCause().getMessage(), equalTo(corruptionMessage + " (resource=preexisting_corruption)"));
|
||||||
|
closeShards(corruptedShard2);
|
||||||
|
|
||||||
|
// check that corrupt marker is there
|
||||||
|
corruptedMarkerCount.set(0);
|
||||||
|
Files.walkFileTree(indexPath, corruptedVisitor);
|
||||||
|
assertThat("store still has a single corrupt marker", corruptedMarkerCount.get(), equalTo(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path corruptIndexFile(ShardPath shardPath) throws IOException {
|
||||||
|
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
|
||||||
|
final Path[] filesToCorrupt =
|
||||||
|
Files.walk(indexPath)
|
||||||
|
.filter(p -> {
|
||||||
|
final String name = p.getFileName().toString();
|
||||||
|
return Files.isRegularFile(p)
|
||||||
|
&& name.startsWith("extra") == false // Skip files added by Lucene's ExtrasFS
|
||||||
|
&& IndexWriter.WRITE_LOCK_NAME.equals(name) == false
|
||||||
|
&& name.startsWith("segments_") == false && name.endsWith(".si") == false;
|
||||||
|
})
|
||||||
|
.toArray(Path[]::new);
|
||||||
|
CorruptionUtils.corruptFile(random(), filesToCorrupt);
|
||||||
|
return indexPath;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService
|
* Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService
|
||||||
* and checking index concurrently. This should always be possible without any exception.
|
* and checking index concurrently. This should always be possible without any exception.
|
||||||
|
@ -2613,7 +2756,7 @@ public class IndexShardTests extends IndexShardTestCase {
|
||||||
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
|
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
|
||||||
.settings(Settings.builder()
|
.settings(Settings.builder()
|
||||||
.put(indexShard.indexSettings.getSettings())
|
.put(indexShard.indexSettings.getSettings())
|
||||||
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("false", "true", "checksum", "fix")))
|
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("false", "true", "checksum")))
|
||||||
.build();
|
.build();
|
||||||
final IndexShard newShard = newShard(shardRouting, indexShard.shardPath(), indexMetaData,
|
final IndexShard newShard = newShard(shardRouting, indexShard.shardPath(), indexMetaData,
|
||||||
null, null, indexShard.engineFactory, indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
null, null, indexShard.engineFactory, indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.elasticsearch.cluster.routing.ShardRouting;
|
||||||
import org.elasticsearch.cluster.routing.ShardRoutingHelper;
|
import org.elasticsearch.cluster.routing.ShardRoutingHelper;
|
||||||
import org.elasticsearch.cluster.routing.ShardRoutingState;
|
import org.elasticsearch.cluster.routing.ShardRoutingState;
|
||||||
import org.elasticsearch.cluster.routing.TestShardRouting;
|
import org.elasticsearch.cluster.routing.TestShardRouting;
|
||||||
|
import org.elasticsearch.common.CheckedFunction;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.common.lucene.uid.Versions;
|
import org.elasticsearch.common.lucene.uid.Versions;
|
||||||
|
@ -156,7 +157,6 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
||||||
return Settings.EMPTY;
|
return Settings.EMPTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
protected Store createStore(IndexSettings indexSettings, ShardPath shardPath) throws IOException {
|
protected Store createStore(IndexSettings indexSettings, ShardPath shardPath) throws IOException {
|
||||||
return createStore(shardPath.getShardId(), indexSettings, newFSDirectory(shardPath.resolveIndex()));
|
return createStore(shardPath.getShardId(), indexSettings, newFSDirectory(shardPath.resolveIndex()));
|
||||||
}
|
}
|
||||||
|
@ -169,7 +169,6 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
return new Store(shardId, indexSettings, directoryService, new DummyShardLock(shardId));
|
return new Store(shardId, indexSettings, directoryService, new DummyShardLock(shardId));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -179,7 +178,17 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
||||||
* another shard)
|
* another shard)
|
||||||
*/
|
*/
|
||||||
protected IndexShard newShard(boolean primary) throws IOException {
|
protected IndexShard newShard(boolean primary) throws IOException {
|
||||||
return newShard(primary, Settings.EMPTY, new InternalEngineFactory());
|
return newShard(primary, Settings.EMPTY);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new initializing shard. The shard will have its own unique data path.
|
||||||
|
*
|
||||||
|
* @param primary indicates whether to a primary shard (ready to recover from an empty store) or a replica (ready to recover from
|
||||||
|
* another shard)
|
||||||
|
*/
|
||||||
|
protected IndexShard newShard(final boolean primary, final Settings settings) throws IOException {
|
||||||
|
return newShard(primary, settings, new InternalEngineFactory());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -318,23 +327,25 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
||||||
* @param routing shard routing to use
|
* @param routing shard routing to use
|
||||||
* @param shardPath path to use for shard data
|
* @param shardPath path to use for shard data
|
||||||
* @param indexMetaData indexMetaData for the shard, including any mapping
|
* @param indexMetaData indexMetaData for the shard, including any mapping
|
||||||
* @param store an optional custom store to use. If null a default file based store will be created
|
* @param storeProvider an optional custom store provider to use. If null a default file based store will be created
|
||||||
* @param indexSearcherWrapper an optional wrapper to be used during searchers
|
* @param indexSearcherWrapper an optional wrapper to be used during searchers
|
||||||
* @param globalCheckpointSyncer callback for syncing global checkpoints
|
* @param globalCheckpointSyncer callback for syncing global checkpoints
|
||||||
* @param indexEventListener index event listener
|
* @param indexEventListener index event listener
|
||||||
* @param listeners an optional set of listeners to add to the shard
|
* @param listeners an optional set of listeners to add to the shard
|
||||||
*/
|
*/
|
||||||
protected IndexShard newShard(ShardRouting routing, ShardPath shardPath, IndexMetaData indexMetaData,
|
protected IndexShard newShard(ShardRouting routing, ShardPath shardPath, IndexMetaData indexMetaData,
|
||||||
@Nullable Store store, @Nullable IndexSearcherWrapper indexSearcherWrapper,
|
@Nullable CheckedFunction<IndexSettings, Store, IOException> storeProvider,
|
||||||
|
@Nullable IndexSearcherWrapper indexSearcherWrapper,
|
||||||
@Nullable EngineFactory engineFactory,
|
@Nullable EngineFactory engineFactory,
|
||||||
Runnable globalCheckpointSyncer,
|
Runnable globalCheckpointSyncer,
|
||||||
IndexEventListener indexEventListener, IndexingOperationListener... listeners) throws IOException {
|
IndexEventListener indexEventListener, IndexingOperationListener... listeners) throws IOException {
|
||||||
final Settings nodeSettings = Settings.builder().put("node.name", routing.currentNodeId()).build();
|
final Settings nodeSettings = Settings.builder().put("node.name", routing.currentNodeId()).build();
|
||||||
final IndexSettings indexSettings = new IndexSettings(indexMetaData, nodeSettings);
|
final IndexSettings indexSettings = new IndexSettings(indexMetaData, nodeSettings);
|
||||||
final IndexShard indexShard;
|
final IndexShard indexShard;
|
||||||
if (store == null) {
|
if (storeProvider == null) {
|
||||||
store = createStore(indexSettings, shardPath);
|
storeProvider = is -> createStore(is, shardPath);
|
||||||
}
|
}
|
||||||
|
final Store store = storeProvider.apply(indexSettings);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
IndexCache indexCache = new IndexCache(indexSettings, new DisabledQueryCache(indexSettings), null);
|
IndexCache indexCache = new IndexCache(indexSettings, new DisabledQueryCache(indexSettings), null);
|
||||||
|
@ -424,7 +435,18 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
||||||
*/
|
*/
|
||||||
protected IndexShard newStartedShard(
|
protected IndexShard newStartedShard(
|
||||||
final boolean primary, final Settings settings, final EngineFactory engineFactory) throws IOException {
|
final boolean primary, final Settings settings, final EngineFactory engineFactory) throws IOException {
|
||||||
IndexShard shard = newShard(primary, settings, engineFactory);
|
return newStartedShard(p -> newShard(p, settings, engineFactory), primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* creates a new empty shard and starts it.
|
||||||
|
*
|
||||||
|
* @param shardFunction shard factory function
|
||||||
|
* @param primary controls whether the shard will be a primary or a replica.
|
||||||
|
*/
|
||||||
|
protected IndexShard newStartedShard(CheckedFunction<Boolean, IndexShard, IOException> shardFunction,
|
||||||
|
boolean primary) throws IOException {
|
||||||
|
IndexShard shard = shardFunction.apply(primary);
|
||||||
if (primary) {
|
if (primary) {
|
||||||
recoverShardFromStore(shard);
|
recoverShardFromStore(shard);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -1,102 +0,0 @@
|
||||||
[role="xpack"]
|
|
||||||
[[ml-api-quickref]]
|
|
||||||
== API quick reference
|
|
||||||
|
|
||||||
All {ml} endpoints have the following base:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
----
|
|
||||||
/_xpack/ml/
|
|
||||||
----
|
|
||||||
// NOTCONSOLE
|
|
||||||
|
|
||||||
The main {ml} resources can be accessed with a variety of endpoints:
|
|
||||||
|
|
||||||
* <<ml-api-jobs,+/anomaly_detectors/+>>: Create and manage {ml} jobs
|
|
||||||
* <<ml-api-datafeeds,+/datafeeds/+>>: Select data from {es} to be analyzed
|
|
||||||
* <<ml-api-results,+/results/+>>: Access the results of a {ml} job
|
|
||||||
* <<ml-api-snapshots,+/model_snapshots/+>>: Manage model snapshots
|
|
||||||
//* <<ml-api-validate,+/validate/+>>: Validate subsections of job configurations
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[ml-api-jobs]]
|
|
||||||
=== /anomaly_detectors/
|
|
||||||
|
|
||||||
* {ref}/ml-put-job.html[PUT /anomaly_detectors/<job_id+++>+++]: Create a job
|
|
||||||
* {ref}/ml-open-job.html[POST /anomaly_detectors/<job_id>/_open]: Open a job
|
|
||||||
* {ref}/ml-post-data.html[POST /anomaly_detectors/<job_id>/_data]: Send data to a job
|
|
||||||
* {ref}/ml-get-job.html[GET /anomaly_detectors]: List jobs
|
|
||||||
* {ref}/ml-get-job.html[GET /anomaly_detectors/<job_id+++>+++]: Get job details
|
|
||||||
* {ref}/ml-get-job-stats.html[GET /anomaly_detectors/<job_id>/_stats]: Get job statistics
|
|
||||||
* {ref}/ml-update-job.html[POST /anomaly_detectors/<job_id>/_update]: Update certain properties of the job configuration
|
|
||||||
* {ref}/ml-flush-job.html[POST anomaly_detectors/<job_id>/_flush]: Force a job to analyze buffered data
|
|
||||||
* {ref}/ml-forecast.html[POST anomaly_detectors/<job_id>/_forecast]: Forecast future job behavior
|
|
||||||
* {ref}/ml-close-job.html[POST /anomaly_detectors/<job_id>/_close]: Close a job
|
|
||||||
* {ref}/ml-delete-job.html[DELETE /anomaly_detectors/<job_id+++>+++]: Delete a job
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[ml-api-calendars]]
|
|
||||||
=== /calendars/
|
|
||||||
|
|
||||||
* {ref}/ml-put-calendar.html[PUT /calendars/<calendar_id+++>+++]: Create a calendar
|
|
||||||
* {ref}/ml-post-calendar-event.html[POST /calendars/<calendar_id+++>+++/events]: Add a scheduled event to a calendar
|
|
||||||
* {ref}/ml-put-calendar-job.html[PUT /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Associate a job with a calendar
|
|
||||||
* {ref}/ml-get-calendar.html[GET /calendars/<calendar_id+++>+++]: Get calendar details
|
|
||||||
* {ref}/ml-get-calendar-event.html[GET /calendars/<calendar_id+++>+++/events]: Get scheduled event details
|
|
||||||
* {ref}/ml-delete-calendar-event.html[DELETE /calendars/<calendar_id+++>+++/events/<event_id+++>+++]: Remove a scheduled event from a calendar
|
|
||||||
* {ref}/ml-delete-calendar-job.html[DELETE /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Disassociate a job from a calendar
|
|
||||||
* {ref}/ml-delete-calendar.html[DELETE /calendars/<calendar_id+++>+++]: Delete a calendar
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[ml-api-filters]]
|
|
||||||
=== /filters/
|
|
||||||
|
|
||||||
* {ref}/ml-put-filter.html[PUT /filters/<filter_id+++>+++]: Create a filter
|
|
||||||
* {ref}/ml-update-filter.html[POST /filters/<filter_id+++>+++/_update]: Update a filter
|
|
||||||
* {ref}/ml-get-filter.html[GET /filters/<filter_id+++>+++]: List filters
|
|
||||||
* {ref}/ml-delete-filter.html[DELETE /filter/<filter_id+++>+++]: Delete a filter
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[ml-api-datafeeds]]
|
|
||||||
=== /datafeeds/
|
|
||||||
|
|
||||||
* {ref}/ml-put-datafeed.html[PUT /datafeeds/<datafeed_id+++>+++]: Create a {dfeed}
|
|
||||||
* {ref}/ml-start-datafeed.html[POST /datafeeds/<datafeed_id>/_start]: Start a {dfeed}
|
|
||||||
* {ref}/ml-get-datafeed.html[GET /datafeeds]: List {dfeeds}
|
|
||||||
* {ref}/ml-get-datafeed.html[GET /datafeeds/<datafeed_id+++>+++]: Get {dfeed} details
|
|
||||||
* {ref}/ml-get-datafeed-stats.html[GET /datafeeds/<datafeed_id>/_stats]: Get statistical information for {dfeeds}
|
|
||||||
* {ref}/ml-preview-datafeed.html[GET /datafeeds/<datafeed_id>/_preview]: Get a preview of a {dfeed}
|
|
||||||
* {ref}/ml-update-datafeed.html[POST /datafeeds/<datafeedid>/_update]: Update certain settings for a {dfeed}
|
|
||||||
* {ref}/ml-stop-datafeed.html[POST /datafeeds/<datafeed_id>/_stop]: Stop a {dfeed}
|
|
||||||
* {ref}/ml-delete-datafeed.html[DELETE /datafeeds/<datafeed_id+++>+++]: Delete {dfeed}
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[ml-api-results]]
|
|
||||||
=== /results/
|
|
||||||
|
|
||||||
* {ref}/ml-get-bucket.html[GET /results/buckets]: List the buckets in the results
|
|
||||||
* {ref}/ml-get-bucket.html[GET /results/buckets/<bucket_id+++>+++]: Get bucket details
|
|
||||||
* {ref}/ml-get-overall-buckets.html[GET /results/overall_buckets]: Get overall bucket results for multiple jobs
|
|
||||||
* {ref}/ml-get-category.html[GET /results/categories]: List the categories in the results
|
|
||||||
* {ref}/ml-get-category.html[GET /results/categories/<category_id+++>+++]: Get category details
|
|
||||||
* {ref}/ml-get-influencer.html[GET /results/influencers]: Get influencer details
|
|
||||||
* {ref}/ml-get-record.html[GET /results/records]: Get records from the results
|
|
||||||
|
|
||||||
[float]
|
|
||||||
[[ml-api-snapshots]]
|
|
||||||
=== /model_snapshots/
|
|
||||||
|
|
||||||
* {ref}/ml-get-snapshot.html[GET /model_snapshots]: List model snapshots
|
|
||||||
* {ref}/ml-get-snapshot.html[GET /model_snapshots/<snapshot_id+++>+++]: Get model snapshot details
|
|
||||||
* {ref}/ml-revert-snapshot.html[POST /model_snapshots/<snapshot_id>/_revert]: Revert a model snapshot
|
|
||||||
* {ref}/ml-update-snapshot.html[POST /model_snapshots/<snapshot_id>/_update]: Update certain settings for a model snapshot
|
|
||||||
* {ref}/ml-delete-snapshot.html[DELETE /model_snapshots/<snapshot_id+++>+++]: Delete a model snapshot
|
|
||||||
|
|
||||||
////
|
|
||||||
[float]
|
|
||||||
[[ml-api-validate]]
|
|
||||||
=== /validate/
|
|
||||||
|
|
||||||
* {ref}/ml-valid-detector.html[POST /anomaly_detectors/_validate/detector]: Validate a detector
|
|
||||||
* {ref}/ml-valid-job.html[POST /anomaly_detectors/_validate]: Validate a job
|
|
||||||
////
|
|
|
@ -1,35 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
||||||
* or more contributor license agreements. Licensed under the Elastic License;
|
|
||||||
* you may not use this file except in compliance with the Elastic License.
|
|
||||||
*/
|
|
||||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
|
||||||
|
|
||||||
import org.supercsv.prefs.CsvPreference;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class CsvLogStructureFinderFactory implements LogStructureFinderFactory {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rules are:
|
|
||||||
* - The file must be valid CSV
|
|
||||||
* - It must contain at least two complete records
|
|
||||||
* - There must be at least two fields per record (otherwise files with no commas could be treated as CSV!)
|
|
||||||
* - Every CSV record except the last must have the same number of fields
|
|
||||||
* The reason the last record is allowed to have fewer fields than the others is that
|
|
||||||
* it could have been truncated when the file was sampled.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
|
||||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.EXCEL_PREFERENCE, "CSV");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
|
||||||
throws IOException {
|
|
||||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
|
||||||
CsvPreference.EXCEL_PREFERENCE, false);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -29,17 +29,16 @@ import java.util.regex.Pattern;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
public class DelimitedLogStructureFinder implements LogStructureFinder {
|
||||||
|
|
||||||
private static final int MAX_LEVENSHTEIN_COMPARISONS = 100;
|
private static final int MAX_LEVENSHTEIN_COMPARISONS = 100;
|
||||||
|
|
||||||
private final List<String> sampleMessages;
|
private final List<String> sampleMessages;
|
||||||
private final LogStructure structure;
|
private final LogStructure structure;
|
||||||
|
|
||||||
static SeparatedValuesLogStructureFinder makeSeparatedValuesLogStructureFinder(List<String> explanation, String sample,
|
static DelimitedLogStructureFinder makeDelimitedLogStructureFinder(List<String> explanation, String sample, String charsetName,
|
||||||
String charsetName, Boolean hasByteOrderMarker,
|
Boolean hasByteOrderMarker, CsvPreference csvPreference,
|
||||||
CsvPreference csvPreference, boolean trimFields)
|
boolean trimFields) throws IOException {
|
||||||
throws IOException {
|
|
||||||
|
|
||||||
Tuple<List<List<String>>, List<Integer>> parsed = readRows(sample, csvPreference);
|
Tuple<List<List<String>>, List<Integer>> parsed = readRows(sample, csvPreference);
|
||||||
List<List<String>> rows = parsed.v1();
|
List<List<String>> rows = parsed.v1();
|
||||||
|
@ -73,13 +72,14 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
||||||
String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n"));
|
String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n"));
|
||||||
|
|
||||||
char delimiter = (char) csvPreference.getDelimiterChar();
|
char delimiter = (char) csvPreference.getDelimiterChar();
|
||||||
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.fromSeparator(delimiter))
|
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.DELIMITED)
|
||||||
.setCharset(charsetName)
|
.setCharset(charsetName)
|
||||||
.setHasByteOrderMarker(hasByteOrderMarker)
|
.setHasByteOrderMarker(hasByteOrderMarker)
|
||||||
.setSampleStart(preamble)
|
.setSampleStart(preamble)
|
||||||
.setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1))
|
.setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1))
|
||||||
.setNumMessagesAnalyzed(sampleRecords.size())
|
.setNumMessagesAnalyzed(sampleRecords.size())
|
||||||
.setHasHeaderRow(isHeaderInFile)
|
.setHasHeaderRow(isHeaderInFile)
|
||||||
|
.setDelimiter(delimiter)
|
||||||
.setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList()));
|
.setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList()));
|
||||||
|
|
||||||
if (trimFields) {
|
if (trimFields) {
|
||||||
|
@ -131,10 +131,10 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
||||||
.setExplanation(explanation)
|
.setExplanation(explanation)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
return new SeparatedValuesLogStructureFinder(sampleMessages, structure);
|
return new DelimitedLogStructureFinder(sampleMessages, structure);
|
||||||
}
|
}
|
||||||
|
|
||||||
private SeparatedValuesLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
|
private DelimitedLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
|
||||||
this.sampleMessages = Collections.unmodifiableList(sampleMessages);
|
this.sampleMessages = Collections.unmodifiableList(sampleMessages);
|
||||||
this.structure = structure;
|
this.structure = structure;
|
||||||
}
|
}
|
|
@ -0,0 +1,57 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||||
|
|
||||||
|
import org.supercsv.prefs.CsvPreference;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
public class DelimitedLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||||
|
|
||||||
|
private final CsvPreference csvPreference;
|
||||||
|
private final int minFieldsPerRow;
|
||||||
|
private final boolean trimFields;
|
||||||
|
|
||||||
|
DelimitedLogStructureFinderFactory(char delimiter, int minFieldsPerRow, boolean trimFields) {
|
||||||
|
csvPreference = new CsvPreference.Builder('"', delimiter, "\n").build();
|
||||||
|
this.minFieldsPerRow = minFieldsPerRow;
|
||||||
|
this.trimFields = trimFields;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rules are:
|
||||||
|
* - It must contain at least two complete records
|
||||||
|
* - There must be a minimum number of fields per record (otherwise files with no commas could be treated as CSV!)
|
||||||
|
* - Every record except the last must have the same number of fields
|
||||||
|
* The reason the last record is allowed to have fewer fields than the others is that
|
||||||
|
* it could have been truncated when the file was sampled.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||||
|
String formatName;
|
||||||
|
switch ((char) csvPreference.getDelimiterChar()) {
|
||||||
|
case ',':
|
||||||
|
formatName = "CSV";
|
||||||
|
break;
|
||||||
|
case '\t':
|
||||||
|
formatName = "TSV";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
formatName = Character.getName(csvPreference.getDelimiterChar()).toLowerCase(Locale.ROOT) + " delimited values";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, minFieldsPerRow, csvPreference, formatName);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||||
|
throws IOException {
|
||||||
|
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||||
|
csvPreference, trimFields);
|
||||||
|
}
|
||||||
|
}
|
|
@ -27,37 +27,14 @@ public class LogStructure implements ToXContentObject {
|
||||||
|
|
||||||
public enum Format {
|
public enum Format {
|
||||||
|
|
||||||
JSON, XML, CSV, TSV, SEMI_COLON_SEPARATED_VALUES, PIPE_SEPARATED_VALUES, SEMI_STRUCTURED_TEXT;
|
JSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
|
||||||
|
|
||||||
public Character separator() {
|
|
||||||
switch (this) {
|
|
||||||
case JSON:
|
|
||||||
case XML:
|
|
||||||
return null;
|
|
||||||
case CSV:
|
|
||||||
return ',';
|
|
||||||
case TSV:
|
|
||||||
return '\t';
|
|
||||||
case SEMI_COLON_SEPARATED_VALUES:
|
|
||||||
return ';';
|
|
||||||
case PIPE_SEPARATED_VALUES:
|
|
||||||
return '|';
|
|
||||||
case SEMI_STRUCTURED_TEXT:
|
|
||||||
return null;
|
|
||||||
default:
|
|
||||||
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean supportsNesting() {
|
public boolean supportsNesting() {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case JSON:
|
case JSON:
|
||||||
case XML:
|
case XML:
|
||||||
return true;
|
return true;
|
||||||
case CSV:
|
case DELIMITED:
|
||||||
case TSV:
|
|
||||||
case SEMI_COLON_SEPARATED_VALUES:
|
|
||||||
case PIPE_SEPARATED_VALUES:
|
|
||||||
case SEMI_STRUCTURED_TEXT:
|
case SEMI_STRUCTURED_TEXT:
|
||||||
return false;
|
return false;
|
||||||
default:
|
default:
|
||||||
|
@ -69,10 +46,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case JSON:
|
case JSON:
|
||||||
case XML:
|
case XML:
|
||||||
case CSV:
|
case DELIMITED:
|
||||||
case TSV:
|
|
||||||
case SEMI_COLON_SEPARATED_VALUES:
|
|
||||||
case PIPE_SEPARATED_VALUES:
|
|
||||||
return true;
|
return true;
|
||||||
case SEMI_STRUCTURED_TEXT:
|
case SEMI_STRUCTURED_TEXT:
|
||||||
return false;
|
return false;
|
||||||
|
@ -85,10 +59,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
switch (this) {
|
switch (this) {
|
||||||
case JSON:
|
case JSON:
|
||||||
case XML:
|
case XML:
|
||||||
case CSV:
|
case DELIMITED:
|
||||||
case TSV:
|
|
||||||
case SEMI_COLON_SEPARATED_VALUES:
|
|
||||||
case PIPE_SEPARATED_VALUES:
|
|
||||||
return false;
|
return false;
|
||||||
case SEMI_STRUCTURED_TEXT:
|
case SEMI_STRUCTURED_TEXT:
|
||||||
return true;
|
return true;
|
||||||
|
@ -97,38 +68,6 @@ public class LogStructure implements ToXContentObject {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isSeparatedValues() {
|
|
||||||
switch (this) {
|
|
||||||
case JSON:
|
|
||||||
case XML:
|
|
||||||
return false;
|
|
||||||
case CSV:
|
|
||||||
case TSV:
|
|
||||||
case SEMI_COLON_SEPARATED_VALUES:
|
|
||||||
case PIPE_SEPARATED_VALUES:
|
|
||||||
return true;
|
|
||||||
case SEMI_STRUCTURED_TEXT:
|
|
||||||
return false;
|
|
||||||
default:
|
|
||||||
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Format fromSeparator(char separator) {
|
|
||||||
switch (separator) {
|
|
||||||
case ',':
|
|
||||||
return CSV;
|
|
||||||
case '\t':
|
|
||||||
return TSV;
|
|
||||||
case ';':
|
|
||||||
return SEMI_COLON_SEPARATED_VALUES;
|
|
||||||
case '|':
|
|
||||||
return PIPE_SEPARATED_VALUES;
|
|
||||||
default:
|
|
||||||
throw new IllegalArgumentException("No known format has separator [" + separator + "]");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Format fromString(String name) {
|
public static Format fromString(String name) {
|
||||||
return valueOf(name.trim().toUpperCase(Locale.ROOT));
|
return valueOf(name.trim().toUpperCase(Locale.ROOT));
|
||||||
}
|
}
|
||||||
|
@ -149,7 +88,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
|
static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
|
||||||
static final ParseField INPUT_FIELDS = new ParseField("input_fields");
|
static final ParseField INPUT_FIELDS = new ParseField("input_fields");
|
||||||
static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
|
static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
|
||||||
static final ParseField SEPARATOR = new ParseField("separator");
|
static final ParseField DELIMITER = new ParseField("delimiter");
|
||||||
static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
|
static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
|
||||||
static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
|
static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
|
||||||
static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
|
static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
|
||||||
|
@ -171,7 +110,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
|
PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
|
||||||
PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS);
|
PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS);
|
||||||
PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
|
PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
|
||||||
PARSER.declareString((p, c) -> p.setSeparator(c.charAt(0)), SEPARATOR);
|
PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER);
|
||||||
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
|
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
|
||||||
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
|
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
|
||||||
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
|
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
|
||||||
|
@ -191,7 +130,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
private final String excludeLinesPattern;
|
private final String excludeLinesPattern;
|
||||||
private final List<String> inputFields;
|
private final List<String> inputFields;
|
||||||
private final Boolean hasHeaderRow;
|
private final Boolean hasHeaderRow;
|
||||||
private final Character separator;
|
private final Character delimiter;
|
||||||
private final Boolean shouldTrimFields;
|
private final Boolean shouldTrimFields;
|
||||||
private final String grokPattern;
|
private final String grokPattern;
|
||||||
private final List<String> timestampFormats;
|
private final List<String> timestampFormats;
|
||||||
|
@ -202,7 +141,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
|
|
||||||
public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
|
public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
|
||||||
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> inputFields,
|
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> inputFields,
|
||||||
Boolean hasHeaderRow, Character separator, Boolean shouldTrimFields, String grokPattern, String timestampField,
|
Boolean hasHeaderRow, Character delimiter, Boolean shouldTrimFields, String grokPattern, String timestampField,
|
||||||
List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings,
|
List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings,
|
||||||
List<String> explanation) {
|
List<String> explanation) {
|
||||||
|
|
||||||
|
@ -216,7 +155,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
this.excludeLinesPattern = excludeLinesPattern;
|
this.excludeLinesPattern = excludeLinesPattern;
|
||||||
this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields));
|
this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields));
|
||||||
this.hasHeaderRow = hasHeaderRow;
|
this.hasHeaderRow = hasHeaderRow;
|
||||||
this.separator = separator;
|
this.delimiter = delimiter;
|
||||||
this.shouldTrimFields = shouldTrimFields;
|
this.shouldTrimFields = shouldTrimFields;
|
||||||
this.grokPattern = grokPattern;
|
this.grokPattern = grokPattern;
|
||||||
this.timestampField = timestampField;
|
this.timestampField = timestampField;
|
||||||
|
@ -266,8 +205,8 @@ public class LogStructure implements ToXContentObject {
|
||||||
return hasHeaderRow;
|
return hasHeaderRow;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Character getSeparator() {
|
public Character getDelimiter() {
|
||||||
return separator;
|
return delimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Boolean getShouldTrimFields() {
|
public Boolean getShouldTrimFields() {
|
||||||
|
@ -322,8 +261,8 @@ public class LogStructure implements ToXContentObject {
|
||||||
if (hasHeaderRow != null) {
|
if (hasHeaderRow != null) {
|
||||||
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
|
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
|
||||||
}
|
}
|
||||||
if (separator != null) {
|
if (delimiter != null) {
|
||||||
builder.field(SEPARATOR.getPreferredName(), String.valueOf(separator));
|
builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter));
|
||||||
}
|
}
|
||||||
if (shouldTrimFields != null) {
|
if (shouldTrimFields != null) {
|
||||||
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
|
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
|
||||||
|
@ -349,7 +288,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
|
|
||||||
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
||||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, timestampField,
|
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, timestampField,
|
||||||
timestampFormats, needClientTimezone, mappings, explanation);
|
timestampFormats, needClientTimezone, mappings, explanation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -376,7 +315,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
|
Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
|
||||||
Objects.equals(this.inputFields, that.inputFields) &&
|
Objects.equals(this.inputFields, that.inputFields) &&
|
||||||
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
|
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
|
||||||
Objects.equals(this.separator, that.separator) &&
|
Objects.equals(this.delimiter, that.delimiter) &&
|
||||||
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
|
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
|
||||||
Objects.equals(this.grokPattern, that.grokPattern) &&
|
Objects.equals(this.grokPattern, that.grokPattern) &&
|
||||||
Objects.equals(this.timestampField, that.timestampField) &&
|
Objects.equals(this.timestampField, that.timestampField) &&
|
||||||
|
@ -397,7 +336,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
private String excludeLinesPattern;
|
private String excludeLinesPattern;
|
||||||
private List<String> inputFields;
|
private List<String> inputFields;
|
||||||
private Boolean hasHeaderRow;
|
private Boolean hasHeaderRow;
|
||||||
private Character separator;
|
private Character delimiter;
|
||||||
private Boolean shouldTrimFields;
|
private Boolean shouldTrimFields;
|
||||||
private String grokPattern;
|
private String grokPattern;
|
||||||
private String timestampField;
|
private String timestampField;
|
||||||
|
@ -441,7 +380,6 @@ public class LogStructure implements ToXContentObject {
|
||||||
|
|
||||||
public Builder setFormat(Format format) {
|
public Builder setFormat(Format format) {
|
||||||
this.format = Objects.requireNonNull(format);
|
this.format = Objects.requireNonNull(format);
|
||||||
this.separator = format.separator();
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -465,13 +403,13 @@ public class LogStructure implements ToXContentObject {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder setShouldTrimFields(Boolean shouldTrimFields) {
|
public Builder setDelimiter(Character delimiter) {
|
||||||
this.shouldTrimFields = shouldTrimFields;
|
this.delimiter = delimiter;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder setSeparator(Character separator) {
|
public Builder setShouldTrimFields(Boolean shouldTrimFields) {
|
||||||
this.separator = separator;
|
this.shouldTrimFields = shouldTrimFields;
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -542,28 +480,22 @@ public class LogStructure implements ToXContentObject {
|
||||||
if (hasHeaderRow != null) {
|
if (hasHeaderRow != null) {
|
||||||
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
|
||||||
}
|
}
|
||||||
if (separator != null) {
|
if (delimiter != null) {
|
||||||
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
|
||||||
}
|
}
|
||||||
if (grokPattern != null) {
|
if (grokPattern != null) {
|
||||||
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CSV:
|
case DELIMITED:
|
||||||
case TSV:
|
|
||||||
case SEMI_COLON_SEPARATED_VALUES:
|
|
||||||
case PIPE_SEPARATED_VALUES:
|
|
||||||
if (inputFields == null || inputFields.isEmpty()) {
|
if (inputFields == null || inputFields.isEmpty()) {
|
||||||
throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures.");
|
||||||
}
|
}
|
||||||
if (hasHeaderRow == null) {
|
if (hasHeaderRow == null) {
|
||||||
throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures.");
|
||||||
}
|
}
|
||||||
Character expectedSeparator = format.separator();
|
if (delimiter == null) {
|
||||||
assert expectedSeparator != null;
|
throw new IllegalArgumentException("Delimiter must be specified for [" + format + "] structures.");
|
||||||
if (expectedSeparator.equals(separator) == false) {
|
|
||||||
throw new IllegalArgumentException("Separator must be [" + expectedSeparator + "] for [" + format +
|
|
||||||
"] structures.");
|
|
||||||
}
|
}
|
||||||
if (grokPattern != null) {
|
if (grokPattern != null) {
|
||||||
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
|
||||||
|
@ -576,8 +508,8 @@ public class LogStructure implements ToXContentObject {
|
||||||
if (hasHeaderRow != null) {
|
if (hasHeaderRow != null) {
|
||||||
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
|
||||||
}
|
}
|
||||||
if (separator != null) {
|
if (delimiter != null) {
|
||||||
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
|
||||||
}
|
}
|
||||||
if (shouldTrimFields != null) {
|
if (shouldTrimFields != null) {
|
||||||
throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures.");
|
throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures.");
|
||||||
|
@ -607,7 +539,7 @@ public class LogStructure implements ToXContentObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
||||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern,
|
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern,
|
||||||
timestampField, timestampFormats, needClientTimezone, mappings, explanation);
|
timestampField, timestampFormats, needClientTimezone, mappings, explanation);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,10 +69,10 @@ public final class LogStructureFinderManager {
|
||||||
new JsonLogStructureFinderFactory(),
|
new JsonLogStructureFinderFactory(),
|
||||||
new XmlLogStructureFinderFactory(),
|
new XmlLogStructureFinderFactory(),
|
||||||
// ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV
|
// ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV
|
||||||
new CsvLogStructureFinderFactory(),
|
new DelimitedLogStructureFinderFactory(',', 2, false),
|
||||||
new TsvLogStructureFinderFactory(),
|
new DelimitedLogStructureFinderFactory('\t', 2, false),
|
||||||
new SemiColonSeparatedValuesLogStructureFinderFactory(),
|
new DelimitedLogStructureFinderFactory(';', 4, false),
|
||||||
new PipeSeparatedValuesLogStructureFinderFactory(),
|
new DelimitedLogStructureFinderFactory('|', 5, true),
|
||||||
new TextLogStructureFinderFactory()
|
new TextLogStructureFinderFactory()
|
||||||
));
|
));
|
||||||
|
|
||||||
|
|
|
@ -21,12 +21,12 @@ import java.util.TreeMap;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
final class LogStructureUtils {
|
public final class LogStructureUtils {
|
||||||
|
|
||||||
static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
|
public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
|
||||||
static final String MAPPING_TYPE_SETTING = "type";
|
public static final String MAPPING_TYPE_SETTING = "type";
|
||||||
static final String MAPPING_FORMAT_SETTING = "format";
|
public static final String MAPPING_FORMAT_SETTING = "format";
|
||||||
static final String MAPPING_PROPERTIES_SETTING = "properties";
|
public static final String MAPPING_PROPERTIES_SETTING = "properties";
|
||||||
|
|
||||||
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
|
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
|
||||||
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$");
|
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$");
|
||||||
|
|
|
@ -1,38 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
||||||
* or more contributor license agreements. Licensed under the Elastic License;
|
|
||||||
* you may not use this file except in compliance with the Elastic License.
|
|
||||||
*/
|
|
||||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
|
||||||
|
|
||||||
import org.supercsv.prefs.CsvPreference;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class PipeSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
|
|
||||||
|
|
||||||
private static final CsvPreference PIPE_PREFERENCE = new CsvPreference.Builder('"', '|', "\n").build();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rules are:
|
|
||||||
* - The file must be valid pipe (<code>|</code>) separated values
|
|
||||||
* - It must contain at least two complete records
|
|
||||||
* - There must be at least five fields per record (otherwise files with coincidental
|
|
||||||
* or no pipe characters could be treated as pipe separated)
|
|
||||||
* - Every pipe separated value record except the last must have the same number of fields
|
|
||||||
* The reason the last record is allowed to have fewer fields than the others is that
|
|
||||||
* it could have been truncated when the file was sampled.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
|
||||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 5, PIPE_PREFERENCE, "pipe separated values");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
|
||||||
throws IOException {
|
|
||||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
|
||||||
PIPE_PREFERENCE, true);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,37 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
||||||
* or more contributor license agreements. Licensed under the Elastic License;
|
|
||||||
* you may not use this file except in compliance with the Elastic License.
|
|
||||||
*/
|
|
||||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
|
||||||
|
|
||||||
import org.supercsv.prefs.CsvPreference;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class SemiColonSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rules are:
|
|
||||||
* - The file must be valid semi-colon separated values
|
|
||||||
* - It must contain at least two complete records
|
|
||||||
* - There must be at least four fields per record (otherwise files with coincidental
|
|
||||||
* or no semi-colons could be treated as semi-colon separated)
|
|
||||||
* - Every semi-colon separated value record except the last must have the same number of fields
|
|
||||||
* The reason the last record is allowed to have fewer fields than the others is that
|
|
||||||
* it could have been truncated when the file was sampled.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
|
||||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 4,
|
|
||||||
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, "semi-colon separated values");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
|
||||||
throws IOException {
|
|
||||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
|
||||||
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, false);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -23,13 +23,13 @@ public class TsvLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
|
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||||
CsvPreference.TAB_PREFERENCE, false);
|
CsvPreference.TAB_PREFERENCE, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,38 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
||||||
* or more contributor license agreements. Licensed under the Elastic License;
|
|
||||||
* you may not use this file except in compliance with the Elastic License.
|
|
||||||
*/
|
|
||||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
|
||||||
|
|
||||||
public class CsvLogStructureFinderFactoryTests extends LogStructureTestCase {
|
|
||||||
|
|
||||||
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
|
|
||||||
|
|
||||||
// No need to check JSON or XML because they come earlier in the order we check formats
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenCsv() {
|
|
||||||
|
|
||||||
assertTrue(factory.canCreateFromSample(explanation, CSV_SAMPLE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenTsv() {
|
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenText() {
|
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||||
|
|
||||||
|
public class DelimitedLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||||
|
|
||||||
|
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
|
||||||
|
private LogStructureFinderFactory tsvFactory = new DelimitedLogStructureFinderFactory('\t', 2, false);
|
||||||
|
private LogStructureFinderFactory semiColonDelimitedfactory = new DelimitedLogStructureFinderFactory(';', 4, false);
|
||||||
|
private LogStructureFinderFactory pipeDelimitedFactory = new DelimitedLogStructureFinderFactory('|', 5, true);
|
||||||
|
|
||||||
|
// CSV - no need to check JSON or XML because they come earlier in the order we check formats
|
||||||
|
|
||||||
|
public void testCanCreateCsvFromSampleGivenCsv() {
|
||||||
|
|
||||||
|
assertTrue(csvFactory.canCreateFromSample(explanation, CSV_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateCsvFromSampleGivenTsv() {
|
||||||
|
|
||||||
|
assertFalse(csvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateCsvFromSampleGivenSemiColonDelimited() {
|
||||||
|
|
||||||
|
assertFalse(csvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateCsvFromSampleGivenPipeDelimited() {
|
||||||
|
|
||||||
|
assertFalse(csvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateCsvFromSampleGivenText() {
|
||||||
|
|
||||||
|
assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TSV - no need to check JSON, XML or CSV because they come earlier in the order we check formats
|
||||||
|
|
||||||
|
public void testCanCreateTsvFromSampleGivenTsv() {
|
||||||
|
|
||||||
|
assertTrue(tsvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateTsvFromSampleGivenSemiColonDelimited() {
|
||||||
|
|
||||||
|
assertFalse(tsvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateTsvFromSampleGivenPipeDelimited() {
|
||||||
|
|
||||||
|
assertFalse(tsvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateTsvFromSampleGivenText() {
|
||||||
|
|
||||||
|
assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Semi-colon delimited - no need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
|
||||||
|
|
||||||
|
public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() {
|
||||||
|
|
||||||
|
assertTrue(semiColonDelimitedfactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateSemiColonDelimitedFromSampleGivenPipeDelimited() {
|
||||||
|
|
||||||
|
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreateSemiColonDelimitedFromSampleGivenText() {
|
||||||
|
|
||||||
|
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pipe delimited - no need to check JSON, XML, CSV, TSV or semi-colon delimited
|
||||||
|
// values because they come earlier in the order we check formats
|
||||||
|
|
||||||
|
public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() {
|
||||||
|
|
||||||
|
assertTrue(pipeDelimitedFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testCanCreatePipeDelimitedFromSampleGivenText() {
|
||||||
|
|
||||||
|
assertFalse(pipeDelimitedFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||||
|
}
|
||||||
|
}
|
|
@ -12,27 +12,27 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinFieldwiseCompareRows;
|
import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinFieldwiseCompareRows;
|
||||||
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinDistance;
|
import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinDistance;
|
||||||
import static org.hamcrest.Matchers.arrayContaining;
|
import static org.hamcrest.Matchers.arrayContaining;
|
||||||
|
|
||||||
public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase {
|
public class DelimitedLogStructureFinderTests extends LogStructureTestCase {
|
||||||
|
|
||||||
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
|
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
|
||||||
|
|
||||||
public void testCreateConfigsGivenCompleteCsv() throws Exception {
|
public void testCreateConfigsGivenCompleteCsv() throws Exception {
|
||||||
String sample = "time,message\n" +
|
String sample = "time,message\n" +
|
||||||
"2018-05-17T13:41:23,hello\n" +
|
"2018-05-17T13:41:23,hello\n" +
|
||||||
"2018-05-17T13:41:32,hello again\n";
|
"2018-05-17T13:41:32,hello again\n";
|
||||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||||
|
|
||||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||||
|
|
||||||
LogStructure structure = structureFinder.getStructure();
|
LogStructure structure = structureFinder.getStructure();
|
||||||
|
|
||||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||||
assertEquals(charset, structure.getCharset());
|
assertEquals(charset, structure.getCharset());
|
||||||
if (hasByteOrderMarker == null) {
|
if (hasByteOrderMarker == null) {
|
||||||
assertNull(structure.getHasByteOrderMarker());
|
assertNull(structure.getHasByteOrderMarker());
|
||||||
|
@ -41,7 +41,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
}
|
}
|
||||||
assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern());
|
assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern());
|
||||||
assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||||
assertTrue(structure.getHasHeaderRow());
|
assertTrue(structure.getHasHeaderRow());
|
||||||
assertNull(structure.getShouldTrimFields());
|
assertNull(structure.getShouldTrimFields());
|
||||||
assertEquals(Arrays.asList("time", "message"), structure.getInputFields());
|
assertEquals(Arrays.asList("time", "message"), structure.getInputFields());
|
||||||
|
@ -55,15 +55,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
"\"hello\n" +
|
"\"hello\n" +
|
||||||
"world\",2018-05-17T13:41:23,1\n" +
|
"world\",2018-05-17T13:41:23,1\n" +
|
||||||
"\"hello again\n"; // note that this last record is truncated
|
"\"hello again\n"; // note that this last record is truncated
|
||||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||||
|
|
||||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||||
|
|
||||||
LogStructure structure = structureFinder.getStructure();
|
LogStructure structure = structureFinder.getStructure();
|
||||||
|
|
||||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||||
assertEquals(charset, structure.getCharset());
|
assertEquals(charset, structure.getCharset());
|
||||||
if (hasByteOrderMarker == null) {
|
if (hasByteOrderMarker == null) {
|
||||||
assertNull(structure.getHasByteOrderMarker());
|
assertNull(structure.getHasByteOrderMarker());
|
||||||
|
@ -72,7 +72,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
}
|
}
|
||||||
assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern());
|
assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern());
|
||||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||||
assertTrue(structure.getHasHeaderRow());
|
assertTrue(structure.getHasHeaderRow());
|
||||||
assertNull(structure.getShouldTrimFields());
|
assertNull(structure.getShouldTrimFields());
|
||||||
assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields());
|
assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields());
|
||||||
|
@ -88,15 +88,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
|
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
|
||||||
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
|
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
|
||||||
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
|
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
|
||||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||||
|
|
||||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||||
|
|
||||||
LogStructure structure = structureFinder.getStructure();
|
LogStructure structure = structureFinder.getStructure();
|
||||||
|
|
||||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||||
assertEquals(charset, structure.getCharset());
|
assertEquals(charset, structure.getCharset());
|
||||||
if (hasByteOrderMarker == null) {
|
if (hasByteOrderMarker == null) {
|
||||||
assertNull(structure.getHasByteOrderMarker());
|
assertNull(structure.getHasByteOrderMarker());
|
||||||
|
@ -108,7 +108,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?",
|
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?",
|
||||||
structure.getExcludeLinesPattern());
|
structure.getExcludeLinesPattern());
|
||||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||||
assertTrue(structure.getHasHeaderRow());
|
assertTrue(structure.getHasHeaderRow());
|
||||||
assertNull(structure.getShouldTrimFields());
|
assertNull(structure.getShouldTrimFields());
|
||||||
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
|
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
|
||||||
|
@ -126,15 +126,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
|
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
|
||||||
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
|
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
|
||||||
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
|
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
|
||||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||||
|
|
||||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||||
|
|
||||||
LogStructure structure = structureFinder.getStructure();
|
LogStructure structure = structureFinder.getStructure();
|
||||||
|
|
||||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||||
assertEquals(charset, structure.getCharset());
|
assertEquals(charset, structure.getCharset());
|
||||||
if (hasByteOrderMarker == null) {
|
if (hasByteOrderMarker == null) {
|
||||||
assertNull(structure.getHasByteOrderMarker());
|
assertNull(structure.getHasByteOrderMarker());
|
||||||
|
@ -146,7 +146,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?",
|
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?",
|
||||||
structure.getExcludeLinesPattern());
|
structure.getExcludeLinesPattern());
|
||||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||||
assertTrue(structure.getHasHeaderRow());
|
assertTrue(structure.getHasHeaderRow());
|
||||||
assertNull(structure.getShouldTrimFields());
|
assertNull(structure.getShouldTrimFields());
|
||||||
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
|
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
|
||||||
|
@ -161,15 +161,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" +
|
String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" +
|
||||||
"\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" +
|
"\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" +
|
||||||
"\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n";
|
"\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n";
|
||||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||||
|
|
||||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||||
|
|
||||||
LogStructure structure = structureFinder.getStructure();
|
LogStructure structure = structureFinder.getStructure();
|
||||||
|
|
||||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||||
assertEquals(charset, structure.getCharset());
|
assertEquals(charset, structure.getCharset());
|
||||||
if (hasByteOrderMarker == null) {
|
if (hasByteOrderMarker == null) {
|
||||||
assertNull(structure.getHasByteOrderMarker());
|
assertNull(structure.getHasByteOrderMarker());
|
||||||
|
@ -179,7 +179,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?",
|
assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?",
|
||||||
structure.getExcludeLinesPattern());
|
structure.getExcludeLinesPattern());
|
||||||
assertNull(structure.getMultilineStartPattern());
|
assertNull(structure.getMultilineStartPattern());
|
||||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||||
assertTrue(structure.getHasHeaderRow());
|
assertTrue(structure.getHasHeaderRow());
|
||||||
assertNull(structure.getShouldTrimFields());
|
assertNull(structure.getShouldTrimFields());
|
||||||
assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields());
|
assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields());
|
||||||
|
@ -195,8 +195,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
|
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
|
||||||
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
|
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
|
||||||
|
|
||||||
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation,
|
Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
|
||||||
SeparatedValuesLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
DelimitedLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||||
|
|
||||||
assertTrue(header.v1());
|
assertTrue(header.v1());
|
||||||
assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype"));
|
assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype"));
|
||||||
|
@ -208,8 +208,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
|
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
|
||||||
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
|
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
|
||||||
|
|
||||||
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation,
|
Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
|
||||||
SeparatedValuesLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
DelimitedLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||||
|
|
||||||
assertFalse(header.v1());
|
assertFalse(header.v1());
|
||||||
assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4"));
|
assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4"));
|
||||||
|
@ -251,43 +251,43 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
||||||
|
|
||||||
public void testLineHasUnescapedQuote() {
|
public void testLineHasUnescapedQuote() {
|
||||||
|
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
|
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||||
|
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
|
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRowContainsDuplicateNonEmptyValues() {
|
public void testRowContainsDuplicateNonEmptyValues() {
|
||||||
|
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
|
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
|
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
|
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
|
||||||
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
|
assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
|
||||||
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
|
assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
|
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
|
||||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
|
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -29,14 +29,14 @@ public class JsonLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||||
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
public void testCanCreateFromSampleGivenSemiColonDelimited() {
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
public void testCanCreateFromSampleGivenPipeDelimited() {
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenText() {
|
public void testCanCreateFromSampleGivenText() {
|
||||||
|
|
|
@ -29,7 +29,7 @@ public class JsonLogStructureFinderTests extends LogStructureTestCase {
|
||||||
}
|
}
|
||||||
assertNull(structure.getExcludeLinesPattern());
|
assertNull(structure.getExcludeLinesPattern());
|
||||||
assertNull(structure.getMultilineStartPattern());
|
assertNull(structure.getMultilineStartPattern());
|
||||||
assertNull(structure.getSeparator());
|
assertNull(structure.getDelimiter());
|
||||||
assertNull(structure.getHasHeaderRow());
|
assertNull(structure.getHasHeaderRow());
|
||||||
assertNull(structure.getShouldTrimFields());
|
assertNull(structure.getShouldTrimFields());
|
||||||
assertNull(structure.getGrokPattern());
|
assertNull(structure.getGrokPattern());
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class LogStructureFinderManagerTests extends LogStructureTestCase {
|
||||||
public void testMakeBestStructureGivenCsv() throws Exception {
|
public void testMakeBestStructureGivenCsv() throws Exception {
|
||||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" +
|
assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" +
|
||||||
"2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()),
|
"2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()),
|
||||||
instanceOf(SeparatedValuesLogStructureFinder.class));
|
instanceOf(DelimitedLogStructureFinder.class));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMakeBestStructureGivenText() throws Exception {
|
public void testMakeBestStructureGivenText() throws Exception {
|
||||||
|
|
|
@ -34,14 +34,14 @@ public abstract class LogStructureTestCase extends ESTestCase {
|
||||||
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
|
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
|
||||||
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
|
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
|
||||||
|
|
||||||
protected static final String PIPE_SEPARATED_VALUES_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
|
protected static final String PIPE_DELIMITED_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
|
||||||
"listening on 0.0.0.0:9987, :::9987\n" +
|
"listening on 0.0.0.0:9987, :::9987\n" +
|
||||||
"2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " +
|
"2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " +
|
||||||
"'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" +
|
"'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" +
|
||||||
"2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " +
|
"2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " +
|
||||||
"'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)";
|
"'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)";
|
||||||
|
|
||||||
protected static final String SEMI_COLON_SEPARATED_VALUES_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
|
protected static final String SEMI_COLON_DELIMITED_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
|
||||||
"\"timestamp\"\n" +
|
"\"timestamp\"\n" +
|
||||||
"\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" +
|
"\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" +
|
||||||
"\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" +
|
"\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" +
|
||||||
|
|
|
@ -43,14 +43,12 @@ public class LogStructureTests extends AbstractXContentTestCase<LogStructure> {
|
||||||
builder.setExcludeLinesPattern(randomAlphaOfLength(100));
|
builder.setExcludeLinesPattern(randomAlphaOfLength(100));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (format.isSeparatedValues() || (format.supportsNesting() && randomBoolean())) {
|
if (format == LogStructure.Format.DELIMITED || (format.supportsNesting() && randomBoolean())) {
|
||||||
builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
|
builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
|
||||||
}
|
}
|
||||||
if (format.isSeparatedValues()) {
|
if (format == LogStructure.Format.DELIMITED) {
|
||||||
builder.setHasHeaderRow(randomBoolean());
|
builder.setHasHeaderRow(randomBoolean());
|
||||||
if (rarely()) {
|
builder.setDelimiter(randomFrom(',', '\t', ';', '|'));
|
||||||
builder.setSeparator(format.separator());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (format.isSemiStructured()) {
|
if (format.isSemiStructured()) {
|
||||||
builder.setGrokPattern(randomAlphaOfLength(100));
|
builder.setGrokPattern(randomAlphaOfLength(100));
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
||||||
* or more contributor license agreements. Licensed under the Elastic License;
|
|
||||||
* you may not use this file except in compliance with the Elastic License.
|
|
||||||
*/
|
|
||||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
|
||||||
|
|
||||||
public class PipeSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
|
|
||||||
|
|
||||||
private LogStructureFinderFactory factory = new PipeSeparatedValuesLogStructureFinderFactory();
|
|
||||||
|
|
||||||
// No need to check JSON, XML, CSV, TSV or semi-colon separated values because they come earlier in the order we check formats
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
|
||||||
|
|
||||||
assertTrue(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenText() {
|
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,28 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
||||||
* or more contributor license agreements. Licensed under the Elastic License;
|
|
||||||
* you may not use this file except in compliance with the Elastic License.
|
|
||||||
*/
|
|
||||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
|
||||||
|
|
||||||
public class SemiColonSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
|
|
||||||
|
|
||||||
private LogStructureFinderFactory factory = new SemiColonSeparatedValuesLogStructureFinderFactory();
|
|
||||||
|
|
||||||
// No need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
|
||||||
|
|
||||||
assertTrue(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testCanCreateFromSampleGivenText() {
|
|
||||||
|
|
||||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
|
||||||
}
|
|
||||||
}
|
|