Merge branch 'master' into ccr
* master: Mute test watcher usage stats output [Rollup] Fix FullClusterRestart test Adjust soft-deletes version after backport into 6.5 completely drop `index.shard.check_on_startup: fix` for 7.0 (#33194) Fix AwaitsFix issue number Mute SmokeTestWatcherWithSecurityIT testsi drop `index.shard.check_on_startup: fix` (#32279) tracked at [DOCS] Moves ml folder from x-pack/docs to docs (#33248) [DOCS] Move rollup APIs to docs (#31450) [DOCS] Rename X-Pack Commands section (#33005) TEST: Disable soft-deletes in ParentChildTestCase Fixes SecurityIntegTestCase so it always adds at least one alias (#33296) Fix pom for build-tools (#33300) Lazy evaluate java9home (#33301) SQL: test coverage for JdbcResultSet (#32813) Work around to be able to generate eclipse projects (#33295) Highlight that index_phrases only works if no slop is used (#33303) Different handling for security specific errors in the CLI. Fix for https://github.com/elastic/elasticsearch/issues/33230 (#33255) [ML] Refactor delimited file structure detection (#33233) SQL: Support multi-index format as table identifier (#33278) MINOR: Remove Dead Code from PathTrie (#33280) Enable forbiddenapis server java9 (#33245)
17
build.gradle
|
@ -16,7 +16,9 @@
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
import com.github.jengelman.gradle.plugins.shadow.ShadowPlugin
|
||||
import org.apache.tools.ant.taskdefs.condition.Os
|
||||
import org.elasticsearch.gradle.BuildPlugin
|
||||
import org.elasticsearch.gradle.LoggedExec
|
||||
import org.elasticsearch.gradle.Version
|
||||
|
@ -24,14 +26,9 @@ import org.elasticsearch.gradle.VersionCollection
|
|||
import org.elasticsearch.gradle.VersionProperties
|
||||
import org.elasticsearch.gradle.plugin.PluginBuildPlugin
|
||||
import org.gradle.plugins.ide.eclipse.model.SourceFolder
|
||||
import org.gradle.util.GradleVersion
|
||||
import org.gradle.util.DistributionLocator
|
||||
import org.apache.tools.ant.taskdefs.condition.Os
|
||||
import org.apache.tools.ant.filters.ReplaceTokens
|
||||
|
||||
import java.nio.file.Files
|
||||
import java.nio.file.Path
|
||||
import java.security.MessageDigest
|
||||
|
||||
plugins {
|
||||
id 'com.gradle.build-scan' version '1.13.2'
|
||||
|
@ -512,6 +509,16 @@ allprojects {
|
|||
tasks.cleanEclipse.dependsOn(wipeEclipseSettings)
|
||||
// otherwise the eclipse merging is *super confusing*
|
||||
tasks.eclipse.dependsOn(cleanEclipse, copyEclipseSettings)
|
||||
|
||||
// work arround https://github.com/gradle/gradle/issues/6582
|
||||
tasks.eclipseProject.mustRunAfter tasks.cleanEclipseProject
|
||||
tasks.matching { it.name == 'eclipseClasspath' }.all {
|
||||
it.mustRunAfter { tasks.cleanEclipseClasspath }
|
||||
}
|
||||
tasks.matching { it.name == 'eclipseJdt' }.all {
|
||||
it.mustRunAfter { tasks.cleanEclipseJdt }
|
||||
}
|
||||
tasks.copyEclipseSettings.mustRunAfter tasks.wipeEclipseSettings
|
||||
}
|
||||
|
||||
allprojects {
|
||||
|
|
|
@ -24,15 +24,6 @@ plugins {
|
|||
id 'groovy'
|
||||
}
|
||||
|
||||
gradlePlugin {
|
||||
plugins {
|
||||
simplePlugin {
|
||||
id = 'elasticsearch.clusterformation'
|
||||
implementationClass = 'org.elasticsearch.gradle.clusterformation.ClusterformationPlugin'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group = 'org.elasticsearch.gradle'
|
||||
|
||||
String minimumGradleVersion = file('src/main/resources/minimumGradleVersion').text.trim()
|
||||
|
|
|
@ -38,7 +38,6 @@ import org.gradle.api.artifacts.ModuleDependency
|
|||
import org.gradle.api.artifacts.ModuleVersionIdentifier
|
||||
import org.gradle.api.artifacts.ProjectDependency
|
||||
import org.gradle.api.artifacts.ResolvedArtifact
|
||||
import org.gradle.api.artifacts.SelfResolvingDependency
|
||||
import org.gradle.api.artifacts.dsl.RepositoryHandler
|
||||
import org.gradle.api.execution.TaskExecutionGraph
|
||||
import org.gradle.api.plugins.JavaPlugin
|
||||
|
@ -212,6 +211,7 @@ class BuildPlugin implements Plugin<Project> {
|
|||
project.rootProject.ext.minimumRuntimeVersion = minimumRuntimeVersion
|
||||
project.rootProject.ext.inFipsJvm = inFipsJvm
|
||||
project.rootProject.ext.gradleJavaVersion = JavaVersion.toVersion(gradleJavaVersion)
|
||||
project.rootProject.ext.java9Home = "${-> findJavaHome("9")}"
|
||||
}
|
||||
|
||||
project.targetCompatibility = project.rootProject.ext.minimumRuntimeVersion
|
||||
|
@ -225,6 +225,7 @@ class BuildPlugin implements Plugin<Project> {
|
|||
project.ext.javaVersions = project.rootProject.ext.javaVersions
|
||||
project.ext.inFipsJvm = project.rootProject.ext.inFipsJvm
|
||||
project.ext.gradleJavaVersion = project.rootProject.ext.gradleJavaVersion
|
||||
project.ext.java9Home = project.rootProject.ext.java9Home
|
||||
}
|
||||
|
||||
private static String getPaddedMajorVersion(JavaVersion compilerJavaVersionEnum) {
|
||||
|
|
|
@ -100,7 +100,7 @@ class PrecommitTasks {
|
|||
|
||||
private static Task configureForbiddenApisCli(Project project) {
|
||||
Task forbiddenApisCli = project.tasks.create('forbiddenApis')
|
||||
project.sourceSets.forEach { sourceSet ->
|
||||
project.sourceSets.all { sourceSet ->
|
||||
forbiddenApisCli.dependsOn(
|
||||
project.tasks.create(sourceSet.getTaskName('forbiddenApis', null), ForbiddenApisCliTask) {
|
||||
ExportElasticsearchBuildResourcesTask buildResources = project.tasks.getByName('buildResources')
|
||||
|
|
|
@ -51,7 +51,8 @@ public class ForbiddenApisCliTask extends DefaultTask {
|
|||
private JavaVersion targetCompatibility;
|
||||
private FileCollection classesDirs;
|
||||
private SourceSet sourceSet;
|
||||
private String javaHome;
|
||||
// This needs to be an object so it can hold Groovy GStrings
|
||||
private Object javaHome;
|
||||
|
||||
@Input
|
||||
public JavaVersion getTargetCompatibility() {
|
||||
|
@ -142,11 +143,11 @@ public class ForbiddenApisCliTask extends DefaultTask {
|
|||
}
|
||||
|
||||
@Input
|
||||
public String getJavaHome() {
|
||||
public Object getJavaHome() {
|
||||
return javaHome;
|
||||
}
|
||||
|
||||
public void setJavaHome(String javaHome) {
|
||||
public void setJavaHome(Object javaHome) {
|
||||
this.javaHome = javaHome;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
implementation-class=org.elasticsearch.gradle.clusterformation.ClusterformationPlugin
|
|
@ -19,6 +19,12 @@
|
|||
|
||||
apply plugin: 'elasticsearch.docs-test'
|
||||
|
||||
/* List of files that have snippets that require a gold or platinum licence
|
||||
and therefore cannot be tested yet... */
|
||||
buildRestTests.expectedUnconvertedCandidates = [
|
||||
'reference/ml/transforms.asciidoc',
|
||||
]
|
||||
|
||||
integTestCluster {
|
||||
/* Enable regexes in painless so our tests don't complain about example
|
||||
* snippets that use them. */
|
||||
|
@ -74,6 +80,17 @@ buildRestTests.docs = fileTree(projectDir) {
|
|||
exclude 'build'
|
||||
// Just syntax examples
|
||||
exclude 'README.asciidoc'
|
||||
// Broken code snippet tests
|
||||
exclude 'reference/rollup/rollup-getting-started.asciidoc'
|
||||
exclude 'reference/rollup/apis/rollup-job-config.asciidoc'
|
||||
exclude 'reference/rollup/apis/rollup-index-caps.asciidoc'
|
||||
exclude 'reference/rollup/apis/put-job.asciidoc'
|
||||
exclude 'reference/rollup/apis/stop-job.asciidoc'
|
||||
exclude 'reference/rollup/apis/start-job.asciidoc'
|
||||
exclude 'reference/rollup/apis/rollup-search.asciidoc'
|
||||
exclude 'reference/rollup/apis/delete-job.asciidoc'
|
||||
exclude 'reference/rollup/apis/get-job.asciidoc'
|
||||
exclude 'reference/rollup/apis/rollup-caps.asciidoc'
|
||||
}
|
||||
|
||||
listSnippets.docs = buildRestTests.docs
|
||||
|
@ -594,3 +611,259 @@ buildRestTests.setups['library'] = '''
|
|||
{"name": "The Moon is a Harsh Mistress", "author": "Robert A. Heinlein", "release_date": "1966-04-01", "page_count": 288}
|
||||
|
||||
'''
|
||||
buildRestTests.setups['sensor_rollup_job'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor-1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
temperature:
|
||||
type: long
|
||||
voltage:
|
||||
type: float
|
||||
node:
|
||||
type: keyword
|
||||
- do:
|
||||
xpack.rollup.put_job:
|
||||
id: "sensor"
|
||||
body: >
|
||||
{
|
||||
"index_pattern": "sensor-*",
|
||||
"rollup_index": "sensor_rollup",
|
||||
"cron": "*/30 * * * * ?",
|
||||
"page_size" :1000,
|
||||
"groups" : {
|
||||
"date_histogram": {
|
||||
"field": "timestamp",
|
||||
"interval": "1h",
|
||||
"delay": "7d"
|
||||
},
|
||||
"terms": {
|
||||
"fields": ["node"]
|
||||
}
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"field": "temperature",
|
||||
"metrics": ["min", "max", "sum"]
|
||||
},
|
||||
{
|
||||
"field": "voltage",
|
||||
"metrics": ["avg"]
|
||||
}
|
||||
]
|
||||
}
|
||||
'''
|
||||
buildRestTests.setups['sensor_started_rollup_job'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor-1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
temperature:
|
||||
type: long
|
||||
voltage:
|
||||
type: float
|
||||
node:
|
||||
type: keyword
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
index: sensor-1
|
||||
type: _doc
|
||||
refresh: true
|
||||
body: |
|
||||
{"index":{}}
|
||||
{"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516642894000, "temperature": 201, "voltage": 5.8, "node": "b"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516556494000, "temperature": 202, "voltage": 5.1, "node": "a"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516470094000, "temperature": 198, "voltage": 5.6, "node": "b"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516383694000, "temperature": 200, "voltage": 4.2, "node": "c"}
|
||||
{"index":{}}
|
||||
{"timestamp": 1516297294000, "temperature": 202, "voltage": 4.0, "node": "c"}
|
||||
|
||||
- do:
|
||||
xpack.rollup.put_job:
|
||||
id: "sensor"
|
||||
body: >
|
||||
{
|
||||
"index_pattern": "sensor-*",
|
||||
"rollup_index": "sensor_rollup",
|
||||
"cron": "* * * * * ?",
|
||||
"page_size" :1000,
|
||||
"groups" : {
|
||||
"date_histogram": {
|
||||
"field": "timestamp",
|
||||
"interval": "1h",
|
||||
"delay": "7d"
|
||||
},
|
||||
"terms": {
|
||||
"fields": ["node"]
|
||||
}
|
||||
},
|
||||
"metrics": [
|
||||
{
|
||||
"field": "temperature",
|
||||
"metrics": ["min", "max", "sum"]
|
||||
},
|
||||
{
|
||||
"field": "voltage",
|
||||
"metrics": ["avg"]
|
||||
}
|
||||
]
|
||||
}
|
||||
- do:
|
||||
xpack.rollup.start_job:
|
||||
id: "sensor"
|
||||
'''
|
||||
|
||||
buildRestTests.setups['sensor_index'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor-1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
temperature:
|
||||
type: long
|
||||
voltage:
|
||||
type: float
|
||||
node:
|
||||
type: keyword
|
||||
load:
|
||||
type: double
|
||||
net_in:
|
||||
type: long
|
||||
net_out:
|
||||
type: long
|
||||
hostname:
|
||||
type: keyword
|
||||
datacenter:
|
||||
type: keyword
|
||||
'''
|
||||
|
||||
buildRestTests.setups['sensor_prefab_data'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor-1
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
timestamp:
|
||||
type: date
|
||||
temperature:
|
||||
type: long
|
||||
voltage:
|
||||
type: float
|
||||
node:
|
||||
type: keyword
|
||||
- do:
|
||||
indices.create:
|
||||
index: sensor_rollup
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
mappings:
|
||||
_doc:
|
||||
properties:
|
||||
node.terms.value:
|
||||
type: keyword
|
||||
temperature.sum.value:
|
||||
type: double
|
||||
temperature.max.value:
|
||||
type: double
|
||||
temperature.min.value:
|
||||
type: double
|
||||
timestamp.date_histogram.time_zone:
|
||||
type: keyword
|
||||
timestamp.date_histogram.interval:
|
||||
type: keyword
|
||||
timestamp.date_histogram.timestamp:
|
||||
type: date
|
||||
timestamp.date_histogram._count:
|
||||
type: long
|
||||
voltage.avg.value:
|
||||
type: double
|
||||
voltage.avg._count:
|
||||
type: long
|
||||
_rollup.id:
|
||||
type: keyword
|
||||
_rollup.version:
|
||||
type: long
|
||||
_meta:
|
||||
_rollup:
|
||||
sensor:
|
||||
cron: "* * * * * ?"
|
||||
rollup_index: "sensor_rollup"
|
||||
index_pattern: "sensor-*"
|
||||
timeout: "20s"
|
||||
page_size: 1000
|
||||
groups:
|
||||
date_histogram:
|
||||
delay: "7d"
|
||||
field: "timestamp"
|
||||
interval: "1h"
|
||||
time_zone: "UTC"
|
||||
terms:
|
||||
fields:
|
||||
- "node"
|
||||
id: sensor
|
||||
metrics:
|
||||
- field: "temperature"
|
||||
metrics:
|
||||
- min
|
||||
- max
|
||||
- sum
|
||||
- field: "voltage"
|
||||
metrics:
|
||||
- avg
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
index: sensor_rollup
|
||||
type: _doc
|
||||
refresh: true
|
||||
body: |
|
||||
{"index":{}}
|
||||
{"node.terms.value":"b","temperature.sum.value":201.0,"temperature.max.value":201.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":201.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.800000190734863,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516640400000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"c","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516381200000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"a","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.099999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516554000000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"a","temperature.sum.value":200.0,"temperature.max.value":200.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":200.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.199999809265137,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516726800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"b","temperature.sum.value":198.0,"temperature.max.value":198.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":198.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":5.599999904632568,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516467600000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
{"index":{}}
|
||||
{"node.terms.value":"c","temperature.sum.value":202.0,"temperature.max.value":202.0,"timestamp.date_histogram.time_zone":"UTC","temperature.min.value":202.0,"timestamp.date_histogram._count":1,"timestamp.date_histogram.interval":"1h","_rollup.computed":["temperature.sum","temperature.min","voltage.avg","temperature.max","node.terms","timestamp.date_histogram"],"voltage.avg.value":4.0,"node.terms._count":1,"_rollup.version":1,"timestamp.date_histogram.timestamp":1516294800000,"voltage.avg._count":1.0,"_rollup.id":"sensor"}
|
||||
|
||||
'''
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
[role="xpack"]
|
||||
[[xpack-commands]]
|
||||
= {xpack} Commands
|
||||
[[commands]]
|
||||
= Command line tools
|
||||
|
||||
[partintro]
|
||||
--
|
||||
|
||||
{xpack} includes commands that help you configure security:
|
||||
{es} provides the following tools for configuring security and performing other
|
||||
tasks from the command line:
|
||||
|
||||
* <<certgen>>
|
||||
* <<certutil>>
|
||||
|
|
|
@ -63,12 +63,6 @@ corruption is detected, it will prevent the shard from being opened. Accepts:
|
|||
Check for both physical and logical corruption. This is much more
|
||||
expensive in terms of CPU and memory usage.
|
||||
|
||||
`fix`::
|
||||
|
||||
Check for both physical and logical corruption. Segments that were reported
|
||||
as corrupted will be automatically removed. This option *may result in data loss*.
|
||||
Use with extreme caution!
|
||||
|
||||
WARNING: Expert only. Checking shards may take a lot of time on large indices.
|
||||
--
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ include::sql/index.asciidoc[]
|
|||
|
||||
include::monitoring/index.asciidoc[]
|
||||
|
||||
include::{xes-repo-dir}/rollup/index.asciidoc[]
|
||||
include::rollup/index.asciidoc[]
|
||||
|
||||
include::rest-api/index.asciidoc[]
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ The following parameters are accepted by `text` fields:
|
|||
`index_phrases`::
|
||||
|
||||
If enabled, two-term word combinations ('shingles') are indexed into a separate
|
||||
field. This allows exact phrase queries to run more efficiently, at the expense
|
||||
field. This allows exact phrase queries (no slop) to run more efficiently, at the expense
|
||||
of a larger index. Note that this works best when stopwords are not removed,
|
||||
as phrases containing stopwords will not use the subsidiary field and will fall
|
||||
back to a standard phrase query. Accepts `true` or `false` (default).
|
||||
|
@ -171,4 +171,4 @@ PUT my_index
|
|||
--------------------------------
|
||||
// CONSOLE
|
||||
<1> `min_chars` must be greater than zero, defaults to 2
|
||||
<2> `max_chars` must be greater than or equal to `min_chars` and less than 20, defaults to 5
|
||||
<2> `max_chars` must be greater than or equal to `min_chars` and less than 20, defaults to 5
|
||||
|
|
|
@ -78,3 +78,7 @@ The parent circuit breaker defines a new setting `indices.breaker.total.use_real
|
|||
heap memory instead of only considering the reserved memory by child circuit breakers. When this
|
||||
setting is `true`, the default parent breaker limit also changes from 70% to 95% of the JVM heap size.
|
||||
The previous behavior can be restored by setting `indices.breaker.total.use_real_memory` to `false`.
|
||||
|
||||
==== `fix` value for `index.shard.check_on_startup` is removed
|
||||
|
||||
Deprecated option value `fix` for setting `index.shard.check_on_startup` is not supported.
|
|
@ -41,7 +41,7 @@ PUT _xpack/ml/anomaly_detectors/farequote
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:farequote_data]
|
||||
// TEST[skip:setup:farequote_data]
|
||||
|
||||
In this example, the `airline`, `responsetime`, and `time` fields are
|
||||
aggregations.
|
||||
|
@ -90,7 +90,7 @@ PUT _xpack/ml/datafeeds/datafeed-farequote
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:farequote_job]
|
||||
// TEST[skip:setup:farequote_job]
|
||||
|
||||
In this example, the aggregations have names that match the fields that they
|
||||
operate on. That is to say, the `max` aggregation is named `time` and its
|
|
@ -44,6 +44,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs
|
|||
}
|
||||
----------------------------------
|
||||
//CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
<1> The `categorization_field_name` property indicates which field will be
|
||||
categorized.
|
||||
<2> The resulting categories are used in a detector by setting `by_field_name`,
|
||||
|
@ -127,6 +128,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs2
|
|||
}
|
||||
----------------------------------
|
||||
//CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
<1> The
|
||||
{ref}/analysis-pattern-replace-charfilter.html[`pattern_replace` character filter]
|
||||
here achieves exactly the same as the `categorization_filters` in the first
|
||||
|
@ -193,6 +195,7 @@ PUT _xpack/ml/anomaly_detectors/it_ops_new_logs3
|
|||
}
|
||||
----------------------------------
|
||||
//CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
<1> Tokens basically consist of hyphens, digits, letters, underscores and dots.
|
||||
<2> By default, categorization ignores tokens that begin with a digit.
|
||||
<3> By default, categorization also ignores tokens that are hexadecimal numbers.
|
|
@ -36,20 +36,20 @@ The scenarios in this section describe some best practices for generating useful
|
|||
* <<ml-configuring-transform>>
|
||||
* <<ml-configuring-detector-custom-rules>>
|
||||
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/customurl.asciidoc
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/customurl.asciidoc
|
||||
include::customurl.asciidoc[]
|
||||
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/aggregations.asciidoc
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/aggregations.asciidoc
|
||||
include::aggregations.asciidoc[]
|
||||
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/categories.asciidoc
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/categories.asciidoc
|
||||
include::categories.asciidoc[]
|
||||
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/populations.asciidoc
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/populations.asciidoc
|
||||
include::populations.asciidoc[]
|
||||
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/transforms.asciidoc
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/transforms.asciidoc
|
||||
include::transforms.asciidoc[]
|
||||
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/x-pack/docs/en/ml/detector-custom-rules.asciidoc
|
||||
:edit_url: https://github.com/elastic/elasticsearch/edit/{branch}/docs/reference/ml/detector-custom-rules.asciidoc
|
||||
include::detector-custom-rules.asciidoc[]
|
|
@ -106,7 +106,7 @@ POST _xpack/ml/anomaly_detectors/sample_job/_update
|
|||
}
|
||||
----------------------------------
|
||||
//CONSOLE
|
||||
//TEST[setup:sample_job]
|
||||
//TEST[skip:setup:sample_job]
|
||||
|
||||
When you click this custom URL in the anomalies table in {kib}, it opens up the
|
||||
*Discover* page and displays source data for the period one hour before and
|
|
@ -39,6 +39,7 @@ PUT _xpack/ml/filters/safe_domains
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
Now, we can create our job specifying a scope that uses the `safe_domains`
|
||||
filter for the `highest_registered_domain` field:
|
||||
|
@ -70,6 +71,7 @@ PUT _xpack/ml/anomaly_detectors/dns_exfiltration_with_rule
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
As time advances and we see more data and more results, we might encounter new
|
||||
domains that we want to add in the filter. We can do that by using the
|
||||
|
@ -83,7 +85,7 @@ POST _xpack/ml/filters/safe_domains/_update
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:ml_filter_safe_domains]
|
||||
// TEST[skip:setup:ml_filter_safe_domains]
|
||||
|
||||
Note that we can use any of the `partition_field_name`, `over_field_name`, or
|
||||
`by_field_name` fields in the `scope`.
|
||||
|
@ -123,6 +125,7 @@ PUT _xpack/ml/anomaly_detectors/scoping_multiple_fields
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
Such a detector will skip results when the values of all 3 scoped fields
|
||||
are included in the referenced filters.
|
||||
|
@ -166,6 +169,7 @@ PUT _xpack/ml/anomaly_detectors/cpu_with_rule
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
When there are multiple conditions they are combined with a logical `and`.
|
||||
This is useful when we want the rule to apply to a range. We simply create
|
||||
|
@ -205,6 +209,7 @@ PUT _xpack/ml/anomaly_detectors/rule_with_range
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
==== Custom rules in the life-cycle of a job
|
||||
|
|
@ -59,6 +59,7 @@ PUT _xpack/ml/anomaly_detectors/example1
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
This example is probably the simplest possible analysis. It identifies
|
||||
time buckets during which the overall count of events is higher or lower than
|
||||
|
@ -86,6 +87,7 @@ PUT _xpack/ml/anomaly_detectors/example2
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
If you use this `high_count` function in a detector in your job, it
|
||||
models the event rate for each error code. It detects users that generate an
|
||||
|
@ -110,6 +112,7 @@ PUT _xpack/ml/anomaly_detectors/example3
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
In this example, the function detects when the count of events for a
|
||||
status code is lower than usual.
|
||||
|
@ -136,6 +139,7 @@ PUT _xpack/ml/anomaly_detectors/example4
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
If you are analyzing an aggregated `events_per_min` field, do not use a sum
|
||||
function (for example, `sum(events_per_min)`). Instead, use the count function
|
||||
|
@ -200,6 +204,7 @@ PUT _xpack/ml/anomaly_detectors/example5
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
If you use this `high_non_zero_count` function in a detector in your job, it
|
||||
models the count of events for the `signaturename` field. It ignores any buckets
|
||||
|
@ -253,6 +258,7 @@ PUT _xpack/ml/anomaly_detectors/example6
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
This `distinct_count` function detects when a system has an unusual number
|
||||
of logged in users. When you use this function in a detector in your job, it
|
||||
|
@ -278,6 +284,7 @@ PUT _xpack/ml/anomaly_detectors/example7
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
This example detects instances of port scanning. When you use this function in a
|
||||
detector in your job, it models the distinct count of ports. It also detects the
|
|
@ -47,6 +47,7 @@ PUT _xpack/ml/anomaly_detectors/example1
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
If you use this `lat_long` function in a detector in your job, it
|
||||
detects anomalies where the geographic location of a credit card transaction is
|
||||
|
@ -98,6 +99,6 @@ PUT _xpack/ml/datafeeds/datafeed-test2
|
|||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:farequote_job]
|
||||
// TEST[skip:setup:farequote_job]
|
||||
|
||||
For more information, see <<ml-configuring-transform>>.
|
Before Width: | Height: | Size: 118 KiB After Width: | Height: | Size: 118 KiB |
Before Width: | Height: | Size: 347 KiB After Width: | Height: | Size: 347 KiB |
Before Width: | Height: | Size: 70 KiB After Width: | Height: | Size: 70 KiB |
Before Width: | Height: | Size: 187 KiB After Width: | Height: | Size: 187 KiB |
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 130 KiB After Width: | Height: | Size: 130 KiB |
Before Width: | Height: | Size: 384 KiB After Width: | Height: | Size: 384 KiB |
Before Width: | Height: | Size: 120 KiB After Width: | Height: | Size: 120 KiB |
Before Width: | Height: | Size: 163 KiB After Width: | Height: | Size: 163 KiB |
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
Before Width: | Height: | Size: 350 KiB After Width: | Height: | Size: 350 KiB |
Before Width: | Height: | Size: 99 KiB After Width: | Height: | Size: 99 KiB |
Before Width: | Height: | Size: 75 KiB After Width: | Height: | Size: 75 KiB |
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
Before Width: | Height: | Size: 176 KiB After Width: | Height: | Size: 176 KiB |
Before Width: | Height: | Size: 96 KiB After Width: | Height: | Size: 96 KiB |
Before Width: | Height: | Size: 205 KiB After Width: | Height: | Size: 205 KiB |
Before Width: | Height: | Size: 100 KiB After Width: | Height: | Size: 100 KiB |
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
Before Width: | Height: | Size: 4.5 KiB After Width: | Height: | Size: 4.5 KiB |
Before Width: | Height: | Size: 90 KiB After Width: | Height: | Size: 90 KiB |
|
@ -51,14 +51,11 @@ PUT _xpack/ml/anomaly_detectors/population
|
|||
}
|
||||
----------------------------------
|
||||
//CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
<1> This `over_field_name` property indicates that the metrics for each user (
|
||||
as identified by their `username` value) are analyzed relative to other users
|
||||
in each bucket.
|
||||
|
||||
//TO-DO: Per sophiec20 "Perhaps add the datafeed config and add a query filter to
|
||||
//include only workstations as servers and printers would behave differently
|
||||
//from the population
|
||||
|
||||
If your data is stored in {es}, you can use the population job wizard in {kib}
|
||||
to create a job with these same properties. For example, the population job
|
||||
wizard provides the following job settings:
|
|
@ -28,7 +28,7 @@ request stops the `feed1` {dfeed}:
|
|||
POST _xpack/ml/datafeeds/datafeed-total-requests/_stop
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:server_metrics_startdf]
|
||||
// TEST[skip:setup:server_metrics_startdf]
|
||||
|
||||
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
|
||||
For more information, see <<security-privileges>>.
|
||||
|
@ -49,6 +49,7 @@ If you are upgrading your cluster, you can use the following request to stop all
|
|||
POST _xpack/ml/datafeeds/_all/_stop
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
[float]
|
||||
[[closing-ml-jobs]]
|
||||
|
@ -67,7 +68,7 @@ example, the following request closes the `job1` job:
|
|||
POST _xpack/ml/anomaly_detectors/total-requests/_close
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:server_metrics_openjob]
|
||||
// TEST[skip:setup:server_metrics_openjob]
|
||||
|
||||
NOTE: You must have `manage_ml`, or `manage` cluster privileges to stop {dfeeds}.
|
||||
For more information, see <<security-privileges>>.
|
||||
|
@ -86,3 +87,4 @@ all open jobs on the cluster:
|
|||
POST _xpack/ml/anomaly_detectors/_all/_close
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:needs-licence]
|
|
@ -95,7 +95,7 @@ PUT /my_index/my_type/1
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TESTSETUP
|
||||
// TEST[skip:SETUP]
|
||||
<1> In this example, string fields are mapped as `keyword` fields to support
|
||||
aggregation. If you want both a full text (`text`) and a keyword (`keyword`)
|
||||
version of the same field, use multi-fields. For more information, see
|
||||
|
@ -144,7 +144,7 @@ PUT _xpack/ml/datafeeds/datafeed-test1
|
|||
}
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:broken]
|
||||
// TEST[skip:needs-licence]
|
||||
<1> A script field named `total_error_count` is referenced in the detector
|
||||
within the job.
|
||||
<2> The script field is defined in the {dfeed}.
|
||||
|
@ -163,7 +163,7 @@ You can preview the contents of the {dfeed} by using the following API:
|
|||
GET _xpack/ml/datafeeds/datafeed-test1/_preview
|
||||
----------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
// TEST[skip:continued]
|
||||
|
||||
In this example, the API returns the following results, which contain a sum of
|
||||
the `error_count` and `aborted_count` values:
|
||||
|
@ -177,8 +177,6 @@ the `error_count` and `aborted_count` values:
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
|
||||
NOTE: This example demonstrates how to use script fields, but it contains
|
||||
insufficient data to generate meaningful results. For a full demonstration of
|
||||
|
@ -254,7 +252,7 @@ PUT _xpack/ml/datafeeds/datafeed-test2
|
|||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:broken]
|
||||
// TEST[skip:needs-licence]
|
||||
<1> The script field has a rather generic name in this case, since it will
|
||||
be used for various tests in the subsequent examples.
|
||||
<2> The script field uses the plus (+) operator to concatenate strings.
|
||||
|
@ -271,7 +269,6 @@ and "SMITH " have been concatenated and an underscore was added:
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
[[ml-configuring-transform3]]
|
||||
.Example 3: Trimming strings
|
||||
|
@ -292,7 +289,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
|||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
// TEST[skip:continued]
|
||||
<1> This script field uses the `trim()` function to trim extra white space from a
|
||||
string.
|
||||
|
||||
|
@ -308,7 +305,6 @@ has been trimmed to "SMITH":
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
[[ml-configuring-transform4]]
|
||||
.Example 4: Converting strings to lowercase
|
||||
|
@ -329,7 +325,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
|||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
// TEST[skip:continued]
|
||||
<1> This script field uses the `toLowerCase` function to convert a string to all
|
||||
lowercase letters. Likewise, you can use the `toUpperCase{}` function to convert
|
||||
a string to uppercase letters.
|
||||
|
@ -346,7 +342,6 @@ has been converted to "joe":
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
[[ml-configuring-transform5]]
|
||||
.Example 5: Converting strings to mixed case formats
|
||||
|
@ -367,7 +362,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
|||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
// TEST[skip:continued]
|
||||
<1> This script field is a more complicated example of case manipulation. It uses
|
||||
the `subString()` function to capitalize the first letter of a string and
|
||||
converts the remaining characters to lowercase.
|
||||
|
@ -384,7 +379,6 @@ has been converted to "Joe":
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
[[ml-configuring-transform6]]
|
||||
.Example 6: Replacing tokens
|
||||
|
@ -405,7 +399,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
|||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
// TEST[skip:continued]
|
||||
<1> This script field uses regular expressions to replace white
|
||||
space with underscores.
|
||||
|
||||
|
@ -421,7 +415,6 @@ The preview {dfeed} API returns the following results, which show that
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
[[ml-configuring-transform7]]
|
||||
.Example 7: Regular expression matching and concatenation
|
||||
|
@ -442,7 +435,7 @@ POST _xpack/ml/datafeeds/datafeed-test2/_update
|
|||
GET _xpack/ml/datafeeds/datafeed-test2/_preview
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[continued]
|
||||
// TEST[skip:continued]
|
||||
<1> This script field looks for a specific regular expression pattern and emits the
|
||||
matched groups as a concatenated string. If no match is found, it emits an empty
|
||||
string.
|
||||
|
@ -459,7 +452,6 @@ The preview {dfeed} API returns the following results, which show that
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
[[ml-configuring-transform8]]
|
||||
.Example 8: Splitting strings by domain name
|
||||
|
@ -509,7 +501,7 @@ PUT _xpack/ml/datafeeds/datafeed-test3
|
|||
GET _xpack/ml/datafeeds/datafeed-test3/_preview
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:broken]
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
If you have a single field that contains a well-formed DNS domain name, you can
|
||||
use the `domainSplit()` function to split the string into its highest registered
|
||||
|
@ -537,7 +529,6 @@ The preview {dfeed} API returns the following results, which show that
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
||||
[[ml-configuring-transform9]]
|
||||
.Example 9: Transforming geo_point data
|
||||
|
@ -583,7 +574,7 @@ PUT _xpack/ml/datafeeds/datafeed-test4
|
|||
GET _xpack/ml/datafeeds/datafeed-test4/_preview
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[skip:broken]
|
||||
// TEST[skip:needs-licence]
|
||||
|
||||
In {es}, location data can be stored in `geo_point` fields but this data type is
|
||||
not supported natively in {xpackml} analytics. This example of a script field
|
||||
|
@ -602,4 +593,4 @@ The preview {dfeed} API returns the following results, which show that
|
|||
}
|
||||
]
|
||||
----------------------------------
|
||||
// TESTRESPONSE
|
||||
|
|
@ -544,3 +544,8 @@ You can use the following APIs to add, remove, and retrieve role mappings:
|
|||
=== Privilege APIs
|
||||
|
||||
See <<security-api-has-privileges>>.
|
||||
|
||||
[role="exclude",id="xpack-commands"]
|
||||
=== X-Pack commands
|
||||
|
||||
See <<commands>>.
|
||||
|
|
|
@ -23,7 +23,7 @@ include::{xes-repo-dir}/rest-api/graph/explore.asciidoc[]
|
|||
include::{es-repo-dir}/licensing/index.asciidoc[]
|
||||
include::{es-repo-dir}/migration/migration.asciidoc[]
|
||||
include::{xes-repo-dir}/rest-api/ml-api.asciidoc[]
|
||||
include::{xes-repo-dir}/rest-api/rollup-api.asciidoc[]
|
||||
include::{es-repo-dir}/rollup/rollup-api.asciidoc[]
|
||||
include::{xes-repo-dir}/rest-api/security.asciidoc[]
|
||||
include::{xes-repo-dir}/rest-api/watcher.asciidoc[]
|
||||
include::{xes-repo-dir}/rest-api/defs.asciidoc[]
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-api-quickref]]
|
||||
== API Quick Reference
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-delete-job]]
|
||||
=== Delete Job API
|
||||
++++
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-get-job]]
|
||||
=== Get Rollup Jobs API
|
||||
++++
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-put-job]]
|
||||
=== Create Job API
|
||||
++++
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-get-rollup-caps]]
|
||||
=== Get Rollup Job Capabilities
|
||||
++++
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-job-config]]
|
||||
=== Rollup Job Configuration
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-search]]
|
||||
=== Rollup Search
|
||||
++++
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-start-job]]
|
||||
=== Start Job API
|
||||
++++
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-stop-job]]
|
||||
=== Stop Job API
|
||||
++++
|
|
@ -1,3 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[xpack-rollup]]
|
||||
= Rolling up historical data
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-overview]]
|
||||
== Overview
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-agg-limitations]]
|
||||
== Rollup Aggregation Limitations
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-apis]]
|
||||
== Rollup APIs
|
||||
|
||||
|
@ -26,12 +27,12 @@
|
|||
|
||||
|
||||
|
||||
include::rollup/delete-job.asciidoc[]
|
||||
include::rollup/get-job.asciidoc[]
|
||||
include::rollup/put-job.asciidoc[]
|
||||
include::rollup/start-job.asciidoc[]
|
||||
include::rollup/stop-job.asciidoc[]
|
||||
include::rollup/rollup-caps.asciidoc[]
|
||||
include::rollup/rollup-index-caps.asciidoc[]
|
||||
include::rollup/rollup-search.asciidoc[]
|
||||
include::rollup/rollup-job-config.asciidoc[]
|
||||
include::apis/delete-job.asciidoc[]
|
||||
include::apis/get-job.asciidoc[]
|
||||
include::apis/put-job.asciidoc[]
|
||||
include::apis/start-job.asciidoc[]
|
||||
include::apis/stop-job.asciidoc[]
|
||||
include::apis/rollup-caps.asciidoc[]
|
||||
include::apis/rollup-index-caps.asciidoc[]
|
||||
include::apis/rollup-search.asciidoc[]
|
||||
include::apis/rollup-job-config.asciidoc[]
|
|
@ -1,3 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-getting-started]]
|
||||
== Getting Started
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-search-limitations]]
|
||||
== Rollup Search Limitations
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[rollup-understanding-groups]]
|
||||
== Understanding Groups
|
||||
|
|
@ -22,6 +22,15 @@ the first parameter:
|
|||
$ ./bin/elasticsearch-sql-cli https://some.server:9200
|
||||
--------------------------------------------------
|
||||
|
||||
If security is enabled on your cluster, you can pass the username
|
||||
and password in the form `username:password@host_name:port`
|
||||
to the SQL CLI:
|
||||
|
||||
[source,bash]
|
||||
--------------------------------------------------
|
||||
$ ./bin/elasticsearch-sql-cli https://sql_user:strongpassword@some.server:9200
|
||||
--------------------------------------------------
|
||||
|
||||
Once the CLI is running you can use any <<sql-spec,query>> that
|
||||
Elasticsearch supports:
|
||||
|
||||
|
|
|
@ -46,12 +46,13 @@ if (!isEclipse && !isIdea) {
|
|||
targetCompatibility = 9
|
||||
}
|
||||
|
||||
/* Enable this when forbiddenapis was updated to 2.6.
|
||||
* See: https://github.com/elastic/elasticsearch/issues/29292
|
||||
forbiddenApisJava9 {
|
||||
targetCompatibility = 9
|
||||
if (project.runtimeJavaVersion < JavaVersion.VERSION_1_9) {
|
||||
targetCompatibility = JavaVersion.VERSION_1_9
|
||||
javaHome = project.java9Home
|
||||
}
|
||||
replaceSignatureFiles 'jdk-signatures'
|
||||
}
|
||||
*/
|
||||
|
||||
jar {
|
||||
metaInf {
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
|||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||
import org.elasticsearch.index.IndexModule;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.join.ParentJoinPlugin;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
|
@ -58,6 +59,8 @@ public abstract class ParentChildTestCase extends ESIntegTestCase {
|
|||
@Override
|
||||
public Settings indexSettings() {
|
||||
Settings.Builder builder = Settings.builder().put(super.indexSettings())
|
||||
// AwaitsFix: https://github.com/elastic/elasticsearch/issues/33318
|
||||
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), false)
|
||||
// aggressive filter caching so that we can assert on the filter cache size
|
||||
.put(IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING.getKey(), true)
|
||||
.put(IndexModule.INDEX_QUERY_CACHE_EVERYTHING_SETTING.getKey(), true);
|
||||
|
|
|
@ -58,13 +58,13 @@ if (!isEclipse && !isIdea) {
|
|||
sourceCompatibility = 9
|
||||
targetCompatibility = 9
|
||||
}
|
||||
|
||||
/* Enable this when forbiddenapis was updated to 2.6.
|
||||
* See: https://github.com/elastic/elasticsearch/issues/29292
|
||||
|
||||
forbiddenApisJava9 {
|
||||
targetCompatibility = 9
|
||||
if (project.runtimeJavaVersion < JavaVersion.VERSION_1_9) {
|
||||
targetCompatibility = JavaVersion.VERSION_1_9
|
||||
javaHome = project.java9Home
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
jar {
|
||||
metaInf {
|
||||
|
|
|
@ -104,24 +104,12 @@ public class PathTrie<T> {
|
|||
namedWildcard = key.substring(key.indexOf('{') + 1, key.indexOf('}'));
|
||||
}
|
||||
|
||||
public boolean isWildcard() {
|
||||
return isWildcard;
|
||||
}
|
||||
|
||||
public synchronized void addChild(TrieNode child) {
|
||||
addInnerChild(child.key, child);
|
||||
}
|
||||
|
||||
private void addInnerChild(String key, TrieNode child) {
|
||||
Map<String, TrieNode> newChildren = new HashMap<>(children);
|
||||
newChildren.put(key, child);
|
||||
children = unmodifiableMap(newChildren);
|
||||
}
|
||||
|
||||
public TrieNode getChild(String key) {
|
||||
return children.get(key);
|
||||
}
|
||||
|
||||
public synchronized void insert(String[] path, int index, T value) {
|
||||
if (index >= path.length)
|
||||
return;
|
||||
|
@ -302,7 +290,7 @@ public class PathTrie<T> {
|
|||
}
|
||||
int index = 0;
|
||||
// Supports initial delimiter.
|
||||
if (strings.length > 0 && strings[0].isEmpty()) {
|
||||
if (strings[0].isEmpty()) {
|
||||
index = 1;
|
||||
}
|
||||
root.insert(strings, index, value);
|
||||
|
@ -327,7 +315,7 @@ public class PathTrie<T> {
|
|||
}
|
||||
int index = 0;
|
||||
// Supports initial delimiter.
|
||||
if (strings.length > 0 && strings[0].isEmpty()) {
|
||||
if (strings[0].isEmpty()) {
|
||||
index = 1;
|
||||
}
|
||||
root.insertOrUpdate(strings, index, value, updater);
|
||||
|
@ -352,7 +340,7 @@ public class PathTrie<T> {
|
|||
int index = 0;
|
||||
|
||||
// Supports initial delimiter.
|
||||
if (strings.length > 0 && strings[0].isEmpty()) {
|
||||
if (strings[0].isEmpty()) {
|
||||
index = 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -75,11 +75,10 @@ public final class IndexSettings {
|
|||
switch(s) {
|
||||
case "false":
|
||||
case "true":
|
||||
case "fix":
|
||||
case "checksum":
|
||||
return s;
|
||||
default:
|
||||
throw new IllegalArgumentException("unknown value for [index.shard.check_on_startup] must be one of [true, false, fix, checksum] but was: " + s);
|
||||
throw new IllegalArgumentException("unknown value for [index.shard.check_on_startup] must be one of [true, false, checksum] but was: " + s);
|
||||
}
|
||||
}, Property.IndexScope);
|
||||
|
||||
|
|
|
@ -1332,7 +1332,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
|||
}
|
||||
recoveryState.setStage(RecoveryState.Stage.VERIFY_INDEX);
|
||||
// also check here, before we apply the translog
|
||||
if (Booleans.isTrue(checkIndexOnStartup)) {
|
||||
if (Booleans.isTrue(checkIndexOnStartup) || "checksum".equals(checkIndexOnStartup)) {
|
||||
try {
|
||||
checkIndex();
|
||||
} catch (IOException ex) {
|
||||
|
@ -1955,6 +1955,9 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
|||
if (store.tryIncRef()) {
|
||||
try {
|
||||
doCheckIndex();
|
||||
} catch (IOException e) {
|
||||
store.markStoreCorrupted(e);
|
||||
throw e;
|
||||
} finally {
|
||||
store.decRef();
|
||||
}
|
||||
|
@ -1998,18 +2001,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
|||
return;
|
||||
}
|
||||
logger.warn("check index [failure]\n{}", os.bytes().utf8ToString());
|
||||
if ("fix".equals(checkIndexOnStartup)) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("fixing index, writing new segments file ...");
|
||||
}
|
||||
store.exorciseIndex(status);
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("index fixed, wrote new segments file \"{}\"", status.segmentsFileName);
|
||||
}
|
||||
} else {
|
||||
// only throw a failure if we are not going to fix the index
|
||||
throw new IllegalStateException("index check failure but can't fix it");
|
||||
}
|
||||
throw new IOException("index check failure");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -134,7 +134,8 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|||
static final int VERSION_STACK_TRACE = 1; // we write the stack trace too since 1.4.0
|
||||
static final int VERSION_START = 0;
|
||||
static final int VERSION = VERSION_WRITE_THROWABLE;
|
||||
static final String CORRUPTED = "corrupted_";
|
||||
// public is for test purposes
|
||||
public static final String CORRUPTED = "corrupted_";
|
||||
public static final Setting<TimeValue> INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING =
|
||||
Setting.timeSetting("index.store.stats_refresh_interval", TimeValue.timeValueSeconds(10), Property.IndexScope);
|
||||
|
||||
|
@ -360,18 +361,6 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Repairs the index using the previous returned status from {@link #checkIndex(PrintStream)}.
|
||||
*/
|
||||
public void exorciseIndex(CheckIndex.Status status) throws IOException {
|
||||
metadataLock.writeLock().lock();
|
||||
try (CheckIndex checkIndex = new CheckIndex(directory)) {
|
||||
checkIndex.exorciseIndex(status);
|
||||
} finally {
|
||||
metadataLock.writeLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
public StoreStats stats() throws IOException {
|
||||
ensureOpen();
|
||||
return new StoreStats(directory.estimateSize());
|
||||
|
|
|
@ -69,7 +69,7 @@ public class MetaDataIndexTemplateServiceTests extends ESSingleNodeTestCase {
|
|||
containsString("Failed to parse value [0] for setting [index.number_of_shards] must be >= 1"));
|
||||
assertThat(throwables.get(0).getMessage(),
|
||||
containsString("unknown value for [index.shard.check_on_startup] " +
|
||||
"must be one of [true, false, fix, checksum] but was: blargh"));
|
||||
"must be one of [true, false, checksum] but was: blargh"));
|
||||
}
|
||||
|
||||
public void testIndexTemplateValidationAccumulatesValidationErrors() {
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.index.CorruptIndexException;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
@ -118,6 +119,7 @@ import org.elasticsearch.snapshots.Snapshot;
|
|||
import org.elasticsearch.snapshots.SnapshotId;
|
||||
import org.elasticsearch.snapshots.SnapshotInfo;
|
||||
import org.elasticsearch.snapshots.SnapshotShardFailure;
|
||||
import org.elasticsearch.test.CorruptionUtils;
|
||||
import org.elasticsearch.test.DummyShardLock;
|
||||
import org.elasticsearch.test.FieldMaskingReader;
|
||||
import org.elasticsearch.test.VersionUtils;
|
||||
|
@ -126,7 +128,11 @@ import org.elasticsearch.ElasticsearchException;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.FileVisitResult;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.SimpleFileVisitor;
|
||||
import java.nio.file.attribute.BasicFileAttributes;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
@ -1239,7 +1245,7 @@ public class IndexShardTests extends IndexShardTestCase {
|
|||
};
|
||||
|
||||
try (Store store = createStore(shardId, new IndexSettings(metaData, Settings.EMPTY), directory)) {
|
||||
IndexShard shard = newShard(shardRouting, shardPath, metaData, store,
|
||||
IndexShard shard = newShard(shardRouting, shardPath, metaData, i -> store,
|
||||
null, new InternalEngineFactory(), () -> {
|
||||
}, EMPTY_EVENT_LISTENER);
|
||||
AtomicBoolean failureCallbackTriggered = new AtomicBoolean(false);
|
||||
|
@ -2590,6 +2596,143 @@ public class IndexShardTests extends IndexShardTestCase {
|
|||
closeShards(newShard);
|
||||
}
|
||||
|
||||
public void testIndexCheckOnStartup() throws Exception {
|
||||
final IndexShard indexShard = newStartedShard(true);
|
||||
|
||||
final long numDocs = between(10, 100);
|
||||
for (long i = 0; i < numDocs; i++) {
|
||||
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
|
||||
}
|
||||
indexShard.flush(new FlushRequest());
|
||||
closeShards(indexShard);
|
||||
|
||||
final ShardPath shardPath = indexShard.shardPath();
|
||||
|
||||
final Path indexPath = corruptIndexFile(shardPath);
|
||||
|
||||
final AtomicInteger corruptedMarkerCount = new AtomicInteger();
|
||||
final SimpleFileVisitor<Path> corruptedVisitor = new SimpleFileVisitor<Path>() {
|
||||
@Override
|
||||
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
|
||||
if (Files.isRegularFile(file) && file.getFileName().toString().startsWith(Store.CORRUPTED)) {
|
||||
corruptedMarkerCount.incrementAndGet();
|
||||
}
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
};
|
||||
Files.walkFileTree(indexPath, corruptedVisitor);
|
||||
|
||||
assertThat("corruption marker should not be there", corruptedMarkerCount.get(), equalTo(0));
|
||||
|
||||
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
|
||||
RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE
|
||||
);
|
||||
// start shard and perform index check on startup. It enforce shard to fail due to corrupted index files
|
||||
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
|
||||
.settings(Settings.builder()
|
||||
.put(indexShard.indexSettings.getSettings())
|
||||
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("true", "checksum")))
|
||||
.build();
|
||||
|
||||
IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetaData,
|
||||
null, null, indexShard.engineFactory,
|
||||
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
||||
|
||||
final IndexShardRecoveryException indexShardRecoveryException =
|
||||
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
|
||||
assertThat(indexShardRecoveryException.getMessage(), equalTo("failed recovery"));
|
||||
|
||||
// check that corrupt marker is there
|
||||
Files.walkFileTree(indexPath, corruptedVisitor);
|
||||
assertThat("store has to be marked as corrupted", corruptedMarkerCount.get(), equalTo(1));
|
||||
|
||||
try {
|
||||
closeShards(corruptedShard);
|
||||
} catch (RuntimeException e) {
|
||||
assertThat(e.getMessage(), equalTo("CheckIndex failed"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testShardDoesNotStartIfCorruptedMarkerIsPresent() throws Exception {
|
||||
final IndexShard indexShard = newStartedShard(true);
|
||||
|
||||
final long numDocs = between(10, 100);
|
||||
for (long i = 0; i < numDocs; i++) {
|
||||
indexDoc(indexShard, "_doc", Long.toString(i), "{}");
|
||||
}
|
||||
indexShard.flush(new FlushRequest());
|
||||
closeShards(indexShard);
|
||||
|
||||
final ShardPath shardPath = indexShard.shardPath();
|
||||
|
||||
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
|
||||
RecoverySource.StoreRecoverySource.EXISTING_STORE_INSTANCE
|
||||
);
|
||||
final IndexMetaData indexMetaData = indexShard.indexSettings().getIndexMetaData();
|
||||
|
||||
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
|
||||
|
||||
// create corrupted marker
|
||||
final String corruptionMessage = "fake ioexception";
|
||||
try(Store store = createStore(indexShard.indexSettings(), shardPath)) {
|
||||
store.markStoreCorrupted(new IOException(corruptionMessage));
|
||||
}
|
||||
|
||||
// try to start shard on corrupted files
|
||||
final IndexShard corruptedShard = newShard(shardRouting, shardPath, indexMetaData,
|
||||
null, null, indexShard.engineFactory,
|
||||
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
||||
|
||||
final IndexShardRecoveryException exception1 = expectThrows(IndexShardRecoveryException.class,
|
||||
() -> newStartedShard(p -> corruptedShard, true));
|
||||
assertThat(exception1.getCause().getMessage(), equalTo(corruptionMessage + " (resource=preexisting_corruption)"));
|
||||
closeShards(corruptedShard);
|
||||
|
||||
final AtomicInteger corruptedMarkerCount = new AtomicInteger();
|
||||
final SimpleFileVisitor<Path> corruptedVisitor = new SimpleFileVisitor<Path>() {
|
||||
@Override
|
||||
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
|
||||
if (Files.isRegularFile(file) && file.getFileName().toString().startsWith(Store.CORRUPTED)) {
|
||||
corruptedMarkerCount.incrementAndGet();
|
||||
}
|
||||
return FileVisitResult.CONTINUE;
|
||||
}
|
||||
};
|
||||
Files.walkFileTree(indexPath, corruptedVisitor);
|
||||
assertThat("store has to be marked as corrupted", corruptedMarkerCount.get(), equalTo(1));
|
||||
|
||||
// try to start another time shard on corrupted files
|
||||
final IndexShard corruptedShard2 = newShard(shardRouting, shardPath, indexMetaData,
|
||||
null, null, indexShard.engineFactory,
|
||||
indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
||||
|
||||
final IndexShardRecoveryException exception2 = expectThrows(IndexShardRecoveryException.class,
|
||||
() -> newStartedShard(p -> corruptedShard2, true));
|
||||
assertThat(exception2.getCause().getMessage(), equalTo(corruptionMessage + " (resource=preexisting_corruption)"));
|
||||
closeShards(corruptedShard2);
|
||||
|
||||
// check that corrupt marker is there
|
||||
corruptedMarkerCount.set(0);
|
||||
Files.walkFileTree(indexPath, corruptedVisitor);
|
||||
assertThat("store still has a single corrupt marker", corruptedMarkerCount.get(), equalTo(1));
|
||||
}
|
||||
|
||||
private Path corruptIndexFile(ShardPath shardPath) throws IOException {
|
||||
final Path indexPath = shardPath.getDataPath().resolve(ShardPath.INDEX_FOLDER_NAME);
|
||||
final Path[] filesToCorrupt =
|
||||
Files.walk(indexPath)
|
||||
.filter(p -> {
|
||||
final String name = p.getFileName().toString();
|
||||
return Files.isRegularFile(p)
|
||||
&& name.startsWith("extra") == false // Skip files added by Lucene's ExtrasFS
|
||||
&& IndexWriter.WRITE_LOCK_NAME.equals(name) == false
|
||||
&& name.startsWith("segments_") == false && name.endsWith(".si") == false;
|
||||
})
|
||||
.toArray(Path[]::new);
|
||||
CorruptionUtils.corruptFile(random(), filesToCorrupt);
|
||||
return indexPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulates a scenario that happens when we are async fetching snapshot metadata from GatewayService
|
||||
* and checking index concurrently. This should always be possible without any exception.
|
||||
|
@ -2613,7 +2756,7 @@ public class IndexShardTests extends IndexShardTestCase {
|
|||
final IndexMetaData indexMetaData = IndexMetaData.builder(indexShard.indexSettings().getIndexMetaData())
|
||||
.settings(Settings.builder()
|
||||
.put(indexShard.indexSettings.getSettings())
|
||||
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("false", "true", "checksum", "fix")))
|
||||
.put(IndexSettings.INDEX_CHECK_ON_STARTUP.getKey(), randomFrom("false", "true", "checksum")))
|
||||
.build();
|
||||
final IndexShard newShard = newShard(shardRouting, indexShard.shardPath(), indexMetaData,
|
||||
null, null, indexShard.engineFactory, indexShard.getGlobalCheckpointSyncer(), EMPTY_EVENT_LISTENER);
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.elasticsearch.cluster.routing.ShardRouting;
|
|||
import org.elasticsearch.cluster.routing.ShardRoutingHelper;
|
||||
import org.elasticsearch.cluster.routing.ShardRoutingState;
|
||||
import org.elasticsearch.cluster.routing.TestShardRouting;
|
||||
import org.elasticsearch.common.CheckedFunction;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.lucene.uid.Versions;
|
||||
|
@ -156,7 +157,6 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
|||
return Settings.EMPTY;
|
||||
}
|
||||
|
||||
|
||||
protected Store createStore(IndexSettings indexSettings, ShardPath shardPath) throws IOException {
|
||||
return createStore(shardPath.getShardId(), indexSettings, newFSDirectory(shardPath.resolveIndex()));
|
||||
}
|
||||
|
@ -169,7 +169,6 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
|||
}
|
||||
};
|
||||
return new Store(shardId, indexSettings, directoryService, new DummyShardLock(shardId));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -179,7 +178,17 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
|||
* another shard)
|
||||
*/
|
||||
protected IndexShard newShard(boolean primary) throws IOException {
|
||||
return newShard(primary, Settings.EMPTY, new InternalEngineFactory());
|
||||
return newShard(primary, Settings.EMPTY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new initializing shard. The shard will have its own unique data path.
|
||||
*
|
||||
* @param primary indicates whether to a primary shard (ready to recover from an empty store) or a replica (ready to recover from
|
||||
* another shard)
|
||||
*/
|
||||
protected IndexShard newShard(final boolean primary, final Settings settings) throws IOException {
|
||||
return newShard(primary, settings, new InternalEngineFactory());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -318,23 +327,25 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
|||
* @param routing shard routing to use
|
||||
* @param shardPath path to use for shard data
|
||||
* @param indexMetaData indexMetaData for the shard, including any mapping
|
||||
* @param store an optional custom store to use. If null a default file based store will be created
|
||||
* @param storeProvider an optional custom store provider to use. If null a default file based store will be created
|
||||
* @param indexSearcherWrapper an optional wrapper to be used during searchers
|
||||
* @param globalCheckpointSyncer callback for syncing global checkpoints
|
||||
* @param indexEventListener index event listener
|
||||
* @param listeners an optional set of listeners to add to the shard
|
||||
*/
|
||||
protected IndexShard newShard(ShardRouting routing, ShardPath shardPath, IndexMetaData indexMetaData,
|
||||
@Nullable Store store, @Nullable IndexSearcherWrapper indexSearcherWrapper,
|
||||
@Nullable CheckedFunction<IndexSettings, Store, IOException> storeProvider,
|
||||
@Nullable IndexSearcherWrapper indexSearcherWrapper,
|
||||
@Nullable EngineFactory engineFactory,
|
||||
Runnable globalCheckpointSyncer,
|
||||
IndexEventListener indexEventListener, IndexingOperationListener... listeners) throws IOException {
|
||||
final Settings nodeSettings = Settings.builder().put("node.name", routing.currentNodeId()).build();
|
||||
final IndexSettings indexSettings = new IndexSettings(indexMetaData, nodeSettings);
|
||||
final IndexShard indexShard;
|
||||
if (store == null) {
|
||||
store = createStore(indexSettings, shardPath);
|
||||
if (storeProvider == null) {
|
||||
storeProvider = is -> createStore(is, shardPath);
|
||||
}
|
||||
final Store store = storeProvider.apply(indexSettings);
|
||||
boolean success = false;
|
||||
try {
|
||||
IndexCache indexCache = new IndexCache(indexSettings, new DisabledQueryCache(indexSettings), null);
|
||||
|
@ -424,7 +435,18 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
|||
*/
|
||||
protected IndexShard newStartedShard(
|
||||
final boolean primary, final Settings settings, final EngineFactory engineFactory) throws IOException {
|
||||
IndexShard shard = newShard(primary, settings, engineFactory);
|
||||
return newStartedShard(p -> newShard(p, settings, engineFactory), primary);
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a new empty shard and starts it.
|
||||
*
|
||||
* @param shardFunction shard factory function
|
||||
* @param primary controls whether the shard will be a primary or a replica.
|
||||
*/
|
||||
protected IndexShard newStartedShard(CheckedFunction<Boolean, IndexShard, IOException> shardFunction,
|
||||
boolean primary) throws IOException {
|
||||
IndexShard shard = shardFunction.apply(primary);
|
||||
if (primary) {
|
||||
recoverShardFromStore(shard);
|
||||
} else {
|
||||
|
|
|
@ -1,102 +0,0 @@
|
|||
[role="xpack"]
|
||||
[[ml-api-quickref]]
|
||||
== API quick reference
|
||||
|
||||
All {ml} endpoints have the following base:
|
||||
|
||||
[source,js]
|
||||
----
|
||||
/_xpack/ml/
|
||||
----
|
||||
// NOTCONSOLE
|
||||
|
||||
The main {ml} resources can be accessed with a variety of endpoints:
|
||||
|
||||
* <<ml-api-jobs,+/anomaly_detectors/+>>: Create and manage {ml} jobs
|
||||
* <<ml-api-datafeeds,+/datafeeds/+>>: Select data from {es} to be analyzed
|
||||
* <<ml-api-results,+/results/+>>: Access the results of a {ml} job
|
||||
* <<ml-api-snapshots,+/model_snapshots/+>>: Manage model snapshots
|
||||
//* <<ml-api-validate,+/validate/+>>: Validate subsections of job configurations
|
||||
|
||||
[float]
|
||||
[[ml-api-jobs]]
|
||||
=== /anomaly_detectors/
|
||||
|
||||
* {ref}/ml-put-job.html[PUT /anomaly_detectors/<job_id+++>+++]: Create a job
|
||||
* {ref}/ml-open-job.html[POST /anomaly_detectors/<job_id>/_open]: Open a job
|
||||
* {ref}/ml-post-data.html[POST /anomaly_detectors/<job_id>/_data]: Send data to a job
|
||||
* {ref}/ml-get-job.html[GET /anomaly_detectors]: List jobs
|
||||
* {ref}/ml-get-job.html[GET /anomaly_detectors/<job_id+++>+++]: Get job details
|
||||
* {ref}/ml-get-job-stats.html[GET /anomaly_detectors/<job_id>/_stats]: Get job statistics
|
||||
* {ref}/ml-update-job.html[POST /anomaly_detectors/<job_id>/_update]: Update certain properties of the job configuration
|
||||
* {ref}/ml-flush-job.html[POST anomaly_detectors/<job_id>/_flush]: Force a job to analyze buffered data
|
||||
* {ref}/ml-forecast.html[POST anomaly_detectors/<job_id>/_forecast]: Forecast future job behavior
|
||||
* {ref}/ml-close-job.html[POST /anomaly_detectors/<job_id>/_close]: Close a job
|
||||
* {ref}/ml-delete-job.html[DELETE /anomaly_detectors/<job_id+++>+++]: Delete a job
|
||||
|
||||
[float]
|
||||
[[ml-api-calendars]]
|
||||
=== /calendars/
|
||||
|
||||
* {ref}/ml-put-calendar.html[PUT /calendars/<calendar_id+++>+++]: Create a calendar
|
||||
* {ref}/ml-post-calendar-event.html[POST /calendars/<calendar_id+++>+++/events]: Add a scheduled event to a calendar
|
||||
* {ref}/ml-put-calendar-job.html[PUT /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Associate a job with a calendar
|
||||
* {ref}/ml-get-calendar.html[GET /calendars/<calendar_id+++>+++]: Get calendar details
|
||||
* {ref}/ml-get-calendar-event.html[GET /calendars/<calendar_id+++>+++/events]: Get scheduled event details
|
||||
* {ref}/ml-delete-calendar-event.html[DELETE /calendars/<calendar_id+++>+++/events/<event_id+++>+++]: Remove a scheduled event from a calendar
|
||||
* {ref}/ml-delete-calendar-job.html[DELETE /calendars/<calendar_id+++>+++/jobs/<job_id+++>+++]: Disassociate a job from a calendar
|
||||
* {ref}/ml-delete-calendar.html[DELETE /calendars/<calendar_id+++>+++]: Delete a calendar
|
||||
|
||||
[float]
|
||||
[[ml-api-filters]]
|
||||
=== /filters/
|
||||
|
||||
* {ref}/ml-put-filter.html[PUT /filters/<filter_id+++>+++]: Create a filter
|
||||
* {ref}/ml-update-filter.html[POST /filters/<filter_id+++>+++/_update]: Update a filter
|
||||
* {ref}/ml-get-filter.html[GET /filters/<filter_id+++>+++]: List filters
|
||||
* {ref}/ml-delete-filter.html[DELETE /filter/<filter_id+++>+++]: Delete a filter
|
||||
|
||||
[float]
|
||||
[[ml-api-datafeeds]]
|
||||
=== /datafeeds/
|
||||
|
||||
* {ref}/ml-put-datafeed.html[PUT /datafeeds/<datafeed_id+++>+++]: Create a {dfeed}
|
||||
* {ref}/ml-start-datafeed.html[POST /datafeeds/<datafeed_id>/_start]: Start a {dfeed}
|
||||
* {ref}/ml-get-datafeed.html[GET /datafeeds]: List {dfeeds}
|
||||
* {ref}/ml-get-datafeed.html[GET /datafeeds/<datafeed_id+++>+++]: Get {dfeed} details
|
||||
* {ref}/ml-get-datafeed-stats.html[GET /datafeeds/<datafeed_id>/_stats]: Get statistical information for {dfeeds}
|
||||
* {ref}/ml-preview-datafeed.html[GET /datafeeds/<datafeed_id>/_preview]: Get a preview of a {dfeed}
|
||||
* {ref}/ml-update-datafeed.html[POST /datafeeds/<datafeedid>/_update]: Update certain settings for a {dfeed}
|
||||
* {ref}/ml-stop-datafeed.html[POST /datafeeds/<datafeed_id>/_stop]: Stop a {dfeed}
|
||||
* {ref}/ml-delete-datafeed.html[DELETE /datafeeds/<datafeed_id+++>+++]: Delete {dfeed}
|
||||
|
||||
[float]
|
||||
[[ml-api-results]]
|
||||
=== /results/
|
||||
|
||||
* {ref}/ml-get-bucket.html[GET /results/buckets]: List the buckets in the results
|
||||
* {ref}/ml-get-bucket.html[GET /results/buckets/<bucket_id+++>+++]: Get bucket details
|
||||
* {ref}/ml-get-overall-buckets.html[GET /results/overall_buckets]: Get overall bucket results for multiple jobs
|
||||
* {ref}/ml-get-category.html[GET /results/categories]: List the categories in the results
|
||||
* {ref}/ml-get-category.html[GET /results/categories/<category_id+++>+++]: Get category details
|
||||
* {ref}/ml-get-influencer.html[GET /results/influencers]: Get influencer details
|
||||
* {ref}/ml-get-record.html[GET /results/records]: Get records from the results
|
||||
|
||||
[float]
|
||||
[[ml-api-snapshots]]
|
||||
=== /model_snapshots/
|
||||
|
||||
* {ref}/ml-get-snapshot.html[GET /model_snapshots]: List model snapshots
|
||||
* {ref}/ml-get-snapshot.html[GET /model_snapshots/<snapshot_id+++>+++]: Get model snapshot details
|
||||
* {ref}/ml-revert-snapshot.html[POST /model_snapshots/<snapshot_id>/_revert]: Revert a model snapshot
|
||||
* {ref}/ml-update-snapshot.html[POST /model_snapshots/<snapshot_id>/_update]: Update certain settings for a model snapshot
|
||||
* {ref}/ml-delete-snapshot.html[DELETE /model_snapshots/<snapshot_id+++>+++]: Delete a model snapshot
|
||||
|
||||
////
|
||||
[float]
|
||||
[[ml-api-validate]]
|
||||
=== /validate/
|
||||
|
||||
* {ref}/ml-valid-detector.html[POST /anomaly_detectors/_validate/detector]: Validate a detector
|
||||
* {ref}/ml-valid-job.html[POST /anomaly_detectors/_validate]: Validate a job
|
||||
////
|
|
@ -1,35 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
import org.supercsv.prefs.CsvPreference;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class CsvLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||
|
||||
/**
|
||||
* Rules are:
|
||||
* - The file must be valid CSV
|
||||
* - It must contain at least two complete records
|
||||
* - There must be at least two fields per record (otherwise files with no commas could be treated as CSV!)
|
||||
* - Every CSV record except the last must have the same number of fields
|
||||
* The reason the last record is allowed to have fewer fields than the others is that
|
||||
* it could have been truncated when the file was sampled.
|
||||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.EXCEL_PREFERENCE, "CSV");
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
CsvPreference.EXCEL_PREFERENCE, false);
|
||||
}
|
||||
}
|
|
@ -29,17 +29,16 @@ import java.util.regex.Pattern;
|
|||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
||||
public class DelimitedLogStructureFinder implements LogStructureFinder {
|
||||
|
||||
private static final int MAX_LEVENSHTEIN_COMPARISONS = 100;
|
||||
|
||||
private final List<String> sampleMessages;
|
||||
private final LogStructure structure;
|
||||
|
||||
static SeparatedValuesLogStructureFinder makeSeparatedValuesLogStructureFinder(List<String> explanation, String sample,
|
||||
String charsetName, Boolean hasByteOrderMarker,
|
||||
CsvPreference csvPreference, boolean trimFields)
|
||||
throws IOException {
|
||||
static DelimitedLogStructureFinder makeDelimitedLogStructureFinder(List<String> explanation, String sample, String charsetName,
|
||||
Boolean hasByteOrderMarker, CsvPreference csvPreference,
|
||||
boolean trimFields) throws IOException {
|
||||
|
||||
Tuple<List<List<String>>, List<Integer>> parsed = readRows(sample, csvPreference);
|
||||
List<List<String>> rows = parsed.v1();
|
||||
|
@ -73,13 +72,14 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
|||
String preamble = Pattern.compile("\n").splitAsStream(sample).limit(lineNumbers.get(1)).collect(Collectors.joining("\n", "", "\n"));
|
||||
|
||||
char delimiter = (char) csvPreference.getDelimiterChar();
|
||||
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.fromSeparator(delimiter))
|
||||
LogStructure.Builder structureBuilder = new LogStructure.Builder(LogStructure.Format.DELIMITED)
|
||||
.setCharset(charsetName)
|
||||
.setHasByteOrderMarker(hasByteOrderMarker)
|
||||
.setSampleStart(preamble)
|
||||
.setNumLinesAnalyzed(lineNumbers.get(lineNumbers.size() - 1))
|
||||
.setNumMessagesAnalyzed(sampleRecords.size())
|
||||
.setHasHeaderRow(isHeaderInFile)
|
||||
.setDelimiter(delimiter)
|
||||
.setInputFields(Arrays.stream(headerWithNamedBlanks).collect(Collectors.toList()));
|
||||
|
||||
if (trimFields) {
|
||||
|
@ -131,10 +131,10 @@ public class SeparatedValuesLogStructureFinder implements LogStructureFinder {
|
|||
.setExplanation(explanation)
|
||||
.build();
|
||||
|
||||
return new SeparatedValuesLogStructureFinder(sampleMessages, structure);
|
||||
return new DelimitedLogStructureFinder(sampleMessages, structure);
|
||||
}
|
||||
|
||||
private SeparatedValuesLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
|
||||
private DelimitedLogStructureFinder(List<String> sampleMessages, LogStructure structure) {
|
||||
this.sampleMessages = Collections.unmodifiableList(sampleMessages);
|
||||
this.structure = structure;
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
import org.supercsv.prefs.CsvPreference;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
public class DelimitedLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||
|
||||
private final CsvPreference csvPreference;
|
||||
private final int minFieldsPerRow;
|
||||
private final boolean trimFields;
|
||||
|
||||
DelimitedLogStructureFinderFactory(char delimiter, int minFieldsPerRow, boolean trimFields) {
|
||||
csvPreference = new CsvPreference.Builder('"', delimiter, "\n").build();
|
||||
this.minFieldsPerRow = minFieldsPerRow;
|
||||
this.trimFields = trimFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rules are:
|
||||
* - It must contain at least two complete records
|
||||
* - There must be a minimum number of fields per record (otherwise files with no commas could be treated as CSV!)
|
||||
* - Every record except the last must have the same number of fields
|
||||
* The reason the last record is allowed to have fewer fields than the others is that
|
||||
* it could have been truncated when the file was sampled.
|
||||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
String formatName;
|
||||
switch ((char) csvPreference.getDelimiterChar()) {
|
||||
case ',':
|
||||
formatName = "CSV";
|
||||
break;
|
||||
case '\t':
|
||||
formatName = "TSV";
|
||||
break;
|
||||
default:
|
||||
formatName = Character.getName(csvPreference.getDelimiterChar()).toLowerCase(Locale.ROOT) + " delimited values";
|
||||
break;
|
||||
}
|
||||
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, minFieldsPerRow, csvPreference, formatName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
csvPreference, trimFields);
|
||||
}
|
||||
}
|
|
@ -27,37 +27,14 @@ public class LogStructure implements ToXContentObject {
|
|||
|
||||
public enum Format {
|
||||
|
||||
JSON, XML, CSV, TSV, SEMI_COLON_SEPARATED_VALUES, PIPE_SEPARATED_VALUES, SEMI_STRUCTURED_TEXT;
|
||||
|
||||
public Character separator() {
|
||||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
return null;
|
||||
case CSV:
|
||||
return ',';
|
||||
case TSV:
|
||||
return '\t';
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
return ';';
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
return '|';
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return null;
|
||||
default:
|
||||
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
|
||||
}
|
||||
}
|
||||
JSON, XML, DELIMITED, SEMI_STRUCTURED_TEXT;
|
||||
|
||||
public boolean supportsNesting() {
|
||||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
return true;
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
case DELIMITED:
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return false;
|
||||
default:
|
||||
|
@ -69,10 +46,7 @@ public class LogStructure implements ToXContentObject {
|
|||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
case DELIMITED:
|
||||
return true;
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return false;
|
||||
|
@ -85,10 +59,7 @@ public class LogStructure implements ToXContentObject {
|
|||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
case DELIMITED:
|
||||
return false;
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return true;
|
||||
|
@ -97,38 +68,6 @@ public class LogStructure implements ToXContentObject {
|
|||
}
|
||||
}
|
||||
|
||||
public boolean isSeparatedValues() {
|
||||
switch (this) {
|
||||
case JSON:
|
||||
case XML:
|
||||
return false;
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
return true;
|
||||
case SEMI_STRUCTURED_TEXT:
|
||||
return false;
|
||||
default:
|
||||
throw new IllegalStateException("enum value [" + this + "] missing from switch.");
|
||||
}
|
||||
}
|
||||
|
||||
public static Format fromSeparator(char separator) {
|
||||
switch (separator) {
|
||||
case ',':
|
||||
return CSV;
|
||||
case '\t':
|
||||
return TSV;
|
||||
case ';':
|
||||
return SEMI_COLON_SEPARATED_VALUES;
|
||||
case '|':
|
||||
return PIPE_SEPARATED_VALUES;
|
||||
default:
|
||||
throw new IllegalArgumentException("No known format has separator [" + separator + "]");
|
||||
}
|
||||
}
|
||||
|
||||
public static Format fromString(String name) {
|
||||
return valueOf(name.trim().toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
@ -149,7 +88,7 @@ public class LogStructure implements ToXContentObject {
|
|||
static final ParseField EXCLUDE_LINES_PATTERN = new ParseField("exclude_lines_pattern");
|
||||
static final ParseField INPUT_FIELDS = new ParseField("input_fields");
|
||||
static final ParseField HAS_HEADER_ROW = new ParseField("has_header_row");
|
||||
static final ParseField SEPARATOR = new ParseField("separator");
|
||||
static final ParseField DELIMITER = new ParseField("delimiter");
|
||||
static final ParseField SHOULD_TRIM_FIELDS = new ParseField("should_trim_fields");
|
||||
static final ParseField GROK_PATTERN = new ParseField("grok_pattern");
|
||||
static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp_field");
|
||||
|
@ -171,7 +110,7 @@ public class LogStructure implements ToXContentObject {
|
|||
PARSER.declareString(Builder::setExcludeLinesPattern, EXCLUDE_LINES_PATTERN);
|
||||
PARSER.declareStringArray(Builder::setInputFields, INPUT_FIELDS);
|
||||
PARSER.declareBoolean(Builder::setHasHeaderRow, HAS_HEADER_ROW);
|
||||
PARSER.declareString((p, c) -> p.setSeparator(c.charAt(0)), SEPARATOR);
|
||||
PARSER.declareString((p, c) -> p.setDelimiter(c.charAt(0)), DELIMITER);
|
||||
PARSER.declareBoolean(Builder::setShouldTrimFields, SHOULD_TRIM_FIELDS);
|
||||
PARSER.declareString(Builder::setGrokPattern, GROK_PATTERN);
|
||||
PARSER.declareString(Builder::setTimestampField, TIMESTAMP_FIELD);
|
||||
|
@ -191,7 +130,7 @@ public class LogStructure implements ToXContentObject {
|
|||
private final String excludeLinesPattern;
|
||||
private final List<String> inputFields;
|
||||
private final Boolean hasHeaderRow;
|
||||
private final Character separator;
|
||||
private final Character delimiter;
|
||||
private final Boolean shouldTrimFields;
|
||||
private final String grokPattern;
|
||||
private final List<String> timestampFormats;
|
||||
|
@ -202,7 +141,7 @@ public class LogStructure implements ToXContentObject {
|
|||
|
||||
public LogStructure(int numLinesAnalyzed, int numMessagesAnalyzed, String sampleStart, String charset, Boolean hasByteOrderMarker,
|
||||
Format format, String multilineStartPattern, String excludeLinesPattern, List<String> inputFields,
|
||||
Boolean hasHeaderRow, Character separator, Boolean shouldTrimFields, String grokPattern, String timestampField,
|
||||
Boolean hasHeaderRow, Character delimiter, Boolean shouldTrimFields, String grokPattern, String timestampField,
|
||||
List<String> timestampFormats, boolean needClientTimezone, Map<String, Object> mappings,
|
||||
List<String> explanation) {
|
||||
|
||||
|
@ -216,7 +155,7 @@ public class LogStructure implements ToXContentObject {
|
|||
this.excludeLinesPattern = excludeLinesPattern;
|
||||
this.inputFields = (inputFields == null) ? null : Collections.unmodifiableList(new ArrayList<>(inputFields));
|
||||
this.hasHeaderRow = hasHeaderRow;
|
||||
this.separator = separator;
|
||||
this.delimiter = delimiter;
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
this.grokPattern = grokPattern;
|
||||
this.timestampField = timestampField;
|
||||
|
@ -266,8 +205,8 @@ public class LogStructure implements ToXContentObject {
|
|||
return hasHeaderRow;
|
||||
}
|
||||
|
||||
public Character getSeparator() {
|
||||
return separator;
|
||||
public Character getDelimiter() {
|
||||
return delimiter;
|
||||
}
|
||||
|
||||
public Boolean getShouldTrimFields() {
|
||||
|
@ -322,8 +261,8 @@ public class LogStructure implements ToXContentObject {
|
|||
if (hasHeaderRow != null) {
|
||||
builder.field(HAS_HEADER_ROW.getPreferredName(), hasHeaderRow.booleanValue());
|
||||
}
|
||||
if (separator != null) {
|
||||
builder.field(SEPARATOR.getPreferredName(), String.valueOf(separator));
|
||||
if (delimiter != null) {
|
||||
builder.field(DELIMITER.getPreferredName(), String.valueOf(delimiter));
|
||||
}
|
||||
if (shouldTrimFields != null) {
|
||||
builder.field(SHOULD_TRIM_FIELDS.getPreferredName(), shouldTrimFields.booleanValue());
|
||||
|
@ -349,7 +288,7 @@ public class LogStructure implements ToXContentObject {
|
|||
public int hashCode() {
|
||||
|
||||
return Objects.hash(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern, timestampField,
|
||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern, timestampField,
|
||||
timestampFormats, needClientTimezone, mappings, explanation);
|
||||
}
|
||||
|
||||
|
@ -376,7 +315,7 @@ public class LogStructure implements ToXContentObject {
|
|||
Objects.equals(this.excludeLinesPattern, that.excludeLinesPattern) &&
|
||||
Objects.equals(this.inputFields, that.inputFields) &&
|
||||
Objects.equals(this.hasHeaderRow, that.hasHeaderRow) &&
|
||||
Objects.equals(this.separator, that.separator) &&
|
||||
Objects.equals(this.delimiter, that.delimiter) &&
|
||||
Objects.equals(this.shouldTrimFields, that.shouldTrimFields) &&
|
||||
Objects.equals(this.grokPattern, that.grokPattern) &&
|
||||
Objects.equals(this.timestampField, that.timestampField) &&
|
||||
|
@ -397,7 +336,7 @@ public class LogStructure implements ToXContentObject {
|
|||
private String excludeLinesPattern;
|
||||
private List<String> inputFields;
|
||||
private Boolean hasHeaderRow;
|
||||
private Character separator;
|
||||
private Character delimiter;
|
||||
private Boolean shouldTrimFields;
|
||||
private String grokPattern;
|
||||
private String timestampField;
|
||||
|
@ -441,7 +380,6 @@ public class LogStructure implements ToXContentObject {
|
|||
|
||||
public Builder setFormat(Format format) {
|
||||
this.format = Objects.requireNonNull(format);
|
||||
this.separator = format.separator();
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -465,13 +403,13 @@ public class LogStructure implements ToXContentObject {
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder setShouldTrimFields(Boolean shouldTrimFields) {
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
public Builder setDelimiter(Character delimiter) {
|
||||
this.delimiter = delimiter;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder setSeparator(Character separator) {
|
||||
this.separator = separator;
|
||||
public Builder setShouldTrimFields(Boolean shouldTrimFields) {
|
||||
this.shouldTrimFields = shouldTrimFields;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -542,28 +480,22 @@ public class LogStructure implements ToXContentObject {
|
|||
if (hasHeaderRow != null) {
|
||||
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (separator != null) {
|
||||
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures.");
|
||||
if (delimiter != null) {
|
||||
throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (grokPattern != null) {
|
||||
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
break;
|
||||
case CSV:
|
||||
case TSV:
|
||||
case SEMI_COLON_SEPARATED_VALUES:
|
||||
case PIPE_SEPARATED_VALUES:
|
||||
case DELIMITED:
|
||||
if (inputFields == null || inputFields.isEmpty()) {
|
||||
throw new IllegalArgumentException("Input fields must be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (hasHeaderRow == null) {
|
||||
throw new IllegalArgumentException("Has header row must be specified for [" + format + "] structures.");
|
||||
}
|
||||
Character expectedSeparator = format.separator();
|
||||
assert expectedSeparator != null;
|
||||
if (expectedSeparator.equals(separator) == false) {
|
||||
throw new IllegalArgumentException("Separator must be [" + expectedSeparator + "] for [" + format +
|
||||
"] structures.");
|
||||
if (delimiter == null) {
|
||||
throw new IllegalArgumentException("Delimiter must be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (grokPattern != null) {
|
||||
throw new IllegalArgumentException("Grok pattern may not be specified for [" + format + "] structures.");
|
||||
|
@ -576,8 +508,8 @@ public class LogStructure implements ToXContentObject {
|
|||
if (hasHeaderRow != null) {
|
||||
throw new IllegalArgumentException("Has header row may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (separator != null) {
|
||||
throw new IllegalArgumentException("Separator may not be specified for [" + format + "] structures.");
|
||||
if (delimiter != null) {
|
||||
throw new IllegalArgumentException("Delimiter may not be specified for [" + format + "] structures.");
|
||||
}
|
||||
if (shouldTrimFields != null) {
|
||||
throw new IllegalArgumentException("Should trim fields may not be specified for [" + format + "] structures.");
|
||||
|
@ -607,7 +539,7 @@ public class LogStructure implements ToXContentObject {
|
|||
}
|
||||
|
||||
return new LogStructure(numLinesAnalyzed, numMessagesAnalyzed, sampleStart, charset, hasByteOrderMarker, format,
|
||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, separator, shouldTrimFields, grokPattern,
|
||||
multilineStartPattern, excludeLinesPattern, inputFields, hasHeaderRow, delimiter, shouldTrimFields, grokPattern,
|
||||
timestampField, timestampFormats, needClientTimezone, mappings, explanation);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,10 +69,10 @@ public final class LogStructureFinderManager {
|
|||
new JsonLogStructureFinderFactory(),
|
||||
new XmlLogStructureFinderFactory(),
|
||||
// ND-JSON will often also be valid (although utterly weird) CSV, so JSON must come before CSV
|
||||
new CsvLogStructureFinderFactory(),
|
||||
new TsvLogStructureFinderFactory(),
|
||||
new SemiColonSeparatedValuesLogStructureFinderFactory(),
|
||||
new PipeSeparatedValuesLogStructureFinderFactory(),
|
||||
new DelimitedLogStructureFinderFactory(',', 2, false),
|
||||
new DelimitedLogStructureFinderFactory('\t', 2, false),
|
||||
new DelimitedLogStructureFinderFactory(';', 4, false),
|
||||
new DelimitedLogStructureFinderFactory('|', 5, true),
|
||||
new TextLogStructureFinderFactory()
|
||||
));
|
||||
|
||||
|
|
|
@ -21,12 +21,12 @@ import java.util.TreeMap;
|
|||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
final class LogStructureUtils {
|
||||
public final class LogStructureUtils {
|
||||
|
||||
static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
|
||||
static final String MAPPING_TYPE_SETTING = "type";
|
||||
static final String MAPPING_FORMAT_SETTING = "format";
|
||||
static final String MAPPING_PROPERTIES_SETTING = "properties";
|
||||
public static final String DEFAULT_TIMESTAMP_FIELD = "@timestamp";
|
||||
public static final String MAPPING_TYPE_SETTING = "type";
|
||||
public static final String MAPPING_FORMAT_SETTING = "format";
|
||||
public static final String MAPPING_PROPERTIES_SETTING = "properties";
|
||||
|
||||
// NUMBER Grok pattern doesn't support scientific notation, so we extend it
|
||||
private static final Grok NUMBER_GROK = new Grok(Grok.getBuiltinPatterns(), "^%{NUMBER}(?:[eE][+-]?[0-3]?[0-9]{1,2})?$");
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
import org.supercsv.prefs.CsvPreference;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class PipeSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||
|
||||
private static final CsvPreference PIPE_PREFERENCE = new CsvPreference.Builder('"', '|', "\n").build();
|
||||
|
||||
/**
|
||||
* Rules are:
|
||||
* - The file must be valid pipe (<code>|</code>) separated values
|
||||
* - It must contain at least two complete records
|
||||
* - There must be at least five fields per record (otherwise files with coincidental
|
||||
* or no pipe characters could be treated as pipe separated)
|
||||
* - Every pipe separated value record except the last must have the same number of fields
|
||||
* The reason the last record is allowed to have fewer fields than the others is that
|
||||
* it could have been truncated when the file was sampled.
|
||||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 5, PIPE_PREFERENCE, "pipe separated values");
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
PIPE_PREFERENCE, true);
|
||||
}
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
import org.supercsv.prefs.CsvPreference;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public class SemiColonSeparatedValuesLogStructureFinderFactory implements LogStructureFinderFactory {
|
||||
|
||||
/**
|
||||
* Rules are:
|
||||
* - The file must be valid semi-colon separated values
|
||||
* - It must contain at least two complete records
|
||||
* - There must be at least four fields per record (otherwise files with coincidental
|
||||
* or no semi-colons could be treated as semi-colon separated)
|
||||
* - Every semi-colon separated value record except the last must have the same number of fields
|
||||
* The reason the last record is allowed to have fewer fields than the others is that
|
||||
* it could have been truncated when the file was sampled.
|
||||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 4,
|
||||
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, "semi-colon separated values");
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
CsvPreference.EXCEL_NORTH_EUROPE_PREFERENCE, false);
|
||||
}
|
||||
}
|
|
@ -23,13 +23,13 @@ public class TsvLogStructureFinderFactory implements LogStructureFinderFactory {
|
|||
*/
|
||||
@Override
|
||||
public boolean canCreateFromSample(List<String> explanation, String sample) {
|
||||
return SeparatedValuesLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
|
||||
return DelimitedLogStructureFinder.canCreateFromSample(explanation, sample, 2, CsvPreference.TAB_PREFERENCE, "TSV");
|
||||
}
|
||||
|
||||
@Override
|
||||
public LogStructureFinder createFromSample(List<String> explanation, String sample, String charsetName, Boolean hasByteOrderMarker)
|
||||
throws IOException {
|
||||
return SeparatedValuesLogStructureFinder.makeSeparatedValuesLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
return DelimitedLogStructureFinder.makeDelimitedLogStructureFinder(explanation, sample, charsetName, hasByteOrderMarker,
|
||||
CsvPreference.TAB_PREFERENCE, false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class CsvLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
|
||||
|
||||
// No need to check JSON or XML because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateFromSampleGivenCsv() {
|
||||
|
||||
assertTrue(factory.canCreateFromSample(explanation, CSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenTsv() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class DelimitedLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
|
||||
private LogStructureFinderFactory tsvFactory = new DelimitedLogStructureFinderFactory('\t', 2, false);
|
||||
private LogStructureFinderFactory semiColonDelimitedfactory = new DelimitedLogStructureFinderFactory(';', 4, false);
|
||||
private LogStructureFinderFactory pipeDelimitedFactory = new DelimitedLogStructureFinderFactory('|', 5, true);
|
||||
|
||||
// CSV - no need to check JSON or XML because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenCsv() {
|
||||
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, CSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenTsv() {
|
||||
|
||||
assertFalse(csvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertFalse(csvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(csvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateCsvFromSampleGivenText() {
|
||||
|
||||
assertFalse(csvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
|
||||
// TSV - no need to check JSON, XML or CSV because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateTsvFromSampleGivenTsv() {
|
||||
|
||||
assertTrue(tsvFactory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateTsvFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertFalse(tsvFactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateTsvFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(tsvFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateTsvFromSampleGivenText() {
|
||||
|
||||
assertFalse(tsvFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
|
||||
// Semi-colon delimited - no need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateSemiColonDelimitedFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertTrue(semiColonDelimitedfactory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateSemiColonDelimitedFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateSemiColonDelimitedFromSampleGivenText() {
|
||||
|
||||
assertFalse(semiColonDelimitedfactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
|
||||
// Pipe delimited - no need to check JSON, XML, CSV, TSV or semi-colon delimited
|
||||
// values because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreatePipeDelimitedFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertTrue(pipeDelimitedFactory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreatePipeDelimitedFromSampleGivenText() {
|
||||
|
||||
assertFalse(pipeDelimitedFactory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|
|
@ -12,27 +12,27 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
|
||||
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinFieldwiseCompareRows;
|
||||
import static org.elasticsearch.xpack.ml.logstructurefinder.SeparatedValuesLogStructureFinder.levenshteinDistance;
|
||||
import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinFieldwiseCompareRows;
|
||||
import static org.elasticsearch.xpack.ml.logstructurefinder.DelimitedLogStructureFinder.levenshteinDistance;
|
||||
import static org.hamcrest.Matchers.arrayContaining;
|
||||
|
||||
public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase {
|
||||
public class DelimitedLogStructureFinderTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new CsvLogStructureFinderFactory();
|
||||
private LogStructureFinderFactory csvFactory = new DelimitedLogStructureFinderFactory(',', 2, false);
|
||||
|
||||
public void testCreateConfigsGivenCompleteCsv() throws Exception {
|
||||
String sample = "time,message\n" +
|
||||
"2018-05-17T13:41:23,hello\n" +
|
||||
"2018-05-17T13:41:32,hello again\n";
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -41,7 +41,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
}
|
||||
assertEquals("^\"?time\"?,\"?message\"?", structure.getExcludeLinesPattern());
|
||||
assertEquals("^\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("time", "message"), structure.getInputFields());
|
||||
|
@ -55,15 +55,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"\"hello\n" +
|
||||
"world\",2018-05-17T13:41:23,1\n" +
|
||||
"\"hello again\n"; // note that this last record is truncated
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -72,7 +72,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
}
|
||||
assertEquals("^\"?message\"?,\"?time\"?,\"?count\"?", structure.getExcludeLinesPattern());
|
||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("message", "time", "count"), structure.getInputFields());
|
||||
|
@ -88,15 +88,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
|
||||
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
|
||||
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -108,7 +108,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?,\"?\"?,\"?\"?",
|
||||
structure.getExcludeLinesPattern());
|
||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
|
||||
|
@ -126,15 +126,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"2,2016-12-31 15:15:01,2016-12-31 15:15:09,1,.00,1,N,264,264,2,1,0,0.5,0,0,0.3,1.8,,\n" +
|
||||
"1,2016-12-01 00:00:01,2016-12-01 00:10:22,1,1.60,1,N,163,143,2,9,0.5,0.5,0,0,0.3,10.3,,\n" +
|
||||
"1,2016-12-01 00:00:01,2016-12-01 00:11:01,1,1.40,1,N,164,229,1,9,0.5,0.5,2.05,0,0.3,12.35,,\n";
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -146,7 +146,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"\"?extra\"?,\"?mta_tax\"?,\"?tip_amount\"?,\"?tolls_amount\"?,\"?improvement_surcharge\"?,\"?total_amount\"?",
|
||||
structure.getExcludeLinesPattern());
|
||||
assertEquals("^.*?,\"?\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}", structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("VendorID", "tpep_pickup_datetime", "tpep_dropoff_datetime", "passenger_count", "trip_distance",
|
||||
|
@ -161,15 +161,15 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
String sample = "\"pos_id\",\"trip_id\",\"latitude\",\"longitude\",\"altitude\",\"timestamp\"\n" +
|
||||
"\"1\",\"3\",\"4703.7815\",\"1527.4713\",\"359.9\",\"2017-01-19 16:19:04.742113\"\n" +
|
||||
"\"2\",\"3\",\"4703.7815\",\"1527.4714\",\"359.9\",\"2017-01-19 16:19:05.741890\"\n";
|
||||
assertTrue(factory.canCreateFromSample(explanation, sample));
|
||||
assertTrue(csvFactory.canCreateFromSample(explanation, sample));
|
||||
|
||||
String charset = randomFrom(POSSIBLE_CHARSETS);
|
||||
Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset);
|
||||
LogStructureFinder structureFinder = factory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
LogStructureFinder structureFinder = csvFactory.createFromSample(explanation, sample, charset, hasByteOrderMarker);
|
||||
|
||||
LogStructure structure = structureFinder.getStructure();
|
||||
|
||||
assertEquals(LogStructure.Format.CSV, structure.getFormat());
|
||||
assertEquals(LogStructure.Format.DELIMITED, structure.getFormat());
|
||||
assertEquals(charset, structure.getCharset());
|
||||
if (hasByteOrderMarker == null) {
|
||||
assertNull(structure.getHasByteOrderMarker());
|
||||
|
@ -179,7 +179,7 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
assertEquals("^\"?pos_id\"?,\"?trip_id\"?,\"?latitude\"?,\"?longitude\"?,\"?altitude\"?,\"?timestamp\"?",
|
||||
structure.getExcludeLinesPattern());
|
||||
assertNull(structure.getMultilineStartPattern());
|
||||
assertEquals(Character.valueOf(','), structure.getSeparator());
|
||||
assertEquals(Character.valueOf(','), structure.getDelimiter());
|
||||
assertTrue(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertEquals(Arrays.asList("pos_id", "trip_id", "latitude", "longitude", "altitude", "timestamp"), structure.getInputFields());
|
||||
|
@ -195,8 +195,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
|
||||
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
|
||||
|
||||
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation,
|
||||
SeparatedValuesLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||
Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
|
||||
DelimitedLogStructureFinder.readRows(withHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||
|
||||
assertTrue(header.v1());
|
||||
assertThat(header.v2(), arrayContaining("time", "airline", "responsetime", "sourcetype"));
|
||||
|
@ -208,8 +208,8 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
"2014-06-23 00:00:01Z,JBU,877.5927,farequote\n" +
|
||||
"2014-06-23 00:00:01Z,KLM,1355.4812,farequote\n";
|
||||
|
||||
Tuple<Boolean, String[]> header = SeparatedValuesLogStructureFinder.findHeaderFromSample(explanation,
|
||||
SeparatedValuesLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||
Tuple<Boolean, String[]> header = DelimitedLogStructureFinder.findHeaderFromSample(explanation,
|
||||
DelimitedLogStructureFinder.readRows(withoutHeader, CsvPreference.EXCEL_PREFERENCE).v1());
|
||||
|
||||
assertFalse(header.v1());
|
||||
assertThat(header.v2(), arrayContaining("column1", "column2", "column3", "column4"));
|
||||
|
@ -251,43 +251,43 @@ public class SeparatedValuesLogStructureFinderTests extends LogStructureTestCase
|
|||
|
||||
public void testLineHasUnescapedQuote() {
|
||||
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,b,c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,\"b\"\"\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a,b,\"c\"\"\"", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a,\"\"b\",c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words,b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\",b,c", CsvPreference.EXCEL_PREFERENCE));
|
||||
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\tb\tc\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\t\"b\"\"\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("a\tb\t\"c\"\"\"", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"\"\"a\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\"\"\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertFalse(DelimitedLogStructureFinder.lineHasUnescapedQuote("\"a\t\"\"b\"\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("between\"words\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
assertTrue(DelimitedLogStructureFinder.lineHasUnescapedQuote("x and \"y\"\tb\tc", CsvPreference.TAB_PREFERENCE));
|
||||
}
|
||||
|
||||
public void testRowContainsDuplicateNonEmptyValues() {
|
||||
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
|
||||
assertTrue(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
|
||||
assertFalse(SeparatedValuesLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("a")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Collections.singletonList("")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "c")));
|
||||
assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "a")));
|
||||
assertTrue(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "b", "b")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("a", "", "")));
|
||||
assertFalse(DelimitedLogStructureFinder.rowContainsDuplicateNonEmptyValues(Arrays.asList("", "a", "")));
|
||||
}
|
||||
}
|
|
@ -29,14 +29,14 @@ public class JsonLogStructureFinderFactoryTests extends LogStructureTestCase {
|
|||
assertFalse(factory.canCreateFromSample(explanation, TSV_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
||||
public void testCanCreateFromSampleGivenSemiColonDelimited() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
||||
assertFalse(factory.canCreateFromSample(explanation, SEMI_COLON_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
public void testCanCreateFromSampleGivenPipeDelimited() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_DELIMITED_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
|
|
@ -29,7 +29,7 @@ public class JsonLogStructureFinderTests extends LogStructureTestCase {
|
|||
}
|
||||
assertNull(structure.getExcludeLinesPattern());
|
||||
assertNull(structure.getMultilineStartPattern());
|
||||
assertNull(structure.getSeparator());
|
||||
assertNull(structure.getDelimiter());
|
||||
assertNull(structure.getHasHeaderRow());
|
||||
assertNull(structure.getShouldTrimFields());
|
||||
assertNull(structure.getGrokPattern());
|
||||
|
|
|
@ -61,7 +61,7 @@ public class LogStructureFinderManagerTests extends LogStructureTestCase {
|
|||
public void testMakeBestStructureGivenCsv() throws Exception {
|
||||
assertThat(structureFinderManager.makeBestStructureFinder(explanation, "time,message\n" +
|
||||
"2018-05-17T13:41:23,hello\n", StandardCharsets.UTF_8.name(), randomBoolean()),
|
||||
instanceOf(SeparatedValuesLogStructureFinder.class));
|
||||
instanceOf(DelimitedLogStructureFinder.class));
|
||||
}
|
||||
|
||||
public void testMakeBestStructureGivenText() throws Exception {
|
||||
|
|
|
@ -34,14 +34,14 @@ public abstract class LogStructureTestCase extends ESTestCase {
|
|||
"\"level\":\"INFO\",\"pid\":42,\"thread\":\"0x7fff7d2a8000\",\"message\":\"message 2\",\"class\":\"ml\"," +
|
||||
"\"method\":\"core::SomeNoiseMaker\",\"file\":\"Noisemaker.cc\",\"line\":333}\n";
|
||||
|
||||
protected static final String PIPE_SEPARATED_VALUES_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
|
||||
protected static final String PIPE_DELIMITED_SAMPLE = "2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |" +
|
||||
"listening on 0.0.0.0:9987, :::9987\n" +
|
||||
"2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client " +
|
||||
"'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)\n" +
|
||||
"2018-01-06 17:21:25.764368|INFO |VirtualServer |1 |client " +
|
||||
"'User1'(id:2) was added to channelgroup 'Channel Admin'(id:5) by client 'User1'(id:2) in channel 'Default Channel'(id:1)";
|
||||
|
||||
protected static final String SEMI_COLON_SEPARATED_VALUES_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
|
||||
protected static final String SEMI_COLON_DELIMITED_SAMPLE = "\"pos_id\";\"trip_id\";\"latitude\";\"longitude\";\"altitude\";" +
|
||||
"\"timestamp\"\n" +
|
||||
"\"1\";\"3\";\"4703.7815\";\"1527.4713\";\"359.9\";\"2017-01-19 16:19:04.742113\"\n" +
|
||||
"\"2\";\"3\";\"4703.7815\";\"1527.4714\";\"359.9\";\"2017-01-19 16:19:05.741890\"\n" +
|
||||
|
|
|
@ -43,14 +43,12 @@ public class LogStructureTests extends AbstractXContentTestCase<LogStructure> {
|
|||
builder.setExcludeLinesPattern(randomAlphaOfLength(100));
|
||||
}
|
||||
|
||||
if (format.isSeparatedValues() || (format.supportsNesting() && randomBoolean())) {
|
||||
if (format == LogStructure.Format.DELIMITED || (format.supportsNesting() && randomBoolean())) {
|
||||
builder.setInputFields(Arrays.asList(generateRandomStringArray(10, 10, false, false)));
|
||||
}
|
||||
if (format.isSeparatedValues()) {
|
||||
if (format == LogStructure.Format.DELIMITED) {
|
||||
builder.setHasHeaderRow(randomBoolean());
|
||||
if (rarely()) {
|
||||
builder.setSeparator(format.separator());
|
||||
}
|
||||
builder.setDelimiter(randomFrom(',', '\t', ';', '|'));
|
||||
}
|
||||
if (format.isSemiStructured()) {
|
||||
builder.setGrokPattern(randomAlphaOfLength(100));
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class PipeSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new PipeSeparatedValuesLogStructureFinderFactory();
|
||||
|
||||
// No need to check JSON, XML, CSV, TSV or semi-colon separated values because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
|
||||
assertTrue(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|
|
@ -1,28 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
package org.elasticsearch.xpack.ml.logstructurefinder;
|
||||
|
||||
public class SemiColonSeparatedValuesLogStructureFinderFactoryTests extends LogStructureTestCase {
|
||||
|
||||
private LogStructureFinderFactory factory = new SemiColonSeparatedValuesLogStructureFinderFactory();
|
||||
|
||||
// No need to check JSON, XML, CSV or TSV because they come earlier in the order we check formats
|
||||
|
||||
public void testCanCreateFromSampleGivenSemiColonSeparatedValues() {
|
||||
|
||||
assertTrue(factory.canCreateFromSample(explanation, SEMI_COLON_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenPipeSeparatedValues() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, PIPE_SEPARATED_VALUES_SAMPLE));
|
||||
}
|
||||
|
||||
public void testCanCreateFromSampleGivenText() {
|
||||
|
||||
assertFalse(factory.canCreateFromSample(explanation, TEXT_SAMPLE));
|
||||
}
|
||||
}
|