diff --git a/docs/reference/ml/apis/find-file-structure.asciidoc b/docs/reference/ml/apis/find-file-structure.asciidoc index f9a583a027a..61987221a76 100644 --- a/docs/reference/ml/apis/find-file-structure.asciidoc +++ b/docs/reference/ml/apis/find-file-structure.asciidoc @@ -365,49 +365,49 @@ If the request does not encounter errors, you receive the following result: "page_count" : { "count" : 24, "cardinality" : 24, - "min_value" : 180.0, - "max_value" : 768.0, + "min_value" : 180, + "max_value" : 768, "mean_value" : 387.0833333333333, "median_value" : 329.5, "top_hits" : [ { - "value" : 180.0, + "value" : 180, "count" : 1 }, { - "value" : 208.0, + "value" : 208, "count" : 1 }, { - "value" : 224.0, + "value" : 224, "count" : 1 }, { - "value" : 227.0, + "value" : 227, "count" : 1 }, { - "value" : 268.0, + "value" : 268, "count" : 1 }, { - "value" : 271.0, + "value" : 271, "count" : 1 }, { - "value" : 275.0, + "value" : 275, "count" : 1 }, { - "value" : 288.0, + "value" : 288, "count" : 1 }, { - "value" : 304.0, + "value" : 304, "count" : 1 }, { - "value" : 311.0, + "value" : 311, "count" : 1 } ] diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java index 8f624d000cc..b371ca739bb 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStats.java @@ -123,16 +123,16 @@ public class FieldStats implements ToXContentObject, Writeable { builder.field(COUNT.getPreferredName(), count); builder.field(CARDINALITY.getPreferredName(), cardinality); if (minValue != null) { - builder.field(MIN_VALUE.getPreferredName(), minValue); + builder.field(MIN_VALUE.getPreferredName(), toIntegerIfInteger(minValue)); } if (maxValue != null) { - builder.field(MAX_VALUE.getPreferredName(), maxValue); + builder.field(MAX_VALUE.getPreferredName(), toIntegerIfInteger(maxValue)); } if (meanValue != null) { - builder.field(MEAN_VALUE.getPreferredName(), meanValue); + builder.field(MEAN_VALUE.getPreferredName(), toIntegerIfInteger(meanValue)); } if (medianValue != null) { - builder.field(MEDIAN_VALUE.getPreferredName(), medianValue); + builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue)); } if (topHits.isEmpty() == false) { builder.field(TOP_HITS.getPreferredName(), topHits); @@ -142,6 +142,15 @@ public class FieldStats implements ToXContentObject, Writeable { return builder; } + public static Number toIntegerIfInteger(double d) { + + if (d >= Integer.MIN_VALUE && d <= Integer.MAX_VALUE && Double.compare(d, StrictMath.rint(d)) == 0) { + return (int) d; + } + + return d; + } + @Override public int hashCode() { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java index 30f7c8f5576..ec46d25edd4 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/filestructurefinder/FieldStatsTests.java @@ -32,8 +32,13 @@ public class FieldStatsTests extends AbstractSerializingTestCase { Double medianValue = null; boolean isMetric = randomBoolean(); if (isMetric) { - minValue = randomDouble(); - maxValue = randomDouble(); + if (randomBoolean()) { + minValue = randomDouble(); + maxValue = randomDouble(); + } else { + minValue = (double) randomInt(); + maxValue = (double) randomInt(); + } meanValue = randomDouble(); medianValue = randomDouble(); } @@ -42,7 +47,7 @@ public class FieldStatsTests extends AbstractSerializingTestCase { for (int i = 0; i < Math.min(10, cardinality); ++i) { Map topHit = new LinkedHashMap<>(); if (isMetric) { - topHit.put("value", randomDouble()); + topHit.put("value", randomBoolean() ? randomDouble() : (double) randomInt()); } else { topHit.put("value", randomAlphaOfLength(20)); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FieldStatsCalculator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FieldStatsCalculator.java index 130a37dbc19..40dff9116d7 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FieldStatsCalculator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/filestructurefinder/FieldStatsCalculator.java @@ -15,6 +15,7 @@ import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; +import java.util.function.Function; import java.util.stream.Collectors; /** @@ -152,18 +153,20 @@ public class FieldStatsCalculator { List> findNumericTopHits(int numTopHits) { assert countsByNumericValue != null; - return findTopHits(numTopHits, countsByNumericValue, Comparator.comparing(Map.Entry::getKey)); + return findTopHits(numTopHits, countsByNumericValue, Comparator.comparing(Map.Entry::getKey), + FieldStats::toIntegerIfInteger); } List> findStringTopHits(int numTopHits) { - return findTopHits(numTopHits, countsByStringValue, Comparator.comparing(Map.Entry::getKey)); + return findTopHits(numTopHits, countsByStringValue, Comparator.comparing(Map.Entry::getKey), s -> s); } /** * Order by descending count, with a secondary sort to ensure reproducibility of results. */ private static List> findTopHits(int numTopHits, Map countsByValue, - Comparator> secondarySort) { + Comparator> secondarySort, + Function outputMapper) { List> sortedByCount = countsByValue.entrySet().stream() .sorted(Comparator.comparing(Map.Entry::getValue, Comparator.reverseOrder()).thenComparing(secondarySort)) @@ -174,7 +177,7 @@ public class FieldStatsCalculator { for (Map.Entry entry : sortedByCount) { Map topHit = new LinkedHashMap<>(3); - topHit.put("value", entry.getKey()); + topHit.put("value", outputMapper.apply(entry.getKey())); topHit.put("count", entry.getValue()); topHits.add(topHit); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FieldStatsCalculatorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FieldStatsCalculatorTests.java index 08035dc741d..30445a4a77c 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FieldStatsCalculatorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FieldStatsCalculatorTests.java @@ -73,16 +73,16 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase { FieldStatsCalculator calculator = new FieldStatsCalculator(); - calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5", "6", "5", "16", "4", "5")); + calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5.2", "6", "5.2", "16", "4", "5.2")); List> topHits = calculator.findNumericTopHits(3); assertEquals(3, topHits.size()); - assertEquals(4.0, topHits.get(0).get("value")); + assertEquals(4, topHits.get(0).get("value")); assertEquals(4, topHits.get(0).get("count")); - assertEquals(5.0, topHits.get(1).get("value")); + assertEquals(5.2, topHits.get(1).get("value")); assertEquals(3, topHits.get(1).get("count")); - assertEquals(6.0, topHits.get(2).get("value")); + assertEquals(6, topHits.get(2).get("value")); assertEquals(2, topHits.get(2).get("count")); } @@ -124,25 +124,25 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase { FieldStatsCalculator calculator = new FieldStatsCalculator(); - calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5", "6", "5", "16", "4", "5")); + calculator.accept(Arrays.asList("4.5", "4.5", "7", "4.5", "6", "5", "6", "5", "25", "4.5", "5")); FieldStats stats = calculator.calculate(3); assertEquals(11L, stats.getCount()); assertEquals(5, stats.getCardinality()); - assertEquals(4.0, stats.getMinValue(), 1e-10); - assertEquals(16.0, stats.getMaxValue(), 1e-10); - assertEquals(6.0, stats.getMeanValue(), 1e-10); + assertEquals(4.5, stats.getMinValue(), 1e-10); + assertEquals(25.0, stats.getMaxValue(), 1e-10); + assertEquals(7.0, stats.getMeanValue(), 1e-10); assertEquals(5.0, stats.getMedianValue(), 1e-10); List> topHits = stats.getTopHits(); assertEquals(3, topHits.size()); - assertEquals(4.0, topHits.get(0).get("value")); + assertEquals(4.5, topHits.get(0).get("value")); assertEquals(4, topHits.get(0).get("count")); - assertEquals(5.0, topHits.get(1).get("value")); + assertEquals(5, topHits.get(1).get("value")); assertEquals(3, topHits.get(1).get("count")); - assertEquals(6.0, topHits.get(2).get("value")); + assertEquals(6, topHits.get(2).get("value")); assertEquals(2, topHits.get(2).get("count")); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java index 8dbfb6a8047..901fca83ffd 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/filestructurefinder/FileStructureUtilsTests.java @@ -338,7 +338,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase { assertEquals(3, fieldStats.size()); assertEquals(new FieldStats(2, 2, makeTopHits("not a time", 1, "whatever", 1)), fieldStats.get("foo")); assertEquals(new FieldStats(2, 2, makeTopHits("2018-05-24 17:28:31,735", 1, "2018-05-29 11:53:02,837", 1)), fieldStats.get("time")); - assertEquals(new FieldStats(2, 2, 17.0, 42.0, 29.5, 29.5, makeTopHits(17.0, 1, 42.0, 1)), fieldStats.get("bar")); + assertEquals(new FieldStats(2, 2, 17.0, 42.0, 29.5, 29.5, makeTopHits(17, 1, 42, 1)), fieldStats.get("bar")); assertNull(fieldStats.get("nothing")); } diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml index 1f6964b9193..fcacdb9cdc7 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/find_file_structure.yml @@ -38,6 +38,11 @@ - match: { field_stats.airline.cardinality: 2 } - match: { field_stats.responsetime.count: 3 } - match: { field_stats.responsetime.cardinality: 3 } + - match: { field_stats.responsetime.min_value: 132.2046 } + - match: { field_stats.responsetime.max_value: 990.4628 } + # Not asserting on field_stats.responsetime.mean as it's a recurring decimal + # so its representation in the response could cause spurious failures + - match: { field_stats.responsetime.median_value: 134.2046 } - match: { field_stats.sourcetype.count: 3 } - match: { field_stats.sourcetype.cardinality: 1 } - match: { field_stats.time.count: 3 } @@ -89,6 +94,11 @@ - match: { field_stats.airline.cardinality: 2 } - match: { field_stats.responsetime.count: 3 } - match: { field_stats.responsetime.cardinality: 3 } + - match: { field_stats.responsetime.min_value: 132.2046 } + - match: { field_stats.responsetime.max_value: 990.4628 } + # Not asserting on field_stats.responsetime.mean as it's a recurring decimal + # so its representation in the response could cause spurious failures + - match: { field_stats.responsetime.median_value: 134.2046 } - match: { field_stats.sourcetype.count: 3 } - match: { field_stats.sourcetype.cardinality: 1 } - match: { field_stats.time.count: 3 }