[ML] Display integers without .0 in file structure field stats (#33947)

Previously numeric values in the field_stats created by the
find_file_structure endpoint were always output with a
decimal point.  This looked unfriendly and unnatural for
fields that clearly store integer values.  This change
converts integer values to type Integer before output in
the file structure field stats.
This commit is contained in:
David Roberts 2018-09-22 15:48:59 +01:00 committed by GitHub
parent 7944a0cb25
commit b89551c452
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 62 additions and 35 deletions

View File

@ -365,49 +365,49 @@ If the request does not encounter errors, you receive the following result:
"page_count" : {
"count" : 24,
"cardinality" : 24,
"min_value" : 180.0,
"max_value" : 768.0,
"min_value" : 180,
"max_value" : 768,
"mean_value" : 387.0833333333333,
"median_value" : 329.5,
"top_hits" : [
{
"value" : 180.0,
"value" : 180,
"count" : 1
},
{
"value" : 208.0,
"value" : 208,
"count" : 1
},
{
"value" : 224.0,
"value" : 224,
"count" : 1
},
{
"value" : 227.0,
"value" : 227,
"count" : 1
},
{
"value" : 268.0,
"value" : 268,
"count" : 1
},
{
"value" : 271.0,
"value" : 271,
"count" : 1
},
{
"value" : 275.0,
"value" : 275,
"count" : 1
},
{
"value" : 288.0,
"value" : 288,
"count" : 1
},
{
"value" : 304.0,
"value" : 304,
"count" : 1
},
{
"value" : 311.0,
"value" : 311,
"count" : 1
}
]

View File

@ -123,16 +123,16 @@ public class FieldStats implements ToXContentObject, Writeable {
builder.field(COUNT.getPreferredName(), count);
builder.field(CARDINALITY.getPreferredName(), cardinality);
if (minValue != null) {
builder.field(MIN_VALUE.getPreferredName(), minValue);
builder.field(MIN_VALUE.getPreferredName(), toIntegerIfInteger(minValue));
}
if (maxValue != null) {
builder.field(MAX_VALUE.getPreferredName(), maxValue);
builder.field(MAX_VALUE.getPreferredName(), toIntegerIfInteger(maxValue));
}
if (meanValue != null) {
builder.field(MEAN_VALUE.getPreferredName(), meanValue);
builder.field(MEAN_VALUE.getPreferredName(), toIntegerIfInteger(meanValue));
}
if (medianValue != null) {
builder.field(MEDIAN_VALUE.getPreferredName(), medianValue);
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
}
if (topHits.isEmpty() == false) {
builder.field(TOP_HITS.getPreferredName(), topHits);
@ -142,6 +142,15 @@ public class FieldStats implements ToXContentObject, Writeable {
return builder;
}
public static Number toIntegerIfInteger(double d) {
if (d >= Integer.MIN_VALUE && d <= Integer.MAX_VALUE && Double.compare(d, StrictMath.rint(d)) == 0) {
return (int) d;
}
return d;
}
@Override
public int hashCode() {

View File

@ -32,8 +32,13 @@ public class FieldStatsTests extends AbstractSerializingTestCase<FieldStats> {
Double medianValue = null;
boolean isMetric = randomBoolean();
if (isMetric) {
if (randomBoolean()) {
minValue = randomDouble();
maxValue = randomDouble();
} else {
minValue = (double) randomInt();
maxValue = (double) randomInt();
}
meanValue = randomDouble();
medianValue = randomDouble();
}
@ -42,7 +47,7 @@ public class FieldStatsTests extends AbstractSerializingTestCase<FieldStats> {
for (int i = 0; i < Math.min(10, cardinality); ++i) {
Map<String, Object> topHit = new LinkedHashMap<>();
if (isMetric) {
topHit.put("value", randomDouble());
topHit.put("value", randomBoolean() ? randomDouble() : (double) randomInt());
} else {
topHit.put("value", randomAlphaOfLength(20));
}

View File

@ -15,6 +15,7 @@ import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;
/**
@ -152,18 +153,20 @@ public class FieldStatsCalculator {
List<Map<String, Object>> findNumericTopHits(int numTopHits) {
assert countsByNumericValue != null;
return findTopHits(numTopHits, countsByNumericValue, Comparator.comparing(Map.Entry<Double, Integer>::getKey));
return findTopHits(numTopHits, countsByNumericValue, Comparator.comparing(Map.Entry<Double, Integer>::getKey),
FieldStats::toIntegerIfInteger);
}
List<Map<String, Object>> findStringTopHits(int numTopHits) {
return findTopHits(numTopHits, countsByStringValue, Comparator.comparing(Map.Entry<String, Integer>::getKey));
return findTopHits(numTopHits, countsByStringValue, Comparator.comparing(Map.Entry<String, Integer>::getKey), s -> s);
}
/**
* Order by descending count, with a secondary sort to ensure reproducibility of results.
*/
private static <T> List<Map<String, Object>> findTopHits(int numTopHits, Map<T, Integer> countsByValue,
Comparator<Map.Entry<T, Integer>> secondarySort) {
Comparator<Map.Entry<T, Integer>> secondarySort,
Function<T, Object> outputMapper) {
List<Map.Entry<T, Integer>> sortedByCount = countsByValue.entrySet().stream()
.sorted(Comparator.comparing(Map.Entry<T, Integer>::getValue, Comparator.reverseOrder()).thenComparing(secondarySort))
@ -174,7 +177,7 @@ public class FieldStatsCalculator {
for (Map.Entry<T, Integer> entry : sortedByCount) {
Map<String, Object> topHit = new LinkedHashMap<>(3);
topHit.put("value", entry.getKey());
topHit.put("value", outputMapper.apply(entry.getKey()));
topHit.put("count", entry.getValue());
topHits.add(topHit);
}

View File

@ -73,16 +73,16 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
FieldStatsCalculator calculator = new FieldStatsCalculator();
calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5", "6", "5", "16", "4", "5"));
calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5.2", "6", "5.2", "16", "4", "5.2"));
List<Map<String, Object>> topHits = calculator.findNumericTopHits(3);
assertEquals(3, topHits.size());
assertEquals(4.0, topHits.get(0).get("value"));
assertEquals(4, topHits.get(0).get("value"));
assertEquals(4, topHits.get(0).get("count"));
assertEquals(5.0, topHits.get(1).get("value"));
assertEquals(5.2, topHits.get(1).get("value"));
assertEquals(3, topHits.get(1).get("count"));
assertEquals(6.0, topHits.get(2).get("value"));
assertEquals(6, topHits.get(2).get("value"));
assertEquals(2, topHits.get(2).get("count"));
}
@ -124,25 +124,25 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
FieldStatsCalculator calculator = new FieldStatsCalculator();
calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5", "6", "5", "16", "4", "5"));
calculator.accept(Arrays.asList("4.5", "4.5", "7", "4.5", "6", "5", "6", "5", "25", "4.5", "5"));
FieldStats stats = calculator.calculate(3);
assertEquals(11L, stats.getCount());
assertEquals(5, stats.getCardinality());
assertEquals(4.0, stats.getMinValue(), 1e-10);
assertEquals(16.0, stats.getMaxValue(), 1e-10);
assertEquals(6.0, stats.getMeanValue(), 1e-10);
assertEquals(4.5, stats.getMinValue(), 1e-10);
assertEquals(25.0, stats.getMaxValue(), 1e-10);
assertEquals(7.0, stats.getMeanValue(), 1e-10);
assertEquals(5.0, stats.getMedianValue(), 1e-10);
List<Map<String, Object>> topHits = stats.getTopHits();
assertEquals(3, topHits.size());
assertEquals(4.0, topHits.get(0).get("value"));
assertEquals(4.5, topHits.get(0).get("value"));
assertEquals(4, topHits.get(0).get("count"));
assertEquals(5.0, topHits.get(1).get("value"));
assertEquals(5, topHits.get(1).get("value"));
assertEquals(3, topHits.get(1).get("count"));
assertEquals(6.0, topHits.get(2).get("value"));
assertEquals(6, topHits.get(2).get("value"));
assertEquals(2, topHits.get(2).get("count"));
}

View File

@ -338,7 +338,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
assertEquals(3, fieldStats.size());
assertEquals(new FieldStats(2, 2, makeTopHits("not a time", 1, "whatever", 1)), fieldStats.get("foo"));
assertEquals(new FieldStats(2, 2, makeTopHits("2018-05-24 17:28:31,735", 1, "2018-05-29 11:53:02,837", 1)), fieldStats.get("time"));
assertEquals(new FieldStats(2, 2, 17.0, 42.0, 29.5, 29.5, makeTopHits(17.0, 1, 42.0, 1)), fieldStats.get("bar"));
assertEquals(new FieldStats(2, 2, 17.0, 42.0, 29.5, 29.5, makeTopHits(17, 1, 42, 1)), fieldStats.get("bar"));
assertNull(fieldStats.get("nothing"));
}

View File

@ -38,6 +38,11 @@
- match: { field_stats.airline.cardinality: 2 }
- match: { field_stats.responsetime.count: 3 }
- match: { field_stats.responsetime.cardinality: 3 }
- match: { field_stats.responsetime.min_value: 132.2046 }
- match: { field_stats.responsetime.max_value: 990.4628 }
# Not asserting on field_stats.responsetime.mean as it's a recurring decimal
# so its representation in the response could cause spurious failures
- match: { field_stats.responsetime.median_value: 134.2046 }
- match: { field_stats.sourcetype.count: 3 }
- match: { field_stats.sourcetype.cardinality: 1 }
- match: { field_stats.time.count: 3 }
@ -89,6 +94,11 @@
- match: { field_stats.airline.cardinality: 2 }
- match: { field_stats.responsetime.count: 3 }
- match: { field_stats.responsetime.cardinality: 3 }
- match: { field_stats.responsetime.min_value: 132.2046 }
- match: { field_stats.responsetime.max_value: 990.4628 }
# Not asserting on field_stats.responsetime.mean as it's a recurring decimal
# so its representation in the response could cause spurious failures
- match: { field_stats.responsetime.median_value: 134.2046 }
- match: { field_stats.sourcetype.count: 3 }
- match: { field_stats.sourcetype.cardinality: 1 }
- match: { field_stats.time.count: 3 }