[ML] Display integers without .0 in file structure field stats (#33947)
Previously numeric values in the field_stats created by the find_file_structure endpoint were always output with a decimal point. This looked unfriendly and unnatural for fields that clearly store integer values. This change converts integer values to type Integer before output in the file structure field stats.
This commit is contained in:
parent
7944a0cb25
commit
b89551c452
|
@ -365,49 +365,49 @@ If the request does not encounter errors, you receive the following result:
|
|||
"page_count" : {
|
||||
"count" : 24,
|
||||
"cardinality" : 24,
|
||||
"min_value" : 180.0,
|
||||
"max_value" : 768.0,
|
||||
"min_value" : 180,
|
||||
"max_value" : 768,
|
||||
"mean_value" : 387.0833333333333,
|
||||
"median_value" : 329.5,
|
||||
"top_hits" : [
|
||||
{
|
||||
"value" : 180.0,
|
||||
"value" : 180,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 208.0,
|
||||
"value" : 208,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 224.0,
|
||||
"value" : 224,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 227.0,
|
||||
"value" : 227,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 268.0,
|
||||
"value" : 268,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 271.0,
|
||||
"value" : 271,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 275.0,
|
||||
"value" : 275,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 288.0,
|
||||
"value" : 288,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 304.0,
|
||||
"value" : 304,
|
||||
"count" : 1
|
||||
},
|
||||
{
|
||||
"value" : 311.0,
|
||||
"value" : 311,
|
||||
"count" : 1
|
||||
}
|
||||
]
|
||||
|
|
|
@ -123,16 +123,16 @@ public class FieldStats implements ToXContentObject, Writeable {
|
|||
builder.field(COUNT.getPreferredName(), count);
|
||||
builder.field(CARDINALITY.getPreferredName(), cardinality);
|
||||
if (minValue != null) {
|
||||
builder.field(MIN_VALUE.getPreferredName(), minValue);
|
||||
builder.field(MIN_VALUE.getPreferredName(), toIntegerIfInteger(minValue));
|
||||
}
|
||||
if (maxValue != null) {
|
||||
builder.field(MAX_VALUE.getPreferredName(), maxValue);
|
||||
builder.field(MAX_VALUE.getPreferredName(), toIntegerIfInteger(maxValue));
|
||||
}
|
||||
if (meanValue != null) {
|
||||
builder.field(MEAN_VALUE.getPreferredName(), meanValue);
|
||||
builder.field(MEAN_VALUE.getPreferredName(), toIntegerIfInteger(meanValue));
|
||||
}
|
||||
if (medianValue != null) {
|
||||
builder.field(MEDIAN_VALUE.getPreferredName(), medianValue);
|
||||
builder.field(MEDIAN_VALUE.getPreferredName(), toIntegerIfInteger(medianValue));
|
||||
}
|
||||
if (topHits.isEmpty() == false) {
|
||||
builder.field(TOP_HITS.getPreferredName(), topHits);
|
||||
|
@ -142,6 +142,15 @@ public class FieldStats implements ToXContentObject, Writeable {
|
|||
return builder;
|
||||
}
|
||||
|
||||
public static Number toIntegerIfInteger(double d) {
|
||||
|
||||
if (d >= Integer.MIN_VALUE && d <= Integer.MAX_VALUE && Double.compare(d, StrictMath.rint(d)) == 0) {
|
||||
return (int) d;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
|
||||
|
|
|
@ -32,8 +32,13 @@ public class FieldStatsTests extends AbstractSerializingTestCase<FieldStats> {
|
|||
Double medianValue = null;
|
||||
boolean isMetric = randomBoolean();
|
||||
if (isMetric) {
|
||||
if (randomBoolean()) {
|
||||
minValue = randomDouble();
|
||||
maxValue = randomDouble();
|
||||
} else {
|
||||
minValue = (double) randomInt();
|
||||
maxValue = (double) randomInt();
|
||||
}
|
||||
meanValue = randomDouble();
|
||||
medianValue = randomDouble();
|
||||
}
|
||||
|
@ -42,7 +47,7 @@ public class FieldStatsTests extends AbstractSerializingTestCase<FieldStats> {
|
|||
for (int i = 0; i < Math.min(10, cardinality); ++i) {
|
||||
Map<String, Object> topHit = new LinkedHashMap<>();
|
||||
if (isMetric) {
|
||||
topHit.put("value", randomDouble());
|
||||
topHit.put("value", randomBoolean() ? randomDouble() : (double) randomInt());
|
||||
} else {
|
||||
topHit.put("value", randomAlphaOfLength(20));
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
|
@ -152,18 +153,20 @@ public class FieldStatsCalculator {
|
|||
|
||||
List<Map<String, Object>> findNumericTopHits(int numTopHits) {
|
||||
assert countsByNumericValue != null;
|
||||
return findTopHits(numTopHits, countsByNumericValue, Comparator.comparing(Map.Entry<Double, Integer>::getKey));
|
||||
return findTopHits(numTopHits, countsByNumericValue, Comparator.comparing(Map.Entry<Double, Integer>::getKey),
|
||||
FieldStats::toIntegerIfInteger);
|
||||
}
|
||||
|
||||
List<Map<String, Object>> findStringTopHits(int numTopHits) {
|
||||
return findTopHits(numTopHits, countsByStringValue, Comparator.comparing(Map.Entry<String, Integer>::getKey));
|
||||
return findTopHits(numTopHits, countsByStringValue, Comparator.comparing(Map.Entry<String, Integer>::getKey), s -> s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Order by descending count, with a secondary sort to ensure reproducibility of results.
|
||||
*/
|
||||
private static <T> List<Map<String, Object>> findTopHits(int numTopHits, Map<T, Integer> countsByValue,
|
||||
Comparator<Map.Entry<T, Integer>> secondarySort) {
|
||||
Comparator<Map.Entry<T, Integer>> secondarySort,
|
||||
Function<T, Object> outputMapper) {
|
||||
|
||||
List<Map.Entry<T, Integer>> sortedByCount = countsByValue.entrySet().stream()
|
||||
.sorted(Comparator.comparing(Map.Entry<T, Integer>::getValue, Comparator.reverseOrder()).thenComparing(secondarySort))
|
||||
|
@ -174,7 +177,7 @@ public class FieldStatsCalculator {
|
|||
for (Map.Entry<T, Integer> entry : sortedByCount) {
|
||||
|
||||
Map<String, Object> topHit = new LinkedHashMap<>(3);
|
||||
topHit.put("value", entry.getKey());
|
||||
topHit.put("value", outputMapper.apply(entry.getKey()));
|
||||
topHit.put("count", entry.getValue());
|
||||
topHits.add(topHit);
|
||||
}
|
||||
|
|
|
@ -73,16 +73,16 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
|
|||
|
||||
FieldStatsCalculator calculator = new FieldStatsCalculator();
|
||||
|
||||
calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5", "6", "5", "16", "4", "5"));
|
||||
calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5.2", "6", "5.2", "16", "4", "5.2"));
|
||||
|
||||
List<Map<String, Object>> topHits = calculator.findNumericTopHits(3);
|
||||
|
||||
assertEquals(3, topHits.size());
|
||||
assertEquals(4.0, topHits.get(0).get("value"));
|
||||
assertEquals(4, topHits.get(0).get("value"));
|
||||
assertEquals(4, topHits.get(0).get("count"));
|
||||
assertEquals(5.0, topHits.get(1).get("value"));
|
||||
assertEquals(5.2, topHits.get(1).get("value"));
|
||||
assertEquals(3, topHits.get(1).get("count"));
|
||||
assertEquals(6.0, topHits.get(2).get("value"));
|
||||
assertEquals(6, topHits.get(2).get("value"));
|
||||
assertEquals(2, topHits.get(2).get("count"));
|
||||
}
|
||||
|
||||
|
@ -124,25 +124,25 @@ public class FieldStatsCalculatorTests extends FileStructureTestCase {
|
|||
|
||||
FieldStatsCalculator calculator = new FieldStatsCalculator();
|
||||
|
||||
calculator.accept(Arrays.asList("4", "4", "7", "4", "6", "5", "6", "5", "16", "4", "5"));
|
||||
calculator.accept(Arrays.asList("4.5", "4.5", "7", "4.5", "6", "5", "6", "5", "25", "4.5", "5"));
|
||||
|
||||
FieldStats stats = calculator.calculate(3);
|
||||
|
||||
assertEquals(11L, stats.getCount());
|
||||
assertEquals(5, stats.getCardinality());
|
||||
assertEquals(4.0, stats.getMinValue(), 1e-10);
|
||||
assertEquals(16.0, stats.getMaxValue(), 1e-10);
|
||||
assertEquals(6.0, stats.getMeanValue(), 1e-10);
|
||||
assertEquals(4.5, stats.getMinValue(), 1e-10);
|
||||
assertEquals(25.0, stats.getMaxValue(), 1e-10);
|
||||
assertEquals(7.0, stats.getMeanValue(), 1e-10);
|
||||
assertEquals(5.0, stats.getMedianValue(), 1e-10);
|
||||
|
||||
List<Map<String, Object>> topHits = stats.getTopHits();
|
||||
|
||||
assertEquals(3, topHits.size());
|
||||
assertEquals(4.0, topHits.get(0).get("value"));
|
||||
assertEquals(4.5, topHits.get(0).get("value"));
|
||||
assertEquals(4, topHits.get(0).get("count"));
|
||||
assertEquals(5.0, topHits.get(1).get("value"));
|
||||
assertEquals(5, topHits.get(1).get("value"));
|
||||
assertEquals(3, topHits.get(1).get("count"));
|
||||
assertEquals(6.0, topHits.get(2).get("value"));
|
||||
assertEquals(6, topHits.get(2).get("value"));
|
||||
assertEquals(2, topHits.get(2).get("count"));
|
||||
}
|
||||
|
||||
|
|
|
@ -338,7 +338,7 @@ public class FileStructureUtilsTests extends FileStructureTestCase {
|
|||
assertEquals(3, fieldStats.size());
|
||||
assertEquals(new FieldStats(2, 2, makeTopHits("not a time", 1, "whatever", 1)), fieldStats.get("foo"));
|
||||
assertEquals(new FieldStats(2, 2, makeTopHits("2018-05-24 17:28:31,735", 1, "2018-05-29 11:53:02,837", 1)), fieldStats.get("time"));
|
||||
assertEquals(new FieldStats(2, 2, 17.0, 42.0, 29.5, 29.5, makeTopHits(17.0, 1, 42.0, 1)), fieldStats.get("bar"));
|
||||
assertEquals(new FieldStats(2, 2, 17.0, 42.0, 29.5, 29.5, makeTopHits(17, 1, 42, 1)), fieldStats.get("bar"));
|
||||
assertNull(fieldStats.get("nothing"));
|
||||
}
|
||||
|
||||
|
|
|
@ -38,6 +38,11 @@
|
|||
- match: { field_stats.airline.cardinality: 2 }
|
||||
- match: { field_stats.responsetime.count: 3 }
|
||||
- match: { field_stats.responsetime.cardinality: 3 }
|
||||
- match: { field_stats.responsetime.min_value: 132.2046 }
|
||||
- match: { field_stats.responsetime.max_value: 990.4628 }
|
||||
# Not asserting on field_stats.responsetime.mean as it's a recurring decimal
|
||||
# so its representation in the response could cause spurious failures
|
||||
- match: { field_stats.responsetime.median_value: 134.2046 }
|
||||
- match: { field_stats.sourcetype.count: 3 }
|
||||
- match: { field_stats.sourcetype.cardinality: 1 }
|
||||
- match: { field_stats.time.count: 3 }
|
||||
|
@ -89,6 +94,11 @@
|
|||
- match: { field_stats.airline.cardinality: 2 }
|
||||
- match: { field_stats.responsetime.count: 3 }
|
||||
- match: { field_stats.responsetime.cardinality: 3 }
|
||||
- match: { field_stats.responsetime.min_value: 132.2046 }
|
||||
- match: { field_stats.responsetime.max_value: 990.4628 }
|
||||
# Not asserting on field_stats.responsetime.mean as it's a recurring decimal
|
||||
# so its representation in the response could cause spurious failures
|
||||
- match: { field_stats.responsetime.median_value: 134.2046 }
|
||||
- match: { field_stats.sourcetype.count: 3 }
|
||||
- match: { field_stats.sourcetype.cardinality: 1 }
|
||||
- match: { field_stats.time.count: 3 }
|
||||
|
|
Loading…
Reference in New Issue