HBASE-27224 HFile tool statistic sampling produces misleading results (#4638)
Signed-off-by: Duo Zhang <zhangduo@apache.org> Reviewed-by: Clay Baenziger <cwb@clayb.net>
This commit is contained in:
parent
890d6dedfe
commit
6f6857b83f
|
@ -20,21 +20,17 @@ package org.apache.hadoop.hbase.io.hfile;
|
||||||
import static com.codahale.metrics.MetricRegistry.name;
|
import static com.codahale.metrics.MetricRegistry.name;
|
||||||
|
|
||||||
import com.codahale.metrics.ConsoleReporter;
|
import com.codahale.metrics.ConsoleReporter;
|
||||||
import com.codahale.metrics.Counter;
|
|
||||||
import com.codahale.metrics.Gauge;
|
|
||||||
import com.codahale.metrics.Histogram;
|
import com.codahale.metrics.Histogram;
|
||||||
import com.codahale.metrics.Meter;
|
|
||||||
import com.codahale.metrics.MetricFilter;
|
|
||||||
import com.codahale.metrics.MetricRegistry;
|
import com.codahale.metrics.MetricRegistry;
|
||||||
import com.codahale.metrics.ScheduledReporter;
|
|
||||||
import com.codahale.metrics.Snapshot;
|
import com.codahale.metrics.Snapshot;
|
||||||
import com.codahale.metrics.Timer;
|
import com.codahale.metrics.UniformReservoir;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.DataInput;
|
import java.io.DataInput;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.text.DateFormat;
|
import java.text.DateFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
|
@ -43,10 +39,8 @@ import java.util.Locale;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.SortedMap;
|
|
||||||
import java.util.TimeZone;
|
import java.util.TimeZone;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.atomic.LongAdder;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.conf.Configured;
|
import org.apache.hadoop.conf.Configured;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
@ -107,6 +101,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
private boolean printBlockIndex;
|
private boolean printBlockIndex;
|
||||||
private boolean printBlockHeaders;
|
private boolean printBlockHeaders;
|
||||||
private boolean printStats;
|
private boolean printStats;
|
||||||
|
private boolean printStatRanges;
|
||||||
private boolean checkRow;
|
private boolean checkRow;
|
||||||
private boolean checkFamily;
|
private boolean checkFamily;
|
||||||
private boolean isSeekToRow = false;
|
private boolean isSeekToRow = false;
|
||||||
|
@ -150,6 +145,8 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
options.addOption("w", "seekToRow", true,
|
options.addOption("w", "seekToRow", true,
|
||||||
"Seek to this row and print all the kvs for this row only");
|
"Seek to this row and print all the kvs for this row only");
|
||||||
options.addOption("s", "stats", false, "Print statistics");
|
options.addOption("s", "stats", false, "Print statistics");
|
||||||
|
options.addOption("d", "details", false,
|
||||||
|
"Print detailed statistics, including counts by range");
|
||||||
options.addOption("i", "checkMobIntegrity", false,
|
options.addOption("i", "checkMobIntegrity", false,
|
||||||
"Print all cells whose mob files are missing");
|
"Print all cells whose mob files are missing");
|
||||||
|
|
||||||
|
@ -181,7 +178,8 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
shouldPrintMeta = cmd.hasOption("m");
|
shouldPrintMeta = cmd.hasOption("m");
|
||||||
printBlockIndex = cmd.hasOption("b");
|
printBlockIndex = cmd.hasOption("b");
|
||||||
printBlockHeaders = cmd.hasOption("h");
|
printBlockHeaders = cmd.hasOption("h");
|
||||||
printStats = cmd.hasOption("s");
|
printStatRanges = cmd.hasOption("d");
|
||||||
|
printStats = cmd.hasOption("s") || printStatRanges;
|
||||||
checkRow = cmd.hasOption("k");
|
checkRow = cmd.hasOption("k");
|
||||||
checkFamily = cmd.hasOption("a");
|
checkFamily = cmd.hasOption("a");
|
||||||
checkMobIntegrity = cmd.hasOption("i");
|
checkMobIntegrity = cmd.hasOption("i");
|
||||||
|
@ -356,7 +354,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (printStats) {
|
if (printStats) {
|
||||||
fileStats.finish();
|
fileStats.finish(printStatRanges);
|
||||||
out.println("Stats:\n" + fileStats);
|
out.println("Stats:\n" + fileStats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -382,7 +380,7 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
}
|
}
|
||||||
// collect stats
|
// collect stats
|
||||||
if (printStats) {
|
if (printStats) {
|
||||||
fileStats.collect(cell);
|
fileStats.collect(cell, printStatRanges);
|
||||||
}
|
}
|
||||||
// dump key value
|
// dump key value
|
||||||
if (printKey) {
|
if (printKey) {
|
||||||
|
@ -581,18 +579,101 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Default reservoir is exponentially decaying, but we're doing a point-in-time analysis
|
||||||
|
// of a store file. It doesn't make sense to prefer keys later in the store file.
|
||||||
|
private static final MetricRegistry.MetricSupplier<Histogram> UNIFORM_RESERVOIR =
|
||||||
|
() -> new Histogram(new UniformReservoir());
|
||||||
|
|
||||||
|
// Useful ranges for viewing distribution of small to large keys, values, and rows.
|
||||||
|
// we only print ranges which actually have values, so more here doesn't add much overhead
|
||||||
|
private static final long[] RANGES = new long[] { 1, 3, 10, 50, 100, 500, 1_000, 5_000, 10_000,
|
||||||
|
50_000, 100_000, 500_000, 750_000, 1_000_000, 5_000_000, 10_000_000, 50_000_000, 100_000_000 };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds a Histogram and supporting min/max and range buckets for analyzing distribution of key
|
||||||
|
* bytes, value bytes, row bytes, and row columns. Supports adding values, getting the histogram,
|
||||||
|
* and getting counts per range.
|
||||||
|
*/
|
||||||
|
static class KeyValueStats {
|
||||||
|
private final Histogram histogram;
|
||||||
|
private final String name;
|
||||||
|
private long max = Long.MIN_VALUE;
|
||||||
|
private long min = Long.MAX_VALUE;
|
||||||
|
private boolean collectRanges = false;
|
||||||
|
private final LongAdder[] rangeCounts;
|
||||||
|
|
||||||
|
KeyValueStats(MetricRegistry metricRegistry, String statName) {
|
||||||
|
this.histogram =
|
||||||
|
metricRegistry.histogram(name(HFilePrettyPrinter.class, statName), UNIFORM_RESERVOIR);
|
||||||
|
this.name = statName;
|
||||||
|
this.rangeCounts = new LongAdder[RANGES.length];
|
||||||
|
for (int i = 0; i < rangeCounts.length; i++) {
|
||||||
|
rangeCounts[i] = new LongAdder();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void update(long value, boolean collectRanges) {
|
||||||
|
histogram.update(value);
|
||||||
|
min = Math.min(value, min);
|
||||||
|
max = Math.max(value, max);
|
||||||
|
|
||||||
|
if (collectRanges) {
|
||||||
|
this.collectRanges = true;
|
||||||
|
int result = Arrays.binarySearch(RANGES, value);
|
||||||
|
int idx = result >= 0 ? result : Math.abs(result) - 1;
|
||||||
|
rangeCounts[idx].increment();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Histogram getHistogram() {
|
||||||
|
return histogram;
|
||||||
|
}
|
||||||
|
|
||||||
|
String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
long getMax() {
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
long getMin() {
|
||||||
|
return min;
|
||||||
|
}
|
||||||
|
|
||||||
|
long[] getRanges() {
|
||||||
|
return RANGES;
|
||||||
|
}
|
||||||
|
|
||||||
|
long getCountAtOrBelow(long range) {
|
||||||
|
long count = 0;
|
||||||
|
for (int i = 0; i < RANGES.length; i++) {
|
||||||
|
if (RANGES[i] <= range) {
|
||||||
|
count += rangeCounts[i].sum();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean hasRangeCounts() {
|
||||||
|
return collectRanges;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static class KeyValueStatsCollector {
|
private static class KeyValueStatsCollector {
|
||||||
private final MetricRegistry metricsRegistry = new MetricRegistry();
|
private final MetricRegistry metricsRegistry = new MetricRegistry();
|
||||||
private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
|
private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
|
||||||
private final SimpleReporter simpleReporter = SimpleReporter.forRegistry(metricsRegistry)
|
|
||||||
.outputTo(new PrintStream(metricsOutput)).filter(MetricFilter.ALL).build();
|
|
||||||
|
|
||||||
Histogram keyLen = metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Key length"));
|
KeyValueStats keyLen = new KeyValueStats(metricsRegistry, "Key length");
|
||||||
Histogram valLen = metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Val length"));
|
KeyValueStats valLen = new KeyValueStats(metricsRegistry, "Val length");
|
||||||
Histogram rowSizeBytes =
|
KeyValueStats rowSizeBytes = new KeyValueStats(metricsRegistry, "Row size (bytes)");
|
||||||
metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Row size (bytes)"));
|
KeyValueStats rowSizeCols = new KeyValueStats(metricsRegistry, "Row size (columns)");
|
||||||
Histogram rowSizeCols =
|
|
||||||
metricsRegistry.histogram(name(HFilePrettyPrinter.class, "Row size (columns)"));
|
private final SimpleReporter simpleReporter =
|
||||||
|
SimpleReporter.newBuilder().outputTo(new PrintStream(metricsOutput)).addStats(keyLen)
|
||||||
|
.addStats(valLen).addStats(rowSizeBytes).addStats(rowSizeCols).build();
|
||||||
|
|
||||||
long curRowBytes = 0;
|
long curRowBytes = 0;
|
||||||
long curRowCols = 0;
|
long curRowCols = 0;
|
||||||
|
@ -603,11 +684,11 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
private long maxRowBytes = 0;
|
private long maxRowBytes = 0;
|
||||||
private long curRowKeyLength;
|
private long curRowKeyLength;
|
||||||
|
|
||||||
public void collect(Cell cell) {
|
public void collect(Cell cell, boolean printStatRanges) {
|
||||||
valLen.update(cell.getValueLength());
|
valLen.update(cell.getValueLength(), printStatRanges);
|
||||||
if (prevCell != null && CellComparator.getInstance().compareRows(prevCell, cell) != 0) {
|
if (prevCell != null && CellComparator.getInstance().compareRows(prevCell, cell) != 0) {
|
||||||
// new row
|
// new row
|
||||||
collectRow();
|
collectRow(printStatRanges);
|
||||||
}
|
}
|
||||||
curRowBytes += cell.getSerializedSize();
|
curRowBytes += cell.getSerializedSize();
|
||||||
curRowKeyLength = KeyValueUtil.keyLength(cell);
|
curRowKeyLength = KeyValueUtil.keyLength(cell);
|
||||||
|
@ -615,10 +696,10 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
prevCell = cell;
|
prevCell = cell;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void collectRow() {
|
private void collectRow(boolean printStatRanges) {
|
||||||
rowSizeBytes.update(curRowBytes);
|
rowSizeBytes.update(curRowBytes, printStatRanges);
|
||||||
rowSizeCols.update(curRowCols);
|
rowSizeCols.update(curRowCols, printStatRanges);
|
||||||
keyLen.update(curRowKeyLength);
|
keyLen.update(curRowKeyLength, printStatRanges);
|
||||||
|
|
||||||
if (curRowBytes > maxRowBytes && prevCell != null) {
|
if (curRowBytes > maxRowBytes && prevCell != null) {
|
||||||
biggestRow = CellUtil.cloneRow(prevCell);
|
biggestRow = CellUtil.cloneRow(prevCell);
|
||||||
|
@ -629,9 +710,9 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
curRowCols = 0;
|
curRowCols = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void finish() {
|
public void finish(boolean printStatRanges) {
|
||||||
if (curRowCols > 0) {
|
if (curRowCols > 0) {
|
||||||
collectRow();
|
collectRow(printStatRanges);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -640,7 +721,6 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
if (prevCell == null) return "no data available for statistics";
|
if (prevCell == null) return "no data available for statistics";
|
||||||
|
|
||||||
// Dump the metrics to the output stream
|
// Dump the metrics to the output stream
|
||||||
simpleReporter.stop();
|
|
||||||
simpleReporter.report();
|
simpleReporter.report();
|
||||||
|
|
||||||
return metricsOutput.toString() + "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
|
return metricsOutput.toString() + "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
|
||||||
|
@ -648,41 +728,32 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Almost identical to ConsoleReporter, but extending ScheduledReporter, as extending
|
* Simple reporter which collects registered histograms for printing to an output stream in
|
||||||
* ConsoleReporter in this version of dropwizard is now too much trouble.
|
* {@link #report()}.
|
||||||
*/
|
*/
|
||||||
private static class SimpleReporter extends ScheduledReporter {
|
private static final class SimpleReporter {
|
||||||
/**
|
/**
|
||||||
* Returns a new {@link Builder} for {@link ConsoleReporter}.
|
* Returns a new {@link Builder} for {@link SimpleReporter}.
|
||||||
* @param registry the registry to report
|
* @return a {@link Builder} instance for a {@link SimpleReporter}
|
||||||
* @return a {@link Builder} instance for a {@link ConsoleReporter}
|
|
||||||
*/
|
*/
|
||||||
public static Builder forRegistry(MetricRegistry registry) {
|
public static Builder newBuilder() {
|
||||||
return new Builder(registry);
|
return new Builder();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A builder for {@link SimpleReporter} instances. Defaults to using the default locale and time
|
* A builder for {@link SimpleReporter} instances. Defaults to using the default locale and time
|
||||||
* zone, writing to {@code System.out}, converting rates to events/second, converting durations
|
* zone, writing to {@code System.out}.
|
||||||
* to milliseconds, and not filtering metrics.
|
|
||||||
*/
|
*/
|
||||||
public static class Builder {
|
public static class Builder {
|
||||||
private final MetricRegistry registry;
|
private final List<KeyValueStats> stats = new ArrayList<>();
|
||||||
private PrintStream output;
|
private PrintStream output;
|
||||||
private Locale locale;
|
private Locale locale;
|
||||||
private TimeZone timeZone;
|
private TimeZone timeZone;
|
||||||
private TimeUnit rateUnit;
|
|
||||||
private TimeUnit durationUnit;
|
|
||||||
private MetricFilter filter;
|
|
||||||
|
|
||||||
private Builder(MetricRegistry registry) {
|
private Builder() {
|
||||||
this.registry = registry;
|
|
||||||
this.output = System.out;
|
this.output = System.out;
|
||||||
this.locale = Locale.getDefault();
|
this.locale = Locale.getDefault();
|
||||||
this.timeZone = TimeZone.getDefault();
|
this.timeZone = TimeZone.getDefault();
|
||||||
this.rateUnit = TimeUnit.SECONDS;
|
|
||||||
this.durationUnit = TimeUnit.MILLISECONDS;
|
|
||||||
this.filter = MetricFilter.ALL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -696,12 +767,12 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Only report metrics which match the given filter.
|
* Add the given {@link KeyValueStats} to be reported
|
||||||
* @param filter a {@link MetricFilter}
|
* @param stat the stat to be reported
|
||||||
* @return {@code this}
|
* @return {@code this}
|
||||||
*/
|
*/
|
||||||
public Builder filter(MetricFilter filter) {
|
public Builder addStats(KeyValueStats stat) {
|
||||||
this.filter = filter;
|
this.stats.add(stat);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -710,35 +781,31 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
* @return a {@link ConsoleReporter}
|
* @return a {@link ConsoleReporter}
|
||||||
*/
|
*/
|
||||||
public SimpleReporter build() {
|
public SimpleReporter build() {
|
||||||
return new SimpleReporter(registry, output, locale, timeZone, rateUnit, durationUnit,
|
return new SimpleReporter(output, stats, locale, timeZone);
|
||||||
filter);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final PrintStream output;
|
private final PrintStream output;
|
||||||
|
private final List<KeyValueStats> stats;
|
||||||
private final Locale locale;
|
private final Locale locale;
|
||||||
private final DateFormat dateFormat;
|
private final DateFormat dateFormat;
|
||||||
|
|
||||||
private SimpleReporter(MetricRegistry registry, PrintStream output, Locale locale,
|
private SimpleReporter(PrintStream output, List<KeyValueStats> stats, Locale locale,
|
||||||
TimeZone timeZone, TimeUnit rateUnit, TimeUnit durationUnit, MetricFilter filter) {
|
TimeZone timeZone) {
|
||||||
super(registry, "simple-reporter", filter, rateUnit, durationUnit);
|
|
||||||
this.output = output;
|
this.output = output;
|
||||||
|
this.stats = stats;
|
||||||
this.locale = locale;
|
this.locale = locale;
|
||||||
|
|
||||||
this.dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.MEDIUM, locale);
|
this.dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.MEDIUM, locale);
|
||||||
dateFormat.setTimeZone(timeZone);
|
dateFormat.setTimeZone(timeZone);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
public void report() {
|
||||||
public void report(SortedMap<String, Gauge> gauges, SortedMap<String, Counter> counters,
|
|
||||||
SortedMap<String, Histogram> histograms, SortedMap<String, Meter> meters,
|
|
||||||
SortedMap<String, Timer> timers) {
|
|
||||||
// we know we only have histograms
|
// we know we only have histograms
|
||||||
if (!histograms.isEmpty()) {
|
if (!stats.isEmpty()) {
|
||||||
for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
|
for (KeyValueStats stat : stats) {
|
||||||
output.print(" " + StringUtils.substringAfterLast(entry.getKey(), "."));
|
output.print(" " + stat.getName());
|
||||||
output.println(':');
|
output.println(':');
|
||||||
printHistogram(entry.getValue());
|
printHistogram(stat);
|
||||||
}
|
}
|
||||||
output.println();
|
output.println();
|
||||||
}
|
}
|
||||||
|
@ -747,10 +814,12 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
output.flush();
|
output.flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void printHistogram(Histogram histogram) {
|
private void printHistogram(KeyValueStats stats) {
|
||||||
|
Histogram histogram = stats.getHistogram();
|
||||||
Snapshot snapshot = histogram.getSnapshot();
|
Snapshot snapshot = histogram.getSnapshot();
|
||||||
output.printf(locale, " min = %d%n", snapshot.getMin());
|
|
||||||
output.printf(locale, " max = %d%n", snapshot.getMax());
|
output.printf(locale, " min = %d%n", stats.getMin());
|
||||||
|
output.printf(locale, " max = %d%n", stats.getMax());
|
||||||
output.printf(locale, " mean = %2.2f%n", snapshot.getMean());
|
output.printf(locale, " mean = %2.2f%n", snapshot.getMean());
|
||||||
output.printf(locale, " stddev = %2.2f%n", snapshot.getStdDev());
|
output.printf(locale, " stddev = %2.2f%n", snapshot.getStdDev());
|
||||||
output.printf(locale, " median = %2.2f%n", snapshot.getMedian());
|
output.printf(locale, " median = %2.2f%n", snapshot.getMedian());
|
||||||
|
@ -760,6 +829,37 @@ public class HFilePrettyPrinter extends Configured implements Tool {
|
||||||
output.printf(locale, " 99%% <= %2.2f%n", snapshot.get99thPercentile());
|
output.printf(locale, " 99%% <= %2.2f%n", snapshot.get99thPercentile());
|
||||||
output.printf(locale, " 99.9%% <= %2.2f%n", snapshot.get999thPercentile());
|
output.printf(locale, " 99.9%% <= %2.2f%n", snapshot.get999thPercentile());
|
||||||
output.printf(locale, " count = %d%n", histogram.getCount());
|
output.printf(locale, " count = %d%n", histogram.getCount());
|
||||||
|
|
||||||
|
// if printStatRanges was enabled with -d arg, below we'll create an approximate histogram
|
||||||
|
// of counts based on the configured ranges in RANGES. Each range of sizes (i.e. <= 50, <=
|
||||||
|
// 100, etc) will have a count printed if any values were seen in that range. If no values
|
||||||
|
// were seen for a range, that range will be excluded to keep the output small.
|
||||||
|
if (stats.hasRangeCounts()) {
|
||||||
|
output.printf(locale, " (range <= count):%n");
|
||||||
|
long lastVal = 0;
|
||||||
|
long lastRange = 0;
|
||||||
|
for (long range : stats.getRanges()) {
|
||||||
|
long val = stats.getCountAtOrBelow(range);
|
||||||
|
if (val - lastVal > 0) {
|
||||||
|
// print the last zero value before this one, to give context
|
||||||
|
if (lastVal == 0 && lastRange != 0) {
|
||||||
|
printRangeCount(lastRange, lastVal);
|
||||||
|
}
|
||||||
|
printRangeCount(range, val - lastVal);
|
||||||
|
}
|
||||||
|
lastVal = val;
|
||||||
|
lastRange = range;
|
||||||
|
}
|
||||||
|
if (histogram.getCount() - lastVal > 0) {
|
||||||
|
// print any remaining that might have been outside our buckets
|
||||||
|
printRangeCount(Long.MAX_VALUE, histogram.getCount() - lastVal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printRangeCount(long range, long countAtOrBelow) {
|
||||||
|
String rangeString = range == Long.MAX_VALUE ? "inf" : Long.toString(range);
|
||||||
|
output.printf(locale, "%17s <= %d%n", rangeString, countAtOrBelow);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotEquals;
|
import static org.junit.Assert.assertNotEquals;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import com.codahale.metrics.MetricRegistry;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -126,4 +127,59 @@ public class TestHFilePrettyPrinter {
|
||||||
String expectedResult = "Scanning -> " + fileNotInRootDir + "\n" + "Scanned kv count -> 1\n";
|
String expectedResult = "Scanning -> " + fileNotInRootDir + "\n" + "Scanned kv count -> 1\n";
|
||||||
assertEquals(expectedResult, result);
|
assertEquals(expectedResult, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testHistograms() throws Exception {
|
||||||
|
Path fileNotInRootDir = UTIL.getDataTestDir("hfile");
|
||||||
|
TestHRegionServerBulkLoad.createHFile(fs, fileNotInRootDir, cf, fam, value, 1000);
|
||||||
|
assertNotEquals("directory used is not an HBase root dir", UTIL.getDefaultRootDirPath(),
|
||||||
|
fileNotInRootDir);
|
||||||
|
|
||||||
|
System.setOut(ps);
|
||||||
|
new HFilePrettyPrinter(conf).run(new String[] { "-s", "-d", String.valueOf(fileNotInRootDir) });
|
||||||
|
String result = stream.toString();
|
||||||
|
LOG.info(result);
|
||||||
|
|
||||||
|
// split out the output into sections based on the headers
|
||||||
|
String[] headers =
|
||||||
|
new String[] { "Key length", "Val length", "Row size (bytes)", "Row size (columns)" };
|
||||||
|
// for each section, there is a corresponding expected (count, range) pairs
|
||||||
|
int[][] expectations = new int[][] { new int[] { 0, 10, 1000, 50 }, new int[] { 0, 1, 1000, 3 },
|
||||||
|
new int[] { 0, 10, 1000, 50 }, new int[] { 1000, 1 }, };
|
||||||
|
|
||||||
|
for (int i = 0; i < headers.length - 1; i++) {
|
||||||
|
int idx = result.indexOf(headers[i]);
|
||||||
|
int nextIdx = result.indexOf(headers[i + 1]);
|
||||||
|
|
||||||
|
assertContainsRanges(result.substring(idx, nextIdx), expectations[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertContainsRanges(String result, int... rangeCountPairs) {
|
||||||
|
for (int i = 0; i < rangeCountPairs.length - 1; i += 2) {
|
||||||
|
String expected = rangeCountPairs[i + 1] + " <= " + rangeCountPairs[i];
|
||||||
|
assertTrue("expected:\n" + result + "\nto contain: '" + expected + "'",
|
||||||
|
result.contains(expected));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKeyValueStats() {
|
||||||
|
HFilePrettyPrinter.KeyValueStats stats =
|
||||||
|
new HFilePrettyPrinter.KeyValueStats(new MetricRegistry(), "test");
|
||||||
|
long[] ranges = stats.getRanges();
|
||||||
|
for (long range : ranges) {
|
||||||
|
stats.update(range - 1, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(ranges[ranges.length - 1] - 1, stats.getMax());
|
||||||
|
assertEquals(ranges[0] - 1, stats.getMin());
|
||||||
|
|
||||||
|
int total = 1;
|
||||||
|
for (long range : ranges) {
|
||||||
|
long val = stats.getCountAtOrBelow(range);
|
||||||
|
assertEquals(total++, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue