HBASE-1333 RowCounter updates

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@767802 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2009-04-23 05:29:10 +00:00
parent 9a5f753e95
commit 6f966f2caa
2 changed files with 38 additions and 43 deletions

View File

@ -151,6 +151,7 @@ Release 0.20.0 - Unreleased
HBASE-1331 Lower the default scanner caching value HBASE-1331 Lower the default scanner caching value
HBASE-1235 Add table enabled status to shell and UI HBASE-1235 Add table enabled status to shell and UI
(Lars George via Stack) (Lars George via Stack)
HBASE-1333 RowCounter updates
Release 0.19.0 - 01/21/2009 Release 0.19.0 - 01/21/2009
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -22,18 +22,15 @@ package org.apache.hadoop.hbase.mapred;
import java.io.IOException; import java.io.IOException;
import java.util.Map; import java.util.Map;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.io.Cell; import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult; import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.IdentityReducer; import org.apache.hadoop.mapred.lib.IdentityReducer;
@ -45,36 +42,41 @@ import org.apache.hadoop.util.ToolRunner;
* Map outputs table rows IF the input row has columns that have content. * Map outputs table rows IF the input row has columns that have content.
* Uses an {@link IdentityReducer} * Uses an {@link IdentityReducer}
*/ */
public class RowCounter public class RowCounter extends Configured implements Tool {
extends MapReduceBase // Name of this 'program'
implements TableMap<ImmutableBytesWritable, RowResult>, Tool {
/* Name of this 'program'
*/
static final String NAME = "rowcounter"; static final String NAME = "rowcounter";
private Configuration conf; static class RowCounterMapper
private final RowResult EMPTY_RESULT_VALUE = implements TableMap<ImmutableBytesWritable, RowResult> {
new RowResult(Bytes.toBytes("dummy"),new HbaseMapWritable<byte [], Cell>()); private static enum Counters {ROWS}
private static enum Counters {ROWS}
public void map(ImmutableBytesWritable row, RowResult value,
public void map(ImmutableBytesWritable row, RowResult value, OutputCollector<ImmutableBytesWritable, RowResult> output,
OutputCollector<ImmutableBytesWritable, RowResult> output, Reporter reporter)
Reporter reporter) throws IOException {
throws IOException { boolean content = false;
boolean content = false; for (Map.Entry<byte [], Cell> e: value.entrySet()) {
for (Map.Entry<byte [], Cell> e: value.entrySet()) { Cell cell = e.getValue();
Cell cell = e.getValue(); if (cell != null && cell.getValue().length > 0) {
if (cell != null && cell.getValue().length > 0) { content = true;
content = true; break;
break; }
} }
if (!content) {
// Don't count rows that are all empty values.
return;
}
// Give out same value every time. We're only interested in the row/key
reporter.incrCounter(Counters.ROWS, 1);
} }
if (!content) {
return; public void configure(JobConf jc) {
// Nothing to do.
}
public void close() throws IOException {
// Nothing to do.
} }
// Give out same value every time. We're only interested in the row/key
reporter.incrCounter(Counters.ROWS, 1);
output.collect(row, EMPTY_RESULT_VALUE);
} }
/** /**
@ -83,7 +85,7 @@ implements TableMap<ImmutableBytesWritable, RowResult>, Tool {
* @throws IOException * @throws IOException
*/ */
public JobConf createSubmittableJob(String[] args) throws IOException { public JobConf createSubmittableJob(String[] args) throws IOException {
JobConf c = new JobConf(getConf(), RowCounter.class); JobConf c = new JobConf(getConf(), getClass());
c.setJobName(NAME); c.setJobName(NAME);
// Columns are space delimited // Columns are space delimited
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
@ -95,9 +97,9 @@ implements TableMap<ImmutableBytesWritable, RowResult>, Tool {
sb.append(args[i]); sb.append(args[i]);
} }
// Second argument is the table name. // Second argument is the table name.
TableMapReduceUtil.initTableMapJob(args[1], sb.toString(), this.getClass(), TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
ImmutableBytesWritable.class, RowResult.class, c); RowCounterMapper.class, ImmutableBytesWritable.class, RowResult.class, c);
c.setReducerClass(IdentityReducer.class); c.setNumReduceTasks(0);
// First arg is the output directory. // First arg is the output directory.
FileOutputFormat.setOutputPath(c, new Path(args[0])); FileOutputFormat.setOutputPath(c, new Path(args[0]));
return c; return c;
@ -119,14 +121,6 @@ implements TableMap<ImmutableBytesWritable, RowResult>, Tool {
return 0; return 0;
} }
public Configuration getConf() {
return this.conf;
}
public void setConf(final Configuration c) {
this.conf = c;
}
/** /**
* @param args * @param args
* @throws Exception * @throws Exception
@ -136,4 +130,4 @@ implements TableMap<ImmutableBytesWritable, RowResult>, Tool {
int errCode = ToolRunner.run(c, new RowCounter(), args); int errCode = ToolRunner.run(c, new RowCounter(), args);
System.exit(errCode); System.exit(errCode);
} }
} }