HBASE-559 MR example job to count table rows
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@648422 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
82901024c9
commit
863ac0c605
|
@ -7,6 +7,10 @@ Hbase Change Log
|
|||
HBASE-12 When hbase regionserver restarts, it says "impossible state for
|
||||
createLease()"
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-559 MR example job to count table rows
|
||||
|
||||
|
||||
Release 0.1.1 - 04/11/2008
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
10
build.xml
10
build.xml
|
@ -148,12 +148,20 @@
|
|||
</javac>
|
||||
</target>
|
||||
|
||||
<!-- Override jar target to specify main class -->
|
||||
<target name="jar" depends="compile">
|
||||
<!--Copy over any properties under src-->
|
||||
<copy todir="${build.classes}">
|
||||
<fileset dir="${src.dir}">
|
||||
<include name="**/*.properties" />
|
||||
</fileset>
|
||||
</copy>
|
||||
<jar jarfile="${build.dir}/${final.name}.jar"
|
||||
basedir="${build.classes}" >
|
||||
<fileset file="${basedir}/conf/hbase-default.xml"/>
|
||||
<zipfileset dir="${build.webapps}" prefix="webapps"/>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org/apache/hadoop/hbase/mapred/Driver" />
|
||||
</manifest>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import org.apache.hadoop.util.ProgramDriver;
|
||||
|
||||
/**
|
||||
* Driver for hbase mapreduce jobs. Select which to run by passing
|
||||
* name of job to this main.
|
||||
*/
|
||||
public class Driver {
|
||||
/**
|
||||
* @param args
|
||||
* @throws Throwable
|
||||
*/
|
||||
public static void main(String[] args) throws Throwable {
|
||||
ProgramDriver pgd = new ProgramDriver();
|
||||
pgd.addClass(RowCounter.NAME, RowCounter.class,
|
||||
"Count rows in HBase table");
|
||||
pgd.driver(args);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,126 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapred.JobClient;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.mapred.lib.IdentityReducer;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
/**
|
||||
* A job with a map to count rows.
|
||||
* Map outputs table rows IF the input row has columns that have content.
|
||||
* Uses an {@link IdentityReducer}
|
||||
*/
|
||||
public class RowCounter extends TableMap<Text, RowResult> implements Tool {
|
||||
/* Name of this 'program'
|
||||
*/
|
||||
static final String NAME = "rowcounter";
|
||||
|
||||
private Configuration conf;
|
||||
private final RowResult EMPTY_RESULT_VALUE = new RowResult();
|
||||
private static enum Counters {ROWS}
|
||||
|
||||
@Override
|
||||
public void map(Text row, RowResult value,
|
||||
OutputCollector<Text, RowResult> output,
|
||||
@SuppressWarnings("unused") Reporter reporter)
|
||||
throws IOException {
|
||||
boolean content = false;
|
||||
for (Map.Entry<Text, Cell> e: value.entrySet()) {
|
||||
Cell cell = e.getValue();
|
||||
if (cell != null && cell.getValue().length > 0) {
|
||||
content = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!content) {
|
||||
return;
|
||||
}
|
||||
// Give out same value every time. We're only interested in the row/key
|
||||
reporter.incrCounter(Counters.ROWS, 1);
|
||||
output.collect(row, EMPTY_RESULT_VALUE);
|
||||
}
|
||||
|
||||
@SuppressWarnings({ "unused", "deprecation" })
|
||||
public JobConf createSubmittableJob(String[] args) throws IOException {
|
||||
JobConf c = new JobConf(getConf(), RowCounter.class);
|
||||
c.setJobName(NAME);
|
||||
// Columns are space delimited
|
||||
StringBuilder sb = new StringBuilder();
|
||||
final int columnoffset = 2;
|
||||
for (int i = columnoffset; i < args.length; i++) {
|
||||
if (i > columnoffset) {
|
||||
sb.append(" ");
|
||||
}
|
||||
sb.append(args[i]);
|
||||
}
|
||||
// Second argument is the table name.
|
||||
TableMap.initJob(args[1], sb.toString(), this.getClass(), Text.class,
|
||||
RowResult.class, c);
|
||||
c.setReducerClass(IdentityReducer.class);
|
||||
// First arg is the output directory.
|
||||
c.setOutputPath(new Path(args[0]));
|
||||
return c;
|
||||
}
|
||||
|
||||
static int printUsage() {
|
||||
System.out.println(NAME +
|
||||
" <outputdir> <tablename> <column1> [<column2>...]");
|
||||
return -1;
|
||||
}
|
||||
|
||||
public int run(final String[] args) throws Exception {
|
||||
// Make sure there are at least 3 parameters
|
||||
if (args.length < 3) {
|
||||
System.err.println("ERROR: Wrong number of parameters: " + args.length);
|
||||
return printUsage();
|
||||
}
|
||||
JobClient.runJob(createSubmittableJob(args));
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Configuration getConf() {
|
||||
return this.conf;
|
||||
}
|
||||
|
||||
public void setConf(final Configuration c) {
|
||||
this.conf = c;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
HBaseConfiguration c = new HBaseConfiguration();
|
||||
c.set("hbase.master", args[0]);
|
||||
int errCode = ToolRunner.run(c, new RowCounter(), args);
|
||||
System.exit(errCode);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
# ResourceBundle properties file for RowCounter MR job
|
||||
|
||||
CounterGroupName= RowCounter
|
||||
|
||||
ROWS.name= Rows
|
|
@ -75,7 +75,15 @@ regions and makes a map-per-region. Writing, its better to have lots of
|
|||
reducers so load is spread across the hbase cluster.
|
||||
</p>
|
||||
|
||||
<h2> Sample MR Bulk Uploader </h2>
|
||||
<h2>Example Code</h2>
|
||||
<h3>Sample Row Counter</h3>
|
||||
<p>See {@link org.apache.hadoop.hbase.mapred.RowCounter}. You should be able to run
|
||||
it by doing: <code>% ./bin/hadoop jar hbase-X.X.X.jar</code>. This will invoke
|
||||
the hbase MapReduce Driver class. Select 'rowcounter' from the choice of jobs
|
||||
offered.
|
||||
</p>
|
||||
|
||||
<h3> Sample MR Bulk Uploader </h3>
|
||||
<p>Read the class comment below for specification of inputs, prerequisites, etc.
|
||||
</p>
|
||||
<blockquote><pre>package org.apache.hadoop.hbase.mapred;
|
||||
|
|
Loading…
Reference in New Issue