HBASE-1850 src/examples/mapred do not compile after HBASE-1822

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@816323 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Gray 2009-09-17 18:37:23 +00:00
parent 4f294977d2
commit dfd1091da5
2 changed files with 122 additions and 112 deletions

View File

@ -33,6 +33,7 @@ Release 0.21.0 - Unreleased
for when Writable is not Configurable (Stack via jgray)
HBASE-1847 Delete latest of a null qualifier when non-null qualifiers
exist throws a RuntimeException
HBASE-1850 src/examples/mapred do not compile after HBASE-1822
IMPROVEMENTS
HBASE-1760 Cleanup TODOs in HTable

View File

@ -1,139 +1,148 @@
/**
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map.Entry;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapred.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapred.TableReduce;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
/*
* Sample uploader.
*
/**
* Sample Uploader MapReduce
* <p>
* This is EXAMPLE code. You will need to change it to work for your context.
*
* Uses TableReduce to put the data into hbase. Change the InputFormat to suit
* your data. Use the map to massage the input so it fits hbase. Currently its
* just a pass-through map. In the reduce, you need to output a row and a
* map of columns to cells. Change map and reduce to suit your input.
*
* <p>The below is wired up to handle an input whose format is a text file
* which has a line format as follow:
* <pre>
* row columnname columndata
* </pre>
*
* <p>The table and columnfamily we're to insert into must preexist.
*
* <p>
* Uses {@link TableReducer} to put the data into HBase. Change the InputFormat
* to suit your data. In this example, we are importing a CSV file.
* <p>
* <pre>row,family,qualifier,value</pre>
* <p>
* The table and columnfamily we're to insert into must preexist.
* <p>
* There is no reducer in this example as it is not necessary and adds
* significant overhead. If you need to do any massaging of data before
* inserting into HBase, you can do this in the map as well.
* <p>Do the following to start the MR job:
* <pre>
* ./bin/hadoop org.apache.hadoop.hbase.mapred.SampleUploader /tmp/input.txt TABLE_NAME
* ./bin/hadoop org.apache.hadoop.hbase.mapreduce.SampleUploader /tmp/input.csv TABLE_NAME
* </pre>
*
* <p>This code was written against hbase 0.1 branch.
* <p>
* This code was written against HBase 0.21 trunk.
*/
public class SampleUploader extends MapReduceBase
implements Mapper<LongWritable, Text, ImmutableBytesWritable, HbaseMapWritable<byte [], byte []>>,
Tool {
public class SampleUploader {
private static final String NAME = "SampleUploader";
private Configuration conf;
static class Uploader
extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
public JobConf createSubmittableJob(String[] args)
throws IOException {
JobConf c = new JobConf(getConf(), SampleUploader.class);
c.setJobName(NAME);
FileInputFormat.setInputPaths(c, new Path(args[0]));
c.setMapperClass(this.getClass());
c.setMapOutputKeyClass(ImmutableBytesWritable.class);
c.setMapOutputValueClass(HbaseMapWritable.class);
c.setReducerClass(TableUploader.class);
TableMapReduceUtil.initTableReduceJob(args[1], TableUploader.class, c);
return c;
}
public void map(LongWritable k, Text v,
OutputCollector<ImmutableBytesWritable, HbaseMapWritable<byte [], byte []>> output,
Reporter r)
throws IOException {
// Lines are space-delimited; first item is row, next the columnname and
// then the third the cell value.
String tmp = v.toString();
if (tmp.length() == 0) {
return;
}
String [] splits = v.toString().split(" ");
HbaseMapWritable<byte [], byte []> mw =
new HbaseMapWritable<byte [], byte []>();
mw.put(Bytes.toBytes(splits[1]), Bytes.toBytes(splits[2]));
byte [] row = Bytes.toBytes(splits[0]);
r.setStatus("Map emitting " + splits[0] + " for record " + k.toString());
output.collect(new ImmutableBytesWritable(row), mw);
}
public static class TableUploader extends MapReduceBase
implements TableReduce<ImmutableBytesWritable, HbaseMapWritable<byte [], byte []>> {
public void reduce(ImmutableBytesWritable k, Iterator<HbaseMapWritable<byte [], byte []>> v,
OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
Reporter r)
private long checkpoint = 100;
private long count = 0;
@Override
public void map(LongWritable key, Text line, Context context)
throws IOException {
while (v.hasNext()) {
r.setStatus("Reducer committing " + k);
BatchUpdate bu = new BatchUpdate(k.get());
while (v.hasNext()) {
HbaseMapWritable<byte [], byte []> hmw = v.next();
for (Entry<byte [], byte []> e: hmw.entrySet()) {
bu.put(e.getKey(), e.getValue());
}
}
output.collect(k, bu);
// Input is a CSV file
// Each map() is a single line, where the key is the line number
// Each line is comma-delimited; row,family,qualifier,value
// Split CSV line
String [] values = line.toString().split(",");
if(values.length != 4) {
return;
}
// Extract each value
byte [] row = Bytes.toBytes(values[0]);
byte [] family = Bytes.toBytes(values[1]);
byte [] qualifier = Bytes.toBytes(values[2]);
byte [] value = Bytes.toBytes(values[3]);
// Create Put
Put put = new Put(row);
put.add(family, qualifier, value);
// Uncomment below to disable WAL. This will improve performance but means
// you will experience data loss in the case of a RegionServer crash.
// put.setWriteToWAL(false);
try {
context.write(new ImmutableBytesWritable(row), put);
} catch (InterruptedException e) {
e.printStackTrace();
}
// Set status every checkpoint lines
if(++count % checkpoint == 0) {
context.setStatus("Emitting Put " + count);
}
}
}
static int printUsage() {
System.out.println(NAME + " <input> <table_name>");
return -1;
}
public int run(@SuppressWarnings("unused") String[] args) throws Exception {
// Make sure there are exactly 2 parameters left.
if (args.length != 2) {
System.out.println("ERROR: Wrong number of parameters: " +
args.length + " instead of 2.");
return printUsage();
}
JobClient.runJob(createSubmittableJob(args));
return 0;
}
public Configuration getConf() {
return this.conf;
}
public void setConf(final Configuration c) {
this.conf = c;
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
Path inputPath = new Path(args[0]);
String tableName = args[1];
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(Uploader.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(Uploader.class);
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
/**
* Main entry point.
*
* @param args The command line parameters.
* @throws Exception When running the job fails.
*/
public static void main(String[] args) throws Exception {
int errCode = ToolRunner.run(new Configuration(), new SampleUploader(),
args);
System.exit(errCode);
HBaseConfiguration conf = new HBaseConfiguration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if(otherArgs.length != 2) {
System.err.println("Wrong number of arguments: " + otherArgs.length);
System.err.println("Usage: " + NAME + " <input> <tablename>");
System.exit(-1);
}
Job job = configureJob(conf, otherArgs);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}