diff --git a/src/java/org/apache/hadoop/hbase/mapreduce/Driver.java b/src/java/org/apache/hadoop/hbase/mapreduce/Driver.java index f0e96923414..06f15ce6759 100644 --- a/src/java/org/apache/hadoop/hbase/mapreduce/Driver.java +++ b/src/java/org/apache/hadoop/hbase/mapreduce/Driver.java @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hbase.mapreduce; +import org.apache.hadoop.hbase.migration.nineteen.HStoreFileToStoreFile; import org.apache.hadoop.util.ProgramDriver; /** @@ -34,6 +35,9 @@ public class Driver { ProgramDriver pgd = new ProgramDriver(); pgd.addClass(RowCounter.NAME, RowCounter.class, "Count rows in HBase table"); + pgd.addClass(HStoreFileToStoreFile.JOBNAME, + HStoreFileToStoreFile.class, + "Bulk convert 0.19 HStoreFiles to 0.20 StoreFiles"); pgd.driver(args); } } diff --git a/src/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java b/src/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java index b4fae725c82..0d7b0fb705e 100644 --- a/src/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java +++ b/src/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java @@ -124,5 +124,4 @@ public class RowCounter { Job job = createSubmittableJob(conf, otherArgs); System.exit(job.waitForCompletion(true) ? 0 : 1); } - } \ No newline at end of file diff --git a/src/java/org/apache/hadoop/hbase/migration/nineteen/HStoreFileToStoreFile.java b/src/java/org/apache/hadoop/hbase/migration/nineteen/HStoreFileToStoreFile.java new file mode 100644 index 00000000000..6081121002c --- /dev/null +++ b/src/java/org/apache/hadoop/hbase/migration/nineteen/HStoreFileToStoreFile.java @@ -0,0 +1,157 @@ +/* + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.migration.nineteen; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Migrate; +import org.apache.hadoop.hbase.util.FSUtils.DirFilter; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.GenericOptionsParser; + +/** + * Mapper that rewrites hbase 0.19 HStoreFiles as 0.20 StoreFiles. + * Creates passed directories as input and output. On startup, it checks + * filesystem is 0.19 generation. It then crawls the filesystem to find the + * files to migrate writing a file into the input directory. Next it starts up + * the MR job to rewrite the 0.19 HStoreFiles as 0.20 StoreFiles deleting the + * old as it goes. Presumption is that only + * one file per in the family Store else stuff breaks; i.e. the 0.19 install + * was major compacted before migration began. + */ +public class HStoreFileToStoreFile { + static final Log LOG = LogFactory.getLog(HStoreFileToStoreFile.class); + public static final String JOBNAME = "hsf2sf"; + + public static class Map extends Mapper { + protected void map(LongWritable key, Text value, Mapper.Context context) + throws java.io.IOException, InterruptedException { + HBaseConfiguration c = new HBaseConfiguration(context.getConfiguration()); + Migrate.rewrite(c, FileSystem.get(c), new Path(value.toString())); + } + } + + private static void writeInputFiles(final HBaseConfiguration conf, + final FileSystem fs, final Path dir) + throws IOException { + if (!FSUtils.isPre020FileLayout(fs, FSUtils.getRootDir(conf))) { + throw new IOException("Not a pre-0.20 filesystem"); + } + if (fs.exists(dir)) { + throw new IOException("Input exists -- please specify empty input dir"); + } + FSDataOutputStream out = fs.create(new Path(dir, "mapfiles")); + try { + gathermapfiles(conf, fs, out); + } finally { + out.close(); + } + } + + private static void gathermapfiles(final HBaseConfiguration conf, + final FileSystem fs, final FSDataOutputStream out) + throws IOException { + // Presumes any directory under hbase.rootdir is a table. + FileStatus [] tableDirs = + fs.listStatus(FSUtils.getRootDir(conf), new DirFilter(fs)); + for (int i = 0; i < tableDirs.length; i++) { + // Inside a table, there are compaction.dir directories to skip. + // Otherwise, all else should be regions. Then in each region, should + // only be family directories. Under each of these, should be a mapfile + // and info directory and in these only one file. + Path d = tableDirs[i].getPath(); + if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) continue; + FileStatus [] regionDirs = fs.listStatus(d, new DirFilter(fs)); + for (int j = 0; j < regionDirs.length; j++) { + Path dd = regionDirs[j].getPath(); + if (dd.equals(HConstants.HREGION_COMPACTIONDIR_NAME)) continue; + // Else its a region name. Now look in region for families. + FileStatus [] familyDirs = fs.listStatus(dd, new DirFilter(fs)); + for (int k = 0; k < familyDirs.length; k++) { + Path family = familyDirs[k].getPath(); + FileStatus [] infoAndMapfile = fs.listStatus(family); + // Assert that only info and mapfile in family dir. + if (infoAndMapfile.length != 0 && infoAndMapfile.length != 2) { + throw new IOException(family.toString() + + " has more than just info and mapfile: " + infoAndMapfile.length); + } + // Make sure directory named info or mapfile. + for (int ll = 0; ll < 2; ll++) { + if (infoAndMapfile[ll].getPath().getName().equals("info") || + infoAndMapfile[ll].getPath().getName().equals("mapfiles")) + continue; + throw new IOException("Unexpected directory name: " + + infoAndMapfile[ll].getPath()); + } + // Now in family, there are 'mapfile' and 'info' subdirs. Just + // look in the 'mapfile' subdir. + FileStatus [] familyStatus = + fs.listStatus(new Path(family, "mapfiles")); + if (familyStatus.length > 1) { + throw new IOException(family.toString() + " has " + familyStatus.length + + " files."); + } + // If we got here, then this is good. Add the mapfile to out + String str = familyStatus[0].getPath().makeQualified(fs).toString(); + LOG.info(str); + out.write(Bytes.toBytes(str + "\n")); + } + } + } + } + + public static void main(String[] args) throws Exception { + HBaseConfiguration conf = new HBaseConfiguration(); + String [] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); + if (otherArgs.length < 2) { + System.err.println("ERROR: Wrong number of parameters: " + args.length); + System.err.println("Usage: " + HStoreFileToStoreFile.class.getName() + + " "); + System.exit(-1); + } + Path input = new Path(args[0]); + FileSystem fs = FileSystem.get(conf); + writeInputFiles(conf, fs, input); + Job job = new Job(conf); + job.setJarByClass(HStoreFileToStoreFile.class); + job.setJobName(JOBNAME); + job.setMapperClass(Map.class); + job.setNumReduceTasks(0); + FileInputFormat.setInputPaths(job, input); + Path output = new Path(args[1]); + FileOutputFormat.setOutputPath(job, output); + System.exit(job.waitForCompletion(true) ? 0 : 1); + } +} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/hbase/migration/package-info.java b/src/java/org/apache/hadoop/hbase/migration/package-info.java index 8830496d2c3..61567f7419c 100644 --- a/src/java/org/apache/hadoop/hbase/migration/package-info.java +++ b/src/java/org/apache/hadoop/hbase/migration/package-info.java @@ -1,5 +1,5 @@ /* - * Copyright 2009The Apache Software Foundation + * Copyright 2009 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,6 +19,8 @@ */ /** Provides classes from old hbase versions used migrating data. -Nineteen package has classes from hbase 0.19. +Nineteen package has classes from hbase 0.19. See +<a href="http://wiki.apache.org/hadoop/Hbase/HowToMigrate">How to Migrate</a> +for more on migrations. */ package org.apache.hadoop.hbase.migration;