From 3519920373ef2d8649891b461f5b5226ee41f891 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Sat, 25 Aug 2012 04:51:41 +0000 Subject: [PATCH] HBASE-6372 Add scanner batching to Export job git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1377212 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/hadoop/hbase/mapreduce/Export.java | 16 ++++++++++++++-- .../hadoop/hbase/mapreduce/TestImportExport.java | 3 +++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java index ac8740260fa..270cb26922b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/Export.java @@ -52,7 +52,8 @@ import org.apache.commons.logging.LogFactory; public class Export { private static final Log LOG = LogFactory.getLog(Export.class); final static String NAME = "export"; - final static String RAW_SCAN="hbase.mapreduce.include.deleted.rows"; + final static String RAW_SCAN = "hbase.mapreduce.include.deleted.rows"; + final static String EXPORT_BATCHING = "hbase.export.scanner.batch"; /** * Mapper. @@ -134,6 +135,15 @@ public class Export { LOG.info("Setting Scan Filter for Export."); s.setFilter(exportFilter); } + + int batching = conf.getInt(EXPORT_BATCHING, -1); + if (batching != -1){ + try{ + s.setBatch(batching); + } catch (RuntimeException e) { + LOG.error("Batching could not be set", e); + } + } LOG.info("versions=" + versions + ", starttime=" + startTime + ", endtime=" + endTime + ", keepDeletedCells=" + raw); return s; @@ -174,6 +184,8 @@ public class Export { + " -Dhbase.client.scanner.caching=100\n" + " -Dmapred.map.tasks.speculative.execution=false\n" + " -Dmapred.reduce.tasks.speculative.execution=false"); + System.err.println("For tables with very wide rows consider setting the batch size as below:\n" + + " -D" + EXPORT_BATCHING + "=10"); } /** @@ -192,4 +204,4 @@ public class Export { Job job = createSubmittableJob(conf, otherArgs); System.exit(job.waitForCompletion(true)? 0 : 1); } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java index a573cbc8d7d..2ed47d55a1a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestImportExport.java @@ -57,6 +57,7 @@ public class TestImportExport { private static final byte[] FAMILYB = Bytes.toBytes(FAMILYB_STRING); private static final byte[] QUAL = Bytes.toBytes("q"); private static final String OUTPUT_DIR = "outputdir"; + private static final String EXPORT_BATCHING = "100"; private static MiniHBaseCluster cluster; private static long now = System.currentTimeMillis(); @@ -102,6 +103,7 @@ public class TestImportExport { String[] args = new String[] { EXPORT_TABLE, OUTPUT_DIR, + EXPORT_BATCHING, "1000" }; @@ -191,6 +193,7 @@ public class TestImportExport { "-D" + Export.RAW_SCAN + "=true", EXPORT_TABLE, OUTPUT_DIR, + EXPORT_BATCHING, "1000" };