From 493ccf01e879e2323b11daa876fc90b62d192d13 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Sat, 5 Jun 2010 16:44:21 +0000 Subject: [PATCH] HBASE-2615 M/R on bulk imported tables git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@951742 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../org/apache/hadoop/hbase/KeyValue.java | 7 ++- .../hbase/mapreduce/HFileOutputFormat.java | 2 + .../hadoop/hbase/regionserver/HRegion.java | 9 ++-- .../mapreduce/TestHFileOutputFormat.java | 50 +++++++++++++++++++ 5 files changed, 62 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 57b1874cfe4..280daa7fb5c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -374,6 +374,7 @@ Release 0.21.0 - Unreleased HBASE-2662 TestScannerResource.testScannerResource broke in trunk HBASE-2667 TestHLog.testSplit failing in trunk (Cosmin and Stack) HBASE-2614 killing server in TestMasterTransitions causes NPEs and test deadlock + HBASE-2615 M/R on bulk imported tables IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable diff --git a/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/src/main/java/org/apache/hadoop/hbase/KeyValue.java index fc5494b4a9e..71284cfa1da 100644 --- a/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -797,8 +797,13 @@ public class KeyValue implements Writable, HeapSize { HConstants.LATEST_TIMESTAMP_BYTES, 0, Bytes.SIZEOF_LONG) == 0; } + /** + * @param now Time to set into this IFF timestamp == + * {@link HConstants#LATEST_TIMESTAMP} (else, its a noop). + * @return True is we modified this. + */ public boolean updateLatestStamp(final byte [] now) { - if(this.isLatestTimestamp()) { + if (this.isLatestTimestamp()) { int tsOffset = getTimestampOffset(); System.arraycopy(now, 0, this.bytes, tsOffset, Bytes.SIZEOF_LONG); return true; diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java index 2a272af5ed8..dd148ba28e6 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java +++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java @@ -85,6 +85,7 @@ public class HFileOutputFormat extends FileOutputFormat writers = new TreeMap(Bytes.BYTES_COMPARATOR); private byte [] previousRow = HConstants.EMPTY_BYTE_ARRAY; + private final byte [] now = Bytes.toBytes(System.currentTimeMillis()); public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { @@ -108,6 +109,7 @@ public class HFileOutputFormat extends FileOutputFormatHBASE-2615 + */ + @Test + public void test_LATEST_TIMESTAMP_isReplaced() + throws IOException, InterruptedException { + Configuration conf = new Configuration(this.util.getConfiguration()); + RecordWriter writer = null; + TaskAttemptContext context = null; + Path dir = + HBaseTestingUtility.getTestDir("test_LATEST_TIMESTAMP_isReplaced"); + try { + Job job = new Job(conf); + FileOutputFormat.setOutputPath(job, dir); + context = new TaskAttemptContext(job.getConfiguration(), + new TaskAttemptID()); + HFileOutputFormat hof = new HFileOutputFormat(); + writer = hof.getRecordWriter(context); + final byte [] b = Bytes.toBytes("b"); + + // Test 1. Pass a KV that has a ts of LATEST_TIMESTAMP. It should be + // changed by call to write. Check all in kv is same but ts. + KeyValue kv = new KeyValue(b, b, b); + KeyValue original = kv.clone(); + writer.write(new ImmutableBytesWritable(), kv); + assertFalse(original.equals(kv)); + assertTrue(Bytes.equals(original.getRow(), kv.getRow())); + assertTrue(original.matchingColumn(kv.getFamily(), kv.getQualifier())); + assertNotSame(original.getTimestamp(), kv.getTimestamp()); + assertNotSame(HConstants.LATEST_TIMESTAMP, kv.getTimestamp()); + + // Test 2. Now test passing a kv that has explicit ts. It should not be + // changed by call to record write. + kv = new KeyValue(b, b, b, System.currentTimeMillis(), b); + original = kv.clone(); + writer.write(new ImmutableBytesWritable(), kv); + assertTrue(original.equals(kv)); + } finally { + if (writer != null && context != null) writer.close(context); + dir.getFileSystem(conf).delete(dir, true); + } + } + /** * Run small MR job. */