From faada59eb7feafe7dfff8cd96ed2bb0a746d008d Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Sun, 9 Dec 2007 04:49:58 +0000 Subject: [PATCH] HADOOP-2384 Delete all members of a column family on a specific row git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@602633 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + src/java/org/apache/hadoop/hbase/HRegion.java | 29 +++ .../apache/hadoop/hbase/HRegionInterface.java | 14 ++ .../apache/hadoop/hbase/HRegionServer.java | 7 + src/java/org/apache/hadoop/hbase/HTable.java | 50 ++++++ .../apache/hadoop/hbase/TestDeleteFamily.java | 166 ++++++++++++++++++ 6 files changed, 268 insertions(+) create mode 100644 src/test/org/apache/hadoop/hbase/TestDeleteFamily.java diff --git a/CHANGES.txt b/CHANGES.txt index 5cc4562c21d..521d5378205 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -13,6 +13,8 @@ Trunk (unreleased changes) HADOOP-2316 Run REST servlet outside of master (Bryan Duxbury & Stack) HADOOP-1550 No means of deleting a'row' (Bryan Duxbuery via Stack) + HADOOP-2384 Delete all members of a column family on a specific row + (Bryan Duxbury via Stack) OPTIMIZATIONS diff --git a/src/java/org/apache/hadoop/hbase/HRegion.java b/src/java/org/apache/hadoop/hbase/HRegion.java index 975da44f7fe..15ff129de70 100644 --- a/src/java/org/apache/hadoop/hbase/HRegion.java +++ b/src/java/org/apache/hadoop/hbase/HRegion.java @@ -1275,6 +1275,35 @@ public class HRegion implements HConstants { } } + /** + * Delete all cells for a row with matching column family with timestamps + * less than or equal to timestamp. + * + * @param row The row to operate on + * @param family The column family to match + * @param timestamp Timestamp to match + */ + public void deleteFamily(Text row, Text family, long timestamp) + throws IOException{ + obtainRowLock(row); + + try { + // find the HStore for the column family + LOG.info(family); + HStore store = stores.get(HStoreKey.extractFamily(family)); + // find all the keys that match our criteria + List keys = store.getKeys(new HStoreKey(row, timestamp), ALL_VERSIONS); + + // delete all the cells + TreeMap edits = new TreeMap(); + for (HStoreKey key: keys) { + edits.put(key, HLogEdit.deleteBytes.get()); + } + update(edits); + } finally { + releaseRowLock(row); + } + } /** * Delete one or many cells. diff --git a/src/java/org/apache/hadoop/hbase/HRegionInterface.java b/src/java/org/apache/hadoop/hbase/HRegionInterface.java index 19f41fc1f3d..e4cde6e0426 100644 --- a/src/java/org/apache/hadoop/hbase/HRegionInterface.java +++ b/src/java/org/apache/hadoop/hbase/HRegionInterface.java @@ -146,6 +146,20 @@ public interface HRegionInterface extends VersionedProtocol { */ public void deleteAll(Text regionName, Text row, long timestamp) throws IOException; + + /** + * Delete all cells for a row with matching column family with timestamps + * less than or equal to timestamp. + * + * @param regionName The name of the region to operate on + * @param row The row to operate on + * @param family The column family to match + * @param timestamp Timestamp to match + */ + public void deleteFamily(Text regionName, Text row, Text family, + long timestamp) + throws IOException; + // // remote scanner interface diff --git a/src/java/org/apache/hadoop/hbase/HRegionServer.java b/src/java/org/apache/hadoop/hbase/HRegionServer.java index a3480a21d64..670ab806574 100644 --- a/src/java/org/apache/hadoop/hbase/HRegionServer.java +++ b/src/java/org/apache/hadoop/hbase/HRegionServer.java @@ -1523,6 +1523,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { region.deleteAll(row, timestamp); } + public void deleteFamily(Text regionName, Text row, Text family, + long timestamp) + throws IOException{ + getRegion(regionName).deleteFamily(row, family, timestamp); + } + + /** * @return Info on this server. */ diff --git a/src/java/org/apache/hadoop/hbase/HTable.java b/src/java/org/apache/hadoop/hbase/HTable.java index 5b95e0c4ac6..7c03ec57602 100644 --- a/src/java/org/apache/hadoop/hbase/HTable.java +++ b/src/java/org/apache/hadoop/hbase/HTable.java @@ -789,6 +789,56 @@ public class HTable implements HConstants { deleteAll(row, HConstants.LATEST_TIMESTAMP); } + /** + * Delete all cells for a row with matching column family with timestamps + * less than or equal to timestamp. + * + * @param row The row to operate on + * @param family The column family to match + * @param timestamp Timestamp to match + */ + public void deleteFamily(final Text row, final Text family, long timestamp) + throws IOException { + checkClosed(); + for(int tries = 0; tries < numRetries; tries++) { + HRegionLocation r = getRegionLocation(row); + HRegionInterface server = + connection.getHRegionConnection(r.getServerAddress()); + try { + server.deleteFamily(r.getRegionInfo().getRegionName(), row, family, timestamp); + break; + + } catch (IOException e) { + if (e instanceof RemoteException) { + e = RemoteExceptionHandler.decodeRemoteException((RemoteException) e); + } + if (tries == numRetries - 1) { + throw e; + } + if (LOG.isDebugEnabled()) { + LOG.debug("reloading table servers because: " + e.getMessage()); + } + tableServers = connection.reloadTableServers(tableName); + } + try { + Thread.sleep(this.pause); + } catch (InterruptedException x) { + // continue + } + } + } + + /** + * Delete all cells for a row with matching column family at all timestamps. + * + * @param row The row to operate on + * @param family The column family to match + */ + public void deleteFamily(final Text row, final Text family) + throws IOException{ + deleteFamily(row, family, HConstants.LATEST_TIMESTAMP); + } + /** * Abort a row mutation. * diff --git a/src/test/org/apache/hadoop/hbase/TestDeleteFamily.java b/src/test/org/apache/hadoop/hbase/TestDeleteFamily.java new file mode 100644 index 00000000000..2304bba2b0d --- /dev/null +++ b/src/test/org/apache/hadoop/hbase/TestDeleteFamily.java @@ -0,0 +1,166 @@ +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.dfs.MiniDFSCluster; +import org.apache.hadoop.io.Text; +import org.apache.commons.logging.*; + +/** + * Test the functionality of deleteFamily. + */ +public class TestDeleteFamily extends HBaseTestCase { + private MiniDFSCluster miniHdfs; + + @Override + protected void setUp() throws Exception { + super.setUp(); + this.miniHdfs = new MiniDFSCluster(this.conf, 1, true, null); + } + + /** + * Tests for HADOOP-2384. + * @throws Exception + */ + public void testDeleteFamily() throws Exception { + HRegion region = null; + HRegionIncommon region_incommon = null; + HLog hlog = new HLog(this.miniHdfs.getFileSystem(), this.testDir, + this.conf, null); + + try{ + HTableDescriptor htd = createTableDescriptor(getName()); + HRegionInfo hri = new HRegionInfo(htd, null, null); + region = new HRegion(this.testDir, hlog, this.miniHdfs.getFileSystem(), + this.conf, hri, null, null); + region_incommon = new HRegionIncommon(region); + + // test memcache + makeSureItWorks(region, region_incommon, false); + // test hstore + makeSureItWorks(region, region_incommon, true); + + } finally { + if (region != null) { + try { + region.close(); + } catch (Exception e) { + e.printStackTrace(); + } + } + hlog.closeAndDelete(); + } + } + + private void makeSureItWorks(HRegion region, HRegionIncommon region_incommon, + boolean flush) + throws Exception{ + // insert a few versions worth of data for a row + Text row = new Text("test_row"); + long t0 = System.currentTimeMillis(); + long t1 = t0 - 15000; + long t2 = t1 - 15000; + + Text colA = new Text(COLUMNS[0].toString() + "a"); + Text colB = new Text(COLUMNS[0].toString() + "b"); + Text colC = new Text(COLUMNS[1].toString() + "c"); + + long lock = region_incommon.startUpdate(row); + region_incommon.put(lock, colA, cellData(0, flush).getBytes()); + region_incommon.put(lock, colB, cellData(0, flush).getBytes()); + region_incommon.put(lock, colC, cellData(0, flush).getBytes()); + region_incommon.commit(lock, t0); + + lock = region_incommon.startUpdate(row); + region_incommon.put(lock, colA, cellData(1, flush).getBytes()); + region_incommon.put(lock, colB, cellData(1, flush).getBytes()); + region_incommon.put(lock, colC, cellData(1, flush).getBytes()); + region_incommon.commit(lock, t1); + + lock = region_incommon.startUpdate(row); + region_incommon.put(lock, colA, cellData(2, flush).getBytes()); + region_incommon.put(lock, colB, cellData(2, flush).getBytes()); + region_incommon.put(lock, colC, cellData(2, flush).getBytes()); + region_incommon.commit(lock, t2); + + if (flush) {region_incommon.flushcache();} + + // call delete family at a timestamp, make sure only the most recent stuff + // for column c is left behind + region.deleteFamily(row, COLUMNS[0], t1); + if (flush) {region_incommon.flushcache();} + // most recent for A,B,C should be fine + // A,B at older timestamps should be gone + // C should be fine for older timestamps + assertCellValueEquals(region, row, colA, t0, cellData(0, flush)); + assertCellValueEquals(region, row, colA, t1, null); + assertCellValueEquals(region, row, colA, t2, null); + assertCellValueEquals(region, row, colB, t0, cellData(0, flush)); + assertCellValueEquals(region, row, colB, t1, null); + assertCellValueEquals(region, row, colB, t2, null); + assertCellValueEquals(region, row, colC, t0, cellData(0, flush)); + assertCellValueEquals(region, row, colC, t1, cellData(1, flush)); + assertCellValueEquals(region, row, colC, t2, cellData(2, flush)); + + // call delete family w/o a timestamp, make sure nothing is left except for + // column C. + region.deleteFamily(row, COLUMNS[0], HConstants.LATEST_TIMESTAMP); + if (flush) {region_incommon.flushcache();} + // A,B for latest timestamp should be gone + // C should still be fine + assertCellValueEquals(region, row, colA, t0, null); + assertCellValueEquals(region, row, colB, t0, null); + assertCellValueEquals(region, row, colC, t0, cellData(0, flush)); + assertCellValueEquals(region, row, colC, t1, cellData(1, flush)); + assertCellValueEquals(region, row, colC, t2, cellData(2, flush)); + + } + + private void assertCellValueEquals(final HRegion region, final Text row, + final Text column, final long timestamp, final String value) + throws IOException { + Map result = region.getFull(row, timestamp); + byte[] cell_value = result.get(column); + if(value == null){ + assertEquals(column.toString() + " at timestamp " + timestamp, null, cell_value); + } else { + if (cell_value == null) { + fail(column.toString() + " at timestamp " + timestamp + + "\" was expected to be \"" + value + " but was null"); + } + assertEquals(column.toString() + " at timestamp " + + timestamp, value, new String(cell_value)); + } + } + + private String cellData(int tsNum, boolean flush){ + return "t" + tsNum + " data" + (flush ? " - with flush" : ""); + } + + protected void tearDown() throws Exception { + if (this.miniHdfs != null) { + this.miniHdfs.shutdown(); + } + super.tearDown(); + } +}