HBASE-961 Delete multiple columns by regular expression

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@712068 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-11-07 06:02:59 +00:00
parent e3659aa246
commit 8fd65c9fbf
10 changed files with 459 additions and 10 deletions

View File

@ -101,6 +101,8 @@ Release 0.19.0 - Unreleased
HBASE-81 When a scanner lease times out, throw a more "user friendly" exception HBASE-81 When a scanner lease times out, throw a more "user friendly" exception
HBASE-978 Remove BloomFilterDescriptor. It is no longer used. HBASE-978 Remove BloomFilterDescriptor. It is no longer used.
HBASE-975 Improve MapFile performance for start and end key HBASE-975 Improve MapFile performance for start and end key
HBASE-961 Delete multiple columns by regular expression
(Samuel Guo via Stack)
NEW FEATURES NEW FEATURES
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters] HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]

View File

@ -891,6 +891,71 @@ public class HTable {
} }
); );
} }
/**
* Delete all cells that match the passed row and column.
* @param row Row to update
* @param colRegex column regex expression
* @throws IOException
*/
public void deleteAllByRegex(final String row, final String colRegex)
throws IOException {
deleteAll(row, colRegex, HConstants.LATEST_TIMESTAMP);
}
/**
* Delete all cells that match the passed row and column and whose
* timestamp is equal-to or older than the passed timestamp.
* @param row Row to update
* @param colRegex Column Regex expression
* @param ts Delete all cells of the same timestamp or older.
* @throws IOException
*/
public void deleteAllByRegex(final String row, final String colRegex,
final long ts) throws IOException {
deleteAllByRegex(Bytes.toBytes(row), colRegex, ts);
}
/**
* Delete all cells that match the passed row and column and whose
* timestamp is equal-to or older than the passed timestamp.
* @param row Row to update
* @param colRegex Column Regex expression
* @param ts Delete all cells of the same timestamp or older.
* @throws IOException
*/
public void deleteAllByRegex(final byte [] row, final String colRegex,
final long ts) throws IOException {
deleteAllByRegex(row, colRegex, ts, null);
}
/**
* Delete all cells that match the passed row and column and whose
* timestamp is equal-to or older than the passed timestamp, using an
* existing row lock.
* @param row Row to update
* @param colRegex Column regex expression
* @param ts Delete all cells of the same timestamp or older.
* @param rl Existing row lock
* @throws IOException
*/
public void deleteAllByRegex(final byte [] row, final String colRegex,
final long ts, final RowLock rl)
throws IOException {
connection.getRegionServerWithRetries(
new ServerCallable<Boolean>(connection, tableName, row) {
public Boolean call() throws IOException {
long lockId = -1L;
if(rl != null) {
lockId = rl.getLockId();
}
this.server.deleteAllByRegex(location.getRegionInfo().getRegionName(),
row, colRegex, ts, lockId);
return null;
}
}
);
}
/** /**
* Delete all cells for a row with matching column family at all timestamps. * Delete all cells for a row with matching column family at all timestamps.
@ -973,6 +1038,90 @@ public class HTable {
} }
); );
} }
/**
* Delete all cells for a row with matching column family regex
* at all timestamps.
*
* @param row The row to operate on
* @param familyRegex Column family regex
* @throws IOException
*/
public void deleteFamilyByRegex(final String row, final String familyRegex)
throws IOException {
deleteFamilyByRegex(row, familyRegex, HConstants.LATEST_TIMESTAMP);
}
/**
* Delete all cells for a row with matching column family regex
* at all timestamps.
*
* @param row The row to operate on
* @param familyRegex Column family regex
* @throws IOException
*/
public void deleteFamilyByRegex(final byte[] row, final String familyRegex)
throws IOException {
deleteFamilyByRegex(row, familyRegex, HConstants.LATEST_TIMESTAMP);
}
/**
* Delete all cells for a row with matching column family regex
* with timestamps less than or equal to <i>timestamp</i>.
*
* @param row The row to operate on
* @param familyRegex Column family regex
* @param timestamp Timestamp to match
* @throws IOException
*/
public void deleteFamilyByRegex(final String row, final String familyRegex,
final long timestamp)
throws IOException{
deleteFamilyByRegex(Bytes.toBytes(row), familyRegex, timestamp);
}
/**
* Delete all cells for a row with matching column family regex
* with timestamps less than or equal to <i>timestamp</i>.
*
* @param row The row to operate on
* @param familyRegex Column family regex
* @param timestamp Timestamp to match
* @throws IOException
*/
public void deleteFamilyByRegex(final byte [] row, final String familyRegex,
final long timestamp)
throws IOException {
deleteFamilyByRegex(row,familyRegex,timestamp,null);
}
/**
* Delete all cells for a row with matching column family regex with
* timestamps less than or equal to <i>timestamp</i>, using existing
* row lock.
*
* @param row The row to operate on
* @param familyRegex Column Family Regex
* @param timestamp Timestamp to match
* @param r1 Existing row lock
* @throws IOException
*/
public void deleteFamilyByRegex(final byte[] row, final String familyRegex,
final long timestamp, final RowLock r1) throws IOException {
connection.getRegionServerWithRetries(
new ServerCallable<Boolean>(connection, tableName, row) {
public Boolean call() throws IOException {
long lockId = -1L;
if(r1 != null) {
lockId = r1.getLockId();
}
server.deleteFamilyByRegex(location.getRegionInfo().getRegionName(),
row, familyRegex, timestamp, lockId);
return null;
}
}
);
}
/** /**
* Commit a BatchUpdate to the table. * Commit a BatchUpdate to the table.

View File

@ -145,6 +145,21 @@ public interface HRegionInterface extends VersionedProtocol {
public void deleteAll(byte [] regionName, byte [] row, long timestamp, public void deleteAll(byte [] regionName, byte [] row, long timestamp,
long lockId) long lockId)
throws IOException; throws IOException;
/**
* Delete all cells that match the passed row & the column regex and whose
* timestamp is equal-to or older than the passed timestamp.
*
* @param regionName
* @param row
* @param colRegex
* @param timestamp
* @param lockId
* @throws IOException
*/
public void deleteAllByRegex(byte [] regionName, byte [] row, String colRegex,
long timestamp, long lockId)
throws IOException;
/** /**
* Delete all cells for a row with matching column family with timestamps * Delete all cells for a row with matching column family with timestamps
@ -160,6 +175,21 @@ public interface HRegionInterface extends VersionedProtocol {
public void deleteFamily(byte [] regionName, byte [] row, byte [] family, public void deleteFamily(byte [] regionName, byte [] row, byte [] family,
long timestamp, long lockId) long timestamp, long lockId)
throws IOException; throws IOException;
/**
* Delete all cells for a row with matching column family regex with
* timestamps less than or equal to <i>timestamp</i>.
*
* @param regionName The name of the region to operate on
* @param row The row to operate on
* @param familyRegex column family regex
* @param timestamp Timestamp to match
* @param lockId lock id
* @throws IOException
*/
public void deleteFamilyByRegex(byte [] regionName, byte [] row, String familyRegex,
long timestamp, long lockId)
throws IOException;
// //

View File

@ -36,6 +36,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -1299,7 +1300,7 @@ public class HRegion implements HConstants {
if (targetStore != null) { if (targetStore != null) {
// Pass versions without modification since in the store getKeys, it // Pass versions without modification since in the store getKeys, it
// includes the size of the passed <code>keys</code> array when counting. // includes the size of the passed <code>keys</code> array when counting.
List<HStoreKey> r = targetStore.getKeys(origin, versions, now); List<HStoreKey> r = targetStore.getKeys(origin, versions, now, null);
if (r != null) { if (r != null) {
keys.addAll(r); keys.addAll(r);
} }
@ -1533,7 +1534,7 @@ public class HRegion implements HConstants {
try { try {
for (HStore store : stores.values()) { for (HStore store : stores.values()) {
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, ts, this.regionInfo), List<HStoreKey> keys = store.getKeys(new HStoreKey(row, ts, this.regionInfo),
ALL_VERSIONS, now); ALL_VERSIONS, now, null);
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>( TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
new HStoreKey.HStoreKeyWritableComparator(regionInfo)); new HStoreKey.HStoreKeyWritableComparator(regionInfo));
for (HStoreKey key: keys) { for (HStoreKey key: keys) {
@ -1545,6 +1546,39 @@ public class HRegion implements HConstants {
if(lockid == null) releaseRowLock(lid); if(lockid == null) releaseRowLock(lid);
} }
} }
/**
* Delete all cells for a row with matching columns with timestamps
* less than or equal to <i>timestamp</i>.
*
* @param row The row to operate on
* @param columnRegex The column regex
* @param timestamp Timestamp to match
* @param lockid Row lock
* @throws IOException
*/
@SuppressWarnings("unchecked")
public void deleteAllByRegex(final byte [] row, final String columnRegex,
final long timestamp, final Integer lockid) throws IOException {
checkReadOnly();
Pattern columnPattern = Pattern.compile(columnRegex);
Integer lid = getLock(lockid, row);
long now = System.currentTimeMillis();
try {
for (HStore store : stores.values()) {
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp, this.regionInfo),
ALL_VERSIONS, now, columnPattern);
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
new HStoreKey.HStoreKeyWritableComparator(regionInfo));
for (HStoreKey key: keys) {
edits.put(key, HLogEdit.deleteBytes.get());
}
update(edits);
}
} finally {
if(lockid == null) releaseRowLock(lid);
}
}
/** /**
* Delete all cells for a row with matching column family with timestamps * Delete all cells for a row with matching column family with timestamps
@ -1568,7 +1602,7 @@ public class HRegion implements HConstants {
HStore store = getStore(family); HStore store = getStore(family);
// find all the keys that match our criteria // find all the keys that match our criteria
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp, List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp,
this.regionInfo), ALL_VERSIONS, now); this.regionInfo), ALL_VERSIONS, now, null);
// delete all the cells // delete all the cells
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>( TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
new HStoreKey.HStoreKeyWritableComparator(regionInfo)); new HStoreKey.HStoreKeyWritableComparator(regionInfo));
@ -1581,6 +1615,46 @@ public class HRegion implements HConstants {
} }
} }
/**
* Delete all cells for a row with all the matching column families by
* familyRegex with timestamps less than or equal to <i>timestamp</i>.
*
* @param row The row to operate on
* @param familyRegex The column family regex for matching. This regex
* expression just match the family name, it didn't include <code>:<code>
* @param timestamp Timestamp to match
* @param lockid Row lock
* @throws IOException
*/
@SuppressWarnings("unchecked")
public void deleteFamilyByRegex(byte [] row, String familyRegex, long timestamp,
final Integer lockid) throws IOException {
checkReadOnly();
// construct the family regex pattern
Pattern familyPattern = Pattern.compile(familyRegex);
Integer lid = getLock(lockid, row);
long now = System.currentTimeMillis();
try {
for(HStore store : stores.values()) {
String familyName = Bytes.toString(store.getFamily().getName());
// check the family name match the family pattern.
if(!(familyPattern.matcher(familyName).matches()))
continue;
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp,
this.regionInfo), ALL_VERSIONS, now, null);
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
new HStoreKey.HStoreKeyWritableComparator(regionInfo));
for (HStoreKey key: keys) {
edits.put(key, HLogEdit.deleteBytes.get());
}
update(edits);
}
} finally {
if(lockid == null) releaseRowLock(lid);
}
}
/* /*
* Delete one or many cells. * Delete one or many cells.
* Used to support {@link #deleteAll(byte [], byte [], long)} and deletion of * Used to support {@link #deleteAll(byte [], byte [], long)} and deletion of

View File

@ -1388,6 +1388,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
region.deleteAll(row, timestamp, getLockFromId(lockId)); region.deleteAll(row, timestamp, getLockFromId(lockId));
} }
@Override
public void deleteAllByRegex(byte[] regionName, byte[] row, String colRegex,
long timestamp, long lockId) throws IOException {
getRegion(regionName).deleteAllByRegex(row, colRegex, timestamp,
getLockFromId(lockId));
}
public void deleteFamily(byte [] regionName, byte [] row, byte [] family, public void deleteFamily(byte [] regionName, byte [] row, byte [] family,
long timestamp, final long lockId) long timestamp, final long lockId)
throws IOException{ throws IOException{
@ -1395,6 +1402,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
getLockFromId(lockId)); getLockFromId(lockId));
} }
@Override
public void deleteFamilyByRegex(byte[] regionName, byte[] row, String familyRegex,
long timestamp, long lockId) throws IOException {
getRegion(regionName).deleteFamilyByRegex(row, familyRegex, timestamp,
getLockFromId(lockId));
}
public long lockRow(byte [] regionName, byte [] row) public long lockRow(byte [] regionName, byte [] row)
throws IOException { throws IOException {
checkOpen(); checkOpen();

View File

@ -1475,11 +1475,14 @@ public class HStore implements HConstants {
* @param versions How many versions to return. Pass * @param versions How many versions to return. Pass
* {@link HConstants#ALL_VERSIONS} to retrieve all. * {@link HConstants#ALL_VERSIONS} to retrieve all.
* @param now * @param now
* @param columnPattern regex pattern for column matching. if columnPattern
* is not null, we use column pattern to match columns. And the columnPattern
* only works when origin's column is null or its length is zero.
* @return Matching keys. * @return Matching keys.
* @throws IOException * @throws IOException
*/ */
public List<HStoreKey> getKeys(final HStoreKey origin, final int versions, public List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
final long now) final long now, final Pattern columnPattern)
throws IOException { throws IOException {
// This code below is very close to the body of the get method. Any // This code below is very close to the body of the get method. Any
// changes in the flow below should also probably be done in get. TODO: // changes in the flow below should also probably be done in get. TODO:
@ -1489,7 +1492,7 @@ public class HStore implements HConstants {
try { try {
// Check the memcache // Check the memcache
List<HStoreKey> keys = List<HStoreKey> keys =
this.memcache.getKeys(origin, versions, deletes, now); this.memcache.getKeys(origin, versions, deletes, now, columnPattern);
// If we got sufficient versions from memcache, return. // If we got sufficient versions from memcache, return.
if (keys.size() >= versions) { if (keys.size() >= versions) {
return keys; return keys;
@ -1514,6 +1517,13 @@ public class HStore implements HConstants {
do { do {
// if the row matches, we might want this one. // if the row matches, we might want this one.
if (rowMatches(origin, readkey)) { if (rowMatches(origin, readkey)) {
// if the column pattern is not null, we use it for column matching.
// we will skip the keys whose column doesn't match the pattern.
if (columnPattern != null) {
if (!(columnPattern.matcher(Bytes.toString(readkey.getColumn())).matches())) {
continue;
}
}
// if the cell address matches, then we definitely want this key. // if the cell address matches, then we definitely want this key.
if (cellMatches(origin, readkey)) { if (cellMatches(origin, readkey)) {
// Store key if isn't deleted or superceded by memcache // Store key if isn't deleted or superceded by memcache

View File

@ -33,6 +33,7 @@ import java.util.SortedMap;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -601,21 +602,26 @@ class Memcache {
* {@link HConstants.ALL_VERSIONS} to retrieve all. * {@link HConstants.ALL_VERSIONS} to retrieve all.
* @param now * @param now
* @param deletes Accumulating list of deletes * @param deletes Accumulating list of deletes
* @param columnPattern regex pattern for column matching. if columnPattern
* is not null, we use column pattern to match columns. And the columnPattern
* only works when origin's column is null or its length is zero.
* @return Ordered list of <code>versions</code> keys going from newest back. * @return Ordered list of <code>versions</code> keys going from newest back.
* @throws IOException * @throws IOException
*/ */
List<HStoreKey> getKeys(final HStoreKey origin, final int versions, List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
final Set<HStoreKey> deletes, final long now) { final Set<HStoreKey> deletes, final long now,
final Pattern columnPattern) {
this.lock.readLock().lock(); this.lock.readLock().lock();
try { try {
List<HStoreKey> results; List<HStoreKey> results;
synchronized (memcache) { synchronized (memcache) {
results = getKeys(this.memcache, origin, versions, deletes, now); results =
getKeys(this.memcache, origin, versions, deletes, now, columnPattern);
} }
synchronized (snapshot) { synchronized (snapshot) {
results.addAll(results.size(), getKeys(snapshot, origin, results.addAll(results.size(), getKeys(snapshot, origin,
versions == HConstants.ALL_VERSIONS ? versions : versions == HConstants.ALL_VERSIONS ? versions :
(versions - results.size()), deletes, now)); (versions - results.size()), deletes, now, columnPattern));
} }
return results; return results;
} finally { } finally {
@ -629,13 +635,17 @@ class Memcache {
* {@link HConstants.ALL_VERSIONS} to retrieve all. * {@link HConstants.ALL_VERSIONS} to retrieve all.
* @param now * @param now
* @param deletes * @param deletes
* @param columnPattern regex pattern for column matching. if columnPattern
* is not null, we use column pattern to match columns. And the columnPattern
* only works when origin's column is null or its length is zero.
* @return List of all keys that are of the same row and column and of * @return List of all keys that are of the same row and column and of
* equal or older timestamp. If no keys, returns an empty List. Does not * equal or older timestamp. If no keys, returns an empty List. Does not
* return null. * return null.
*/ */
private List<HStoreKey> getKeys(final SortedMap<HStoreKey, private List<HStoreKey> getKeys(final SortedMap<HStoreKey,
byte []> map, final HStoreKey origin, final int versions, byte []> map, final HStoreKey origin, final int versions,
final Set<HStoreKey> deletes, final long now) { final Set<HStoreKey> deletes, final long now,
final Pattern columnPattern) {
List<HStoreKey> result = new ArrayList<HStoreKey>(); List<HStoreKey> result = new ArrayList<HStoreKey>();
List<HStoreKey> victims = new ArrayList<HStoreKey>(); List<HStoreKey> victims = new ArrayList<HStoreKey>();
SortedMap<HStoreKey, byte []> tailMap = map.tailMap(origin); SortedMap<HStoreKey, byte []> tailMap = map.tailMap(origin);
@ -649,6 +659,13 @@ class Memcache {
origin.getRow())) { origin.getRow())) {
break; break;
} }
// if the column pattern is not null, we use it for column matching.
// we will skip the keys whose column doesn't match the pattern.
if (columnPattern != null) {
if (!(columnPattern.matcher(Bytes.toString(key.getColumn())).matches())) {
continue;
}
}
// if the rows match but the timestamp is newer, skip it so we can // if the rows match but the timestamp is newer, skip it so we can
// get to the ones we actually want. // get to the ones we actually want.
if (key.getTimestamp() > origin.getTimestamp()) { if (key.getTimestamp() > origin.getTimestamp()) {

View File

@ -371,7 +371,7 @@ class TransactionalRegion extends HRegion {
for (HStore store : super.stores.values()) { for (HStore store : super.stores.values()) {
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp), List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp),
ALL_VERSIONS, now); ALL_VERSIONS, now, null);
BatchUpdate deleteUpdate = new BatchUpdate(row, timestamp); BatchUpdate deleteUpdate = new BatchUpdate(row, timestamp);
for (HStoreKey key : keys) { for (HStoreKey key : keys) {

View File

@ -33,6 +33,9 @@ import org.apache.hadoop.hbase.util.Bytes;
*/ */
public class TestDeleteAll extends HBaseTestCase { public class TestDeleteAll extends HBaseTestCase {
static final Log LOG = LogFactory.getLog(TestDeleteAll.class); static final Log LOG = LogFactory.getLog(TestDeleteAll.class);
private final String COLUMN_REGEX = "[a-zA-Z0-9]*:[b|c]?";
private MiniDFSCluster miniHdfs; private MiniDFSCluster miniHdfs;
@Override @Override
@ -66,6 +69,11 @@ public class TestDeleteAll extends HBaseTestCase {
// test hstore // test hstore
makeSureItWorks(region, region_incommon, true); makeSureItWorks(region, region_incommon, true);
// regex test memcache
makeSureRegexWorks(region, region_incommon, false);
// regex test hstore
makeSureRegexWorks(region, region_incommon, true);
} finally { } finally {
if (region != null) { if (region != null) {
try { try {
@ -137,6 +145,79 @@ public class TestDeleteAll extends HBaseTestCase {
} }
private void makeSureRegexWorks(HRegion region, HRegionIncommon region_incommon,
boolean flush)
throws Exception{
// insert a few versions worth of data for a row
byte [] row = Bytes.toBytes("test_row");
long t0 = System.currentTimeMillis();
long t1 = t0 - 15000;
long t2 = t1 - 15000;
byte [] colA = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "a");
byte [] colB = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "b");
byte [] colC = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "c");
byte [] colD = Bytes.toBytes(Bytes.toString(COLUMNS[0]));
BatchUpdate batchUpdate = new BatchUpdate(row, t0);
batchUpdate.put(colA, cellData(0, flush).getBytes());
batchUpdate.put(colB, cellData(0, flush).getBytes());
batchUpdate.put(colC, cellData(0, flush).getBytes());
batchUpdate.put(colD, cellData(0, flush).getBytes());
region_incommon.commit(batchUpdate);
batchUpdate = new BatchUpdate(row, t1);
batchUpdate.put(colA, cellData(1, flush).getBytes());
batchUpdate.put(colB, cellData(1, flush).getBytes());
batchUpdate.put(colC, cellData(1, flush).getBytes());
batchUpdate.put(colD, cellData(1, flush).getBytes());
region_incommon.commit(batchUpdate);
batchUpdate = new BatchUpdate(row, t2);
batchUpdate.put(colA, cellData(2, flush).getBytes());
batchUpdate.put(colB, cellData(2, flush).getBytes());
batchUpdate.put(colC, cellData(2, flush).getBytes());
batchUpdate.put(colD, cellData(2, flush).getBytes());
region_incommon.commit(batchUpdate);
if (flush) {region_incommon.flushcache();}
// call delete the matching columns at a timestamp,
// make sure only the most recent stuff is left behind
region.deleteAllByRegex(row, COLUMN_REGEX, t1, null);
if (flush) {region_incommon.flushcache();}
assertCellEquals(region, row, colA, t0, cellData(0, flush));
assertCellEquals(region, row, colA, t1, cellData(1, flush));
assertCellEquals(region, row, colA, t2, cellData(2, flush));
assertCellEquals(region, row, colB, t0, cellData(0, flush));
assertCellEquals(region, row, colB, t1, null);
assertCellEquals(region, row, colB, t2, null);
assertCellEquals(region, row, colC, t0, cellData(0, flush));
assertCellEquals(region, row, colC, t1, null);
assertCellEquals(region, row, colC, t2, null);
assertCellEquals(region, row, colD, t0, cellData(0, flush));
assertCellEquals(region, row, colD, t1, null);
assertCellEquals(region, row, colD, t2, null);
// call delete all w/o a timestamp, make sure nothing is left.
region.deleteAllByRegex(row, COLUMN_REGEX,
HConstants.LATEST_TIMESTAMP, null);
if (flush) {region_incommon.flushcache();}
assertCellEquals(region, row, colA, t0, cellData(0, flush));
assertCellEquals(region, row, colA, t1, cellData(1, flush));
assertCellEquals(region, row, colA, t2, cellData(2, flush));
assertCellEquals(region, row, colB, t0, null);
assertCellEquals(region, row, colB, t1, null);
assertCellEquals(region, row, colB, t2, null);
assertCellEquals(region, row, colC, t0, null);
assertCellEquals(region, row, colC, t1, null);
assertCellEquals(region, row, colC, t2, null);
assertCellEquals(region, row, colD, t0, null);
assertCellEquals(region, row, colD, t1, null);
assertCellEquals(region, row, colD, t2, null);
}
private String cellData(int tsNum, boolean flush){ private String cellData(int tsNum, boolean flush){
return "t" + tsNum + " data" + (flush ? " - with flush" : ""); return "t" + tsNum + " data" + (flush ? " - with flush" : "");
} }

View File

@ -35,6 +35,9 @@ public class TestDeleteFamily extends HBaseTestCase {
static final Log LOG = LogFactory.getLog(TestDeleteFamily.class); static final Log LOG = LogFactory.getLog(TestDeleteFamily.class);
private MiniDFSCluster miniHdfs; private MiniDFSCluster miniHdfs;
//for family regex deletion test
protected static final String COLFAMILY_REGEX = "col[a-zA-Z]*1";
@Override @Override
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
@ -60,6 +63,10 @@ public class TestDeleteFamily extends HBaseTestCase {
makeSureItWorks(region, region_incommon, false); makeSureItWorks(region, region_incommon, false);
// test hstore // test hstore
makeSureItWorks(region, region_incommon, true); makeSureItWorks(region, region_incommon, true);
// family regex test memcache
makeSureRegexWorks(region, region_incommon, false);
// family regex test hstore
makeSureRegexWorks(region, region_incommon, true);
} finally { } finally {
if (region != null) { if (region != null) {
@ -138,6 +145,71 @@ public class TestDeleteFamily extends HBaseTestCase {
} }
private void makeSureRegexWorks(HRegion region, HRegionIncommon region_incommon,
boolean flush)
throws Exception{
// insert a few versions worth of data for a row
byte [] row = Bytes.toBytes("test_row");
long t0 = System.currentTimeMillis();
long t1 = t0 - 15000;
long t2 = t1 - 15000;
byte [] colA = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "a");
byte [] colB = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "b");
byte [] colC = Bytes.toBytes(Bytes.toString(COLUMNS[1]) + "c");
BatchUpdate batchUpdate = null;
batchUpdate = new BatchUpdate(row, t0);
batchUpdate.put(colA, cellData(0, flush).getBytes());
batchUpdate.put(colB, cellData(0, flush).getBytes());
batchUpdate.put(colC, cellData(0, flush).getBytes());
region_incommon.commit(batchUpdate);
batchUpdate = new BatchUpdate(row, t1);
batchUpdate.put(colA, cellData(1, flush).getBytes());
batchUpdate.put(colB, cellData(1, flush).getBytes());
batchUpdate.put(colC, cellData(1, flush).getBytes());
region_incommon.commit(batchUpdate);
batchUpdate = new BatchUpdate(row, t2);
batchUpdate.put(colA, cellData(2, flush).getBytes());
batchUpdate.put(colB, cellData(2, flush).getBytes());
batchUpdate.put(colC, cellData(2, flush).getBytes());
region_incommon.commit(batchUpdate);
if (flush) {region_incommon.flushcache();}
// call delete family at a timestamp, make sure only the most recent stuff
// for column c is left behind
region.deleteFamilyByRegex(row, COLFAMILY_REGEX, t1, null);
if (flush) {region_incommon.flushcache();}
// most recent for A,B,C should be fine
// A,B at older timestamps should be gone
// C should be fine for older timestamps
assertCellEquals(region, row, colA, t0, cellData(0, flush));
assertCellEquals(region, row, colA, t1, null);
assertCellEquals(region, row, colA, t2, null);
assertCellEquals(region, row, colB, t0, cellData(0, flush));
assertCellEquals(region, row, colB, t1, null);
assertCellEquals(region, row, colB, t2, null);
assertCellEquals(region, row, colC, t0, cellData(0, flush));
assertCellEquals(region, row, colC, t1, cellData(1, flush));
assertCellEquals(region, row, colC, t2, cellData(2, flush));
// call delete family w/o a timestamp, make sure nothing is left except for
// column C.
region.deleteFamilyByRegex(row, COLFAMILY_REGEX, HConstants.LATEST_TIMESTAMP, null);
if (flush) {region_incommon.flushcache();}
// A,B for latest timestamp should be gone
// C should still be fine
assertCellEquals(region, row, colA, t0, null);
assertCellEquals(region, row, colB, t0, null);
assertCellEquals(region, row, colC, t0, cellData(0, flush));
assertCellEquals(region, row, colC, t1, cellData(1, flush));
assertCellEquals(region, row, colC, t2, cellData(2, flush));
}
private String cellData(int tsNum, boolean flush){ private String cellData(int tsNum, boolean flush){
return "t" + tsNum + " data" + (flush ? " - with flush" : ""); return "t" + tsNum + " data" + (flush ? " - with flush" : "");
} }