HBASE-961 Delete multiple columns by regular expression
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@712068 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e3659aa246
commit
8fd65c9fbf
@ -101,6 +101,8 @@ Release 0.19.0 - Unreleased
|
|||||||
HBASE-81 When a scanner lease times out, throw a more "user friendly" exception
|
HBASE-81 When a scanner lease times out, throw a more "user friendly" exception
|
||||||
HBASE-978 Remove BloomFilterDescriptor. It is no longer used.
|
HBASE-978 Remove BloomFilterDescriptor. It is no longer used.
|
||||||
HBASE-975 Improve MapFile performance for start and end key
|
HBASE-975 Improve MapFile performance for start and end key
|
||||||
|
HBASE-961 Delete multiple columns by regular expression
|
||||||
|
(Samuel Guo via Stack)
|
||||||
|
|
||||||
NEW FEATURES
|
NEW FEATURES
|
||||||
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]
|
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]
|
||||||
|
@ -891,6 +891,71 @@ public class HTable {
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells that match the passed row and column.
|
||||||
|
* @param row Row to update
|
||||||
|
* @param colRegex column regex expression
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteAllByRegex(final String row, final String colRegex)
|
||||||
|
throws IOException {
|
||||||
|
deleteAll(row, colRegex, HConstants.LATEST_TIMESTAMP);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells that match the passed row and column and whose
|
||||||
|
* timestamp is equal-to or older than the passed timestamp.
|
||||||
|
* @param row Row to update
|
||||||
|
* @param colRegex Column Regex expression
|
||||||
|
* @param ts Delete all cells of the same timestamp or older.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteAllByRegex(final String row, final String colRegex,
|
||||||
|
final long ts) throws IOException {
|
||||||
|
deleteAllByRegex(Bytes.toBytes(row), colRegex, ts);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells that match the passed row and column and whose
|
||||||
|
* timestamp is equal-to or older than the passed timestamp.
|
||||||
|
* @param row Row to update
|
||||||
|
* @param colRegex Column Regex expression
|
||||||
|
* @param ts Delete all cells of the same timestamp or older.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteAllByRegex(final byte [] row, final String colRegex,
|
||||||
|
final long ts) throws IOException {
|
||||||
|
deleteAllByRegex(row, colRegex, ts, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells that match the passed row and column and whose
|
||||||
|
* timestamp is equal-to or older than the passed timestamp, using an
|
||||||
|
* existing row lock.
|
||||||
|
* @param row Row to update
|
||||||
|
* @param colRegex Column regex expression
|
||||||
|
* @param ts Delete all cells of the same timestamp or older.
|
||||||
|
* @param rl Existing row lock
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteAllByRegex(final byte [] row, final String colRegex,
|
||||||
|
final long ts, final RowLock rl)
|
||||||
|
throws IOException {
|
||||||
|
connection.getRegionServerWithRetries(
|
||||||
|
new ServerCallable<Boolean>(connection, tableName, row) {
|
||||||
|
public Boolean call() throws IOException {
|
||||||
|
long lockId = -1L;
|
||||||
|
if(rl != null) {
|
||||||
|
lockId = rl.getLockId();
|
||||||
|
}
|
||||||
|
this.server.deleteAllByRegex(location.getRegionInfo().getRegionName(),
|
||||||
|
row, colRegex, ts, lockId);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete all cells for a row with matching column family at all timestamps.
|
* Delete all cells for a row with matching column family at all timestamps.
|
||||||
@ -973,6 +1038,90 @@ public class HTable {
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells for a row with matching column family regex
|
||||||
|
* at all timestamps.
|
||||||
|
*
|
||||||
|
* @param row The row to operate on
|
||||||
|
* @param familyRegex Column family regex
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteFamilyByRegex(final String row, final String familyRegex)
|
||||||
|
throws IOException {
|
||||||
|
deleteFamilyByRegex(row, familyRegex, HConstants.LATEST_TIMESTAMP);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells for a row with matching column family regex
|
||||||
|
* at all timestamps.
|
||||||
|
*
|
||||||
|
* @param row The row to operate on
|
||||||
|
* @param familyRegex Column family regex
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteFamilyByRegex(final byte[] row, final String familyRegex)
|
||||||
|
throws IOException {
|
||||||
|
deleteFamilyByRegex(row, familyRegex, HConstants.LATEST_TIMESTAMP);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells for a row with matching column family regex
|
||||||
|
* with timestamps less than or equal to <i>timestamp</i>.
|
||||||
|
*
|
||||||
|
* @param row The row to operate on
|
||||||
|
* @param familyRegex Column family regex
|
||||||
|
* @param timestamp Timestamp to match
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteFamilyByRegex(final String row, final String familyRegex,
|
||||||
|
final long timestamp)
|
||||||
|
throws IOException{
|
||||||
|
deleteFamilyByRegex(Bytes.toBytes(row), familyRegex, timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells for a row with matching column family regex
|
||||||
|
* with timestamps less than or equal to <i>timestamp</i>.
|
||||||
|
*
|
||||||
|
* @param row The row to operate on
|
||||||
|
* @param familyRegex Column family regex
|
||||||
|
* @param timestamp Timestamp to match
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteFamilyByRegex(final byte [] row, final String familyRegex,
|
||||||
|
final long timestamp)
|
||||||
|
throws IOException {
|
||||||
|
deleteFamilyByRegex(row,familyRegex,timestamp,null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells for a row with matching column family regex with
|
||||||
|
* timestamps less than or equal to <i>timestamp</i>, using existing
|
||||||
|
* row lock.
|
||||||
|
*
|
||||||
|
* @param row The row to operate on
|
||||||
|
* @param familyRegex Column Family Regex
|
||||||
|
* @param timestamp Timestamp to match
|
||||||
|
* @param r1 Existing row lock
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteFamilyByRegex(final byte[] row, final String familyRegex,
|
||||||
|
final long timestamp, final RowLock r1) throws IOException {
|
||||||
|
connection.getRegionServerWithRetries(
|
||||||
|
new ServerCallable<Boolean>(connection, tableName, row) {
|
||||||
|
public Boolean call() throws IOException {
|
||||||
|
long lockId = -1L;
|
||||||
|
if(r1 != null) {
|
||||||
|
lockId = r1.getLockId();
|
||||||
|
}
|
||||||
|
server.deleteFamilyByRegex(location.getRegionInfo().getRegionName(),
|
||||||
|
row, familyRegex, timestamp, lockId);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Commit a BatchUpdate to the table.
|
* Commit a BatchUpdate to the table.
|
||||||
|
@ -145,6 +145,21 @@ public interface HRegionInterface extends VersionedProtocol {
|
|||||||
public void deleteAll(byte [] regionName, byte [] row, long timestamp,
|
public void deleteAll(byte [] regionName, byte [] row, long timestamp,
|
||||||
long lockId)
|
long lockId)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells that match the passed row & the column regex and whose
|
||||||
|
* timestamp is equal-to or older than the passed timestamp.
|
||||||
|
*
|
||||||
|
* @param regionName
|
||||||
|
* @param row
|
||||||
|
* @param colRegex
|
||||||
|
* @param timestamp
|
||||||
|
* @param lockId
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteAllByRegex(byte [] regionName, byte [] row, String colRegex,
|
||||||
|
long timestamp, long lockId)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete all cells for a row with matching column family with timestamps
|
* Delete all cells for a row with matching column family with timestamps
|
||||||
@ -160,6 +175,21 @@ public interface HRegionInterface extends VersionedProtocol {
|
|||||||
public void deleteFamily(byte [] regionName, byte [] row, byte [] family,
|
public void deleteFamily(byte [] regionName, byte [] row, byte [] family,
|
||||||
long timestamp, long lockId)
|
long timestamp, long lockId)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells for a row with matching column family regex with
|
||||||
|
* timestamps less than or equal to <i>timestamp</i>.
|
||||||
|
*
|
||||||
|
* @param regionName The name of the region to operate on
|
||||||
|
* @param row The row to operate on
|
||||||
|
* @param familyRegex column family regex
|
||||||
|
* @param timestamp Timestamp to match
|
||||||
|
* @param lockId lock id
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void deleteFamilyByRegex(byte [] regionName, byte [] row, String familyRegex,
|
||||||
|
long timestamp, long lockId)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
|
@ -36,6 +36,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
@ -1299,7 +1300,7 @@ public class HRegion implements HConstants {
|
|||||||
if (targetStore != null) {
|
if (targetStore != null) {
|
||||||
// Pass versions without modification since in the store getKeys, it
|
// Pass versions without modification since in the store getKeys, it
|
||||||
// includes the size of the passed <code>keys</code> array when counting.
|
// includes the size of the passed <code>keys</code> array when counting.
|
||||||
List<HStoreKey> r = targetStore.getKeys(origin, versions, now);
|
List<HStoreKey> r = targetStore.getKeys(origin, versions, now, null);
|
||||||
if (r != null) {
|
if (r != null) {
|
||||||
keys.addAll(r);
|
keys.addAll(r);
|
||||||
}
|
}
|
||||||
@ -1533,7 +1534,7 @@ public class HRegion implements HConstants {
|
|||||||
try {
|
try {
|
||||||
for (HStore store : stores.values()) {
|
for (HStore store : stores.values()) {
|
||||||
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, ts, this.regionInfo),
|
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, ts, this.regionInfo),
|
||||||
ALL_VERSIONS, now);
|
ALL_VERSIONS, now, null);
|
||||||
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
|
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
|
||||||
new HStoreKey.HStoreKeyWritableComparator(regionInfo));
|
new HStoreKey.HStoreKeyWritableComparator(regionInfo));
|
||||||
for (HStoreKey key: keys) {
|
for (HStoreKey key: keys) {
|
||||||
@ -1545,6 +1546,39 @@ public class HRegion implements HConstants {
|
|||||||
if(lockid == null) releaseRowLock(lid);
|
if(lockid == null) releaseRowLock(lid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells for a row with matching columns with timestamps
|
||||||
|
* less than or equal to <i>timestamp</i>.
|
||||||
|
*
|
||||||
|
* @param row The row to operate on
|
||||||
|
* @param columnRegex The column regex
|
||||||
|
* @param timestamp Timestamp to match
|
||||||
|
* @param lockid Row lock
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public void deleteAllByRegex(final byte [] row, final String columnRegex,
|
||||||
|
final long timestamp, final Integer lockid) throws IOException {
|
||||||
|
checkReadOnly();
|
||||||
|
Pattern columnPattern = Pattern.compile(columnRegex);
|
||||||
|
Integer lid = getLock(lockid, row);
|
||||||
|
long now = System.currentTimeMillis();
|
||||||
|
try {
|
||||||
|
for (HStore store : stores.values()) {
|
||||||
|
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp, this.regionInfo),
|
||||||
|
ALL_VERSIONS, now, columnPattern);
|
||||||
|
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
|
||||||
|
new HStoreKey.HStoreKeyWritableComparator(regionInfo));
|
||||||
|
for (HStoreKey key: keys) {
|
||||||
|
edits.put(key, HLogEdit.deleteBytes.get());
|
||||||
|
}
|
||||||
|
update(edits);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if(lockid == null) releaseRowLock(lid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete all cells for a row with matching column family with timestamps
|
* Delete all cells for a row with matching column family with timestamps
|
||||||
@ -1568,7 +1602,7 @@ public class HRegion implements HConstants {
|
|||||||
HStore store = getStore(family);
|
HStore store = getStore(family);
|
||||||
// find all the keys that match our criteria
|
// find all the keys that match our criteria
|
||||||
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp,
|
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp,
|
||||||
this.regionInfo), ALL_VERSIONS, now);
|
this.regionInfo), ALL_VERSIONS, now, null);
|
||||||
// delete all the cells
|
// delete all the cells
|
||||||
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
|
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
|
||||||
new HStoreKey.HStoreKeyWritableComparator(regionInfo));
|
new HStoreKey.HStoreKeyWritableComparator(regionInfo));
|
||||||
@ -1581,6 +1615,46 @@ public class HRegion implements HConstants {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete all cells for a row with all the matching column families by
|
||||||
|
* familyRegex with timestamps less than or equal to <i>timestamp</i>.
|
||||||
|
*
|
||||||
|
* @param row The row to operate on
|
||||||
|
* @param familyRegex The column family regex for matching. This regex
|
||||||
|
* expression just match the family name, it didn't include <code>:<code>
|
||||||
|
* @param timestamp Timestamp to match
|
||||||
|
* @param lockid Row lock
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public void deleteFamilyByRegex(byte [] row, String familyRegex, long timestamp,
|
||||||
|
final Integer lockid) throws IOException {
|
||||||
|
checkReadOnly();
|
||||||
|
// construct the family regex pattern
|
||||||
|
Pattern familyPattern = Pattern.compile(familyRegex);
|
||||||
|
Integer lid = getLock(lockid, row);
|
||||||
|
long now = System.currentTimeMillis();
|
||||||
|
try {
|
||||||
|
for(HStore store : stores.values()) {
|
||||||
|
String familyName = Bytes.toString(store.getFamily().getName());
|
||||||
|
// check the family name match the family pattern.
|
||||||
|
if(!(familyPattern.matcher(familyName).matches()))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp,
|
||||||
|
this.regionInfo), ALL_VERSIONS, now, null);
|
||||||
|
TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>(
|
||||||
|
new HStoreKey.HStoreKeyWritableComparator(regionInfo));
|
||||||
|
for (HStoreKey key: keys) {
|
||||||
|
edits.put(key, HLogEdit.deleteBytes.get());
|
||||||
|
}
|
||||||
|
update(edits);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if(lockid == null) releaseRowLock(lid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Delete one or many cells.
|
* Delete one or many cells.
|
||||||
* Used to support {@link #deleteAll(byte [], byte [], long)} and deletion of
|
* Used to support {@link #deleteAll(byte [], byte [], long)} and deletion of
|
||||||
|
@ -1388,6 +1388,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||||||
region.deleteAll(row, timestamp, getLockFromId(lockId));
|
region.deleteAll(row, timestamp, getLockFromId(lockId));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteAllByRegex(byte[] regionName, byte[] row, String colRegex,
|
||||||
|
long timestamp, long lockId) throws IOException {
|
||||||
|
getRegion(regionName).deleteAllByRegex(row, colRegex, timestamp,
|
||||||
|
getLockFromId(lockId));
|
||||||
|
}
|
||||||
|
|
||||||
public void deleteFamily(byte [] regionName, byte [] row, byte [] family,
|
public void deleteFamily(byte [] regionName, byte [] row, byte [] family,
|
||||||
long timestamp, final long lockId)
|
long timestamp, final long lockId)
|
||||||
throws IOException{
|
throws IOException{
|
||||||
@ -1395,6 +1402,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||||||
getLockFromId(lockId));
|
getLockFromId(lockId));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteFamilyByRegex(byte[] regionName, byte[] row, String familyRegex,
|
||||||
|
long timestamp, long lockId) throws IOException {
|
||||||
|
getRegion(regionName).deleteFamilyByRegex(row, familyRegex, timestamp,
|
||||||
|
getLockFromId(lockId));
|
||||||
|
}
|
||||||
|
|
||||||
public long lockRow(byte [] regionName, byte [] row)
|
public long lockRow(byte [] regionName, byte [] row)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
checkOpen();
|
checkOpen();
|
||||||
|
@ -1475,11 +1475,14 @@ public class HStore implements HConstants {
|
|||||||
* @param versions How many versions to return. Pass
|
* @param versions How many versions to return. Pass
|
||||||
* {@link HConstants#ALL_VERSIONS} to retrieve all.
|
* {@link HConstants#ALL_VERSIONS} to retrieve all.
|
||||||
* @param now
|
* @param now
|
||||||
|
* @param columnPattern regex pattern for column matching. if columnPattern
|
||||||
|
* is not null, we use column pattern to match columns. And the columnPattern
|
||||||
|
* only works when origin's column is null or its length is zero.
|
||||||
* @return Matching keys.
|
* @return Matching keys.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
|
public List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
|
||||||
final long now)
|
final long now, final Pattern columnPattern)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// This code below is very close to the body of the get method. Any
|
// This code below is very close to the body of the get method. Any
|
||||||
// changes in the flow below should also probably be done in get. TODO:
|
// changes in the flow below should also probably be done in get. TODO:
|
||||||
@ -1489,7 +1492,7 @@ public class HStore implements HConstants {
|
|||||||
try {
|
try {
|
||||||
// Check the memcache
|
// Check the memcache
|
||||||
List<HStoreKey> keys =
|
List<HStoreKey> keys =
|
||||||
this.memcache.getKeys(origin, versions, deletes, now);
|
this.memcache.getKeys(origin, versions, deletes, now, columnPattern);
|
||||||
// If we got sufficient versions from memcache, return.
|
// If we got sufficient versions from memcache, return.
|
||||||
if (keys.size() >= versions) {
|
if (keys.size() >= versions) {
|
||||||
return keys;
|
return keys;
|
||||||
@ -1514,6 +1517,13 @@ public class HStore implements HConstants {
|
|||||||
do {
|
do {
|
||||||
// if the row matches, we might want this one.
|
// if the row matches, we might want this one.
|
||||||
if (rowMatches(origin, readkey)) {
|
if (rowMatches(origin, readkey)) {
|
||||||
|
// if the column pattern is not null, we use it for column matching.
|
||||||
|
// we will skip the keys whose column doesn't match the pattern.
|
||||||
|
if (columnPattern != null) {
|
||||||
|
if (!(columnPattern.matcher(Bytes.toString(readkey.getColumn())).matches())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
// if the cell address matches, then we definitely want this key.
|
// if the cell address matches, then we definitely want this key.
|
||||||
if (cellMatches(origin, readkey)) {
|
if (cellMatches(origin, readkey)) {
|
||||||
// Store key if isn't deleted or superceded by memcache
|
// Store key if isn't deleted or superceded by memcache
|
||||||
|
@ -33,6 +33,7 @@ import java.util.SortedMap;
|
|||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
@ -601,21 +602,26 @@ class Memcache {
|
|||||||
* {@link HConstants.ALL_VERSIONS} to retrieve all.
|
* {@link HConstants.ALL_VERSIONS} to retrieve all.
|
||||||
* @param now
|
* @param now
|
||||||
* @param deletes Accumulating list of deletes
|
* @param deletes Accumulating list of deletes
|
||||||
|
* @param columnPattern regex pattern for column matching. if columnPattern
|
||||||
|
* is not null, we use column pattern to match columns. And the columnPattern
|
||||||
|
* only works when origin's column is null or its length is zero.
|
||||||
* @return Ordered list of <code>versions</code> keys going from newest back.
|
* @return Ordered list of <code>versions</code> keys going from newest back.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
|
List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
|
||||||
final Set<HStoreKey> deletes, final long now) {
|
final Set<HStoreKey> deletes, final long now,
|
||||||
|
final Pattern columnPattern) {
|
||||||
this.lock.readLock().lock();
|
this.lock.readLock().lock();
|
||||||
try {
|
try {
|
||||||
List<HStoreKey> results;
|
List<HStoreKey> results;
|
||||||
synchronized (memcache) {
|
synchronized (memcache) {
|
||||||
results = getKeys(this.memcache, origin, versions, deletes, now);
|
results =
|
||||||
|
getKeys(this.memcache, origin, versions, deletes, now, columnPattern);
|
||||||
}
|
}
|
||||||
synchronized (snapshot) {
|
synchronized (snapshot) {
|
||||||
results.addAll(results.size(), getKeys(snapshot, origin,
|
results.addAll(results.size(), getKeys(snapshot, origin,
|
||||||
versions == HConstants.ALL_VERSIONS ? versions :
|
versions == HConstants.ALL_VERSIONS ? versions :
|
||||||
(versions - results.size()), deletes, now));
|
(versions - results.size()), deletes, now, columnPattern));
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
} finally {
|
} finally {
|
||||||
@ -629,13 +635,17 @@ class Memcache {
|
|||||||
* {@link HConstants.ALL_VERSIONS} to retrieve all.
|
* {@link HConstants.ALL_VERSIONS} to retrieve all.
|
||||||
* @param now
|
* @param now
|
||||||
* @param deletes
|
* @param deletes
|
||||||
|
* @param columnPattern regex pattern for column matching. if columnPattern
|
||||||
|
* is not null, we use column pattern to match columns. And the columnPattern
|
||||||
|
* only works when origin's column is null or its length is zero.
|
||||||
* @return List of all keys that are of the same row and column and of
|
* @return List of all keys that are of the same row and column and of
|
||||||
* equal or older timestamp. If no keys, returns an empty List. Does not
|
* equal or older timestamp. If no keys, returns an empty List. Does not
|
||||||
* return null.
|
* return null.
|
||||||
*/
|
*/
|
||||||
private List<HStoreKey> getKeys(final SortedMap<HStoreKey,
|
private List<HStoreKey> getKeys(final SortedMap<HStoreKey,
|
||||||
byte []> map, final HStoreKey origin, final int versions,
|
byte []> map, final HStoreKey origin, final int versions,
|
||||||
final Set<HStoreKey> deletes, final long now) {
|
final Set<HStoreKey> deletes, final long now,
|
||||||
|
final Pattern columnPattern) {
|
||||||
List<HStoreKey> result = new ArrayList<HStoreKey>();
|
List<HStoreKey> result = new ArrayList<HStoreKey>();
|
||||||
List<HStoreKey> victims = new ArrayList<HStoreKey>();
|
List<HStoreKey> victims = new ArrayList<HStoreKey>();
|
||||||
SortedMap<HStoreKey, byte []> tailMap = map.tailMap(origin);
|
SortedMap<HStoreKey, byte []> tailMap = map.tailMap(origin);
|
||||||
@ -649,6 +659,13 @@ class Memcache {
|
|||||||
origin.getRow())) {
|
origin.getRow())) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// if the column pattern is not null, we use it for column matching.
|
||||||
|
// we will skip the keys whose column doesn't match the pattern.
|
||||||
|
if (columnPattern != null) {
|
||||||
|
if (!(columnPattern.matcher(Bytes.toString(key.getColumn())).matches())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
// if the rows match but the timestamp is newer, skip it so we can
|
// if the rows match but the timestamp is newer, skip it so we can
|
||||||
// get to the ones we actually want.
|
// get to the ones we actually want.
|
||||||
if (key.getTimestamp() > origin.getTimestamp()) {
|
if (key.getTimestamp() > origin.getTimestamp()) {
|
||||||
|
@ -371,7 +371,7 @@ class TransactionalRegion extends HRegion {
|
|||||||
|
|
||||||
for (HStore store : super.stores.values()) {
|
for (HStore store : super.stores.values()) {
|
||||||
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp),
|
List<HStoreKey> keys = store.getKeys(new HStoreKey(row, timestamp),
|
||||||
ALL_VERSIONS, now);
|
ALL_VERSIONS, now, null);
|
||||||
BatchUpdate deleteUpdate = new BatchUpdate(row, timestamp);
|
BatchUpdate deleteUpdate = new BatchUpdate(row, timestamp);
|
||||||
|
|
||||||
for (HStoreKey key : keys) {
|
for (HStoreKey key : keys) {
|
||||||
|
@ -33,6 +33,9 @@ import org.apache.hadoop.hbase.util.Bytes;
|
|||||||
*/
|
*/
|
||||||
public class TestDeleteAll extends HBaseTestCase {
|
public class TestDeleteAll extends HBaseTestCase {
|
||||||
static final Log LOG = LogFactory.getLog(TestDeleteAll.class);
|
static final Log LOG = LogFactory.getLog(TestDeleteAll.class);
|
||||||
|
|
||||||
|
private final String COLUMN_REGEX = "[a-zA-Z0-9]*:[b|c]?";
|
||||||
|
|
||||||
private MiniDFSCluster miniHdfs;
|
private MiniDFSCluster miniHdfs;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -66,6 +69,11 @@ public class TestDeleteAll extends HBaseTestCase {
|
|||||||
// test hstore
|
// test hstore
|
||||||
makeSureItWorks(region, region_incommon, true);
|
makeSureItWorks(region, region_incommon, true);
|
||||||
|
|
||||||
|
// regex test memcache
|
||||||
|
makeSureRegexWorks(region, region_incommon, false);
|
||||||
|
// regex test hstore
|
||||||
|
makeSureRegexWorks(region, region_incommon, true);
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
if (region != null) {
|
if (region != null) {
|
||||||
try {
|
try {
|
||||||
@ -137,6 +145,79 @@ public class TestDeleteAll extends HBaseTestCase {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void makeSureRegexWorks(HRegion region, HRegionIncommon region_incommon,
|
||||||
|
boolean flush)
|
||||||
|
throws Exception{
|
||||||
|
// insert a few versions worth of data for a row
|
||||||
|
byte [] row = Bytes.toBytes("test_row");
|
||||||
|
long t0 = System.currentTimeMillis();
|
||||||
|
long t1 = t0 - 15000;
|
||||||
|
long t2 = t1 - 15000;
|
||||||
|
|
||||||
|
byte [] colA = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "a");
|
||||||
|
byte [] colB = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "b");
|
||||||
|
byte [] colC = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "c");
|
||||||
|
byte [] colD = Bytes.toBytes(Bytes.toString(COLUMNS[0]));
|
||||||
|
|
||||||
|
BatchUpdate batchUpdate = new BatchUpdate(row, t0);
|
||||||
|
batchUpdate.put(colA, cellData(0, flush).getBytes());
|
||||||
|
batchUpdate.put(colB, cellData(0, flush).getBytes());
|
||||||
|
batchUpdate.put(colC, cellData(0, flush).getBytes());
|
||||||
|
batchUpdate.put(colD, cellData(0, flush).getBytes());
|
||||||
|
region_incommon.commit(batchUpdate);
|
||||||
|
|
||||||
|
batchUpdate = new BatchUpdate(row, t1);
|
||||||
|
batchUpdate.put(colA, cellData(1, flush).getBytes());
|
||||||
|
batchUpdate.put(colB, cellData(1, flush).getBytes());
|
||||||
|
batchUpdate.put(colC, cellData(1, flush).getBytes());
|
||||||
|
batchUpdate.put(colD, cellData(1, flush).getBytes());
|
||||||
|
region_incommon.commit(batchUpdate);
|
||||||
|
|
||||||
|
batchUpdate = new BatchUpdate(row, t2);
|
||||||
|
batchUpdate.put(colA, cellData(2, flush).getBytes());
|
||||||
|
batchUpdate.put(colB, cellData(2, flush).getBytes());
|
||||||
|
batchUpdate.put(colC, cellData(2, flush).getBytes());
|
||||||
|
batchUpdate.put(colD, cellData(2, flush).getBytes());
|
||||||
|
region_incommon.commit(batchUpdate);
|
||||||
|
|
||||||
|
if (flush) {region_incommon.flushcache();}
|
||||||
|
|
||||||
|
// call delete the matching columns at a timestamp,
|
||||||
|
// make sure only the most recent stuff is left behind
|
||||||
|
region.deleteAllByRegex(row, COLUMN_REGEX, t1, null);
|
||||||
|
if (flush) {region_incommon.flushcache();}
|
||||||
|
assertCellEquals(region, row, colA, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colA, t1, cellData(1, flush));
|
||||||
|
assertCellEquals(region, row, colA, t2, cellData(2, flush));
|
||||||
|
assertCellEquals(region, row, colB, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colB, t1, null);
|
||||||
|
assertCellEquals(region, row, colB, t2, null);
|
||||||
|
assertCellEquals(region, row, colC, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colC, t1, null);
|
||||||
|
assertCellEquals(region, row, colC, t2, null);
|
||||||
|
assertCellEquals(region, row, colD, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colD, t1, null);
|
||||||
|
assertCellEquals(region, row, colD, t2, null);
|
||||||
|
|
||||||
|
// call delete all w/o a timestamp, make sure nothing is left.
|
||||||
|
region.deleteAllByRegex(row, COLUMN_REGEX,
|
||||||
|
HConstants.LATEST_TIMESTAMP, null);
|
||||||
|
if (flush) {region_incommon.flushcache();}
|
||||||
|
assertCellEquals(region, row, colA, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colA, t1, cellData(1, flush));
|
||||||
|
assertCellEquals(region, row, colA, t2, cellData(2, flush));
|
||||||
|
assertCellEquals(region, row, colB, t0, null);
|
||||||
|
assertCellEquals(region, row, colB, t1, null);
|
||||||
|
assertCellEquals(region, row, colB, t2, null);
|
||||||
|
assertCellEquals(region, row, colC, t0, null);
|
||||||
|
assertCellEquals(region, row, colC, t1, null);
|
||||||
|
assertCellEquals(region, row, colC, t2, null);
|
||||||
|
assertCellEquals(region, row, colD, t0, null);
|
||||||
|
assertCellEquals(region, row, colD, t1, null);
|
||||||
|
assertCellEquals(region, row, colD, t2, null);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private String cellData(int tsNum, boolean flush){
|
private String cellData(int tsNum, boolean flush){
|
||||||
return "t" + tsNum + " data" + (flush ? " - with flush" : "");
|
return "t" + tsNum + " data" + (flush ? " - with flush" : "");
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,9 @@ public class TestDeleteFamily extends HBaseTestCase {
|
|||||||
static final Log LOG = LogFactory.getLog(TestDeleteFamily.class);
|
static final Log LOG = LogFactory.getLog(TestDeleteFamily.class);
|
||||||
private MiniDFSCluster miniHdfs;
|
private MiniDFSCluster miniHdfs;
|
||||||
|
|
||||||
|
//for family regex deletion test
|
||||||
|
protected static final String COLFAMILY_REGEX = "col[a-zA-Z]*1";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
@ -60,6 +63,10 @@ public class TestDeleteFamily extends HBaseTestCase {
|
|||||||
makeSureItWorks(region, region_incommon, false);
|
makeSureItWorks(region, region_incommon, false);
|
||||||
// test hstore
|
// test hstore
|
||||||
makeSureItWorks(region, region_incommon, true);
|
makeSureItWorks(region, region_incommon, true);
|
||||||
|
// family regex test memcache
|
||||||
|
makeSureRegexWorks(region, region_incommon, false);
|
||||||
|
// family regex test hstore
|
||||||
|
makeSureRegexWorks(region, region_incommon, true);
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
if (region != null) {
|
if (region != null) {
|
||||||
@ -138,6 +145,71 @@ public class TestDeleteFamily extends HBaseTestCase {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void makeSureRegexWorks(HRegion region, HRegionIncommon region_incommon,
|
||||||
|
boolean flush)
|
||||||
|
throws Exception{
|
||||||
|
// insert a few versions worth of data for a row
|
||||||
|
byte [] row = Bytes.toBytes("test_row");
|
||||||
|
long t0 = System.currentTimeMillis();
|
||||||
|
long t1 = t0 - 15000;
|
||||||
|
long t2 = t1 - 15000;
|
||||||
|
|
||||||
|
byte [] colA = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "a");
|
||||||
|
byte [] colB = Bytes.toBytes(Bytes.toString(COLUMNS[0]) + "b");
|
||||||
|
byte [] colC = Bytes.toBytes(Bytes.toString(COLUMNS[1]) + "c");
|
||||||
|
|
||||||
|
BatchUpdate batchUpdate = null;
|
||||||
|
batchUpdate = new BatchUpdate(row, t0);
|
||||||
|
batchUpdate.put(colA, cellData(0, flush).getBytes());
|
||||||
|
batchUpdate.put(colB, cellData(0, flush).getBytes());
|
||||||
|
batchUpdate.put(colC, cellData(0, flush).getBytes());
|
||||||
|
region_incommon.commit(batchUpdate);
|
||||||
|
|
||||||
|
batchUpdate = new BatchUpdate(row, t1);
|
||||||
|
batchUpdate.put(colA, cellData(1, flush).getBytes());
|
||||||
|
batchUpdate.put(colB, cellData(1, flush).getBytes());
|
||||||
|
batchUpdate.put(colC, cellData(1, flush).getBytes());
|
||||||
|
region_incommon.commit(batchUpdate);
|
||||||
|
|
||||||
|
batchUpdate = new BatchUpdate(row, t2);
|
||||||
|
batchUpdate.put(colA, cellData(2, flush).getBytes());
|
||||||
|
batchUpdate.put(colB, cellData(2, flush).getBytes());
|
||||||
|
batchUpdate.put(colC, cellData(2, flush).getBytes());
|
||||||
|
region_incommon.commit(batchUpdate);
|
||||||
|
|
||||||
|
if (flush) {region_incommon.flushcache();}
|
||||||
|
|
||||||
|
// call delete family at a timestamp, make sure only the most recent stuff
|
||||||
|
// for column c is left behind
|
||||||
|
region.deleteFamilyByRegex(row, COLFAMILY_REGEX, t1, null);
|
||||||
|
if (flush) {region_incommon.flushcache();}
|
||||||
|
// most recent for A,B,C should be fine
|
||||||
|
// A,B at older timestamps should be gone
|
||||||
|
// C should be fine for older timestamps
|
||||||
|
assertCellEquals(region, row, colA, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colA, t1, null);
|
||||||
|
assertCellEquals(region, row, colA, t2, null);
|
||||||
|
assertCellEquals(region, row, colB, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colB, t1, null);
|
||||||
|
assertCellEquals(region, row, colB, t2, null);
|
||||||
|
assertCellEquals(region, row, colC, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colC, t1, cellData(1, flush));
|
||||||
|
assertCellEquals(region, row, colC, t2, cellData(2, flush));
|
||||||
|
|
||||||
|
// call delete family w/o a timestamp, make sure nothing is left except for
|
||||||
|
// column C.
|
||||||
|
region.deleteFamilyByRegex(row, COLFAMILY_REGEX, HConstants.LATEST_TIMESTAMP, null);
|
||||||
|
if (flush) {region_incommon.flushcache();}
|
||||||
|
// A,B for latest timestamp should be gone
|
||||||
|
// C should still be fine
|
||||||
|
assertCellEquals(region, row, colA, t0, null);
|
||||||
|
assertCellEquals(region, row, colB, t0, null);
|
||||||
|
assertCellEquals(region, row, colC, t0, cellData(0, flush));
|
||||||
|
assertCellEquals(region, row, colC, t1, cellData(1, flush));
|
||||||
|
assertCellEquals(region, row, colC, t2, cellData(2, flush));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private String cellData(int tsNum, boolean flush){
|
private String cellData(int tsNum, boolean flush){
|
||||||
return "t" + tsNum + " data" + (flush ? " - with flush" : "");
|
return "t" + tsNum + " data" + (flush ? " - with flush" : "");
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user