HBASE-10885 Support visibility expressions on Deletes (Ram)

This commit is contained in:
Ramkrishna 2014-07-04 23:55:05 +05:30
parent 94d3dbea86
commit 62c048660b
12 changed files with 3735 additions and 66 deletions

View File

@ -40,4 +40,13 @@ public final class VisibilityConstants {
/** Qualifier for the internal storage table for visibility labels */
public static final byte[] LABEL_QUALIFIER = new byte[1];
/**
* Visibility serialization version format. It indicates the visibility labels
* are sorted based on ordinal
**/
public static final byte VISIBILITY_SERIALIZATION_VERSION = 1;
/** Byte representation of the visibility_serialization_version **/
public static final byte[] SORTED_ORDINAL_SERIALIZATION_FORMAT = Bytes
.toBytes(VISIBILITY_SERIALIZATION_VERSION);
}

View File

@ -428,6 +428,14 @@ public final class CellUtil {
return cell.getTypeByte() == Type.DeleteFamilyVersion.getCode();
}
public static boolean isDeleteColumns(final Cell cell) {
return cell.getTypeByte() == Type.DeleteColumn.getCode();
}
public static boolean isDeleteColumnVersion(final Cell cell) {
return cell.getTypeByte() == Type.Delete.getCode();
}
/**
*
* @return True if this cell is a delete family or column type.

View File

@ -27,4 +27,5 @@ public final class TagType {
public static final byte ACL_TAG_TYPE = (byte) 1;
public static final byte VISIBILITY_TAG_TYPE = (byte) 2;
public static final byte LOG_REPLAY_TAG_TYPE = (byte) 3;
public static final byte VISIBILITY_EXP_SERIALIZATION_TAG_TYPE = (byte)4;
}

View File

@ -25,6 +25,7 @@ import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -38,10 +39,12 @@ import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.util.StreamUtils;
import org.apache.hadoop.hbase.mapreduce.ImportTsv.TsvParser.BadTsvLineException;
import org.apache.hadoop.hbase.security.visibility.Authorizations;
import org.apache.hadoop.hbase.security.visibility.ExpressionExpander;
import org.apache.hadoop.hbase.security.visibility.ExpressionParser;
import org.apache.hadoop.hbase.security.visibility.InvalidLabelException;
import org.apache.hadoop.hbase.security.visibility.ParseException;
import org.apache.hadoop.hbase.security.visibility.VisibilityUtils;
import org.apache.hadoop.hbase.security.visibility.expression.ExpressionNode;
@ -49,7 +52,6 @@ import org.apache.hadoop.hbase.security.visibility.expression.LeafExpressionNode
import org.apache.hadoop.hbase.security.visibility.expression.NonLeafExpressionNode;
import org.apache.hadoop.hbase.security.visibility.expression.Operator;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.WritableUtils;
/**
* An utility class that helps the mapper and reducers used with visibility to
@ -71,32 +73,37 @@ public class LabelExpander {
// TODO : The code repeats from that in Visibility Controller.. Refactoring
// may be needed
public List<Tag> createVisibilityTags(String visibilityLabelsExp) throws IOException,
BadTsvLineException {
private List<Tag> createVisibilityTags(String visibilityLabelsExp) throws IOException,
ParseException, InvalidLabelException {
ExpressionNode node = null;
try {
node = parser.parse(visibilityLabelsExp);
} catch (ParseException e) {
throw new BadTsvLineException(e.getMessage());
}
node = parser.parse(visibilityLabelsExp);
node = expander.expand(node);
List<Tag> tags = new ArrayList<Tag>();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
List<Integer> labelOrdinals = new ArrayList<Integer>();
// We will be adding this tag before the visibility tags and the presence of
// this
// tag indicates we are supporting deletes with cell visibility
tags.add(VisibilityUtils.VIS_SERIALIZATION_TAG);
if (node.isSingleNode()) {
writeLabelOrdinalsToStream(node, dos);
getLabelOrdinals(node, labelOrdinals);
writeLabelOrdinalsToStream(labelOrdinals, dos);
tags.add(new Tag(VisibilityUtils.VISIBILITY_TAG_TYPE, baos.toByteArray()));
baos.reset();
} else {
NonLeafExpressionNode nlNode = (NonLeafExpressionNode) node;
if (nlNode.getOperator() == Operator.OR) {
for (ExpressionNode child : nlNode.getChildExps()) {
writeLabelOrdinalsToStream(child, dos);
getLabelOrdinals(child, labelOrdinals);
writeLabelOrdinalsToStream(labelOrdinals, dos);
tags.add(new Tag(VisibilityUtils.VISIBILITY_TAG_TYPE, baos.toByteArray()));
baos.reset();
labelOrdinals.clear();
}
} else {
writeLabelOrdinalsToStream(nlNode, dos);
getLabelOrdinals(nlNode, labelOrdinals);
writeLabelOrdinalsToStream(labelOrdinals, dos);
tags.add(new Tag(VisibilityUtils.VISIBILITY_TAG_TYPE, baos.toByteArray()));
baos.reset();
}
@ -104,34 +111,38 @@ public class LabelExpander {
return tags;
}
private void writeLabelOrdinalsToStream(ExpressionNode node, DataOutputStream dos)
throws IOException, BadTsvLineException {
private void writeLabelOrdinalsToStream(List<Integer> labelOrdinals, DataOutputStream dos)
throws IOException {
Collections.sort(labelOrdinals);
for (Integer labelOrdinal : labelOrdinals) {
StreamUtils.writeRawVInt32(dos, labelOrdinal);
}
}
private void getLabelOrdinals(ExpressionNode node, List<Integer> labelOrdinals)
throws IOException, InvalidLabelException {
if (node.isSingleNode()) {
String identifier = null;
int labelOrdinal = 0;
if (node instanceof LeafExpressionNode) {
identifier = ((LeafExpressionNode) node).getIdentifier();
if (this.labels.get(identifier) != null) {
labelOrdinal = this.labels.get(identifier);
}
labelOrdinal = this.labels.get(identifier);
} else {
// This is a NOT node.
LeafExpressionNode lNode = (LeafExpressionNode) ((NonLeafExpressionNode) node)
.getChildExps().get(0);
identifier = lNode.getIdentifier();
if (this.labels.get(identifier) != null) {
labelOrdinal = this.labels.get(identifier);
labelOrdinal = -1 * labelOrdinal; // Store NOT node as -ve ordinal.
}
labelOrdinal = this.labels.get(identifier);
labelOrdinal = -1 * labelOrdinal; // Store NOT node as -ve ordinal.
}
if (labelOrdinal == 0) {
throw new BadTsvLineException("Invalid visibility label " + identifier);
throw new InvalidLabelException("Invalid visibility label " + identifier);
}
WritableUtils.writeVInt(dos, labelOrdinal);
labelOrdinals.add(labelOrdinal);
} else {
List<ExpressionNode> childExps = ((NonLeafExpressionNode) node).getChildExps();
for (ExpressionNode child : childExps) {
writeLabelOrdinalsToStream(child, dos);
getLabelOrdinals(child, labelOrdinals);
}
}
}
@ -190,6 +201,7 @@ public class LabelExpander {
* @return KeyValue from the cell visibility expr
* @throws IOException
* @throws BadTsvLineException
* @throws ParseException
*/
public KeyValue createKVFromCellVisibilityExpr(int rowKeyOffset, int rowKeyLength, byte[] family,
int familyOffset, int familyLength, byte[] qualifier, int qualifierOffset,
@ -201,10 +213,14 @@ public class LabelExpander {
KeyValue kv = null;
if (cellVisibilityExpr != null) {
// Apply the expansion and parsing here
List<Tag> visibilityTags = createVisibilityTags(cellVisibilityExpr);
kv = new KeyValue(lineBytes, rowKeyOffset, rowKeyLength, family, familyOffset, familyLength,
qualifier, qualifierOffset, qualifierLength, ts, KeyValue.Type.Put, lineBytes, columnOffset,
columnLength, visibilityTags);
try {
List<Tag> visibilityTags = createVisibilityTags(cellVisibilityExpr);
kv = new KeyValue(lineBytes, rowKeyOffset, rowKeyLength, family, familyOffset,
familyLength, qualifier, qualifierOffset, qualifierLength, ts, KeyValue.Type.Put,
lineBytes, columnOffset, columnLength, visibilityTags);
} catch (ParseException e) {
throw new BadTsvLineException("Parse Exception " + e.getMessage());
}
} else {
kv = new KeyValue(lineBytes, rowKeyOffset, rowKeyLength, family, familyOffset, familyLength,
qualifier, qualifierOffset, qualifierLength, ts, KeyValue.Type.Put, lineBytes, columnOffset,

View File

@ -45,7 +45,7 @@ public interface DeleteTracker {
/**
* Check if the specified cell buffer has been deleted by a previously
* seen delete.
* @param cell - current cell to check if deleted by a previously deleted cell
* @param cell - current cell to check if deleted by a previously seen delete
* @return deleteResult The result tells whether the KeyValue is deleted and why
*/
DeleteResult isDeleted(Cell cell);

View File

@ -2079,8 +2079,8 @@ public class HRegion implements HeapSize { // , Writable{
get.setMaxVersions(count);
get.addColumn(family, qual);
if (coprocessorHost != null) {
if (!coprocessorHost.prePrepareTimeStampForDeleteVersion(mutation, cell, byteNow,
get)) {
if (!coprocessorHost.prePrepareTimeStampForDeleteVersion(mutation, cell,
byteNow, get)) {
updateDeleteLatestVersionTimeStamp(kv, get, count, byteNow);
}
} else {
@ -4759,7 +4759,7 @@ public class HRegion implements HeapSize { // , Writable{
* @param withCoprocessor invoke coprocessor or not. We don't want to
* always invoke cp for this private method.
*/
private List<Cell> get(Get get, boolean withCoprocessor)
public List<Cell> get(Get get, boolean withCoprocessor)
throws IOException {
List<Cell> results = new ArrayList<Cell>();

View File

@ -45,14 +45,14 @@ import org.apache.hadoop.hbase.util.Bytes;
@InterfaceAudience.Private
public class ScanDeleteTracker implements DeleteTracker {
private boolean hasFamilyStamp = false;
private long familyStamp = 0L;
private SortedSet<Long> familyVersionStamps = new TreeSet<Long>();
private byte [] deleteBuffer = null;
private int deleteOffset = 0;
private int deleteLength = 0;
private byte deleteType = 0;
private long deleteTimestamp = 0L;
protected boolean hasFamilyStamp = false;
protected long familyStamp = 0L;
protected SortedSet<Long> familyVersionStamps = new TreeSet<Long>();
protected byte [] deleteBuffer = null;
protected int deleteOffset = 0;
protected int deleteLength = 0;
protected byte deleteType = 0;
protected long deleteTimestamp = 0L;
/**
* Constructor for ScanDeleteTracker
@ -65,7 +65,7 @@ public class ScanDeleteTracker implements DeleteTracker {
* Add the specified KeyValue to the list of deletes to check against for
* this row operation.
* <p>
* This is called when a Delete is encountered in a StoreFile.
* This is called when a Delete is encountered.
* @param cell - the delete cell
*/
@Override

View File

@ -30,6 +30,7 @@ import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@ -75,6 +76,7 @@ import org.apache.hadoop.hbase.coprocessor.RegionObserver;
import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterBase;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.util.StreamUtils;
@ -93,6 +95,7 @@ import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.Visibil
import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsResponse;
import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsService;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.regionserver.DeleteTracker;
import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
@ -149,7 +152,6 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
private boolean acOn = false;
private Configuration conf;
private volatile boolean initialized = false;
/** Mapping of scanner instances to the user who created them */
private Map<InternalScanner,String> scannerOwners =
new MapMaker().weakKeys().makeMap();
@ -167,6 +169,13 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
LABELS_TABLE_TAGS[0] = new Tag(VisibilityUtils.VISIBILITY_TAG_TYPE, baos.toByteArray());
}
// Add to this list if there are any reserved tag types
private static ArrayList<Byte> reservedVisTagTypes = new ArrayList<Byte>();
static {
reservedVisTagTypes.add(VisibilityUtils.VISIBILITY_TAG_TYPE);
reservedVisTagTypes.add(VisibilityUtils.VISIBILITY_EXP_SERIALIZATION_TAG_TYPE);
}
@Override
public void start(CoprocessorEnvironment env) throws IOException {
this.conf = env.getConfiguration();
@ -690,10 +699,8 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
new OperationStatus(SANITY_CHECK_FAILURE, de.getMessage()));
continue;
}
if (m instanceof Put) {
Put p = (Put) m;
boolean sanityFailure = false;
for (CellScanner cellScanner = p.cellScanner(); cellScanner.advance();) {
for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance();) {
if (!checkForReservedVisibilityTagPresence(cellScanner.current())) {
miniBatchOp.setOperationStatus(i, new OperationStatus(SANITY_CHECK_FAILURE,
"Mutation contains cell with reserved type tag"));
@ -707,7 +714,7 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
List<Tag> visibilityTags = labelCache.get(labelsExp);
if (visibilityTags == null) {
try {
visibilityTags = createVisibilityTags(labelsExp);
visibilityTags = createVisibilityTags(labelsExp, true);
} catch (ParseException e) {
miniBatchOp.setOperationStatus(i,
new OperationStatus(SANITY_CHECK_FAILURE, e.getMessage()));
@ -719,7 +726,7 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
if (visibilityTags != null) {
labelCache.put(labelsExp, visibilityTags);
List<Cell> updatedCells = new ArrayList<Cell>();
for (CellScanner cellScanner = p.cellScanner(); cellScanner.advance();) {
for (CellScanner cellScanner = m.cellScanner(); cellScanner.advance();) {
Cell cell = cellScanner.current();
List<Tag> tags = Tag.asList(cell.getTagsArray(), cell.getTagsOffset(),
cell.getTagsLength());
@ -732,22 +739,71 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
cell.getValueLength(), tags);
updatedCells.add(updatedCell);
}
p.getFamilyCellMap().clear();
// Clear and add new Cells to the Mutation.
for (Cell cell : updatedCells) {
m.getFamilyCellMap().clear();
// Clear and add new Cells to the Mutation.
for (Cell cell : updatedCells) {
if (m instanceof Put) {
Put p = (Put) m;
p.add(cell);
} else if (m instanceof Delete) {
// TODO : Cells without visibility tags would be handled in follow up issue
Delete d = (Delete) m;
d.addDeleteMarker(cell);
}
}
}
}
} else if (cellVisibility != null) {
// CellVisibility in a Delete is not legal! Fail the operation
miniBatchOp.setOperationStatus(i, new OperationStatus(SANITY_CHECK_FAILURE,
"CellVisibility cannot be set on Delete mutation"));
}
}
}
@Override
public void prePrepareTimeStampForDeleteVersion(
ObserverContext<RegionCoprocessorEnvironment> ctx, Mutation delete, Cell cell,
byte[] byteNow, Get get) throws IOException {
KeyValue kv = KeyValueUtil.ensureKeyValue(cell);
CellVisibility cellVisibility = null;
try {
cellVisibility = delete.getCellVisibility();
} catch (DeserializationException de) {
throw new IOException("Invalid cell visibility specified " + delete, de);
}
// The check for checkForReservedVisibilityTagPresence happens in preBatchMutate happens.
// It happens for every mutation and that would be enough.
List<Tag> visibilityTags = new ArrayList<Tag>();
if (cellVisibility != null) {
String labelsExp = cellVisibility.getExpression();
try {
visibilityTags = createVisibilityTags(labelsExp, false);
} catch (ParseException e) {
throw new IOException("Invalid cell visibility expression " + labelsExp, e);
} catch (InvalidLabelException e) {
throw new IOException("Invalid cell visibility specified " + labelsExp, e);
}
}
get.setFilter(new DeleteVersionVisibilityExpressionFilter(visibilityTags));
List<Cell> result = ctx.getEnvironment().getRegion().get(get, false);
if (result.size() < get.getMaxVersions()) {
// Nothing to delete
kv.updateLatestStamp(Bytes.toBytes(Long.MIN_VALUE));
return;
}
if (result.size() > get.getMaxVersions()) {
throw new RuntimeException("Unexpected size: " + result.size()
+ ". Results more than the max versions obtained.");
}
KeyValue getkv = KeyValueUtil.ensureKeyValue(result.get(get.getMaxVersions() - 1));
Bytes.putBytes(kv.getBuffer(), kv.getTimestampOffset(), getkv.getBuffer(),
getkv.getTimestampOffset(), Bytes.SIZEOF_LONG);
// We are bypassing here because in the HRegion.updateDeleteLatestVersionTimeStamp we would
// update with the current timestamp after again doing a get. As the hook as already determined
// the needed timestamp we need to bypass here.
// TODO : See if HRegion.updateDeleteLatestVersionTimeStamp() could be
// called only if the hook is not called.
ctx.bypass();
}
@Override
public void postBatchMutate(ObserverContext<RegionCoprocessorEnvironment> c,
MiniBatchOperationInProgress<Mutation> miniBatchOp) throws IOException {
@ -844,7 +900,7 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
Iterator<Tag> tagsItr = CellUtil.tagsIterator(cell.getTagsArray(), cell.getTagsOffset(),
cell.getTagsLength());
while (tagsItr.hasNext()) {
if (tagsItr.next().getType() == VisibilityUtils.VISIBILITY_TAG_TYPE) {
if (reservedVisTagTypes.contains(tagsItr.next().getType())) {
return false;
}
}
@ -852,28 +908,38 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
return true;
}
private List<Tag> createVisibilityTags(String visibilityLabelsExp) throws IOException,
ParseException, InvalidLabelException {
private List<Tag> createVisibilityTags(String visibilityLabelsExp, boolean addSerializationTag)
throws IOException, ParseException, InvalidLabelException {
ExpressionNode node = null;
node = this.expressionParser.parse(visibilityLabelsExp);
node = this.expressionExpander.expand(node);
List<Tag> tags = new ArrayList<Tag>();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(baos);
List<Integer> labelOrdinals = new ArrayList<Integer>();
// We will be adding this tag before the visibility tags and the presence of this
// tag indicates we are supporting deletes with cell visibility
if (addSerializationTag) {
tags.add(VisibilityUtils.VIS_SERIALIZATION_TAG);
}
if (node.isSingleNode()) {
writeLabelOrdinalsToStream(node, dos);
getLabelOrdinals(node, labelOrdinals);
writeLabelOrdinalsToStream(labelOrdinals, dos);
tags.add(new Tag(VisibilityUtils.VISIBILITY_TAG_TYPE, baos.toByteArray()));
baos.reset();
} else {
NonLeafExpressionNode nlNode = (NonLeafExpressionNode) node;
if (nlNode.getOperator() == Operator.OR) {
for (ExpressionNode child : nlNode.getChildExps()) {
writeLabelOrdinalsToStream(child, dos);
getLabelOrdinals(child, labelOrdinals);
writeLabelOrdinalsToStream(labelOrdinals, dos);
tags.add(new Tag(VisibilityUtils.VISIBILITY_TAG_TYPE, baos.toByteArray()));
baos.reset();
labelOrdinals.clear();
}
} else {
writeLabelOrdinalsToStream(nlNode, dos);
getLabelOrdinals(nlNode, labelOrdinals);
writeLabelOrdinalsToStream(labelOrdinals, dos);
tags.add(new Tag(VisibilityUtils.VISIBILITY_TAG_TYPE, baos.toByteArray()));
baos.reset();
}
@ -881,7 +947,15 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
return tags;
}
private void writeLabelOrdinalsToStream(ExpressionNode node, DataOutputStream dos)
private void writeLabelOrdinalsToStream(List<Integer> labelOrdinals, DataOutputStream dos)
throws IOException {
Collections.sort(labelOrdinals);
for (Integer labelOrdinal : labelOrdinals) {
StreamUtils.writeRawVInt32(dos, labelOrdinal);
}
}
private void getLabelOrdinals(ExpressionNode node, List<Integer> labelOrdinals)
throws IOException, InvalidLabelException {
if (node.isSingleNode()) {
String identifier = null;
@ -904,11 +978,11 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
if (labelOrdinal == 0) {
throw new InvalidLabelException("Invalid visibility label " + identifier);
}
StreamUtils.writeRawVInt32(dos, labelOrdinal);
labelOrdinals.add(labelOrdinal);
} else {
List<ExpressionNode> childExps = ((NonLeafExpressionNode) node).getChildExps();
for (ExpressionNode child : childExps) {
writeLabelOrdinalsToStream(child, dos);
getLabelOrdinals(child, labelOrdinals);
}
}
}
@ -949,6 +1023,22 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
return false;
}
@Override
public DeleteTracker postInstantiateDeleteTracker(
ObserverContext<RegionCoprocessorEnvironment> ctx, DeleteTracker delTracker)
throws IOException {
HRegion region = ctx.getEnvironment().getRegion();
TableName table = region.getRegionInfo().getTable();
if (table.isSystemTable()) {
return delTracker;
}
// We are creating a new type of delete tracker here which is able to track
// the timestamps and also the
// visibility tags per cell. The covering cells are determined not only
// based on the delete type and ts
// but also on the visibility expression matching.
return new VisibilityScanDeleteTracker();
}
@Override
public RegionScanner postScannerOpen(final ObserverContext<RegionCoprocessorEnvironment> c,
final Scan scan, final RegionScanner s) throws IOException {
@ -1126,7 +1216,7 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
}
}
try {
tags.addAll(createVisibilityTags(cellVisibility.getExpression()));
tags.addAll(createVisibilityTags(cellVisibility.getExpression(), true));
} catch (ParseException e) {
throw new IOException(e);
}
@ -1416,4 +1506,22 @@ public class VisibilityController extends BaseRegionObserver implements MasterOb
}
}
}
static class DeleteVersionVisibilityExpressionFilter extends FilterBase {
private List<Tag> visibilityTags;
public DeleteVersionVisibilityExpressionFilter(List<Tag> visibilityTags) {
this.visibilityTags = visibilityTags;
}
@Override
public ReturnCode filterKeyValue(Cell kv) throws IOException {
boolean matchFound = VisibilityUtils.checkForMatchingVisibilityTags(kv, visibilityTags);
if (matchFound) {
return ReturnCode.INCLUDE;
} else {
return ReturnCode.SKIP;
}
}
}
}

View File

@ -0,0 +1,276 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.security.visibility;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.regionserver.ScanDeleteTracker;
import org.apache.hadoop.hbase.util.Bytes;
/**
* Similar to ScanDeletTracker but tracks the visibility expression also before
* deciding if a Cell can be considered deleted
*/
@InterfaceAudience.Private
public class VisibilityScanDeleteTracker extends ScanDeleteTracker {
// Its better to track the visibility tags in delete based on each type. Create individual
// data structures for tracking each of them. This would ensure that there is no tracking based
// on time and also would handle all cases where deletefamily or deletecolumns is specified with
// Latest_timestamp. In such cases the ts in the delete marker and the masking
// put will not be same. So going with individual data structures for different delete
// type would solve this problem and also ensure that the combination of different type
// of deletes with diff ts would also work fine
// Track per TS
private Map<Long, List<Tag>> visibilityTagsDeleteFamily = new HashMap<Long, List<Tag>>();
// Delete family version with different ts and different visibility expression could come.
// Need to track it per ts.
private Map<Long,List<Tag>> visibilityTagsDeleteFamilyVersion = new HashMap<Long, List<Tag>>();
private List<List<Tag>> visibilityTagsDeleteColumns;
// Tracking as List<List> is to handle same ts cell but different visibility tag.
// TODO : Need to handle puts with same ts but different vis tags.
private List<List<Tag>> visiblityTagsDeleteColumnVersion = new ArrayList<List<Tag>>();
public VisibilityScanDeleteTracker() {
super();
}
@Override
public void add(Cell delCell) {
//Cannot call super.add because need to find if the delete needs to be considered
long timestamp = delCell.getTimestamp();
int qualifierOffset = delCell.getQualifierOffset();
int qualifierLength = delCell.getQualifierLength();
byte type = delCell.getTypeByte();
if (type == KeyValue.Type.DeleteFamily.getCode()) {
hasFamilyStamp = true;
//familyStamps.add(delCell.getTimestamp());
extractDeleteTags(delCell, KeyValue.Type.DeleteFamily);
return;
} else if (type == KeyValue.Type.DeleteFamilyVersion.getCode()) {
familyVersionStamps.add(timestamp);
extractDeleteTags(delCell, KeyValue.Type.DeleteFamilyVersion);
return;
}
// new column, or more general delete type
if (deleteBuffer != null) {
if (Bytes.compareTo(deleteBuffer, deleteOffset, deleteLength, delCell.getQualifierArray(),
qualifierOffset, qualifierLength) != 0) {
// A case where there are deletes for a column qualifier but there are
// no corresponding puts for them. Rare case.
visibilityTagsDeleteColumns = null;
visiblityTagsDeleteColumnVersion = null;
} else if (type == KeyValue.Type.Delete.getCode() && (deleteTimestamp != timestamp)) {
// there is a timestamp change which means we could clear the list
// when ts is same and the vis tags are different we need to collect
// them all. Interesting part is that in the normal case of puts if
// there are 2 cells with same ts and diff vis tags only one of them is
// returned. Handling with a single List<Tag> would mean that only one
// of the cell would be considered. Doing this as a precaution.
// Rare cases.
visiblityTagsDeleteColumnVersion = null;
}
}
deleteBuffer = delCell.getQualifierArray();
deleteOffset = qualifierOffset;
deleteLength = qualifierLength;
deleteType = type;
deleteTimestamp = timestamp;
extractDeleteTags(delCell, KeyValue.Type.codeToType(type));
}
private void extractDeleteTags(Cell delCell, Type type) {
// If tag is present in the delete
if (delCell.getTagsLength() > 0) {
switch (type) {
case DeleteFamily:
List<Tag> delTags = new ArrayList<Tag>();
if (visibilityTagsDeleteFamily != null) {
VisibilityUtils.getVisibilityTags(delCell, delTags);
if (!delTags.isEmpty()) {
visibilityTagsDeleteFamily.put(delCell.getTimestamp(), delTags);
}
}
break;
case DeleteFamilyVersion:
delTags = new ArrayList<Tag>();
VisibilityUtils.getVisibilityTags(delCell, delTags);
if (!delTags.isEmpty()) {
visibilityTagsDeleteFamilyVersion.put(delCell.getTimestamp(), delTags);
}
break;
case DeleteColumn:
if (visibilityTagsDeleteColumns == null) {
visibilityTagsDeleteColumns = new ArrayList<List<Tag>>();
}
delTags = new ArrayList<Tag>();
VisibilityUtils.getVisibilityTags(delCell, delTags);
if (!delTags.isEmpty()) {
visibilityTagsDeleteColumns.add(delTags);
}
break;
case Delete:
if (visiblityTagsDeleteColumnVersion == null) {
visiblityTagsDeleteColumnVersion = new ArrayList<List<Tag>>();
}
delTags = new ArrayList<Tag>();
VisibilityUtils.getVisibilityTags(delCell, delTags);
if (!delTags.isEmpty()) {
visiblityTagsDeleteColumnVersion.add(delTags);
}
break;
default:
throw new IllegalArgumentException("Invalid delete type");
}
} else {
switch (type) {
case DeleteFamily:
visibilityTagsDeleteFamily = null;
break;
case DeleteFamilyVersion:
visibilityTagsDeleteFamilyVersion = null;
break;
case DeleteColumn:
visibilityTagsDeleteColumns = null;
break;
case Delete:
visiblityTagsDeleteColumnVersion = null;
break;
default:
throw new IllegalArgumentException("Invalid delete type");
}
}
}
@Override
public DeleteResult isDeleted(Cell cell) {
long timestamp = cell.getTimestamp();
int qualifierOffset = cell.getQualifierOffset();
int qualifierLength = cell.getQualifierLength();
if (hasFamilyStamp) {
if (visibilityTagsDeleteFamily != null) {
Set<Entry<Long, List<Tag>>> deleteFamilies = visibilityTagsDeleteFamily.entrySet();
Iterator<Entry<Long, List<Tag>>> iterator = deleteFamilies.iterator();
while (iterator.hasNext()) {
Entry<Long, List<Tag>> entry = iterator.next();
if (timestamp <= entry.getKey()) {
boolean matchFound = VisibilityUtils.checkForMatchingVisibilityTags(cell,
entry.getValue());
if (matchFound) {
return DeleteResult.FAMILY_VERSION_DELETED;
}
}
}
} else {
if (!VisibilityUtils.isVisibilityTagsPresent(cell)) {
// No tags
return DeleteResult.FAMILY_VERSION_DELETED;
}
}
}
if (familyVersionStamps.contains(Long.valueOf(timestamp))) {
if (visibilityTagsDeleteFamilyVersion != null) {
List<Tag> tags = visibilityTagsDeleteFamilyVersion.get(Long.valueOf(timestamp));
if (tags != null) {
boolean matchFound = VisibilityUtils.checkForMatchingVisibilityTags(cell, tags);
if (matchFound) {
return DeleteResult.FAMILY_VERSION_DELETED;
}
}
} else {
if (!VisibilityUtils.isVisibilityTagsPresent(cell)) {
// No tags
return DeleteResult.FAMILY_VERSION_DELETED;
}
}
}
if (deleteBuffer != null) {
int ret = Bytes.compareTo(deleteBuffer, deleteOffset, deleteLength, cell.getQualifierArray(),
qualifierOffset, qualifierLength);
if (ret == 0) {
if (deleteType == KeyValue.Type.DeleteColumn.getCode()) {
if (visibilityTagsDeleteColumns != null) {
for (List<Tag> tags : visibilityTagsDeleteColumns) {
boolean matchFound = VisibilityUtils.checkForMatchingVisibilityTags(cell,
tags);
if (matchFound) {
return DeleteResult.VERSION_DELETED;
}
}
} else {
if (!VisibilityUtils.isVisibilityTagsPresent(cell)) {
// No tags
return DeleteResult.VERSION_DELETED;
}
}
}
// Delete (aka DeleteVersion)
// If the timestamp is the same, keep this one
if (timestamp == deleteTimestamp) {
if (visiblityTagsDeleteColumnVersion != null) {
for (List<Tag> tags : visiblityTagsDeleteColumnVersion) {
boolean matchFound = VisibilityUtils.checkForMatchingVisibilityTags(cell,
tags);
if (matchFound) {
return DeleteResult.VERSION_DELETED;
}
}
} else {
if (!VisibilityUtils.isVisibilityTagsPresent(cell)) {
// No tags
return DeleteResult.VERSION_DELETED;
}
}
}
} else if (ret < 0) {
// Next column case.
deleteBuffer = null;
visibilityTagsDeleteColumns = null;
visiblityTagsDeleteColumnVersion = null;
} else {
throw new IllegalStateException("isDeleted failed: deleteBuffer="
+ Bytes.toStringBinary(deleteBuffer, deleteOffset, deleteLength) + ", qualifier="
+ Bytes.toStringBinary(cell.getQualifierArray(), qualifierOffset, qualifierLength)
+ ", timestamp=" + timestamp + ", comparison result: " + ret);
}
}
return DeleteResult.NOT_DELETED;
}
@Override
public void reset() {
super.reset();
visibilityTagsDeleteColumns = null;
visibilityTagsDeleteFamily = new HashMap<Long, List<Tag>>();
visibilityTagsDeleteFamilyVersion = new HashMap<Long, List<Tag>>();
visiblityTagsDeleteColumnVersion = null;
}
}

View File

@ -19,25 +19,31 @@ package org.apache.hadoop.hbase.security.visibility;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import com.google.protobuf.HBaseZeroCopyByteString;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.TagType;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.io.util.StreamUtils;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.MultiUserAuthorizations;
import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.UserAuthorizations;
import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabel;
import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsRequest;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.util.ReflectionUtils;
import com.google.protobuf.HBaseZeroCopyByteString;
import com.google.protobuf.InvalidProtocolBufferException;
/**
@ -46,10 +52,14 @@ import com.google.protobuf.InvalidProtocolBufferException;
@InterfaceAudience.Private
public class VisibilityUtils {
public static final String VISIBILITY_LABEL_GENERATOR_CLASS =
public static final String VISIBILITY_LABEL_GENERATOR_CLASS =
"hbase.regionserver.scan.visibility.label.generator.class";
public static final byte VISIBILITY_TAG_TYPE = TagType.VISIBILITY_TAG_TYPE;
public static final byte VISIBILITY_EXP_SERIALIZATION_TAG_TYPE =
TagType.VISIBILITY_EXP_SERIALIZATION_TAG_TYPE;
public static final String SYSTEM_LABEL = "system";
public static final Tag VIS_SERIALIZATION_TAG = new Tag(VISIBILITY_EXP_SERIALIZATION_TAG_TYPE,
VisibilityConstants.SORTED_ORDINAL_SERIALIZATION_FORMAT);
private static final String COMMA = ",";
/**
@ -156,4 +166,162 @@ public class VisibilityUtils {
}
return slgs;
}
/**
* Get the list of visibility tags in the given cell
* @param cell - the cell
* @param tags - the tags array that will be populated if
* visibility tags are present
* @return true if the tags are in sorted order.
*/
public static boolean getVisibilityTags(Cell cell, List<Tag> tags) {
boolean sortedOrder = false;
Iterator<Tag> tagsIterator = CellUtil.tagsIterator(cell.getTagsArray(), cell.getTagsOffset(),
cell.getTagsLength());
while (tagsIterator.hasNext()) {
Tag tag = tagsIterator.next();
if(tag.getType() == VisibilityUtils.VISIBILITY_EXP_SERIALIZATION_TAG_TYPE) {
int serializationVersion = Bytes.toShort(tag.getValue());
if (serializationVersion == VisibilityConstants.VISIBILITY_SERIALIZATION_VERSION) {
sortedOrder = true;
continue;
}
}
if (tag.getType() == VisibilityUtils.VISIBILITY_TAG_TYPE) {
tags.add(tag);
}
}
return sortedOrder;
}
/**
* Checks if the cell has a visibility tag
* @param cell
* @return true if found, false if not found
*/
public static boolean isVisibilityTagsPresent(Cell cell) {
Iterator<Tag> tagsIterator = CellUtil.tagsIterator(cell.getTagsArray(), cell.getTagsOffset(),
cell.getTagsLength());
while (tagsIterator.hasNext()) {
Tag tag = tagsIterator.next();
if (tag.getType() == VisibilityUtils.VISIBILITY_TAG_TYPE) {
return true;
}
}
return false;
}
/**
* Checks for the matching visibility labels in the delete mutation and
* the cell in consideration
* @param cell - the cell
* @param visibilityTagsInDeleteCell - that list of tags in the delete mutation
* (the specified Cell Visibility)
* @return true if matching tags are found
*/
public static boolean checkForMatchingVisibilityTags(Cell cell,
List<Tag> visibilityTagsInDeleteCell) {
List<Tag> tags = new ArrayList<Tag>();
boolean sortedTags = getVisibilityTags(cell, tags);
if (tags.size() == 0) {
// Early out if there are no tags in the cell
return false;
}
if (sortedTags) {
return checkForMatchingVisibilityTagsWithSortedOrder(visibilityTagsInDeleteCell, tags);
} else {
try {
return checkForMatchingVisibilityTagsWithOutSortedOrder(cell, visibilityTagsInDeleteCell);
} catch (IOException e) {
// Should not happen
throw new RuntimeException("Exception while sorting the tags from the cell", e);
}
}
}
private static boolean checkForMatchingVisibilityTagsWithOutSortedOrder(Cell cell,
List<Tag> visibilityTagsInDeleteCell) throws IOException {
List<List<Integer>> sortedDeleteTags = sortTagsBasedOnOrdinal(
visibilityTagsInDeleteCell);
List<List<Integer>> sortedTags = sortTagsBasedOnOrdinal(cell);
return compareTagsOrdinals(sortedDeleteTags, sortedTags);
}
private static boolean checkForMatchingVisibilityTagsWithSortedOrder(
List<Tag> visibilityTagsInDeleteCell, List<Tag> tags) {
boolean matchFound = false;
if ((visibilityTagsInDeleteCell.size()) != tags.size()) {
// If the size does not match. Definitely we are not comparing the
// equal tags.
// Return false in that case.
return matchFound;
}
for (Tag tag : visibilityTagsInDeleteCell) {
matchFound = false;
for (Tag givenTag : tags) {
if (Bytes.equals(tag.getBuffer(), tag.getTagOffset(), tag.getTagLength(),
givenTag.getBuffer(), givenTag.getTagOffset(), givenTag.getTagLength())) {
matchFound = true;
break;
}
}
}
return matchFound;
}
private static List<List<Integer>> sortTagsBasedOnOrdinal(Cell cell) throws IOException {
Iterator<Tag> tagsItr = CellUtil.tagsIterator(cell.getTagsArray(), cell.getTagsOffset(),
cell.getTagsLength());
List<List<Integer>> fullTagsList = new ArrayList<List<Integer>>();
while (tagsItr.hasNext()) {
Tag tag = tagsItr.next();
if (tag.getType() == VisibilityUtils.VISIBILITY_TAG_TYPE) {
getSortedTagOrdinals(fullTagsList, tag);
}
}
return fullTagsList;
}
private static List<List<Integer>> sortTagsBasedOnOrdinal(List<Tag> tags) throws IOException {
List<List<Integer>> fullTagsList = new ArrayList<List<Integer>>();
for (Tag tag : tags) {
if (tag.getType() == VisibilityUtils.VISIBILITY_TAG_TYPE) {
getSortedTagOrdinals(fullTagsList, tag);
}
}
return fullTagsList;
}
private static void getSortedTagOrdinals(List<List<Integer>> fullTagsList, Tag tag)
throws IOException {
List<Integer> tagsOrdinalInSortedOrder = new ArrayList<Integer>();
int offset = tag.getTagOffset();
int endOffset = offset + tag.getTagLength();
while (offset < endOffset) {
Pair<Integer, Integer> result = StreamUtils.readRawVarint32(tag.getBuffer(), offset);
tagsOrdinalInSortedOrder.add(result.getFirst());
offset += result.getSecond();
}
Collections.sort(tagsOrdinalInSortedOrder);
fullTagsList.add(tagsOrdinalInSortedOrder);
}
private static boolean compareTagsOrdinals(List<List<Integer>> tagsInDeletes,
List<List<Integer>> tags) {
boolean matchFound = false;
if (tagsInDeletes.size() != tags.size()) {
return matchFound;
} else {
for (List<Integer> deleteTagOrdinals : tagsInDeletes) {
matchFound = false;
for (List<Integer> tagOrdinals : tags) {
if (deleteTagOrdinals.equals(tagOrdinals)) {
matchFound = true;
break;
}
}
}
return matchFound;
}
}
}

View File

@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
@ -50,6 +51,7 @@ import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsResponse;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.security.visibility.Authorizations;
import org.apache.hadoop.hbase.security.visibility.CellVisibility;
import org.apache.hadoop.hbase.security.visibility.ScanLabelGenerator;
import org.apache.hadoop.hbase.security.visibility.SimpleScanLabelGenerator;
import org.apache.hadoop.hbase.security.visibility.VisibilityClient;
@ -161,6 +163,58 @@ public class TestImportTSVWithVisibilityLabels implements Configurable {
util.deleteTable(tableName);
}
@Test
public void testMROnTableWithDeletes() throws Exception {
String tableName = "test-" + UUID.randomUUID();
// Prepare the arguments required for the test.
String[] args = new String[] {
"-D" + ImportTsv.MAPPER_CONF_KEY + "=org.apache.hadoop.hbase.mapreduce.TsvImporterMapper",
"-D" + ImportTsv.COLUMNS_CONF_KEY + "=HBASE_ROW_KEY,FAM:A,FAM:B,HBASE_CELL_VISIBILITY",
"-D" + ImportTsv.SEPARATOR_CONF_KEY + "=\u001b", tableName };
String data = "KEY\u001bVALUE1\u001bVALUE2\u001bsecret&private\n";
util.createTable(tableName, FAMILY);
doMROnTableTest(util, FAMILY, data, args, 1);
issueDeleteAndVerifyData(tableName);
util.deleteTable(tableName);
}
private void issueDeleteAndVerifyData(String tableName) throws IOException {
LOG.debug("Validating table after delete.");
HTable table = new HTable(conf, tableName);
boolean verified = false;
long pause = conf.getLong("hbase.client.pause", 5 * 1000);
int numRetries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5);
for (int i = 0; i < numRetries; i++) {
try {
Delete d = new Delete(Bytes.toBytes("KEY"));
d.deleteFamily(Bytes.toBytes(FAMILY));
d.setCellVisibility(new CellVisibility("private&secret"));
table.delete(d);
Scan scan = new Scan();
// Scan entire family.
scan.addFamily(Bytes.toBytes(FAMILY));
scan.setAuthorizations(new Authorizations("secret", "private"));
ResultScanner resScanner = table.getScanner(scan);
Result[] next = resScanner.next(5);
assertEquals(0, next.length);
verified = true;
break;
} catch (NullPointerException e) {
// If here, a cell was empty. Presume its because updates came in
// after the scanner had been opened. Wait a while and retry.
}
try {
Thread.sleep(pause);
} catch (InterruptedException e) {
// continue
}
}
table.close();
assertTrue(verified);
}
@Test
public void testMROnTableWithBulkload() throws Exception {
String tableName = "test-" + UUID.randomUUID();