HBASE-1822 Remove the deprecated APIs (incompatible change)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@816014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6cd712c7be
commit
b37eca06af
|
@ -1,6 +1,7 @@
|
|||
HBase Change Log
|
||||
Release 0.21.0 - Unreleased
|
||||
INCOMPATIBLE CHANGES
|
||||
HBASE-1822 Remove the deprecated APIs
|
||||
|
||||
BUG FIXES
|
||||
HBASE-1791 Timeout in IndexRecordWriter (Bradford Stephens via Andrew
|
||||
|
|
|
@ -27,7 +27,6 @@ import java.util.Map;
|
|||
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.HTableInterface;
|
||||
import org.apache.hadoop.hbase.client.HTablePool;
|
||||
|
||||
|
|
|
@ -91,9 +91,7 @@ public class RowResource implements Constants {
|
|||
rowKey = value.getRow();
|
||||
rowModel = new RowModel(rowKey);
|
||||
}
|
||||
rowModel.addCell(
|
||||
new CellModel(value.getColumn(), value.getTimestamp(),
|
||||
value.getValue()));
|
||||
rowModel.addCell(new CellModel(value));
|
||||
value = generator.next();
|
||||
} while (value != null);
|
||||
model.addRow(rowModel);
|
||||
|
@ -148,7 +146,11 @@ public class RowResource implements Constants {
|
|||
Put put = new Put(row.getKey());
|
||||
for (CellModel cell: row.getCells()) {
|
||||
byte [][] parts = KeyValue.parseColumn(cell.getColumn());
|
||||
put.add(parts[0], parts[1], cell.getTimestamp(), cell.getValue());
|
||||
if(parts.length == 1) {
|
||||
put.add(parts[0], new byte[0], cell.getTimestamp(), cell.getValue());
|
||||
} else {
|
||||
put.add(parts[0], parts[1], cell.getTimestamp(), cell.getValue());
|
||||
}
|
||||
}
|
||||
table.put(put);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
|
@ -203,7 +205,11 @@ public class RowResource implements Constants {
|
|||
}
|
||||
Put put = new Put(row);
|
||||
byte parts[][] = KeyValue.parseColumn(column);
|
||||
put.add(parts[0], parts[1], timestamp, message);
|
||||
if(parts.length == 1) {
|
||||
put.add(parts[0], new byte[0], timestamp, message);
|
||||
} else {
|
||||
put.add(parts[0], parts[1], timestamp, message);
|
||||
}
|
||||
table = pool.getTable(this.table);
|
||||
table.put(put);
|
||||
if (LOG.isDebugEnabled()) {
|
||||
|
@ -272,13 +278,13 @@ public class RowResource implements Constants {
|
|||
for (byte[] column: rowspec.getColumns()) {
|
||||
byte[][] split = KeyValue.parseColumn(column);
|
||||
if (rowspec.hasTimestamp()) {
|
||||
if (split[1] != null) {
|
||||
if (split.length == 2) {
|
||||
delete.deleteColumns(split[0], split[1], rowspec.getTimestamp());
|
||||
} else {
|
||||
delete.deleteFamily(split[0], rowspec.getTimestamp());
|
||||
}
|
||||
} else {
|
||||
if (split[1] != null) {
|
||||
if (split.length == 2) {
|
||||
delete.deleteColumns(split[0], split[1]);
|
||||
} else {
|
||||
delete.deleteFamily(split[0]);
|
||||
|
|
|
@ -41,7 +41,15 @@ public class RowResultGenerator extends ResultGenerator {
|
|||
try {
|
||||
Get get = new Get(rowspec.getRow());
|
||||
if (rowspec.hasColumns()) {
|
||||
get.addColumns(rowspec.getColumns());
|
||||
byte [][] columns = rowspec.getColumns();
|
||||
for(byte [] column : columns) {
|
||||
byte [][] famQf = KeyValue.parseColumn(column);
|
||||
if(famQf.length == 1) {
|
||||
get.addFamily(famQf[0]);
|
||||
} else {
|
||||
get.addColumn(famQf[0], famQf[1]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// rowspec does not explicitly specify columns, return them all
|
||||
for (HColumnDescriptor family:
|
||||
|
|
|
@ -99,9 +99,7 @@ public class ScannerInstanceResource implements Constants {
|
|||
rowKey = value.getRow();
|
||||
rowModel = new RowModel(rowKey);
|
||||
}
|
||||
rowModel.addCell(
|
||||
new CellModel(value.getColumn(), value.getTimestamp(),
|
||||
value.getValue()));
|
||||
rowModel.addCell(new CellModel(value));
|
||||
} while (--count > 0);
|
||||
model.addRow(rowModel);
|
||||
ResponseBuilder response = Response.ok(model);
|
||||
|
@ -122,10 +120,12 @@ public class ScannerInstanceResource implements Constants {
|
|||
LOG.info("generator exhausted");
|
||||
return Response.noContent().build();
|
||||
}
|
||||
byte [] column = KeyValue.makeColumn(value.getFamily(),
|
||||
value.getQualifier());
|
||||
ResponseBuilder response = Response.ok(value.getValue());
|
||||
response.cacheControl(cacheControl);
|
||||
response.header("X-Row", Base64.encode(value.getRow()));
|
||||
response.header("X-Column", Base64.encode(value.getColumn()));
|
||||
response.header("X-Column", Base64.encode(column));
|
||||
response.header("X-Timestamp", value.getTimestamp());
|
||||
return response.build();
|
||||
} catch (IllegalStateException e) {
|
||||
|
|
|
@ -59,7 +59,7 @@ public class ScannerResultGenerator extends ResultGenerator {
|
|||
byte[][] columns = rowspec.getColumns();
|
||||
for (byte[] column: columns) {
|
||||
byte[][] split = KeyValue.parseColumn(column);
|
||||
if (split[1] != null) {
|
||||
if (split.length == 2) {
|
||||
scan.addColumn(split[0], split[1]);
|
||||
} else {
|
||||
scan.addFamily(split[0]);
|
||||
|
|
|
@ -29,6 +29,7 @@ import javax.xml.bind.annotation.XmlType;
|
|||
import javax.xml.bind.annotation.XmlValue;
|
||||
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.stargate.ProtobufMessageHandler;
|
||||
import org.apache.hadoop.hbase.stargate.protobuf.generated.CellMessage.Cell;
|
||||
|
||||
|
@ -76,6 +77,15 @@ public class CellModel implements ProtobufMessageHandler, Serializable {
|
|||
this(column, HConstants.LATEST_TIMESTAMP, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor from KeyValue
|
||||
* @param kv
|
||||
*/
|
||||
public CellModel(KeyValue kv) {
|
||||
this(KeyValue.makeColumn(kv.getFamily(), kv.getQualifier()),
|
||||
kv.getTimestamp(), kv.getValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param column
|
||||
|
|
|
@ -32,6 +32,7 @@ import javax.xml.bind.Unmarshaller;
|
|||
import org.apache.commons.httpclient.Header;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.stargate.client.Client;
|
||||
import org.apache.hadoop.hbase.stargate.client.Cluster;
|
||||
|
@ -79,8 +80,10 @@ public class TestRowResource extends MiniClusterTestCase {
|
|||
return;
|
||||
}
|
||||
HTableDescriptor htd = new HTableDescriptor(TABLE);
|
||||
htd.addFamily(new HColumnDescriptor(COLUMN_1));
|
||||
htd.addFamily(new HColumnDescriptor(COLUMN_2));
|
||||
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
|
||||
Bytes.toBytes(COLUMN_1))[0]));
|
||||
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
|
||||
Bytes.toBytes(COLUMN_2))[0]));
|
||||
admin.createTable(htd);
|
||||
}
|
||||
|
||||
|
|
|
@ -76,7 +76,11 @@ public class TestScannerResource extends MiniClusterTestCase {
|
|||
k[1] = b2;
|
||||
k[2] = b3;
|
||||
Put put = new Put(k);
|
||||
put.add(famAndQf[0], famAndQf[1], k);
|
||||
if(famAndQf.length == 1) {
|
||||
put.add(famAndQf[0], new byte[0], k);
|
||||
} else {
|
||||
put.add(famAndQf[0], famAndQf[1], k);
|
||||
}
|
||||
table.put(put);
|
||||
count++;
|
||||
}
|
||||
|
@ -107,8 +111,10 @@ public class TestScannerResource extends MiniClusterTestCase {
|
|||
return;
|
||||
}
|
||||
HTableDescriptor htd = new HTableDescriptor(TABLE);
|
||||
htd.addFamily(new HColumnDescriptor(COLUMN_1));
|
||||
htd.addFamily(new HColumnDescriptor(COLUMN_2));
|
||||
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
|
||||
Bytes.toBytes(COLUMN_1))[0]));
|
||||
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
|
||||
Bytes.toBytes(COLUMN_2))[0]));
|
||||
admin.createTable(htd);
|
||||
expectedRows1 = insertData(TABLE, COLUMN_1, 1.0);
|
||||
expectedRows2 = insertData(TABLE, COLUMN_2, 0.5);
|
||||
|
|
|
@ -29,6 +29,7 @@ import javax.xml.bind.JAXBException;
|
|||
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.stargate.client.Client;
|
||||
|
@ -38,6 +39,7 @@ import org.apache.hadoop.hbase.stargate.model.TableModel;
|
|||
import org.apache.hadoop.hbase.stargate.model.TableInfoModel;
|
||||
import org.apache.hadoop.hbase.stargate.model.TableListModel;
|
||||
import org.apache.hadoop.hbase.stargate.model.TableRegionModel;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
public class TestTableResource extends MiniClusterTestCase {
|
||||
private static String TABLE = "TestTableResource";
|
||||
|
@ -65,7 +67,8 @@ public class TestTableResource extends MiniClusterTestCase {
|
|||
return;
|
||||
}
|
||||
HTableDescriptor htd = new HTableDescriptor(TABLE);
|
||||
htd.addFamily(new HColumnDescriptor(COLUMN));
|
||||
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
|
||||
Bytes.toBytes(COLUMN))[0]));
|
||||
admin.createTable(htd);
|
||||
new HTable(conf, TABLE);
|
||||
}
|
||||
|
|
|
@ -29,7 +29,6 @@ import java.util.Map;
|
|||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HStoreKey;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
|
@ -46,7 +45,7 @@ public class IndexedTable extends TransactionalTable {
|
|||
// TODO move these schema constants elsewhere
|
||||
public static final byte[] INDEX_COL_FAMILY_NAME = Bytes.toBytes("__INDEX__");
|
||||
public static final byte[] INDEX_COL_FAMILY = Bytes.add(
|
||||
INDEX_COL_FAMILY_NAME, new byte[] { HStoreKey.COLUMN_FAMILY_DELIMITER });
|
||||
INDEX_COL_FAMILY_NAME, KeyValue.COLUMN_FAMILY_DELIM_ARRAY);
|
||||
public static final byte[] INDEX_BASE_ROW = Bytes.toBytes("ROW");
|
||||
public static final byte[] INDEX_BASE_ROW_COLUMN = Bytes.add(
|
||||
INDEX_COL_FAMILY, INDEX_BASE_ROW);
|
||||
|
@ -114,7 +113,14 @@ public class IndexedTable extends TransactionalTable {
|
|||
|
||||
Scan indexScan = new Scan();
|
||||
indexScan.setFilter(indexFilter);
|
||||
indexScan.addColumns(allIndexColumns);
|
||||
for(byte [] column : allIndexColumns) {
|
||||
byte [][] famQf = KeyValue.parseColumn(column);
|
||||
if(famQf.length == 1) {
|
||||
indexScan.addFamily(famQf[0]);
|
||||
} else {
|
||||
indexScan.addColumn(famQf[0], famQf[1]);
|
||||
}
|
||||
}
|
||||
if (indexStartRow != null) {
|
||||
indexScan.setStartRow(indexStartRow);
|
||||
}
|
||||
|
@ -174,24 +180,32 @@ public class IndexedTable extends TransactionalTable {
|
|||
for (int i = 0; i < indexResult.length; i++) {
|
||||
Result row = indexResult[i];
|
||||
|
||||
byte[] baseRow = row.getValue(INDEX_BASE_ROW_COLUMN);
|
||||
byte[] baseRow = row.getValue(INDEX_COL_FAMILY_NAME, INDEX_BASE_ROW);
|
||||
LOG.debug("next index row [" + Bytes.toString(row.getRow())
|
||||
+ "] -> base row [" + Bytes.toString(baseRow) + "]");
|
||||
Result baseResult = null;
|
||||
if (columns != null && columns.length > 0) {
|
||||
LOG.debug("Going to base table for remaining columns");
|
||||
Get baseGet = new Get(baseRow);
|
||||
baseGet.addColumns(columns);
|
||||
for(byte [] column : columns) {
|
||||
byte [][] famQf = KeyValue.parseColumn(column);
|
||||
if(famQf.length == 1) {
|
||||
baseGet.addFamily(famQf[0]);
|
||||
} else {
|
||||
baseGet.addColumn(famQf[0], famQf[1]);
|
||||
}
|
||||
}
|
||||
baseResult = IndexedTable.this.get(baseGet);
|
||||
}
|
||||
|
||||
List<KeyValue> results = new ArrayList<KeyValue>();
|
||||
for (KeyValue indexKV : row.list()) {
|
||||
byte[] col = indexKV.getColumn();
|
||||
if (HStoreKey.matchingFamily(INDEX_COL_FAMILY_NAME, col)) {
|
||||
if (indexKV.matchingFamily(INDEX_COL_FAMILY_NAME)) {
|
||||
continue;
|
||||
}
|
||||
results.add(new KeyValue(baseRow, indexKV.getColumn(), indexKV.getTimestamp(), KeyValue.Type.Put, indexKV.getValue()));
|
||||
results.add(new KeyValue(baseRow, indexKV.getFamily(),
|
||||
indexKV.getQualifier(), indexKV.getTimestamp(), KeyValue.Type.Put,
|
||||
indexKV.getValue()));
|
||||
}
|
||||
|
||||
if (baseResult != null) {
|
||||
|
|
|
@ -24,22 +24,20 @@ import java.util.Set;
|
|||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.ColumnNameParseException;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HStoreKey;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.MasterNotRunningException;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.regionserver.tableindexed.IndexMaintenanceUtils;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
|
@ -88,16 +86,13 @@ public class IndexedTableAdmin extends HBaseAdmin {
|
|||
HTableDescriptor indexTableDesc = new HTableDescriptor(indexSpec
|
||||
.getIndexedTableName(baseTableName));
|
||||
Set<byte[]> families = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
|
||||
families.add(IndexedTable.INDEX_COL_FAMILY);
|
||||
families.add(IndexedTable.INDEX_COL_FAMILY_NAME);
|
||||
for (byte[] column : indexSpec.getAllColumns()) {
|
||||
families.add(Bytes.add(HStoreKey.getFamily(column),
|
||||
new byte[] { HStoreKey.COLUMN_FAMILY_DELIMITER }));
|
||||
families.add(KeyValue.parseColumn(column)[0]);
|
||||
}
|
||||
|
||||
for (byte[] colFamily : families) {
|
||||
indexTableDesc.addFamily(new HColumnDescriptor(colFamily));
|
||||
}
|
||||
|
||||
return indexTableDesc;
|
||||
}
|
||||
|
||||
|
@ -135,13 +130,23 @@ public class IndexedTableAdmin extends HBaseAdmin {
|
|||
private void reIndexTable(byte[] baseTableName, IndexSpecification indexSpec) throws IOException {
|
||||
HTable baseTable = new HTable(baseTableName);
|
||||
HTable indexTable = new HTable(indexSpec.getIndexedTableName(baseTableName));
|
||||
for (RowResult rowResult : baseTable.getScanner(indexSpec.getAllColumns())) {
|
||||
Scan baseScan = new Scan();
|
||||
for(byte [] column : indexSpec.getAllColumns()) {
|
||||
byte [][] famQf = KeyValue.parseColumn(column);
|
||||
if(famQf.length == 1) {
|
||||
baseScan.addFamily(famQf[0]);
|
||||
} else {
|
||||
baseScan.addColumn(famQf[0], famQf[1]);
|
||||
}
|
||||
}
|
||||
for (Result result : baseTable.getScanner(baseScan)) {
|
||||
SortedMap<byte[], byte[]> columnValues = new TreeMap<byte[], byte[]>(Bytes.BYTES_COMPARATOR);
|
||||
for (Entry<byte[], Cell> entry : rowResult.entrySet()) {
|
||||
columnValues.put(entry.getKey(), entry.getValue().getValue());
|
||||
for(KeyValue kv : result.sorted()) {
|
||||
columnValues.put(Bytes.add(kv.getFamily(), KeyValue.COLUMN_FAMILY_DELIM_ARRAY,
|
||||
kv.getQualifier()), kv.getValue());
|
||||
}
|
||||
if (IndexMaintenanceUtils.doesApplyToIndex(indexSpec, columnValues)) {
|
||||
Put indexUpdate = IndexMaintenanceUtils.createIndexUpdate(indexSpec, rowResult.getRow(), columnValues);
|
||||
Put indexUpdate = IndexMaintenanceUtils.createIndexUpdate(indexSpec, result.getRow(), columnValues);
|
||||
indexTable.put(indexUpdate);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,11 +25,12 @@ import java.util.Random;
|
|||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
public class HBaseBackedTransactionLogger implements TransactionLogger {
|
||||
|
@ -37,16 +38,12 @@ public class HBaseBackedTransactionLogger implements TransactionLogger {
|
|||
/** The name of the transaction status table. */
|
||||
public static final String TABLE_NAME = "__GLOBAL_TRX_LOG__";
|
||||
|
||||
private static final String INFO_FAMILY = "Info:";
|
||||
|
||||
/**
|
||||
* Column which holds the transaction status.
|
||||
*
|
||||
*/
|
||||
private static final String STATUS_COLUMN = INFO_FAMILY + "Status";
|
||||
private static final byte[] STATUS_COLUMN_BYTES = Bytes
|
||||
.toBytes(STATUS_COLUMN);
|
||||
|
||||
private static final byte [] STATUS_FAMILY = Bytes.toBytes("Info");
|
||||
private static final byte [] STATUS_QUALIFIER = Bytes.toBytes("Status");
|
||||
/**
|
||||
* Create the table.
|
||||
*
|
||||
|
@ -55,7 +52,7 @@ public class HBaseBackedTransactionLogger implements TransactionLogger {
|
|||
*/
|
||||
public static void createTable() throws IOException {
|
||||
HTableDescriptor tableDesc = new HTableDescriptor(TABLE_NAME);
|
||||
tableDesc.addFamily(new HColumnDescriptor(INFO_FAMILY));
|
||||
tableDesc.addFamily(new HColumnDescriptor(STATUS_FAMILY));
|
||||
HBaseAdmin admin = new HBaseAdmin(new HBaseConfiguration());
|
||||
admin.createTable(tableDesc);
|
||||
}
|
||||
|
@ -93,15 +90,15 @@ public class HBaseBackedTransactionLogger implements TransactionLogger {
|
|||
|
||||
public TransactionStatus getStatusForTransaction(long transactionId) {
|
||||
try {
|
||||
RowResult result = table.getRow(getRow(transactionId));
|
||||
Result result = table.get(new Get(getRow(transactionId)));
|
||||
if (result == null || result.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
Cell statusCell = result.get(STATUS_COLUMN_BYTES);
|
||||
if (statusCell == null) {
|
||||
byte [] statusValue = result.getValue(STATUS_FAMILY, STATUS_QUALIFIER);
|
||||
if (statusValue == null) {
|
||||
throw new RuntimeException("No status cell for row " + transactionId);
|
||||
}
|
||||
String statusString = Bytes.toString(statusCell.getValue());
|
||||
String statusString = Bytes.toString(statusValue);
|
||||
return TransactionStatus.valueOf(statusString);
|
||||
|
||||
} catch (IOException e) {
|
||||
|
@ -115,22 +112,20 @@ public class HBaseBackedTransactionLogger implements TransactionLogger {
|
|||
|
||||
public void setStatusForTransaction(long transactionId,
|
||||
TransactionStatus status) {
|
||||
BatchUpdate update = new BatchUpdate(getRow(transactionId));
|
||||
update.put(STATUS_COLUMN, Bytes.toBytes(status.name()));
|
||||
|
||||
Put put = new Put(getRow(transactionId));
|
||||
put.add(STATUS_FAMILY, STATUS_QUALIFIER, Bytes.toBytes(status.name()));
|
||||
try {
|
||||
table.commit(update);
|
||||
table.put(put);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void forgetTransaction(long transactionId) {
|
||||
BatchUpdate update = new BatchUpdate(getRow(transactionId));
|
||||
update.delete(STATUS_COLUMN);
|
||||
|
||||
Delete delete = new Delete(getRow(transactionId));
|
||||
delete.deleteColumns(STATUS_FAMILY, STATUS_QUALIFIER);
|
||||
try {
|
||||
table.commit(update);
|
||||
table.delete(delete);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
|
@ -32,7 +32,6 @@ import org.apache.hadoop.hbase.client.ResultScanner;
|
|||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.ScannerCallable;
|
||||
import org.apache.hadoop.hbase.client.ServerCallable;
|
||||
import org.apache.hadoop.hbase.ipc.HBaseRPC;
|
||||
import org.apache.hadoop.hbase.ipc.TransactionalRegionInterface;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.SortedMap;
|
|||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.ColumnNameParseException;
|
||||
import org.apache.hadoop.hbase.HStoreKey;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.tableindexed.IndexSpecification;
|
||||
import org.apache.hadoop.hbase.client.tableindexed.IndexedTable;
|
||||
|
@ -45,22 +45,28 @@ public class IndexMaintenanceUtils {
|
|||
update.add(IndexedTable.INDEX_COL_FAMILY_NAME, IndexedTable.INDEX_BASE_ROW, row);
|
||||
|
||||
try {
|
||||
for (byte[] col : indexSpec.getIndexedColumns()) {
|
||||
byte[] val = columnValues.get(col);
|
||||
if (val == null) {
|
||||
throw new RuntimeException("Unexpected missing column value. [" + Bytes.toString(col) + "]");
|
||||
}
|
||||
byte [][] colSeperated = HStoreKey.parseColumn(col);
|
||||
update.add(colSeperated[0], colSeperated[1], val);
|
||||
}
|
||||
|
||||
for (byte[] col : indexSpec.getAdditionalColumns()) {
|
||||
byte[] val = columnValues.get(col);
|
||||
if (val != null) {
|
||||
byte [][] colSeperated = HStoreKey.parseColumn(col);
|
||||
update.add(colSeperated[0], colSeperated[1], val);
|
||||
}
|
||||
}
|
||||
for (byte[] col : indexSpec.getIndexedColumns()) {
|
||||
byte[] val = columnValues.get(col);
|
||||
if (val == null) {
|
||||
throw new RuntimeException("Unexpected missing column value. [" + Bytes.toString(col) + "]");
|
||||
}
|
||||
byte [][] colSeparated = KeyValue.parseColumn(col);
|
||||
if(colSeparated.length == 1) {
|
||||
throw new ColumnNameParseException("Expected family:qualifier but only got a family");
|
||||
}
|
||||
update.add(colSeparated[0], colSeparated[1], val);
|
||||
}
|
||||
|
||||
for (byte[] col : indexSpec.getAdditionalColumns()) {
|
||||
byte[] val = columnValues.get(col);
|
||||
if (val != null) {
|
||||
byte [][] colSeparated = KeyValue.parseColumn(col);
|
||||
if(colSeparated.length == 1) {
|
||||
throw new ColumnNameParseException("Expected family:qualifier but only got a family");
|
||||
}
|
||||
update.add(colSeparated[0], colSeparated[1], val);
|
||||
}
|
||||
}
|
||||
} catch (ColumnNameParseException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
|
@ -116,7 +116,12 @@ class IndexedRegion extends TransactionalRegion {
|
|||
|
||||
Get oldGet = new Get(put.getRow());
|
||||
for (byte [] neededCol : neededColumns) {
|
||||
oldGet.addColumn(neededCol);
|
||||
byte [][] famQf = KeyValue.parseColumn(neededCol);
|
||||
if(famQf.length == 1) {
|
||||
oldGet.addFamily(famQf[0]);
|
||||
} else {
|
||||
oldGet.addColumn(famQf[0], famQf[1]);
|
||||
}
|
||||
}
|
||||
|
||||
Result oldResult = super.get(oldGet, lockId);
|
||||
|
@ -124,8 +129,10 @@ class IndexedRegion extends TransactionalRegion {
|
|||
// Add the old values to the new if they are not there
|
||||
if (oldResult != null && oldResult.raw() != null) {
|
||||
for (KeyValue oldKV : oldResult.raw()) {
|
||||
if (!newColumnValues.containsKey(oldKV.getColumn())) {
|
||||
newColumnValues.put(oldKV.getColumn(), oldKV.getValue());
|
||||
byte [] column = KeyValue.makeColumn(oldKV.getFamily(),
|
||||
oldKV.getQualifier());
|
||||
if (!newColumnValues.containsKey(column)) {
|
||||
newColumnValues.put(column, oldKV.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -181,7 +188,8 @@ class IndexedRegion extends TransactionalRegion {
|
|||
Bytes.BYTES_COMPARATOR);
|
||||
for (List<KeyValue> familyPuts : put.getFamilyMap().values()) {
|
||||
for (KeyValue kv : familyPuts) {
|
||||
columnValues.put(kv.getColumn(), kv.getValue());
|
||||
byte [] column = KeyValue.makeColumn(kv.getFamily(), kv.getQualifier());
|
||||
columnValues.put(column, kv.getValue());
|
||||
}
|
||||
}
|
||||
return columnValues;
|
||||
|
@ -196,7 +204,8 @@ class IndexedRegion extends TransactionalRegion {
|
|||
private boolean possiblyAppliesToIndex(IndexSpecification indexSpec, Put put) {
|
||||
for (List<KeyValue> familyPuts : put.getFamilyMap().values()) {
|
||||
for (KeyValue kv : familyPuts) {
|
||||
if (indexSpec.containsColumn(kv.getColumn())) {
|
||||
byte [] column = KeyValue.makeColumn(kv.getFamily(), kv.getQualifier());
|
||||
if (indexSpec.containsColumn(column)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -227,7 +236,12 @@ class IndexedRegion extends TransactionalRegion {
|
|||
|
||||
Get get = new Get(delete.getRow());
|
||||
for (byte [] col : neededColumns) {
|
||||
get.addColumn(col);
|
||||
byte [][] famQf = KeyValue.parseColumn(col);
|
||||
if(famQf.length == 1) {
|
||||
get.addFamily(famQf[0]);
|
||||
} else {
|
||||
get.addColumn(famQf[0], famQf[1]);
|
||||
}
|
||||
}
|
||||
|
||||
Result oldRow = super.get(get, null);
|
||||
|
@ -263,7 +277,8 @@ class IndexedRegion extends TransactionalRegion {
|
|||
List<KeyValue> list = result.list();
|
||||
if (list != null) {
|
||||
for(KeyValue kv : result.list()) {
|
||||
currentColumnValues.put(kv.getColumn(), kv.getValue());
|
||||
byte [] column = KeyValue.makeColumn(kv.getFamily(), kv.getQualifier());
|
||||
currentColumnValues.put(column, kv.getValue());
|
||||
}
|
||||
}
|
||||
return currentColumnValues;
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
|
|||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.PerformanceEvaluation;
|
||||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
|
@ -71,7 +72,7 @@ public class TestIndexedTable extends HBaseClusterTestCase {
|
|||
super.setUp();
|
||||
|
||||
desc = new HTableDescriptor(TABLE_NAME);
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY_COLON));
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||
|
||||
IndexedTableDescriptor indexDesc = new IndexedTableDescriptor(desc);
|
||||
// Create a new index that does lexicographic ordering on COL_A
|
||||
|
@ -107,7 +108,7 @@ public class TestIndexedTable extends HBaseClusterTestCase {
|
|||
int numRows = 0;
|
||||
byte[] lastColA = null;
|
||||
for (Result rowResult : scanner) {
|
||||
byte[] colA = rowResult.getValue(COL_A);
|
||||
byte[] colA = rowResult.getValue(FAMILY, QUAL_A);
|
||||
LOG.info("index scan : row [" + Bytes.toString(rowResult.getRow())
|
||||
+ "] value [" + Bytes.toString(colA) + "]");
|
||||
if (lastColA != null) {
|
||||
|
@ -127,7 +128,7 @@ public class TestIndexedTable extends HBaseClusterTestCase {
|
|||
byte[] persistedRowValue = null;
|
||||
for (Result rowResult : scanner) {
|
||||
byte[] row = rowResult.getRow();
|
||||
byte[] value = rowResult.getValue(COL_A);
|
||||
byte[] value = rowResult.getValue(FAMILY, QUAL_A);
|
||||
if (Bytes.toString(row).equals(Bytes.toString(PerformanceEvaluation.format(updatedRow)))) {
|
||||
persistedRowValue = value;
|
||||
LOG.info("update found: row [" + Bytes.toString(row)
|
||||
|
@ -191,7 +192,7 @@ public class TestIndexedTable extends HBaseClusterTestCase {
|
|||
public void testDelete() throws IOException {
|
||||
writeInitalRows();
|
||||
// Delete the first row;
|
||||
table.deleteAll(PerformanceEvaluation.format(0));
|
||||
table.delete(new Delete(PerformanceEvaluation.format(0)));
|
||||
|
||||
assertRowsInOrder(NUM_ROWS - 1);
|
||||
}
|
||||
|
|
|
@ -232,9 +232,9 @@ public class StressTestTransactions extends HBaseClusterTestCase {
|
|||
|
||||
TransactionState transactionState = transactionManager.beginTransaction();
|
||||
int row1Amount = Bytes.toInt(table.get(transactionState,
|
||||
new Get(row1).addColumn(COL)).getValue(COL));
|
||||
new Get(row1).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
|
||||
int row2Amount = Bytes.toInt(table.get(transactionState,
|
||||
new Get(row2).addColumn(COL)).getValue(COL));
|
||||
new Get(row2).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
|
||||
|
||||
row1Amount -= transferAmount;
|
||||
row2Amount += transferAmount;
|
||||
|
@ -257,7 +257,8 @@ public class StressTestTransactions extends HBaseClusterTestCase {
|
|||
int totalSum = 0;
|
||||
for (int i = 0; i < NUM_ST_ROWS; i++) {
|
||||
totalSum += Bytes.toInt(table.get(transactionState,
|
||||
new Get(makeSTRow(i)).addColumn(COL)).getValue(COL));
|
||||
new Get(makeSTRow(i)).addColumn(FAMILY, QUAL_A)).getValue(FAMILY,
|
||||
QUAL_A));
|
||||
}
|
||||
|
||||
transactionManager.tryCommit(transactionState);
|
||||
|
@ -309,9 +310,9 @@ public class StressTestTransactions extends HBaseClusterTestCase {
|
|||
|
||||
TransactionState transactionState = transactionManager.beginTransaction();
|
||||
int table1Amount = Bytes.toInt(table1.get(transactionState,
|
||||
new Get(row).addColumn(COL)).getValue(COL));
|
||||
new Get(row).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
|
||||
int table2Amount = Bytes.toInt(table2.get(transactionState,
|
||||
new Get(row).addColumn(COL)).getValue(COL));
|
||||
new Get(row).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
|
||||
|
||||
table1Amount -= transferAmount;
|
||||
table2Amount += transferAmount;
|
||||
|
@ -337,7 +338,7 @@ public class StressTestTransactions extends HBaseClusterTestCase {
|
|||
int[] amounts = new int[tables.length];
|
||||
for (int i = 0; i < tables.length; i++) {
|
||||
int amount = Bytes.toInt(tables[i].get(transactionState,
|
||||
new Get(row).addColumn(COL)).getValue(COL));
|
||||
new Get(row).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
|
||||
amounts[i] = amount;
|
||||
totalSum += amount;
|
||||
}
|
||||
|
@ -397,15 +398,15 @@ public class StressTestTransactions extends HBaseClusterTestCase {
|
|||
int thisTableSum = 0;
|
||||
for (int i = 0; i < NUM_ST_ROWS; i++) {
|
||||
byte[] row = makeSTRow(i);
|
||||
thisTableSum += Bytes.toInt(table.get(new Get(row).addColumn(COL))
|
||||
.getValue(COL));
|
||||
thisTableSum += Bytes.toInt(table.get(new Get(row).addColumn(FAMILY, QUAL_A))
|
||||
.getValue(FAMILY, QUAL_A));
|
||||
}
|
||||
Assert.assertEquals(SingleTableTransactionThread.TOTAL_SUM, thisTableSum);
|
||||
|
||||
for (int i = 0; i < NUM_MT_ROWS; i++) {
|
||||
byte[] row = makeMTRow(i);
|
||||
mtSums[i] += Bytes.toInt(table.get(new Get(row).addColumn(COL))
|
||||
.getValue(COL));
|
||||
mtSums[i] += Bytes.toInt(table.get(new Get(row).addColumn(FAMILY, QUAL_A))
|
||||
.getValue(FAMILY, QUAL_A));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ public class TestTransactions extends HBaseClusterTestCase {
|
|||
super.setUp();
|
||||
|
||||
HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY_COLON));
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||
admin = new HBaseAdmin(conf);
|
||||
admin.createTable(desc);
|
||||
table = new TransactionalTable(conf, desc.getName());
|
||||
|
@ -118,13 +118,14 @@ public class TestTransactions extends HBaseClusterTestCase {
|
|||
TransactionState transactionState = transactionManager.beginTransaction();
|
||||
|
||||
int originalValue = Bytes.toInt(table.get(transactionState,
|
||||
new Get(ROW1).addColumn(COL_A)).value());
|
||||
new Get(ROW1).addColumn(FAMILY, QUAL_A)).value());
|
||||
int newValue = originalValue + 1;
|
||||
|
||||
table.put(transactionState, new Put(ROW1).add(FAMILY, QUAL_A, Bytes
|
||||
.toBytes(newValue)));
|
||||
|
||||
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(COL_A));
|
||||
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(FAMILY,
|
||||
QUAL_A));
|
||||
Assert.assertEquals(newValue, Bytes.toInt(row1_A.value()));
|
||||
}
|
||||
|
||||
|
@ -132,7 +133,7 @@ public class TestTransactions extends HBaseClusterTestCase {
|
|||
TransactionState transactionState = transactionManager.beginTransaction();
|
||||
|
||||
int originalValue = Bytes.toInt(table.get(transactionState,
|
||||
new Get(ROW1).addColumn(COL_A)).value());
|
||||
new Get(ROW1).addColumn(FAMILY, QUAL_A)).value());
|
||||
int newValue = originalValue + 1;
|
||||
table.put(transactionState, new Put(ROW1).add(FAMILY, QUAL_A, Bytes
|
||||
.toBytes(newValue)));
|
||||
|
@ -175,12 +176,13 @@ public class TestTransactions extends HBaseClusterTestCase {
|
|||
private TransactionState makeTransaction1() throws IOException {
|
||||
TransactionState transactionState = transactionManager.beginTransaction();
|
||||
|
||||
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(COL_A));
|
||||
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(FAMILY,
|
||||
QUAL_A));
|
||||
|
||||
table.put(transactionState, new Put(ROW2).add(FAMILY, QUAL_A, row1_A
|
||||
.getValue(COL_A)));
|
||||
.getValue(FAMILY, QUAL_A)));
|
||||
table.put(transactionState, new Put(ROW3).add(FAMILY, QUAL_A, row1_A
|
||||
.getValue(COL_A)));
|
||||
.getValue(FAMILY, QUAL_A)));
|
||||
|
||||
return transactionState;
|
||||
}
|
||||
|
@ -189,9 +191,10 @@ public class TestTransactions extends HBaseClusterTestCase {
|
|||
private TransactionState makeTransaction2() throws IOException {
|
||||
TransactionState transactionState = transactionManager.beginTransaction();
|
||||
|
||||
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(COL_A));
|
||||
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(FAMILY,
|
||||
QUAL_A));
|
||||
|
||||
int value = Bytes.toInt(row1_A.getValue(COL_A));
|
||||
int value = Bytes.toInt(row1_A.getValue(FAMILY, QUAL_A));
|
||||
|
||||
table.put(transactionState, new Put(ROW1).add(FAMILY, QUAL_A, Bytes
|
||||
.toBytes(value + 1)));
|
||||
|
|
|
@ -93,7 +93,7 @@ public class TestTHLogRecovery extends HBaseClusterTestCase {
|
|||
super.setUp();
|
||||
|
||||
HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY_COLON));
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||
admin = new HBaseAdmin(conf);
|
||||
admin.createTable(desc);
|
||||
table = new TransactionalTable(conf, desc.getName());
|
||||
|
@ -203,12 +203,12 @@ public class TestTHLogRecovery extends HBaseClusterTestCase {
|
|||
|
||||
private void verify(final int numRuns) throws IOException {
|
||||
// Reads
|
||||
int row1 = Bytes.toInt(table.get(new Get(ROW1).addColumn(COL_A)).getValue(
|
||||
COL_A));
|
||||
int row2 = Bytes.toInt(table.get(new Get(ROW2).addColumn(COL_A)).getValue(
|
||||
COL_A));
|
||||
int row3 = Bytes.toInt(table.get(new Get(ROW3).addColumn(COL_A)).getValue(
|
||||
COL_A));
|
||||
int row1 = Bytes.toInt(table.get(new Get(ROW1).addColumn(FAMILY, QUAL_A))
|
||||
.getValue(FAMILY, QUAL_A));
|
||||
int row2 = Bytes.toInt(table.get(new Get(ROW2).addColumn(FAMILY, QUAL_A))
|
||||
.getValue(FAMILY, QUAL_A));
|
||||
int row3 = Bytes.toInt(table.get(new Get(ROW3).addColumn(FAMILY, QUAL_A))
|
||||
.getValue(FAMILY, QUAL_A));
|
||||
|
||||
assertEquals(TOTAL_VALUE - 2 * numRuns, row1);
|
||||
assertEquals(numRuns, row2);
|
||||
|
@ -222,11 +222,11 @@ public class TestTHLogRecovery extends HBaseClusterTestCase {
|
|||
|
||||
// Reads
|
||||
int row1 = Bytes.toInt(table.get(transactionState,
|
||||
new Get(ROW1).addColumn(COL_A)).getValue(COL_A));
|
||||
new Get(ROW1).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
|
||||
int row2 = Bytes.toInt(table.get(transactionState,
|
||||
new Get(ROW2).addColumn(COL_A)).getValue(COL_A));
|
||||
new Get(ROW2).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
|
||||
int row3 = Bytes.toInt(table.get(transactionState,
|
||||
new Get(ROW3).addColumn(COL_A)).getValue(COL_A));
|
||||
new Get(ROW3).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
|
||||
|
||||
row1 -= 2;
|
||||
row2 += 1;
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.NavigableMap;
|
||||
import java.util.TreeMap;
|
||||
|
|
|
@ -29,9 +29,6 @@ import java.util.Map;
|
|||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.hfile.Compression;
|
||||
import org.apache.hadoop.hbase.io.hfile.HFile;
|
||||
import org.apache.hadoop.hbase.rest.exception.HBaseRestException;
|
||||
import org.apache.hadoop.hbase.rest.serializer.IRestSerializer;
|
||||
import org.apache.hadoop.hbase.rest.serializer.ISerializable;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
|
@ -47,7 +44,7 @@ import agilejson.TOJSON;
|
|||
* column and recreating it. If there is data stored in the column, it will be
|
||||
* deleted when the column is deleted.
|
||||
*/
|
||||
public class HColumnDescriptor implements ISerializable, WritableComparable<HColumnDescriptor> {
|
||||
public class HColumnDescriptor implements WritableComparable<HColumnDescriptor> {
|
||||
// For future backward compatibility
|
||||
|
||||
// Version 3 was when column names become byte arrays and when we picked up
|
||||
|
@ -150,7 +147,7 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
* The other attributes are defaulted.
|
||||
*
|
||||
* @param familyName Column family name. Must be 'printable' -- digit or
|
||||
* letter -- and end in a <code>:<code>
|
||||
* letter -- and may not contain a <code>:<code>
|
||||
*/
|
||||
public HColumnDescriptor(final String familyName) {
|
||||
this(Bytes.toBytes(familyName));
|
||||
|
@ -161,7 +158,7 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
* The other attributes are defaulted.
|
||||
*
|
||||
* @param familyName Column family name. Must be 'printable' -- digit or
|
||||
* letter -- and end in a <code>:<code>
|
||||
* letter -- and may not contain a <code>:<code>
|
||||
*/
|
||||
public HColumnDescriptor(final byte [] familyName) {
|
||||
this (familyName == null || familyName.length <= 0?
|
||||
|
@ -188,7 +185,7 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
/**
|
||||
* Constructor
|
||||
* @param familyName Column family name. Must be 'printable' -- digit or
|
||||
* letter -- and end in a <code>:<code>
|
||||
* letter -- and may not contain a <code>:<code>
|
||||
* @param maxVersions Maximum number of versions to keep
|
||||
* @param compression Compression type
|
||||
* @param inMemory If true, column data should be kept in an HRegionServer's
|
||||
|
@ -199,8 +196,8 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
* @param bloomFilter Enable the specified bloom filter for this column
|
||||
*
|
||||
* @throws IllegalArgumentException if passed a family name that is made of
|
||||
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> and does not
|
||||
* end in a <code>:</code>
|
||||
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> or contains
|
||||
* a <code>:</code>
|
||||
* @throws IllegalArgumentException if the number of versions is <= 0
|
||||
*/
|
||||
public HColumnDescriptor(final byte [] familyName, final int maxVersions,
|
||||
|
@ -210,43 +207,11 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
this(familyName, maxVersions, compression, inMemory, blockCacheEnabled,
|
||||
DEFAULT_BLOCKSIZE, timeToLive, bloomFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Backwards compatible Constructor. Maximum value length is no longer
|
||||
* configurable.
|
||||
*
|
||||
* @param familyName Column family name. Must be 'printable' -- digit or
|
||||
* letter -- and end in a <code>:<code>
|
||||
* @param maxVersions Maximum number of versions to keep
|
||||
* @param compression Compression type
|
||||
* @param inMemory If true, column data should be kept in an HRegionServer's
|
||||
* cache
|
||||
* @param blockCacheEnabled If true, MapFile blocks should be cached
|
||||
* @param blocksize
|
||||
* @param maxValueLength Restrict values to <= this value (UNSUPPORTED)
|
||||
* @param timeToLive Time-to-live of cell contents, in seconds
|
||||
* (use HConstants.FOREVER for unlimited TTL)
|
||||
* @param bloomFilter Enable the specified bloom filter for this column
|
||||
*
|
||||
* @throws IllegalArgumentException if passed a family name that is made of
|
||||
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> and does not
|
||||
* end in a <code>:</code>
|
||||
* @throws IllegalArgumentException if the number of versions is <= 0
|
||||
* @deprecated As of hbase 0.20.0, max value length no longer supported
|
||||
*/
|
||||
// public HColumnDescriptor(final byte [] familyName, final int maxVersions,
|
||||
// final String compression, final boolean inMemory,
|
||||
// final boolean blockCacheEnabled, final int blocksize,
|
||||
// final int maxValueLength,
|
||||
// final int timeToLive, final boolean bloomFilter) {
|
||||
// this(familyName, maxVersions, compression, inMemory, blockCacheEnabled,
|
||||
// blocksize, timeToLive, bloomFilter);
|
||||
// }
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param familyName Column family name. Must be 'printable' -- digit or
|
||||
* letter -- and end in a <code>:<code>
|
||||
* letter -- and may not contain a <code>:<code>
|
||||
* @param maxVersions Maximum number of versions to keep
|
||||
* @param compression Compression type
|
||||
* @param inMemory If true, column data should be kept in an HRegionServer's
|
||||
|
@ -258,16 +223,16 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
* @param bloomFilter Enable the specified bloom filter for this column
|
||||
*
|
||||
* @throws IllegalArgumentException if passed a family name that is made of
|
||||
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> and does not
|
||||
* end in a <code>:</code>
|
||||
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> or contains
|
||||
* a <code>:</code>
|
||||
* @throws IllegalArgumentException if the number of versions is <= 0
|
||||
*/
|
||||
public HColumnDescriptor(final byte [] familyName, final int maxVersions,
|
||||
final String compression, final boolean inMemory,
|
||||
final boolean blockCacheEnabled, final int blocksize,
|
||||
final int timeToLive, final boolean bloomFilter) {
|
||||
this.name = stripColon(familyName);
|
||||
isLegalFamilyName(this.name);
|
||||
isLegalFamilyName(familyName);
|
||||
this.name = familyName;
|
||||
|
||||
if (maxVersions <= 0) {
|
||||
// TODO: Allow maxVersion of 0 to be the way you say "Keep all versions".
|
||||
|
@ -284,17 +249,6 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
setBlocksize(blocksize);
|
||||
}
|
||||
|
||||
private static byte [] stripColon(final byte [] n) {
|
||||
byte col = n[n.length-1];
|
||||
if (col == ':') {
|
||||
// strip.
|
||||
byte [] res = new byte[n.length-1];
|
||||
System.arraycopy(n, 0, res, 0, n.length-1);
|
||||
return res;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param b Family name.
|
||||
* @return <code>b</code>
|
||||
|
@ -311,7 +265,7 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
throw new IllegalArgumentException("Family names cannot start with a " +
|
||||
"period: " + Bytes.toString(b));
|
||||
}
|
||||
for (int i = 0; i < (b.length - 1); i++) {
|
||||
for (int i = 0; i < b.length; i++) {
|
||||
if (Character.isISOControl(b[i]) || b[i] == ':') {
|
||||
throw new IllegalArgumentException("Illegal character <" + b[i] +
|
||||
">. Family names cannot contain control characters or colons: " +
|
||||
|
@ -324,18 +278,11 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
/**
|
||||
* @return Name of this column family
|
||||
*/
|
||||
@TOJSON(fieldName = "name", base64=true)
|
||||
public byte [] getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Name of this column family with colon as required by client API
|
||||
*/
|
||||
@TOJSON(fieldName = "name", base64=true)
|
||||
public byte [] getNameWithColon() {
|
||||
return Bytes.add(this.name, new byte[]{':'});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return Name of this column family
|
||||
*/
|
||||
|
@ -684,11 +631,4 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.hadoop.hbase.rest.xml.IOutputXML#toXML()
|
||||
*/
|
||||
public void restSerialize(IRestSerializer serializer) throws HBaseRestException {
|
||||
serializer.serializeColumnDescriptor(this);
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -33,9 +33,6 @@ import java.util.TreeMap;
|
|||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.hfile.Compression;
|
||||
import org.apache.hadoop.hbase.rest.exception.HBaseRestException;
|
||||
import org.apache.hadoop.hbase.rest.serializer.IRestSerializer;
|
||||
import org.apache.hadoop.hbase.rest.serializer.ISerializable;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
|
||||
|
@ -45,8 +42,7 @@ import agilejson.TOJSON;
|
|||
* HTableDescriptor contains the name of an HTable, and its
|
||||
* column families.
|
||||
*/
|
||||
public class HTableDescriptor implements WritableComparable<HTableDescriptor>,
|
||||
ISerializable {
|
||||
public class HTableDescriptor implements WritableComparable<HTableDescriptor> {
|
||||
|
||||
// Changes prior to version 3 were not recorded here.
|
||||
// Version 3 adds metadata as a map where keys and values are byte[].
|
||||
|
@ -658,11 +654,4 @@ ISerializable {
|
|||
HConstants.ALL_VERSIONS, Compression.Algorithm.NONE.getName(),
|
||||
false, false, 8 * 1024,
|
||||
HConstants.WEEK_IN_SECONDS, false)});
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.hadoop.hbase.rest.xml.IOutputXML#toXML()
|
||||
*/
|
||||
public void restSerialize(IRestSerializer serializer) throws HBaseRestException {
|
||||
serializer.serializeTableDescriptor(this);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ package org.apache.hadoop.hbase;
|
|||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -236,35 +237,6 @@ public class KeyValue implements Writable, HeapSize {
|
|||
this.length = length;
|
||||
}
|
||||
|
||||
/** Temporary constructors until 880/1249 is committed to remove deps */
|
||||
|
||||
/**
|
||||
* Temporary.
|
||||
*/
|
||||
public KeyValue(final byte [] row, final byte [] column) {
|
||||
this(row, column, HConstants.LATEST_TIMESTAMP, null);
|
||||
}
|
||||
|
||||
public KeyValue(final byte [] row, final byte [] column, long ts) {
|
||||
this(row, column, ts, null);
|
||||
}
|
||||
|
||||
public KeyValue(final byte [] row, final byte [] column, long ts,
|
||||
byte [] value) {
|
||||
this(row, column, ts, Type.Put, value);
|
||||
}
|
||||
|
||||
public KeyValue(final byte [] row, final byte [] column, long ts, Type type,
|
||||
byte [] value) {
|
||||
int rlength = row == null ? 0 : row.length;
|
||||
int vlength = value == null ? 0 : value.length;
|
||||
int clength = column == null ? 0 : column.length;
|
||||
this.bytes = createByteArray(row, 0, rlength, column, 0, clength,
|
||||
ts, type, value, 0, vlength);
|
||||
this.length = this.bytes.length;
|
||||
this.offset = 0;
|
||||
}
|
||||
|
||||
/** Constructors that build a new backing byte array from fields */
|
||||
|
||||
/**
|
||||
|
@ -911,25 +883,6 @@ public class KeyValue implements Writable, HeapSize {
|
|||
return getType() == Type.DeleteFamily.getCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Primarily for use client-side. Returns the column of this KeyValue in the
|
||||
* deprecated format: <i>family:qualifier</i>, and in a new byte array.<p>
|
||||
*
|
||||
* If server-side, use {@link #getBuffer()} with appropriate offsets and
|
||||
* lengths instead.
|
||||
* @return Returns column. Makes a copy. Inserts delimiter.
|
||||
*/
|
||||
public byte [] getColumn() {
|
||||
int fo = getFamilyOffset();
|
||||
int fl = getFamilyLength(fo);
|
||||
int ql = getQualifierLength();
|
||||
byte [] result = new byte[fl + 1 + ql];
|
||||
System.arraycopy(this.bytes, fo, result, 0, fl);
|
||||
result[fl] = COLUMN_FAMILY_DELIMITER;
|
||||
System.arraycopy(this.bytes, fo + fl, result, fl + 1, ql);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Primarily for use client-side. Returns the family of this KeyValue in a
|
||||
* new byte array.<p>
|
||||
|
@ -1073,23 +1026,6 @@ public class KeyValue implements Writable, HeapSize {
|
|||
return Bytes.compareTo(column, 0, column.length, this.bytes, o, l) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param column Column with delimiter
|
||||
* @return True if column matches.
|
||||
*/
|
||||
public boolean matchingColumn(final byte [] column) {
|
||||
int index = getFamilyDelimiterIndex(column, 0, column.length);
|
||||
int rl = getRowLength();
|
||||
int o = getFamilyOffset(rl);
|
||||
int fl = getFamilyLength(o);
|
||||
int ql = getQualifierLength(rl,fl);
|
||||
if(Bytes.compareTo(column, 0, index, this.bytes, o, fl) != 0) {
|
||||
return false;
|
||||
}
|
||||
return Bytes.compareTo(column, index + 1, column.length - (index + 1),
|
||||
this.bytes, o + fl, ql) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param family column family
|
||||
|
@ -1158,19 +1094,24 @@ public class KeyValue implements Writable, HeapSize {
|
|||
|
||||
/**
|
||||
* Splits a column in family:qualifier form into separate byte arrays.
|
||||
*
|
||||
* <p>
|
||||
* Not recommend to be used as this is old-style API.
|
||||
* @param c The column.
|
||||
* @return The parsed column.
|
||||
*/
|
||||
public static byte [][] parseColumn(byte [] c) {
|
||||
final byte [][] result = new byte [2][];
|
||||
final int index = getDelimiter(c, 0, c.length, COLUMN_FAMILY_DELIMITER);
|
||||
if (index == -1) {
|
||||
// If no delimiter, return <code>c</code> as family and null qualifier.
|
||||
result[0] = c;
|
||||
result[1] = null;
|
||||
return result;
|
||||
// If no delimiter, return array of size 1
|
||||
return new byte [][] { c };
|
||||
} else if(index == c.length - 1) {
|
||||
// Only a family, return array size 1
|
||||
byte [] family = new byte[c.length-1];
|
||||
System.arraycopy(c, 0, family, 0, family.length);
|
||||
return new byte [][] { family };
|
||||
}
|
||||
// Family and column, return array size 2
|
||||
final byte [][] result = new byte [2][];
|
||||
result[0] = new byte [index];
|
||||
System.arraycopy(c, 0, result[0], 0, index);
|
||||
final int len = c.length - (index + 1);
|
||||
|
@ -1180,6 +1121,18 @@ public class KeyValue implements Writable, HeapSize {
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a column in family:qualifier form from separate byte arrays.
|
||||
* <p>
|
||||
* Not recommended for usage as this is old-style API.
|
||||
* @param family
|
||||
* @param qualifier
|
||||
* @return family:qualifier
|
||||
*/
|
||||
public static byte [] makeColumn(byte [] family, byte [] qualifier) {
|
||||
return Bytes.add(family, COLUMN_FAMILY_DELIM_ARRAY, qualifier);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param b
|
||||
* @return Index of the family-qualifier colon delimiter character in passed
|
||||
|
@ -1551,6 +1504,24 @@ public class KeyValue implements Writable, HeapSize {
|
|||
return new KeyValue(row, f, q, ts, Type.Maximum);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param b
|
||||
* @return A KeyValue made of a byte array that holds the key-only part.
|
||||
* Needed to convert hfile index members to KeyValues.
|
||||
*/
|
||||
public static KeyValue createKeyValueFromKey(final byte [] b) {
|
||||
return createKeyValueFromKey(b, 0, b.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param b
|
||||
* @return A KeyValue made of a byte buffer that holds the key-only part.
|
||||
* Needed to convert hfile index members to KeyValues.
|
||||
*/
|
||||
public static KeyValue createKeyValueFromKey(final ByteBuffer bb) {
|
||||
return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param b
|
||||
* @param o
|
||||
|
|
|
@ -195,19 +195,6 @@ public class Delete implements Writable, Row, Comparable<Row> {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all versions of the specified column, given in
|
||||
* <code>family:qualifier</code> notation, and with a timestamp less than
|
||||
* or equal to the specified timestamp.
|
||||
* @param column colon-delimited family and qualifier
|
||||
* @param timestamp maximum version timestamp
|
||||
*/
|
||||
public Delete deleteColumns(byte [] column, long timestamp) {
|
||||
byte [][] parts = KeyValue.parseColumn(column);
|
||||
this.deleteColumns(parts[0], parts[1], timestamp);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the latest version of the specified column.
|
||||
* This is an expensive call in that on the server-side, it first does a
|
||||
|
@ -237,22 +224,6 @@ public class Delete implements Writable, Row, Comparable<Row> {
|
|||
familyMap.put(family, list);
|
||||
return this;
|
||||
}
|
||||
|
||||
public void deleteColumns(byte [] column) {
|
||||
byte [][] parts = KeyValue.parseColumn(column);
|
||||
this.deleteColumns(parts[0], parts[1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the latest version of the specified column, given in
|
||||
* <code>family:qualifier</code> notation.
|
||||
* @param column colon-delimited family and qualifier
|
||||
*/
|
||||
public Delete deleteColumn(byte [] column) {
|
||||
byte [][] parts = KeyValue.parseColumn(column);
|
||||
this.deleteColumn(parts[0], parts[1], HConstants.LATEST_TIMESTAMP);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method for retrieving the delete's familyMap
|
||||
|
|
|
@ -127,38 +127,7 @@ public class Get implements Writable {
|
|||
familyMap.put(family, set);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an array of columns specified the old format, family:qualifier.
|
||||
* <p>
|
||||
* Overrides previous calls to addFamily for any families in the input.
|
||||
* @param columns array of columns, formatted as <pre>family:qualifier</pre>
|
||||
*/
|
||||
public Get addColumns(byte [][] columns) {
|
||||
if (columns == null) return this;
|
||||
for(int i = 0; i < columns.length; i++) {
|
||||
try {
|
||||
addColumn(columns[i]);
|
||||
} catch(Exception e) {}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param column Old format column.
|
||||
* @return This.
|
||||
*/
|
||||
public Get addColumn(final byte [] column) {
|
||||
if (column == null) return this;
|
||||
byte [][] split = KeyValue.parseColumn(column);
|
||||
if (split.length > 1 && split[1] != null && split[1].length > 0) {
|
||||
addColumn(split[0], split[1]);
|
||||
} else {
|
||||
addFamily(split[0]);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get versions of columns only within the specified timestamp range,
|
||||
* [minStamp, maxStamp).
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -109,19 +109,6 @@ public class Put implements HeapSize, Writable, Row, Comparable<Row> {
|
|||
return add(family, qualifier, this.timestamp, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the specified column and value, with the specified timestamp as
|
||||
* its version to this Put operation.
|
||||
* @param column Old style column name with family and qualifier put together
|
||||
* with a colon.
|
||||
* @param ts version timestamp
|
||||
* @param value column value
|
||||
*/
|
||||
public Put add(byte [] column, long ts, byte [] value) {
|
||||
byte [][] parts = KeyValue.parseColumn(column);
|
||||
return add(parts[0], parts[1], ts, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the specified column and value, with the specified timestamp as
|
||||
* its version to this Put operation.
|
||||
|
|
|
@ -30,13 +30,11 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.NavigableMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.KeyValue.SplitKeyValue;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/**
|
||||
|
@ -275,32 +273,6 @@ public class Result implements Writable {
|
|||
return entry == null? null: entry.getValue();
|
||||
}
|
||||
|
||||
public Cell getCellValue(byte[] family, byte[] qualifier) {
|
||||
Map.Entry<Long,byte[]> val = getKeyValue(family, qualifier);
|
||||
if (val == null)
|
||||
return null;
|
||||
return new Cell(val.getValue(), val.getKey());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return First KeyValue in this Result as a Cell or null if empty.
|
||||
*/
|
||||
public Cell getCellValue() {
|
||||
return isEmpty()? null: new Cell(kvs[0].getValue(), kvs[0].getTimestamp());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return This Result as array of Cells or null if empty.
|
||||
*/
|
||||
public Cell [] getCellValues() {
|
||||
if (isEmpty()) return null;
|
||||
Cell [] results = new Cell[kvs.length];
|
||||
for (int i = 0; i < kvs.length; i++) {
|
||||
results[i] = new Cell(kvs[i].getValue(), kvs[i].getTimestamp());
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
private Map.Entry<Long,byte[]> getKeyValue(byte[] family, byte[] qualifier) {
|
||||
if(this.familyMap == null) {
|
||||
getMap();
|
||||
|
@ -327,21 +299,6 @@ public class Result implements Writable {
|
|||
qualifierMap.get(qualifier): qualifierMap.get(new byte[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest version of the specified column,
|
||||
* using <pre>family:qualifier</pre> notation.
|
||||
* @param column column in family:qualifier notation
|
||||
* @return value of latest version of column, null if none found
|
||||
*/
|
||||
public byte [] getValue(byte [] column) {
|
||||
try {
|
||||
byte [][] split = KeyValue.parseColumn(column);
|
||||
return getValue(split[0], split[1]);
|
||||
} catch(Exception e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for existence of the specified column.
|
||||
* @param family family name
|
||||
|
@ -366,18 +323,7 @@ public class Result implements Writable {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this Result in the old return format, {@link RowResult}.
|
||||
* @return a RowResult
|
||||
*/
|
||||
public RowResult getRowResult() {
|
||||
if(this.kvs == null) {
|
||||
readFields();
|
||||
}
|
||||
return RowResult.createRowResult(Arrays.asList(kvs));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the value of the first column in the Result.
|
||||
* @return value of the first column
|
||||
|
|
|
@ -30,9 +30,7 @@ import java.util.TreeSet;
|
|||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.filter.Filter;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.hbase.io.TimeRange;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
@ -80,7 +78,6 @@ public class Scan implements Writable {
|
|||
private int caching = -1;
|
||||
private boolean cacheBlocks = true;
|
||||
private Filter filter = null;
|
||||
private RowFilterInterface oldFilter = null;
|
||||
private TimeRange tr = new TimeRange();
|
||||
private Map<byte [], NavigableSet<byte []>> familyMap =
|
||||
new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
|
||||
|
@ -128,7 +125,6 @@ public class Scan implements Writable {
|
|||
maxVersions = scan.getMaxVersions();
|
||||
caching = scan.getCaching();
|
||||
filter = scan.getFilter(); // clone?
|
||||
oldFilter = scan.getOldFilter(); // clone?
|
||||
TimeRange ctr = scan.getTimeRange();
|
||||
tr = new TimeRange(ctr.getMin(), ctr.getMax());
|
||||
Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
|
||||
|
@ -173,89 +169,6 @@ public class Scan implements Writable {
|
|||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a combined family and qualifier and adds either both or just the
|
||||
* family in case there is not qualifier. This assumes the older colon
|
||||
* divided notation, e.g. "data:contents" or "meta:".
|
||||
* <p>
|
||||
* Note: It will through an error when the colon is missing.
|
||||
*
|
||||
* @param familyAndQualifier
|
||||
* @return A reference to this instance.
|
||||
* @throws IllegalArgumentException When the colon is missing.
|
||||
*/
|
||||
public Scan addColumn(byte[] familyAndQualifier) {
|
||||
byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
|
||||
if (fq.length > 1 && fq[1] != null && fq[1].length > 0) {
|
||||
addColumn(fq[0], fq[1]);
|
||||
} else {
|
||||
addFamily(fq[0]);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds an array of columns specified using old format, family:qualifier.
|
||||
* <p>
|
||||
* Overrides previous calls to addFamily for any families in the input.
|
||||
*
|
||||
* @param columns array of columns, formatted as <pre>family:qualifier</pre>
|
||||
*/
|
||||
public Scan addColumns(byte [][] columns) {
|
||||
for (int i = 0; i < columns.length; i++) {
|
||||
addColumn(columns[i]);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method to help parse old style (or rather user entry on the
|
||||
* command line) column definitions, e.g. "data:contents mime:". The columns
|
||||
* must be space delimited and always have a colon (":") to denote family
|
||||
* and qualifier.
|
||||
*
|
||||
* @param columns The columns to parse.
|
||||
* @return A reference to this instance.
|
||||
*/
|
||||
public Scan addColumns(String columns) {
|
||||
String[] cols = columns.split(" ");
|
||||
for (String col : cols) {
|
||||
addColumn(Bytes.toBytes(col));
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helps to convert the binary column families and qualifiers to a text
|
||||
* representation, e.g. "data:mimetype data:contents meta:". Binary values
|
||||
* are properly encoded using {@link Bytes#toBytesBinary(String)}.
|
||||
*
|
||||
* @return The columns in an old style string format.
|
||||
*/
|
||||
public String getInputColumns() {
|
||||
String cols = "";
|
||||
for (Map.Entry<byte[], NavigableSet<byte[]>> e :
|
||||
familyMap.entrySet()) {
|
||||
byte[] fam = e.getKey();
|
||||
if (cols.length() > 0) cols += " ";
|
||||
NavigableSet<byte[]> quals = e.getValue();
|
||||
// check if this family has qualifiers
|
||||
if (quals != null && quals.size() > 0) {
|
||||
String cs = "";
|
||||
for (byte[] qual : quals) {
|
||||
if (cs.length() > 0) cs += " ";
|
||||
// encode values to make parsing easier later
|
||||
cs += Bytes.toStringBinary(fam) + ":" + Bytes.toStringBinary(qual);
|
||||
}
|
||||
cols += cs;
|
||||
} else {
|
||||
// only add the family but with old style delimiter
|
||||
cols += Bytes.toStringBinary(fam) + ":";
|
||||
}
|
||||
}
|
||||
return cols;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get versions of columns only within the specified timestamp range,
|
||||
|
@ -337,19 +250,6 @@ public class Scan implements Writable {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set an old-style filter interface to use. Note: not all features of the
|
||||
* old style filters are supported.
|
||||
*
|
||||
* @deprecated
|
||||
* @param filter
|
||||
* @return The scan instance.
|
||||
*/
|
||||
public Scan setOldFilter(RowFilterInterface filter) {
|
||||
oldFilter = filter;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Setting the familyMap
|
||||
* @param familyMap
|
||||
|
@ -436,20 +336,11 @@ public class Scan implements Writable {
|
|||
return filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the old style filter, if there is one.
|
||||
* @deprecated
|
||||
* @return null or instance
|
||||
*/
|
||||
public RowFilterInterface getOldFilter() {
|
||||
return oldFilter;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true is a filter has been specified, false if not
|
||||
*/
|
||||
public boolean hasFilter() {
|
||||
return filter != null || oldFilter != null;
|
||||
return filter != null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -550,11 +441,6 @@ public class Scan implements Writable {
|
|||
this.filter = (Filter)createForName(Bytes.toString(Bytes.readByteArray(in)));
|
||||
this.filter.readFields(in);
|
||||
}
|
||||
if (in.readBoolean()) {
|
||||
this.oldFilter =
|
||||
(RowFilterInterface)createForName(Bytes.toString(Bytes.readByteArray(in)));
|
||||
this.oldFilter.readFields(in);
|
||||
}
|
||||
this.tr = new TimeRange();
|
||||
tr.readFields(in);
|
||||
int numFamilies = in.readInt();
|
||||
|
@ -586,13 +472,6 @@ public class Scan implements Writable {
|
|||
Bytes.writeByteArray(out, Bytes.toBytes(filter.getClass().getName()));
|
||||
filter.write(out);
|
||||
}
|
||||
if (this.oldFilter == null) {
|
||||
out.writeBoolean(false);
|
||||
} else {
|
||||
out.writeBoolean(true);
|
||||
Bytes.writeByteArray(out, Bytes.toBytes(oldFilter.getClass().getName()));
|
||||
oldFilter.write(out);
|
||||
}
|
||||
tr.write(out);
|
||||
out.writeInt(familyMap.size());
|
||||
for(Map.Entry<byte [], NavigableSet<byte []>> entry : familyMap.entrySet()) {
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
/**
|
||||
* Copyright 2009 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.client;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
|
||||
/**
|
||||
* Interface for client-side scanning.
|
||||
* Go to {@link HTable} to obtain instances.
|
||||
* @deprecated See {@link ResultScanner}
|
||||
*/
|
||||
public interface Scanner extends Closeable, Iterable<RowResult> {
|
||||
/**
|
||||
* Grab the next row's worth of values.
|
||||
* @return RowResult object if there is another row, null if the scanner is
|
||||
* exhausted.
|
||||
* @throws IOException
|
||||
*/
|
||||
public RowResult next() throws IOException;
|
||||
|
||||
/**
|
||||
* @param nbRows number of rows to return
|
||||
* @return Between zero and <param>nbRows</param> Results
|
||||
* @throws IOException
|
||||
*/
|
||||
public RowResult [] next(int nbRows) throws IOException;
|
||||
|
||||
/**
|
||||
* Closes the scanner and releases any resources it has allocated
|
||||
*/
|
||||
public void close();
|
||||
}
|
|
@ -1,286 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.io.ObjectWritable;
|
||||
|
||||
/**
|
||||
* This filter is a no-op in HBase 0.20. Don't use it.
|
||||
*
|
||||
* This filter is used to filter based on the value of a given column. It takes
|
||||
* an operator (equal, greater, not equal, etc) and either a byte [] value or a
|
||||
* byte [] comparator. If we have a byte [] value then we just do a
|
||||
* lexicographic compare. If this is not sufficient (eg you want to deserialize
|
||||
* a long and then compare it to a fixed long value), then you can pass in your
|
||||
* own comparator instead.
|
||||
* @deprecated Use filters that are rooted on @{link Filter} instead.
|
||||
*/
|
||||
public class ColumnValueFilter implements RowFilterInterface {
|
||||
/** Comparison operators. */
|
||||
public enum CompareOp {
|
||||
/** less than */
|
||||
LESS,
|
||||
/** less than or equal to */
|
||||
LESS_OR_EQUAL,
|
||||
/** equals */
|
||||
EQUAL,
|
||||
/** not equal */
|
||||
NOT_EQUAL,
|
||||
/** greater than or equal to */
|
||||
GREATER_OR_EQUAL,
|
||||
/** greater than */
|
||||
GREATER;
|
||||
}
|
||||
|
||||
private byte[] columnName;
|
||||
private CompareOp compareOp;
|
||||
private byte[] value;
|
||||
private WritableByteArrayComparable comparator;
|
||||
private boolean filterIfColumnMissing;
|
||||
|
||||
ColumnValueFilter() {
|
||||
// for Writable
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param columnName name of column
|
||||
* @param compareOp operator
|
||||
* @param value value to compare column values against
|
||||
*/
|
||||
public ColumnValueFilter(final byte[] columnName, final CompareOp compareOp,
|
||||
final byte[] value) {
|
||||
this(columnName, compareOp, value, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param columnName name of column
|
||||
* @param compareOp operator
|
||||
* @param value value to compare column values against
|
||||
* @param filterIfColumnMissing if true then we will filter rows that don't have the column.
|
||||
*/
|
||||
public ColumnValueFilter(final byte[] columnName, final CompareOp compareOp,
|
||||
final byte[] value, boolean filterIfColumnMissing) {
|
||||
this.columnName = columnName;
|
||||
this.compareOp = compareOp;
|
||||
this.value = value;
|
||||
this.filterIfColumnMissing = filterIfColumnMissing;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param columnName name of column
|
||||
* @param compareOp operator
|
||||
* @param comparator Comparator to use.
|
||||
*/
|
||||
public ColumnValueFilter(final byte[] columnName, final CompareOp compareOp,
|
||||
final WritableByteArrayComparable comparator) {
|
||||
this(columnName, compareOp, comparator, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param columnName name of column
|
||||
* @param compareOp operator
|
||||
* @param comparator Comparator to use.
|
||||
* @param filterIfColumnMissing if true then we will filter rows that don't have the column.
|
||||
*/
|
||||
public ColumnValueFilter(final byte[] columnName, final CompareOp compareOp,
|
||||
final WritableByteArrayComparable comparator, boolean filterIfColumnMissing) {
|
||||
this.columnName = columnName;
|
||||
this.compareOp = compareOp;
|
||||
this.comparator = comparator;
|
||||
this.filterIfColumnMissing = filterIfColumnMissing;
|
||||
}
|
||||
|
||||
public boolean filterRowKey(final byte[] rowKey) {
|
||||
return filterRowKey(rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public boolean filterColumn(final byte[] rowKey,
|
||||
final byte[] colKey, final byte[] data) {
|
||||
if (!filterIfColumnMissing) {
|
||||
return false; // Must filter on the whole row
|
||||
}
|
||||
if (!Arrays.equals(colKey, columnName)) {
|
||||
return false;
|
||||
}
|
||||
return filterColumnValue(data, 0, data.length);
|
||||
}
|
||||
|
||||
|
||||
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
|
||||
byte[] cn, int coffset, int clength, byte[] columnValue,
|
||||
int voffset, int vlength) {
|
||||
if (!filterIfColumnMissing) {
|
||||
return false; // Must filter on the whole row
|
||||
}
|
||||
if (Bytes.compareTo(cn, coffset, clength,
|
||||
this.columnName, 0, this.columnName.length) != 0) {
|
||||
return false;
|
||||
}
|
||||
return filterColumnValue(columnValue, voffset, vlength);
|
||||
}
|
||||
|
||||
private boolean filterColumnValue(final byte [] data, final int offset,
|
||||
final int length) {
|
||||
int compareResult;
|
||||
if (comparator != null) {
|
||||
compareResult = comparator.compareTo(Arrays.copyOfRange(data, offset, offset+length));
|
||||
} else {
|
||||
compareResult = Bytes.compareTo(value, 0, value.length, data, offset, length);
|
||||
}
|
||||
|
||||
switch (compareOp) {
|
||||
case LESS:
|
||||
return compareResult <= 0;
|
||||
case LESS_OR_EQUAL:
|
||||
return compareResult < 0;
|
||||
case EQUAL:
|
||||
return compareResult != 0;
|
||||
case NOT_EQUAL:
|
||||
return compareResult == 0;
|
||||
case GREATER_OR_EQUAL:
|
||||
return compareResult > 0;
|
||||
case GREATER:
|
||||
return compareResult >= 0;
|
||||
default:
|
||||
throw new RuntimeException("Unknown Compare op " + compareOp.name());
|
||||
}
|
||||
}
|
||||
|
||||
public boolean filterAllRemaining() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterRow(final SortedMap<byte[], Cell> columns) {
|
||||
if (columns == null)
|
||||
return false;
|
||||
if (filterIfColumnMissing) {
|
||||
return !columns.containsKey(columnName);
|
||||
}
|
||||
// Otherwise we must do the filter here
|
||||
Cell colCell = columns.get(columnName);
|
||||
if (colCell == null) {
|
||||
return false;
|
||||
}
|
||||
byte [] v = colCell.getValue();
|
||||
return this.filterColumnValue(v, 0, v.length);
|
||||
}
|
||||
|
||||
public boolean filterRow(List<KeyValue> results) {
|
||||
if (results == null) return false;
|
||||
KeyValue found = null;
|
||||
if (filterIfColumnMissing) {
|
||||
boolean doesntHaveIt = true;
|
||||
for (KeyValue kv: results) {
|
||||
if (kv.matchingColumn(columnName)) {
|
||||
doesntHaveIt = false;
|
||||
found = kv;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (doesntHaveIt) return doesntHaveIt;
|
||||
}
|
||||
if (found == null) {
|
||||
for (KeyValue kv: results) {
|
||||
if (kv.matchingColumn(columnName)) {
|
||||
found = kv;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (found == null) {
|
||||
return false;
|
||||
}
|
||||
return this.filterColumnValue(found.getBuffer(), found.getValueOffset(),
|
||||
found.getValueLength());
|
||||
}
|
||||
|
||||
public boolean processAlways() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
// Nothing.
|
||||
}
|
||||
|
||||
public void rowProcessed(final boolean filtered,
|
||||
final byte[] key) {
|
||||
// Nothing
|
||||
}
|
||||
|
||||
|
||||
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
|
||||
// Nothing
|
||||
}
|
||||
|
||||
public void validate(final byte[][] columns) {
|
||||
// Nothing
|
||||
}
|
||||
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
int valueLen = in.readInt();
|
||||
if (valueLen > 0) {
|
||||
value = new byte[valueLen];
|
||||
in.readFully(value);
|
||||
}
|
||||
columnName = Bytes.readByteArray(in);
|
||||
compareOp = CompareOp.valueOf(in.readUTF());
|
||||
comparator = (WritableByteArrayComparable) ObjectWritable.readObject(in,
|
||||
new HBaseConfiguration());
|
||||
filterIfColumnMissing = in.readBoolean();
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
if (value == null) {
|
||||
out.writeInt(0);
|
||||
} else {
|
||||
out.writeInt(value.length);
|
||||
out.write(value);
|
||||
}
|
||||
Bytes.writeByteArray(out, columnName);
|
||||
out.writeUTF(compareOp.name());
|
||||
ObjectWritable.writeObject(out, comparator,
|
||||
WritableByteArrayComparable.class, new HBaseConfiguration());
|
||||
out.writeBoolean(filterIfColumnMissing);
|
||||
}
|
||||
}
|
|
@ -1,64 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
/**
|
||||
* Subclass of StopRowFilter that filters rows > the stop row,
|
||||
* making it include up to the last row but no further.
|
||||
*
|
||||
* @deprecated Use filters that are rooted on @{link Filter} instead
|
||||
*/
|
||||
public class InclusiveStopRowFilter extends StopRowFilter {
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public InclusiveStopRowFilter() {super();}
|
||||
|
||||
/**
|
||||
* Constructor that takes a stopRowKey on which to filter
|
||||
*
|
||||
* @param stopRowKey rowKey to filter on.
|
||||
*/
|
||||
public InclusiveStopRowFilter(final byte [] stopRowKey) {
|
||||
super(stopRowKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.apache.hadoop.hbase.filter.StopRowFilter#filterRowKey(byte[])
|
||||
*/
|
||||
@Override
|
||||
public boolean filterRowKey(final byte [] rowKey) {
|
||||
return filterRowKey(rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
public boolean filterRowKey(byte []rowKey, int offset, int length) {
|
||||
if (rowKey == null) {
|
||||
if (getStopRowKey() == null) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return Bytes.compareTo(getStopRowKey(), 0, getStopRowKey().length,
|
||||
rowKey, offset, length) < 0;
|
||||
}
|
||||
}
|
|
@ -1,132 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
|
||||
/**
|
||||
* Implementation of RowFilterInterface that limits results to a specific page
|
||||
* size. It terminates scanning once the number of filter-passed results is >=
|
||||
* the given page size.
|
||||
*
|
||||
* <p>
|
||||
* Note that this filter cannot guarantee that the number of results returned
|
||||
* to a client are <= page size. This is because the filter is applied
|
||||
* separately on different region servers. It does however optimize the scan of
|
||||
* individual HRegions by making sure that the page size is never exceeded
|
||||
* locally.
|
||||
* </p>
|
||||
*
|
||||
* @deprecated Use filters that are rooted on @{link Filter} instead
|
||||
*/
|
||||
public class PageRowFilter implements RowFilterInterface {
|
||||
|
||||
private long pageSize = Long.MAX_VALUE;
|
||||
private int rowsAccepted = 0;
|
||||
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public PageRowFilter() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a maximum page size.
|
||||
*
|
||||
* @param pageSize Maximum result size.
|
||||
*/
|
||||
public PageRowFilter(final long pageSize) {
|
||||
this.pageSize = pageSize;
|
||||
}
|
||||
|
||||
public void validate(final byte [][] columns) {
|
||||
// Doesn't filter columns
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
rowsAccepted = 0;
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered,
|
||||
byte [] rowKey) {
|
||||
rowProcessed(filtered, rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
|
||||
if (!filtered) {
|
||||
this.rowsAccepted++;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean processAlways() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterAllRemaining() {
|
||||
return this.rowsAccepted > this.pageSize;
|
||||
}
|
||||
|
||||
public boolean filterRowKey(final byte [] r) {
|
||||
return filterRowKey(r, 0, r.length);
|
||||
}
|
||||
|
||||
|
||||
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterColumn(final byte [] rowKey,
|
||||
final byte [] colKey,
|
||||
final byte[] data) {
|
||||
return filterColumn(rowKey, 0, rowKey.length, colKey, 0, colKey.length,
|
||||
data, 0, data.length);
|
||||
}
|
||||
|
||||
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
|
||||
byte[] colunmName, int coffset, int clength, byte[] columnValue,
|
||||
int voffset, int vlength) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterRow(List<KeyValue> results) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
this.pageSize = in.readLong();
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
out.writeLong(pageSize);
|
||||
}
|
||||
}
|
|
@ -1,120 +0,0 @@
|
|||
/**
|
||||
* Copyright 2009 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
/**
|
||||
* RowFilterInterface that filters everything that does not match a prefix
|
||||
*
|
||||
* @deprecated Use filters that are rooted on @{link Filter} instead
|
||||
*/
|
||||
public class PrefixRowFilter implements RowFilterInterface {
|
||||
protected byte[] prefix;
|
||||
|
||||
/**
|
||||
* Constructor that takes a row prefix to filter on
|
||||
* @param prefix
|
||||
*/
|
||||
public PrefixRowFilter(byte[] prefix) {
|
||||
this.prefix = prefix;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default Constructor, filters nothing. Required for RPC
|
||||
* deserialization
|
||||
*/
|
||||
public PrefixRowFilter() { }
|
||||
|
||||
public void reset() {
|
||||
// Nothing to reset
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte [] key) {
|
||||
rowProcessed(filtered, key, 0, key.length);
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
|
||||
// does not care
|
||||
}
|
||||
|
||||
public boolean processAlways() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterAllRemaining() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterRowKey(final byte [] rowKey) {
|
||||
return filterRowKey(rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
|
||||
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
|
||||
if (rowKey == null)
|
||||
return true;
|
||||
if (length < prefix.length)
|
||||
return true;
|
||||
for(int i = 0;i < prefix.length;i++)
|
||||
if (prefix[i] != rowKey[i + offset])
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterColumn(final byte [] rowKey, final byte [] colunmName,
|
||||
final byte[] columnValue) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
|
||||
byte[] colunmName, int coffset, int clength, byte[] columnValue,
|
||||
int voffset, int vlength) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterRow(List<KeyValue> results) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void validate(final byte [][] columns) {
|
||||
// does not do this
|
||||
}
|
||||
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
prefix = Bytes.readByteArray(in);
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
Bytes.writeByteArray(out, prefix);
|
||||
}
|
||||
}
|
|
@ -1,344 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
/**
|
||||
* Implementation of RowFilterInterface that can filter by rowkey regular
|
||||
* expression and/or individual column values (equals comparison only). Multiple
|
||||
* column filters imply an implicit conjunction of filter criteria.
|
||||
*
|
||||
* Note that column value filtering in this interface has been replaced by
|
||||
* {@link ColumnValueFilter}.
|
||||
* @deprecated This interface doesn't work well in new KeyValue world.
|
||||
* Use filters based on new {@link Filter} instead.
|
||||
*/
|
||||
public class RegExpRowFilter implements RowFilterInterface {
|
||||
|
||||
private Pattern rowKeyPattern = null;
|
||||
private String rowKeyRegExp = null;
|
||||
private Map<byte [], byte[]> equalsMap =
|
||||
new TreeMap<byte [], byte[]>(Bytes.BYTES_COMPARATOR);
|
||||
private Set<byte []> nullColumns =
|
||||
new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
|
||||
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public RegExpRowFilter() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a row key regular expression to filter on.
|
||||
*
|
||||
* @param rowKeyRegExp
|
||||
*/
|
||||
public RegExpRowFilter(final String rowKeyRegExp) {
|
||||
this.rowKeyRegExp = rowKeyRegExp;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Column filtering has been replaced by {@link ColumnValueFilter}
|
||||
* Constructor that takes a row key regular expression to filter on.
|
||||
*
|
||||
* @param rowKeyRegExp
|
||||
* @param columnFilter
|
||||
*/
|
||||
@Deprecated
|
||||
public RegExpRowFilter(final String rowKeyRegExp,
|
||||
final Map<byte [], Cell> columnFilter) {
|
||||
this.rowKeyRegExp = rowKeyRegExp;
|
||||
this.setColumnFilters(columnFilter);
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte [] rowKey) {
|
||||
rowProcessed(filtered, rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
|
||||
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
|
||||
//doesn't care
|
||||
}
|
||||
|
||||
public boolean processAlways() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Column filtering has been replaced by {@link ColumnValueFilter}
|
||||
* Specify a value that must be matched for the given column.
|
||||
*
|
||||
* @param colKey
|
||||
* the column to match on
|
||||
* @param value
|
||||
* the value that must equal the stored value.
|
||||
*/
|
||||
@Deprecated
|
||||
public void setColumnFilter(final byte [] colKey, final byte[] value) {
|
||||
if (value == null) {
|
||||
nullColumns.add(colKey);
|
||||
} else {
|
||||
equalsMap.put(colKey, value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Column filtering has been replaced by {@link ColumnValueFilter}
|
||||
* Set column filters for a number of columns.
|
||||
*
|
||||
* @param columnFilter
|
||||
* Map of columns with value criteria.
|
||||
*/
|
||||
@Deprecated
|
||||
public void setColumnFilters(final Map<byte [], Cell> columnFilter) {
|
||||
if (null == columnFilter) {
|
||||
nullColumns.clear();
|
||||
equalsMap.clear();
|
||||
} else {
|
||||
for (Entry<byte [], Cell> entry : columnFilter.entrySet()) {
|
||||
setColumnFilter(entry.getKey(), entry.getValue().getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
// Nothing to reset
|
||||
}
|
||||
|
||||
public boolean filterAllRemaining() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterRowKey(final byte [] rowKey) {
|
||||
return filterRowKey(rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
|
||||
return (filtersByRowKey() && rowKey != null)?
|
||||
!getRowKeyPattern().matcher(Bytes.toString(rowKey, offset, length)).matches():
|
||||
false;
|
||||
}
|
||||
|
||||
public boolean filterColumn(final byte [] rowKey, final byte [] colKey,
|
||||
final byte[] data) {
|
||||
if (filterRowKey(rowKey)) {
|
||||
return true;
|
||||
}
|
||||
if (filtersByColumnValue()) {
|
||||
byte[] filterValue = equalsMap.get(colKey);
|
||||
if (null != filterValue) {
|
||||
return !Arrays.equals(filterValue, data);
|
||||
}
|
||||
}
|
||||
if (nullColumns.contains(colKey)) {
|
||||
if (data != null /* DELETE IS IN KEY NOW && !HLogEdit.isDeleted(data)*/) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
|
||||
byte [] colunmName, int coffset, int clength, byte[] columnValue,
|
||||
int voffset, int vlength) {
|
||||
if (filterRowKey(rowKey, roffset, rlength)) {
|
||||
return true;
|
||||
}
|
||||
byte [] colkey = null;
|
||||
if (filtersByColumnValue()) {
|
||||
colkey = getColKey(colunmName, coffset, clength);
|
||||
byte [] filterValue = equalsMap.get(colkey);
|
||||
if (null != filterValue) {
|
||||
return Bytes.compareTo(filterValue, 0, filterValue.length, columnValue,
|
||||
voffset, vlength) != 0;
|
||||
}
|
||||
}
|
||||
if (colkey == null) {
|
||||
colkey = getColKey(colunmName, coffset, clength);
|
||||
}
|
||||
if (nullColumns.contains(colkey)) {
|
||||
if (columnValue != null /* TODO: FIX!!! && !HLogEdit.isDeleted(data)*/) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private byte [] getColKey(final byte [] c, final int offset, final int length) {
|
||||
byte [] colkey = null;
|
||||
if (offset == 0) {
|
||||
colkey = c;
|
||||
} else {
|
||||
colkey = new byte [length];
|
||||
System.arraycopy(c, offset, colkey, 0, length);
|
||||
}
|
||||
return colkey;
|
||||
}
|
||||
|
||||
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
|
||||
for (Entry<byte [], Cell> col : columns.entrySet()) {
|
||||
if (nullColumns.contains(col.getKey())
|
||||
/* DELETE IS IN KEY NOW && !HLogEdit.isDeleted(col.getValue().getValue())*/) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
for (byte [] col : equalsMap.keySet()) {
|
||||
if (!columns.containsKey(col)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// THIS METHOD IS HORRIDLY EXPENSIVE TO RUN. NEEDS FIXUP.
|
||||
public boolean filterRow(List<KeyValue> kvs) {
|
||||
for (KeyValue kv: kvs) {
|
||||
byte [] column = kv.getColumn();
|
||||
if (nullColumns.contains(column) && !kv.isDeleteType()) {
|
||||
return true;
|
||||
}
|
||||
if (!equalsMap.containsKey(column)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean filtersByColumnValue() {
|
||||
return equalsMap != null && equalsMap.size() > 0;
|
||||
}
|
||||
|
||||
private boolean filtersByRowKey() {
|
||||
return null != rowKeyPattern || null != rowKeyRegExp;
|
||||
}
|
||||
|
||||
private String getRowKeyRegExp() {
|
||||
if (null == rowKeyRegExp && rowKeyPattern != null) {
|
||||
rowKeyRegExp = rowKeyPattern.toString();
|
||||
}
|
||||
return rowKeyRegExp;
|
||||
}
|
||||
|
||||
private Pattern getRowKeyPattern() {
|
||||
if (rowKeyPattern == null && rowKeyRegExp != null) {
|
||||
rowKeyPattern = Pattern.compile(rowKeyRegExp);
|
||||
}
|
||||
return rowKeyPattern;
|
||||
}
|
||||
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
boolean hasRowKeyPattern = in.readBoolean();
|
||||
if (hasRowKeyPattern) {
|
||||
rowKeyRegExp = in.readUTF();
|
||||
}
|
||||
// equals map
|
||||
equalsMap.clear();
|
||||
int size = in.readInt();
|
||||
for (int i = 0; i < size; i++) {
|
||||
byte [] key = Bytes.readByteArray(in);
|
||||
int len = in.readInt();
|
||||
byte[] value = null;
|
||||
if (len >= 0) {
|
||||
value = new byte[len];
|
||||
in.readFully(value);
|
||||
}
|
||||
setColumnFilter(key, value);
|
||||
}
|
||||
// nullColumns
|
||||
nullColumns.clear();
|
||||
size = in.readInt();
|
||||
for (int i = 0; i < size; i++) {
|
||||
setColumnFilter(Bytes.readByteArray(in), null);
|
||||
}
|
||||
}
|
||||
|
||||
public void validate(final byte [][] columns) {
|
||||
Set<byte []> invalids = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
|
||||
for (byte [] colKey : getFilterColumns()) {
|
||||
boolean found = false;
|
||||
for (byte [] col : columns) {
|
||||
if (Bytes.equals(col, colKey)) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
invalids.add(colKey);
|
||||
}
|
||||
}
|
||||
|
||||
if (invalids.size() > 0) {
|
||||
throw new InvalidRowFilterException(String.format(
|
||||
"RowFilter contains criteria on columns %s not in %s", invalids,
|
||||
Arrays.toString(columns)));
|
||||
}
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
private Set<byte []> getFilterColumns() {
|
||||
Set<byte []> cols = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
|
||||
cols.addAll(equalsMap.keySet());
|
||||
cols.addAll(nullColumns);
|
||||
return cols;
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
if (!filtersByRowKey()) {
|
||||
out.writeBoolean(false);
|
||||
} else {
|
||||
out.writeBoolean(true);
|
||||
out.writeUTF(getRowKeyRegExp());
|
||||
}
|
||||
|
||||
// equalsMap
|
||||
out.writeInt(equalsMap.size());
|
||||
for (Entry<byte [], byte[]> entry : equalsMap.entrySet()) {
|
||||
Bytes.writeByteArray(out, entry.getKey());
|
||||
byte[] value = entry.getValue();
|
||||
out.writeInt(value.length);
|
||||
out.write(value);
|
||||
}
|
||||
|
||||
// null columns
|
||||
out.writeInt(nullColumns.size());
|
||||
for (byte [] col : nullColumns) {
|
||||
Bytes.writeByteArray(out, col);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,181 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/**
|
||||
*
|
||||
* Interface used for row-level filters applied to HRegion.HScanner scan
|
||||
* results during calls to next().
|
||||
*
|
||||
* In HBase 0.20, not all of the functions will be called, thus filters which depend
|
||||
* on them will not work as advertised!
|
||||
*
|
||||
* Specifically, you can only count on the following methods to be called:
|
||||
* boolean filterRowKey(final byte [] rowKey, final int offset, final int length);
|
||||
* boolean filterAllRemaining();
|
||||
*
|
||||
* Complex filters that depend in more need to be rewritten to work with @{link Filter}
|
||||
*
|
||||
* Write new filters to use the @{link Filter} API instead.
|
||||
* @deprecated Use filters that are rooted on @{link Filter} instead
|
||||
*/
|
||||
public interface RowFilterInterface extends Writable {
|
||||
/**
|
||||
* Resets the state of the filter. Used prior to the start of a Region scan.
|
||||
*
|
||||
*/
|
||||
void reset();
|
||||
|
||||
/**
|
||||
* Called to let filter know the final decision (to pass or filter) on a
|
||||
* given row. With out HScanner calling this, the filter does not know if a
|
||||
* row passed filtering even if it passed the row itself because other
|
||||
* filters may have failed the row. E.g. when this filter is a member of a
|
||||
* RowFilterSet with an OR operator.
|
||||
*
|
||||
* @see RowFilterSet
|
||||
* @param filtered
|
||||
* @param key
|
||||
* @deprecated Use {@link #rowProcessed(boolean, byte[], int, int)} instead.
|
||||
*/
|
||||
void rowProcessed(boolean filtered, byte [] key);
|
||||
|
||||
/**
|
||||
* Called to let filter know the final decision (to pass or filter) on a
|
||||
* given row. With out HScanner calling this, the filter does not know if a
|
||||
* row passed filtering even if it passed the row itself because other
|
||||
* filters may have failed the row. E.g. when this filter is a member of a
|
||||
* RowFilterSet with an OR operator.
|
||||
*
|
||||
* @see RowFilterSet
|
||||
* @param filtered
|
||||
* @param key
|
||||
* @param offset
|
||||
* @param length
|
||||
*/
|
||||
void rowProcessed(boolean filtered, byte [] key, int offset, int length);
|
||||
|
||||
/**
|
||||
* Returns whether or not the filter should always be processed in any
|
||||
* filtering call. This precaution is necessary for filters that maintain
|
||||
* state and need to be updated according to their response to filtering
|
||||
* calls (see WhileMatchRowFilter for an example). At times, filters nested
|
||||
* in RowFilterSets may or may not be called because the RowFilterSet
|
||||
* determines a result as fast as possible. Returning true for
|
||||
* processAlways() ensures that the filter will always be called.
|
||||
*
|
||||
* @return whether or not to always process the filter
|
||||
*/
|
||||
boolean processAlways();
|
||||
|
||||
/**
|
||||
* Determines if the filter has decided that all remaining results should be
|
||||
* filtered (skipped). This is used to prevent the scanner from scanning a
|
||||
* the rest of the HRegion when for sure the filter will exclude all
|
||||
* remaining rows.
|
||||
*
|
||||
* @return true if the filter intends to filter all remaining rows.
|
||||
*/
|
||||
boolean filterAllRemaining();
|
||||
|
||||
/**
|
||||
* Filters on just a row key. This is the first chance to stop a row.
|
||||
*
|
||||
* @param rowKey
|
||||
* @return true if given row key is filtered and row should not be processed.
|
||||
* @deprecated Use {@link #filterRowKey(byte[], int, int)} instead.
|
||||
*/
|
||||
boolean filterRowKey(final byte [] rowKey);
|
||||
|
||||
/**
|
||||
* Filters on just a row key. This is the first chance to stop a row.
|
||||
*
|
||||
* @param rowKey
|
||||
* @param offset
|
||||
* @param length
|
||||
* @return true if given row key is filtered and row should not be processed.
|
||||
*/
|
||||
boolean filterRowKey(final byte [] rowKey, final int offset, final int length);
|
||||
|
||||
/**
|
||||
* Filters on row key, column name, and column value. This will take individual columns out of a row,
|
||||
* but the rest of the row will still get through.
|
||||
*
|
||||
* @param rowKey row key to filter on.
|
||||
* @param columnName column name to filter on
|
||||
* @param columnValue column value to filter on
|
||||
* @return true if row filtered and should not be processed.
|
||||
* @deprecated Use {@link #filterColumn(byte[], int, int, byte[], int, int, byte[], int, int)}
|
||||
* instead.
|
||||
*/
|
||||
@Deprecated
|
||||
boolean filterColumn(final byte [] rowKey, final byte [] columnName,
|
||||
final byte [] columnValue);
|
||||
|
||||
/**
|
||||
* Filters on row key, column name, and column value. This will take individual columns out of a row,
|
||||
* but the rest of the row will still get through.
|
||||
*
|
||||
* @param rowKey row key to filter on.
|
||||
* @param colunmName column name to filter on
|
||||
* @param columnValue column value to filter on
|
||||
* @return true if row filtered and should not be processed.
|
||||
*/
|
||||
boolean filterColumn(final byte [] rowKey, final int roffset,
|
||||
final int rlength, final byte [] colunmName, final int coffset,
|
||||
final int clength, final byte [] columnValue, final int voffset,
|
||||
final int vlength);
|
||||
|
||||
/**
|
||||
* Filter on the fully assembled row. This is the last chance to stop a row.
|
||||
*
|
||||
* @param columns
|
||||
* @return true if row filtered and should not be processed.
|
||||
*/
|
||||
boolean filterRow(final SortedMap<byte [], Cell> columns);
|
||||
|
||||
/**
|
||||
* Filter on the fully assembled row. This is the last chance to stop a row.
|
||||
*
|
||||
* @param results
|
||||
* @return true if row filtered and should not be processed.
|
||||
*/
|
||||
boolean filterRow(final List<KeyValue> results);
|
||||
|
||||
/**
|
||||
* Validates that this filter applies only to a subset of the given columns.
|
||||
* This check is done prior to opening of scanner due to the limitation that
|
||||
* filtering of columns is dependent on the retrieval of those columns within
|
||||
* the HRegion. Criteria on columns that are not part of a scanner's column
|
||||
* list will be ignored. In the case of null value filters, all rows will pass
|
||||
* the filter. This behavior should be 'undefined' for the user and therefore
|
||||
* not permitted.
|
||||
*
|
||||
* @param columns
|
||||
*/
|
||||
void validate(final byte [][] columns);
|
||||
}
|
|
@ -1,295 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.io.ObjectWritable;
|
||||
|
||||
/**
|
||||
* Implementation of RowFilterInterface that represents a set of RowFilters
|
||||
* which will be evaluated with a specified boolean operator MUST_PASS_ALL
|
||||
* (!AND) or MUST_PASS_ONE (!OR). Since you can use RowFilterSets as children
|
||||
* of RowFilterSet, you can create a hierarchy of filters to be evaluated.
|
||||
*
|
||||
* It is highly likely this construct will no longer work!
|
||||
*
|
||||
* @deprecated Use filters that are rooted on @{link Filter} instead
|
||||
*/
|
||||
public class RowFilterSet implements RowFilterInterface {
|
||||
|
||||
/** set operator */
|
||||
public static enum Operator {
|
||||
/** !AND */
|
||||
MUST_PASS_ALL,
|
||||
/** !OR */
|
||||
MUST_PASS_ONE
|
||||
}
|
||||
|
||||
private Operator operator = Operator.MUST_PASS_ALL;
|
||||
private Set<RowFilterInterface> filters = new HashSet<RowFilterInterface>();
|
||||
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public RowFilterSet() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a set of RowFilters. The default operator
|
||||
* MUST_PASS_ALL is assumed.
|
||||
*
|
||||
* @param rowFilters
|
||||
*/
|
||||
public RowFilterSet(final Set<RowFilterInterface> rowFilters) {
|
||||
this.filters = rowFilters;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a set of RowFilters and an operator.
|
||||
*
|
||||
* @param operator Operator to process filter set with.
|
||||
* @param rowFilters Set of row filters.
|
||||
*/
|
||||
public RowFilterSet(final Operator operator,
|
||||
final Set<RowFilterInterface> rowFilters) {
|
||||
this.filters = rowFilters;
|
||||
this.operator = operator;
|
||||
}
|
||||
|
||||
/** Get the operator.
|
||||
*
|
||||
* @return operator
|
||||
*/
|
||||
public Operator getOperator() {
|
||||
return operator;
|
||||
}
|
||||
|
||||
/** Get the filters.
|
||||
*
|
||||
* @return filters
|
||||
*/
|
||||
public Set<RowFilterInterface> getFilters() {
|
||||
return filters;
|
||||
}
|
||||
|
||||
/** Add a filter.
|
||||
*
|
||||
* @param filter
|
||||
*/
|
||||
public void addFilter(RowFilterInterface filter) {
|
||||
this.filters.add(filter);
|
||||
}
|
||||
|
||||
public void validate(final byte [][] columns) {
|
||||
for (RowFilterInterface filter : filters) {
|
||||
filter.validate(columns);
|
||||
}
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for (RowFilterInterface filter : filters) {
|
||||
filter.reset();
|
||||
}
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte [] rowKey) {
|
||||
rowProcessed(filtered, rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
|
||||
for (RowFilterInterface filter : filters) {
|
||||
filter.rowProcessed(filtered, key, offset, length);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean processAlways() {
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (filter.processAlways()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterAllRemaining() {
|
||||
boolean result = operator == Operator.MUST_PASS_ONE;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (operator == Operator.MUST_PASS_ALL) {
|
||||
if (filter.filterAllRemaining()) {
|
||||
return true;
|
||||
}
|
||||
} else if (operator == Operator.MUST_PASS_ONE) {
|
||||
if (!filter.filterAllRemaining()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public boolean filterRowKey(final byte [] rowKey) {
|
||||
return filterRowKey(rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
|
||||
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
|
||||
boolean resultFound = false;
|
||||
boolean result = operator == Operator.MUST_PASS_ONE;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (!resultFound) {
|
||||
if (operator == Operator.MUST_PASS_ALL) {
|
||||
if (filter.filterAllRemaining() ||
|
||||
filter.filterRowKey(rowKey, offset, length)) {
|
||||
result = true;
|
||||
resultFound = true;
|
||||
}
|
||||
} else if (operator == Operator.MUST_PASS_ONE) {
|
||||
if (!filter.filterAllRemaining() &&
|
||||
!filter.filterRowKey(rowKey, offset, length)) {
|
||||
result = false;
|
||||
resultFound = true;
|
||||
}
|
||||
}
|
||||
} else if (filter.processAlways()) {
|
||||
filter.filterRowKey(rowKey, offset, length);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public boolean filterColumn(final byte [] rowKey, final byte [] colKey,
|
||||
final byte[] data) {
|
||||
return filterColumn(rowKey, 0, rowKey.length, colKey, 0, colKey.length,
|
||||
data, 0, data.length);
|
||||
}
|
||||
|
||||
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
|
||||
byte[] columnName, int coffset, int clength, byte[] columnValue,
|
||||
int voffset, int vlength) {
|
||||
boolean resultFound = false;
|
||||
boolean result = operator == Operator.MUST_PASS_ONE;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (!resultFound) {
|
||||
if (operator == Operator.MUST_PASS_ALL) {
|
||||
if (filter.filterAllRemaining() ||
|
||||
filter.filterColumn(rowKey, roffset, rlength, columnName, coffset,
|
||||
clength, columnValue, voffset, vlength)) {
|
||||
result = true;
|
||||
resultFound = true;
|
||||
}
|
||||
} else if (operator == Operator.MUST_PASS_ONE) {
|
||||
if (!filter.filterAllRemaining() &&
|
||||
!filter.filterColumn(rowKey, roffset, rlength, columnName, coffset,
|
||||
clength, columnValue, voffset, vlength)) {
|
||||
result = false;
|
||||
resultFound = true;
|
||||
}
|
||||
}
|
||||
} else if (filter.processAlways()) {
|
||||
filter.filterColumn(rowKey, roffset, rlength, columnName, coffset,
|
||||
clength, columnValue, voffset, vlength);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
|
||||
boolean resultFound = false;
|
||||
boolean result = operator == Operator.MUST_PASS_ONE;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (!resultFound) {
|
||||
if (operator == Operator.MUST_PASS_ALL) {
|
||||
if (filter.filterAllRemaining() || filter.filterRow(columns)) {
|
||||
result = true;
|
||||
resultFound = true;
|
||||
}
|
||||
} else if (operator == Operator.MUST_PASS_ONE) {
|
||||
if (!filter.filterAllRemaining() && !filter.filterRow(columns)) {
|
||||
result = false;
|
||||
resultFound = true;
|
||||
}
|
||||
}
|
||||
} else if (filter.processAlways()) {
|
||||
filter.filterRow(columns);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public boolean filterRow(List<KeyValue> results) {
|
||||
boolean resultFound = false;
|
||||
boolean result = operator == Operator.MUST_PASS_ONE;
|
||||
for (RowFilterInterface filter : filters) {
|
||||
if (!resultFound) {
|
||||
if (operator == Operator.MUST_PASS_ALL) {
|
||||
if (filter.filterAllRemaining() || filter.filterRow(results)) {
|
||||
result = true;
|
||||
resultFound = true;
|
||||
}
|
||||
} else if (operator == Operator.MUST_PASS_ONE) {
|
||||
if (!filter.filterAllRemaining() && !filter.filterRow(results)) {
|
||||
result = false;
|
||||
resultFound = true;
|
||||
}
|
||||
}
|
||||
} else if (filter.processAlways()) {
|
||||
filter.filterRow(results);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
Configuration conf = new HBaseConfiguration();
|
||||
byte opByte = in.readByte();
|
||||
operator = Operator.values()[opByte];
|
||||
int size = in.readInt();
|
||||
if (size > 0) {
|
||||
filters = new HashSet<RowFilterInterface>();
|
||||
for (int i = 0; i < size; i++) {
|
||||
RowFilterInterface filter = (RowFilterInterface) ObjectWritable
|
||||
.readObject(in, conf);
|
||||
filters.add(filter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
Configuration conf = new HBaseConfiguration();
|
||||
out.writeByte(operator.ordinal());
|
||||
out.writeInt(filters.size());
|
||||
for (RowFilterInterface filter : filters) {
|
||||
ObjectWritable.writeObject(out, filter, RowFilterInterface.class, conf);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,146 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
/**
|
||||
* Implementation of RowFilterInterface that filters out rows greater than or
|
||||
* equal to a specified rowKey.
|
||||
*
|
||||
* @deprecated Use filters that are rooted on @{link Filter} instead
|
||||
*/
|
||||
public class StopRowFilter implements RowFilterInterface {
|
||||
private byte [] stopRowKey;
|
||||
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public StopRowFilter() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that takes a stopRowKey on which to filter
|
||||
*
|
||||
* @param stopRowKey rowKey to filter on.
|
||||
*/
|
||||
public StopRowFilter(final byte [] stopRowKey) {
|
||||
this.stopRowKey = stopRowKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* An accessor for the stopRowKey
|
||||
*
|
||||
* @return the filter's stopRowKey
|
||||
*/
|
||||
public byte [] getStopRowKey() {
|
||||
return this.stopRowKey;
|
||||
}
|
||||
|
||||
public void validate(final byte [][] columns) {
|
||||
// Doesn't filter columns
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
// Nothing to reset
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte [] rowKey) {
|
||||
// Doesn't care
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
|
||||
// Doesn't care
|
||||
}
|
||||
|
||||
public boolean processAlways() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterAllRemaining() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean filterRowKey(final byte [] rowKey) {
|
||||
return filterRowKey(rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
|
||||
if (rowKey == null) {
|
||||
if (this.stopRowKey == null) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return Bytes.compareTo(stopRowKey, 0, stopRowKey.length, rowKey, offset,
|
||||
length) <= 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Because StopRowFilter does not examine column information, this method
|
||||
* defaults to calling the rowKey-only version of filter.
|
||||
* @param rowKey
|
||||
* @param colKey
|
||||
* @param data
|
||||
* @return boolean
|
||||
*/
|
||||
public boolean filterColumn(final byte [] rowKey, final byte [] colKey,
|
||||
final byte[] data) {
|
||||
return filterRowKey(rowKey);
|
||||
}
|
||||
|
||||
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
|
||||
byte[] colunmName, int coffset, int clength, byte[] columnValue,
|
||||
int voffset, int vlength) {
|
||||
return filterRowKey(rowKey, roffset, rlength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Because StopRowFilter does not examine column information, this method
|
||||
* defaults to calling filterAllRemaining().
|
||||
* @param columns
|
||||
* @return boolean
|
||||
*/
|
||||
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterRow(List<KeyValue> results) {
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.stopRowKey = Bytes.readByteArray(in);
|
||||
}
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
Bytes.writeByteArray(out, this.stopRowKey);
|
||||
}
|
||||
}
|
|
@ -21,7 +21,6 @@
|
|||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
|
||||
/**
|
||||
* This filter is used to filter based on column value. It takes an
|
||||
|
|
|
@ -1,167 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
|
||||
/**
|
||||
* WhileMatchRowFilter is a wrapper filter that filters everything after the
|
||||
* first filtered row. Once the nested filter returns true for either of it's
|
||||
* filter(..) methods or filterNotNull(SortedMap<Text, byte[]>), this wrapper's
|
||||
* filterAllRemaining() will return true. All filtering methods will
|
||||
* thereafter defer to the result of filterAllRemaining().
|
||||
*
|
||||
* @deprecated Use filters that are rooted on @{link Filter} instead
|
||||
*/
|
||||
public class WhileMatchRowFilter implements RowFilterInterface {
|
||||
private boolean filterAllRemaining = false;
|
||||
private RowFilterInterface filter;
|
||||
|
||||
/**
|
||||
* Default constructor, filters nothing. Required though for RPC
|
||||
* deserialization.
|
||||
*/
|
||||
public WhileMatchRowFilter() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param filter
|
||||
*/
|
||||
public WhileMatchRowFilter(RowFilterInterface filter) {
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the internal filter being wrapped
|
||||
*
|
||||
* @return the internal filter
|
||||
*/
|
||||
public RowFilterInterface getInternalFilter() {
|
||||
return this.filter;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
this.filterAllRemaining = false;
|
||||
this.filter.reset();
|
||||
}
|
||||
|
||||
public boolean processAlways() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true once the nested filter has filtered out a row (returned true
|
||||
* on a call to one of it's filtering methods). Until then it returns false.
|
||||
*
|
||||
* @return true/false whether the nested filter has returned true on a filter
|
||||
* call.
|
||||
*/
|
||||
public boolean filterAllRemaining() {
|
||||
return this.filterAllRemaining || this.filter.filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterRowKey(final byte [] rowKey) {
|
||||
changeFAR(this.filter.filterRowKey(rowKey, 0, rowKey.length));
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
|
||||
changeFAR(this.filter.filterRowKey(rowKey, offset, length));
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterColumn(final byte [] rowKey, final byte [] colKey,
|
||||
final byte[] data) {
|
||||
changeFAR(this.filter.filterColumn(rowKey, colKey, data));
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
|
||||
changeFAR(this.filter.filterRow(columns));
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
public boolean filterRow(List<KeyValue> results) {
|
||||
changeFAR(this.filter.filterRow(results));
|
||||
return filterAllRemaining();
|
||||
}
|
||||
|
||||
/**
|
||||
* Change filterAllRemaining from false to true if value is true, otherwise
|
||||
* leave as is.
|
||||
*
|
||||
* @param value
|
||||
*/
|
||||
private void changeFAR(boolean value) {
|
||||
this.filterAllRemaining = this.filterAllRemaining || value;
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte [] rowKey) {
|
||||
this.filter.rowProcessed(filtered, rowKey, 0, rowKey.length);
|
||||
}
|
||||
|
||||
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
|
||||
this.filter.rowProcessed(filtered, key, offset, length);
|
||||
}
|
||||
|
||||
public void validate(final byte [][] columns) {
|
||||
this.filter.validate(columns);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
String className = in.readUTF();
|
||||
|
||||
try {
|
||||
this.filter = (RowFilterInterface)(Class.forName(className).
|
||||
newInstance());
|
||||
this.filter.readFields(in);
|
||||
} catch (InstantiationException e) {
|
||||
throw new RuntimeException("Failed to deserialize WhileMatchRowFilter.",
|
||||
e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new RuntimeException("Failed to deserialize WhileMatchRowFilter.",
|
||||
e);
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new RuntimeException("Failed to deserialize WhileMatchRowFilter.",
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeUTF(this.filter.getClass().getName());
|
||||
this.filter.write(out);
|
||||
}
|
||||
|
||||
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
|
||||
byte[] colunmName, int coffset, int clength, byte[] columnValue,
|
||||
int voffset, int vlength) {
|
||||
changeFAR(this.filter.filterColumn(rowKey, roffset, rlength, colunmName, coffset, clength, columnValue, voffset, vlength));
|
||||
return filterAllRemaining();
|
||||
}
|
||||
}
|
|
@ -1,150 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/**
|
||||
* Batch update operation.
|
||||
*
|
||||
* If value is null, its a DELETE operation. If its non-null, its a PUT.
|
||||
* This object is purposely bare-bones because many instances are created
|
||||
* during bulk uploads. We have one class for DELETEs and PUTs rather than
|
||||
* a class per type because it makes the serialization easier.
|
||||
* @see BatchUpdate
|
||||
* @deprecated As of hbase 0.20.0, replaced by new Get/Put/Delete/Result-based API.
|
||||
*/
|
||||
public class BatchOperation implements Writable, HeapSize {
|
||||
/**
|
||||
* Estimated size of this object.
|
||||
*/
|
||||
// JHat says this is 32 bytes.
|
||||
public final int ESTIMATED_HEAP_TAX = 36;
|
||||
|
||||
private byte [] column = null;
|
||||
|
||||
// A null value defines DELETE operations.
|
||||
private byte [] value = null;
|
||||
|
||||
/**
|
||||
* Default constructor
|
||||
*/
|
||||
public BatchOperation() {
|
||||
this((byte [])null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a DELETE batch operation.
|
||||
* @param column column name
|
||||
*/
|
||||
public BatchOperation(final byte [] column) {
|
||||
this(column, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a DELETE batch operation.
|
||||
* @param column column name
|
||||
*/
|
||||
public BatchOperation(final String column) {
|
||||
this(Bytes.toBytes(column), null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a batch operation.
|
||||
* @param column column name
|
||||
* @param value column value. If non-null, this is a PUT operation.
|
||||
*/
|
||||
public BatchOperation(final String column, String value) {
|
||||
this(Bytes.toBytes(column), Bytes.toBytes(value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a batch operation.
|
||||
* @param column column name
|
||||
* @param value column value. If non-null, this is a PUT operation.
|
||||
*/
|
||||
public BatchOperation(final byte [] column, final byte [] value) {
|
||||
this.column = column;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the column
|
||||
*/
|
||||
public byte [] getColumn() {
|
||||
return this.column;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the value
|
||||
*/
|
||||
public byte[] getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if this is a PUT operation (this.value is not null).
|
||||
*/
|
||||
public boolean isPut() {
|
||||
return this.value != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see java.lang.Object#toString()
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return "column => " + Bytes.toString(this.column) + ", value => '...'";
|
||||
}
|
||||
|
||||
// Writable methods
|
||||
|
||||
// This is a hotspot when updating deserializing incoming client submissions.
|
||||
// In Performance Evaluation sequentialWrite, 70% of object allocations are
|
||||
// done in here.
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
this.column = Bytes.readByteArray(in);
|
||||
// Is there a value to read?
|
||||
if (in.readBoolean()) {
|
||||
this.value = new byte[in.readInt()];
|
||||
in.readFully(this.value);
|
||||
}
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
Bytes.writeByteArray(out, this.column);
|
||||
boolean p = isPut();
|
||||
out.writeBoolean(p);
|
||||
if (p) {
|
||||
out.writeInt(value.length);
|
||||
out.write(value);
|
||||
}
|
||||
}
|
||||
|
||||
public long heapSize() {
|
||||
return Bytes.ESTIMATED_HEAP_TAX * 2 + this.column.length +
|
||||
this.value.length + ESTIMATED_HEAP_TAX;
|
||||
}
|
||||
}
|
|
@ -1,405 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.RuntimeMXBean;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
|
||||
/**
|
||||
* A Writable object that contains a series of BatchOperations
|
||||
*
|
||||
* There is one BatchUpdate object per server, so a series of batch operations
|
||||
* can result in multiple BatchUpdate objects if the batch contains rows that
|
||||
* are served by multiple region servers.
|
||||
* @deprecated As of hbase 0.20.0, replaced by new Get/Put/Delete/Result-based API.
|
||||
*/
|
||||
public class BatchUpdate
|
||||
implements WritableComparable<BatchUpdate>, Iterable<BatchOperation>, HeapSize {
|
||||
private static final Log LOG = LogFactory.getLog(BatchUpdate.class);
|
||||
|
||||
/**
|
||||
* Estimated 'shallow size' of this object not counting payload.
|
||||
*/
|
||||
// Shallow size is 56. Add 32 for the arraylist below.
|
||||
public static final int ESTIMATED_HEAP_TAX = 56 + 32;
|
||||
|
||||
// the row being updated
|
||||
private byte [] row = null;
|
||||
private long size = 0;
|
||||
|
||||
// the batched operations
|
||||
private ArrayList<BatchOperation> operations =
|
||||
new ArrayList<BatchOperation>();
|
||||
|
||||
private long timestamp = HConstants.LATEST_TIMESTAMP;
|
||||
|
||||
private long rowLock = -1l;
|
||||
|
||||
/**
|
||||
* Default constructor used serializing. Do not use directly.
|
||||
*/
|
||||
public BatchUpdate() {
|
||||
this ((byte [])null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a BatchUpdate operation on a row. Timestamp is assumed to be
|
||||
* now.
|
||||
*
|
||||
* @param row
|
||||
*/
|
||||
public BatchUpdate(final String row) {
|
||||
this(Bytes.toBytes(row), HConstants.LATEST_TIMESTAMP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a BatchUpdate operation on a row. Timestamp is assumed to be
|
||||
* now.
|
||||
*
|
||||
* @param row
|
||||
*/
|
||||
public BatchUpdate(final byte [] row) {
|
||||
this(row, HConstants.LATEST_TIMESTAMP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a BatchUpdate operation on a row with a specific timestamp.
|
||||
*
|
||||
* @param row
|
||||
* @param timestamp
|
||||
*/
|
||||
public BatchUpdate(final String row, long timestamp){
|
||||
this(Bytes.toBytes(row), timestamp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Recopy constructor
|
||||
* @param buToCopy BatchUpdate to copy
|
||||
*/
|
||||
public BatchUpdate(BatchUpdate buToCopy) {
|
||||
this(buToCopy.getRow(), buToCopy.getTimestamp());
|
||||
for(BatchOperation bo : buToCopy) {
|
||||
byte [] val = bo.getValue();
|
||||
if (val == null) {
|
||||
// Presume a delete is intended.
|
||||
this.delete(bo.getColumn());
|
||||
} else {
|
||||
this.put(bo.getColumn(), val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize a BatchUpdate operation on a row with a specific timestamp.
|
||||
*
|
||||
* @param row
|
||||
* @param timestamp
|
||||
*/
|
||||
public BatchUpdate(final byte [] row, long timestamp){
|
||||
this.row = row;
|
||||
this.timestamp = timestamp;
|
||||
this.operations = new ArrayList<BatchOperation>();
|
||||
this.size = (row == null)? 0: row.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a batch operation.
|
||||
* @param rr the RowResult
|
||||
*/
|
||||
public BatchUpdate(final RowResult rr) {
|
||||
this(rr.getRow());
|
||||
for(Map.Entry<byte[], Cell> entry : rr.entrySet()){
|
||||
this.put(entry.getKey(), entry.getValue().getValue());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the row lock associated with this update
|
||||
* @return the row lock
|
||||
*/
|
||||
public long getRowLock() {
|
||||
return rowLock;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the lock to be used for this update
|
||||
* @param rowLock the row lock
|
||||
*/
|
||||
public void setRowLock(long rowLock) {
|
||||
this.rowLock = rowLock;
|
||||
}
|
||||
|
||||
|
||||
/** @return the row */
|
||||
public byte [] getRow() {
|
||||
return row;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the timestamp this BatchUpdate will be committed with.
|
||||
*/
|
||||
public long getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set this BatchUpdate's timestamp.
|
||||
*
|
||||
* @param timestamp
|
||||
*/
|
||||
public void setTimestamp(long timestamp) {
|
||||
this.timestamp = timestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current value of the specified column
|
||||
*
|
||||
* @param column column name
|
||||
* @return byte[] the cell value, returns null if the column does not exist.
|
||||
*/
|
||||
public synchronized byte[] get(final String column) {
|
||||
return get(Bytes.toBytes(column));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current value of the specified column
|
||||
*
|
||||
* @param column column name
|
||||
* @return byte[] the cell value, returns null if the column does not exist.
|
||||
*/
|
||||
public synchronized byte[] get(final byte[] column) {
|
||||
for (BatchOperation operation: operations) {
|
||||
if (Arrays.equals(column, operation.getColumn())) {
|
||||
return operation.getValue();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current columns
|
||||
*
|
||||
* @return byte[][] an array of byte[] columns
|
||||
*/
|
||||
public synchronized byte[][] getColumns() {
|
||||
byte[][] columns = new byte[operations.size()][];
|
||||
for (int i = 0; i < operations.size(); i++) {
|
||||
columns[i] = operations.get(i).getColumn();
|
||||
}
|
||||
return columns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the specified column is currently assigned a value
|
||||
*
|
||||
* @param column column to check for
|
||||
* @return boolean true if the given column exists
|
||||
*/
|
||||
public synchronized boolean hasColumn(String column) {
|
||||
return hasColumn(Bytes.toBytes(column));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the specified column is currently assigned a value
|
||||
*
|
||||
* @param column column to check for
|
||||
* @return boolean true if the given column exists
|
||||
*/
|
||||
public synchronized boolean hasColumn(byte[] column) {
|
||||
byte[] getColumn = get(column);
|
||||
if (getColumn == null) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change a value for the specified column
|
||||
*
|
||||
* @param column column whose value is being set
|
||||
* @param val new value for column. Cannot be null (can be empty).
|
||||
*/
|
||||
public synchronized void put(final String column, final byte val[]) {
|
||||
put(Bytes.toBytes(column), val);
|
||||
}
|
||||
|
||||
/**
|
||||
* Change a value for the specified column
|
||||
*
|
||||
* @param column column whose value is being set
|
||||
* @param val new value for column. Cannot be null (can be empty).
|
||||
*/
|
||||
public synchronized void put(final byte [] column, final byte val[]) {
|
||||
if (val == null) {
|
||||
// If null, the PUT becomes a DELETE operation.
|
||||
throw new IllegalArgumentException("Passed value cannot be null");
|
||||
}
|
||||
BatchOperation bo = new BatchOperation(column, val);
|
||||
this.size += bo.heapSize();
|
||||
operations.add(bo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the value for a column
|
||||
* Deletes the cell whose row/column/commit-timestamp match those of the
|
||||
* delete.
|
||||
* @param column name of column whose value is to be deleted
|
||||
*/
|
||||
public void delete(final String column) {
|
||||
delete(Bytes.toBytes(column));
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the value for a column
|
||||
* Deletes the cell whose row/column/commit-timestamp match those of the
|
||||
* delete.
|
||||
* @param column name of column whose value is to be deleted
|
||||
*/
|
||||
public synchronized void delete(final byte [] column) {
|
||||
operations.add(new BatchOperation(column));
|
||||
}
|
||||
|
||||
//
|
||||
// Iterable
|
||||
//
|
||||
|
||||
/**
|
||||
* @return Iterator<BatchOperation>
|
||||
*/
|
||||
public Iterator<BatchOperation> iterator() {
|
||||
return operations.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* @see java.lang.Object#toString()
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("row => ");
|
||||
sb.append(row == null? "": Bytes.toString(row));
|
||||
sb.append(", {");
|
||||
boolean morethanone = false;
|
||||
for (BatchOperation bo: this.operations) {
|
||||
if (morethanone) {
|
||||
sb.append(", ");
|
||||
}
|
||||
morethanone = true;
|
||||
sb.append(bo.toString());
|
||||
}
|
||||
sb.append("}");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
//
|
||||
// Writable
|
||||
//
|
||||
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
// Clear any existing operations; may be hangovers from previous use of
|
||||
// this instance.
|
||||
if (this.operations.size() != 0) {
|
||||
this.operations.clear();
|
||||
}
|
||||
this.row = Bytes.readByteArray(in);
|
||||
timestamp = in.readLong();
|
||||
this.size = in.readLong();
|
||||
int nOps = in.readInt();
|
||||
for (int i = 0; i < nOps; i++) {
|
||||
BatchOperation op = new BatchOperation();
|
||||
op.readFields(in);
|
||||
this.operations.add(op);
|
||||
}
|
||||
this.rowLock = in.readLong();
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
Bytes.writeByteArray(out, this.row);
|
||||
out.writeLong(timestamp);
|
||||
out.writeLong(this.size);
|
||||
out.writeInt(operations.size());
|
||||
for (BatchOperation op: operations) {
|
||||
op.write(out);
|
||||
}
|
||||
out.writeLong(this.rowLock);
|
||||
}
|
||||
|
||||
public int compareTo(BatchUpdate o) {
|
||||
return Bytes.compareTo(this.row, o.getRow());
|
||||
}
|
||||
|
||||
public long heapSize() {
|
||||
return this.row.length + Bytes.ESTIMATED_HEAP_TAX + this.size +
|
||||
ESTIMATED_HEAP_TAX;
|
||||
}
|
||||
|
||||
/**
|
||||
* Code to test sizes of BatchUpdate arrays.
|
||||
* @param args
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public static void main(String[] args) throws InterruptedException {
|
||||
RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
|
||||
LOG.info("vmName=" + runtime.getVmName() + ", vmVendor="
|
||||
+ runtime.getVmVendor() + ", vmVersion=" + runtime.getVmVersion());
|
||||
LOG.info("vmInputArguments=" + runtime.getInputArguments());
|
||||
final int count = 10000;
|
||||
BatchUpdate[] batch1 = new BatchUpdate[count];
|
||||
// TODO: x32 vs x64
|
||||
long size = 0;
|
||||
for (int i = 0; i < count; i++) {
|
||||
BatchUpdate bu = new BatchUpdate(HConstants.EMPTY_BYTE_ARRAY);
|
||||
bu.put(HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY);
|
||||
batch1[i] = bu;
|
||||
size += bu.heapSize();
|
||||
}
|
||||
LOG.info("batch1 estimated size=" + size);
|
||||
// Make a variably sized memcache.
|
||||
size = 0;
|
||||
BatchUpdate[] batch2 = new BatchUpdate[count];
|
||||
for (int i = 0; i < count; i++) {
|
||||
BatchUpdate bu = new BatchUpdate(Bytes.toBytes(i));
|
||||
bu.put(Bytes.toBytes(i), new byte[i]);
|
||||
batch2[i] = bu;
|
||||
size += bu.heapSize();
|
||||
}
|
||||
LOG.info("batch2 estimated size=" + size);
|
||||
final int seconds = 30;
|
||||
LOG.info("Waiting " + seconds + " seconds while heap dump is taken");
|
||||
for (int i = 0; i < seconds; i++) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
LOG.info("Exiting.");
|
||||
}
|
||||
}
|
|
@ -1,280 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.rest.exception.HBaseRestException;
|
||||
import org.apache.hadoop.hbase.rest.serializer.IRestSerializer;
|
||||
import org.apache.hadoop.hbase.rest.serializer.ISerializable;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
import agilejson.TOJSON;
|
||||
|
||||
/**
|
||||
* Cell - Used to transport a cell value (byte[]) and the timestamp it was
|
||||
* stored with together as a result for get and getRow methods. This promotes
|
||||
* the timestamp of a cell to a first-class value, making it easy to take note
|
||||
* of temporal data. Cell is used all the way from HStore up to HTable.
|
||||
* @deprecated As of hbase 0.20.0, replaced by new Get/Put/Delete/Result-based API.
|
||||
*/
|
||||
public class Cell implements Writable, Iterable<Map.Entry<Long, byte[]>>,
|
||||
ISerializable {
|
||||
protected final SortedMap<Long, byte[]> valueMap = new TreeMap<Long, byte[]>(
|
||||
new Comparator<Long>() {
|
||||
public int compare(Long l1, Long l2) {
|
||||
return l2.compareTo(l1);
|
||||
}
|
||||
});
|
||||
|
||||
/** For Writable compatibility */
|
||||
public Cell() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Cell with a given value and timestamp. Used by HStore.
|
||||
*
|
||||
* @param value
|
||||
* @param timestamp
|
||||
*/
|
||||
public Cell(String value, long timestamp) {
|
||||
this(Bytes.toBytes(value), timestamp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Cell with a given value and timestamp. Used by HStore.
|
||||
*
|
||||
* @param value
|
||||
* @param timestamp
|
||||
*/
|
||||
public Cell(byte[] value, long timestamp) {
|
||||
valueMap.put(timestamp, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new Cell with a given value and timestamp. Used by HStore.
|
||||
*
|
||||
* @param bb
|
||||
* @param timestamp
|
||||
*/
|
||||
public Cell(final ByteBuffer bb, long timestamp) {
|
||||
this.valueMap.put(timestamp, Bytes.toBytes(bb));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param vals
|
||||
* array of values
|
||||
* @param ts
|
||||
* array of timestamps
|
||||
*/
|
||||
public Cell(String [] vals, long[] ts) {
|
||||
this(Bytes.toByteArrays(vals), ts);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param vals
|
||||
* array of values
|
||||
* @param ts
|
||||
* array of timestamps
|
||||
*/
|
||||
public Cell(byte[][] vals, long[] ts) {
|
||||
if (vals.length != ts.length) {
|
||||
throw new IllegalArgumentException(
|
||||
"number of values must be the same as the number of timestamps");
|
||||
}
|
||||
for (int i = 0; i < vals.length; i++) {
|
||||
valueMap.put(ts[i], vals[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/** @return the current cell's value */
|
||||
@TOJSON(base64=true)
|
||||
public byte[] getValue() {
|
||||
return valueMap.get(valueMap.firstKey());
|
||||
}
|
||||
|
||||
/** @return the current cell's timestamp */
|
||||
@TOJSON
|
||||
public long getTimestamp() {
|
||||
return valueMap.firstKey();
|
||||
}
|
||||
|
||||
/** @return the number of values this cell holds */
|
||||
public int getNumValues() {
|
||||
return valueMap.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new timestamp and value to this cell provided timestamp does not
|
||||
* already exist
|
||||
*
|
||||
* @param val
|
||||
* @param ts
|
||||
*/
|
||||
public void add(byte[] val, long ts) {
|
||||
if (!valueMap.containsKey(ts)) {
|
||||
valueMap.put(ts, val);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see java.lang.Object#toString()
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
if (valueMap.size() == 1) {
|
||||
return "timestamp=" + getTimestamp() + ", value="
|
||||
+ Bytes.toString(getValue());
|
||||
}
|
||||
StringBuilder s = new StringBuilder("{ ");
|
||||
int i = 0;
|
||||
for (Map.Entry<Long, byte[]> entry : valueMap.entrySet()) {
|
||||
if (i > 0) {
|
||||
s.append(", ");
|
||||
}
|
||||
s.append("[timestamp=");
|
||||
s.append(entry.getKey());
|
||||
s.append(", value=");
|
||||
s.append(Bytes.toString(entry.getValue()));
|
||||
s.append("]");
|
||||
i++;
|
||||
}
|
||||
s.append(" }");
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
//
|
||||
// Writable
|
||||
//
|
||||
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
int nvalues = in.readInt();
|
||||
for (int i = 0; i < nvalues; i++) {
|
||||
long timestamp = in.readLong();
|
||||
byte[] value = Bytes.readByteArray(in);
|
||||
valueMap.put(timestamp, value);
|
||||
}
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
out.writeInt(valueMap.size());
|
||||
for (Map.Entry<Long, byte[]> entry : valueMap.entrySet()) {
|
||||
out.writeLong(entry.getKey());
|
||||
Bytes.writeByteArray(out, entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Iterable
|
||||
//
|
||||
|
||||
public Iterator<Entry<Long, byte[]>> iterator() {
|
||||
return new CellIterator();
|
||||
}
|
||||
|
||||
private class CellIterator implements Iterator<Entry<Long, byte[]>> {
|
||||
private Iterator<Entry<Long, byte[]>> it;
|
||||
|
||||
CellIterator() {
|
||||
it = valueMap.entrySet().iterator();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return it.hasNext();
|
||||
}
|
||||
|
||||
public Entry<Long, byte[]> next() {
|
||||
return it.next();
|
||||
}
|
||||
|
||||
public void remove() throws UnsupportedOperationException {
|
||||
throw new UnsupportedOperationException("remove is not supported");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param results
|
||||
* @return
|
||||
* TODO: This is the glue between old way of doing things and the new.
|
||||
* Herein we are converting our clean KeyValues to Map of Cells.
|
||||
*/
|
||||
public static HbaseMapWritable<byte [], Cell> createCells(
|
||||
final List<KeyValue> results) {
|
||||
HbaseMapWritable<byte [], Cell> cells =
|
||||
new HbaseMapWritable<byte [], Cell>();
|
||||
// Walking backward through the list of results though it has no effect
|
||||
// because we're inserting into a sorted map.
|
||||
for (ListIterator<KeyValue> i = results.listIterator(results.size());
|
||||
i.hasPrevious();) {
|
||||
KeyValue kv = i.previous();
|
||||
byte [] column = kv.getColumn();
|
||||
Cell c = cells.get(column);
|
||||
if (c == null) {
|
||||
c = new Cell(kv.getValue(), kv.getTimestamp());
|
||||
cells.put(column, c);
|
||||
} else {
|
||||
c.add(kv.getValue(), kv.getTimestamp());
|
||||
}
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param results
|
||||
* @return Array of Cells.
|
||||
* TODO: This is the glue between old way of doing things and the new.
|
||||
* Herein we are converting our clean KeyValues to Map of Cells.
|
||||
*/
|
||||
public static Cell [] createSingleCellArray(final List<KeyValue> results) {
|
||||
if (results == null) return null;
|
||||
int index = 0;
|
||||
Cell [] cells = new Cell[results.size()];
|
||||
for (KeyValue kv: results) {
|
||||
cells[index++] = new Cell(kv.getValue(), kv.getTimestamp());
|
||||
}
|
||||
return cells;
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.hadoop.hbase.rest.serializer.ISerializable#restSerialize(org
|
||||
* .apache.hadoop.hbase.rest.serializer.IRestSerializer)
|
||||
*/
|
||||
public void restSerialize(IRestSerializer serializer)
|
||||
throws HBaseRestException {
|
||||
serializer.serializeCell(this);
|
||||
}
|
||||
}
|
|
@ -45,7 +45,7 @@ public interface CodeToClassAndBack {
|
|||
/**
|
||||
* Class list for supported classes
|
||||
*/
|
||||
public Class<?>[] classList = {byte[].class, Cell.class};
|
||||
public Class<?>[] classList = {byte[].class};
|
||||
|
||||
/**
|
||||
* The static loader that is used instead of the static constructor in
|
||||
|
|
|
@ -113,8 +113,6 @@ public class HbaseObjectWritable implements Writable, Configurable {
|
|||
addToMap(HConstants.Modify.class, code++);
|
||||
addToMap(HMsg.class, code++);
|
||||
addToMap(HMsg[].class, code++);
|
||||
addToMap(RowFilterInterface.class, code++);
|
||||
addToMap(RowFilterSet.class, code++);
|
||||
addToMap(HRegion.class, code++);
|
||||
addToMap(HRegion[].class, code++);
|
||||
addToMap(HRegionInfo.class, code++);
|
||||
|
|
|
@ -1,342 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.rest.descriptors.RestCell;
|
||||
import org.apache.hadoop.hbase.rest.exception.HBaseRestException;
|
||||
import org.apache.hadoop.hbase.rest.serializer.IRestSerializer;
|
||||
import org.apache.hadoop.hbase.rest.serializer.ISerializable;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
import agilejson.TOJSON;
|
||||
|
||||
/**
|
||||
* Holds row name and then a map of columns to cells.
|
||||
* @deprecated As of hbase 0.20.0, replaced by new Get/Put/Delete/Result-based API.
|
||||
*/
|
||||
public class RowResult implements Writable, SortedMap<byte [], Cell>,
|
||||
Comparable<RowResult>, ISerializable {
|
||||
private byte [] row = null;
|
||||
private final HbaseMapWritable<byte [], Cell> cells;
|
||||
private final byte [] COL_REGIONINFO = Bytes.toBytes("info:regioninfo");
|
||||
|
||||
/** default constructor for writable */
|
||||
public RowResult() {
|
||||
this(null, new HbaseMapWritable<byte [], Cell>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a RowResult from a row and Cell map
|
||||
* @param row
|
||||
* @param m
|
||||
*/
|
||||
public RowResult (final byte [] row,
|
||||
final HbaseMapWritable<byte [], Cell> m) {
|
||||
this.row = row;
|
||||
this.cells = m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the row for this RowResult
|
||||
* @return the row
|
||||
*/
|
||||
@TOJSON(base64=true)
|
||||
public byte [] getRow() {
|
||||
return row;
|
||||
}
|
||||
|
||||
//
|
||||
// Map interface
|
||||
//
|
||||
public Cell put(byte [] key,
|
||||
Cell value) {
|
||||
throw new UnsupportedOperationException("RowResult is read-only!");
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public void putAll(Map map) {
|
||||
throw new UnsupportedOperationException("RowResult is read-only!");
|
||||
}
|
||||
|
||||
public Cell get(Object key) {
|
||||
return this.cells.get(key);
|
||||
}
|
||||
|
||||
public Cell remove(Object key) {
|
||||
throw new UnsupportedOperationException("RowResult is read-only!");
|
||||
}
|
||||
|
||||
public boolean containsKey(Object key) {
|
||||
return cells.containsKey(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the key can be found in this RowResult
|
||||
* @param key
|
||||
* @return true if key id found, false if not
|
||||
*/
|
||||
public boolean containsKey(String key) {
|
||||
return cells.containsKey(Bytes.toBytes(key));
|
||||
}
|
||||
|
||||
public boolean containsValue(Object value) {
|
||||
throw new UnsupportedOperationException("Don't support containsValue!");
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return cells.isEmpty();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return cells.size();
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
throw new UnsupportedOperationException("RowResult is read-only!");
|
||||
}
|
||||
|
||||
public Set<byte []> keySet() {
|
||||
Set<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
|
||||
for (byte [] w : cells.keySet()) {
|
||||
result.add(w);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public Set<Map.Entry<byte [], Cell>> entrySet() {
|
||||
return Collections.unmodifiableSet(this.cells.entrySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* This method used solely for the REST serialization
|
||||
*
|
||||
* @return Cells
|
||||
*/
|
||||
@TOJSON
|
||||
public RestCell[] getCells() {
|
||||
RestCell[] restCells = new RestCell[this.cells.size()];
|
||||
int i = 0;
|
||||
for (Map.Entry<byte[], Cell> entry : this.cells.entrySet()) {
|
||||
restCells[i] = new RestCell(entry.getKey(), entry.getValue());
|
||||
i++;
|
||||
}
|
||||
return restCells;
|
||||
}
|
||||
|
||||
public Collection<Cell> values() {
|
||||
ArrayList<Cell> result = new ArrayList<Cell>();
|
||||
for (Writable w : cells.values()) {
|
||||
result.add((Cell)w);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Cell that corresponds to column
|
||||
* @param column
|
||||
* @return the Cell
|
||||
*/
|
||||
public Cell get(byte [] column) {
|
||||
return this.cells.get(column);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Cell that corresponds to column, using a String key
|
||||
* @param key
|
||||
* @return the Cell
|
||||
*/
|
||||
public Cell get(String key) {
|
||||
return get(Bytes.toBytes(key));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a cell using seperate family, columnQualifier arguments.
|
||||
* @param family
|
||||
* @param columnQualifier
|
||||
* @return The cell.
|
||||
*/
|
||||
public Cell get(byte [] family, byte [] columnQualifier) {
|
||||
return get(Bytes.add(family, KeyValue.COLUMN_FAMILY_DELIM_ARRAY, columnQualifier));
|
||||
}
|
||||
|
||||
|
||||
public Comparator<? super byte[]> comparator() {
|
||||
return this.cells.comparator();
|
||||
}
|
||||
|
||||
public byte[] firstKey() {
|
||||
return this.cells.firstKey();
|
||||
}
|
||||
|
||||
public SortedMap<byte[], Cell> headMap(byte[] toKey) {
|
||||
return this.cells.headMap(toKey);
|
||||
}
|
||||
|
||||
public byte[] lastKey() {
|
||||
return this.cells.lastKey();
|
||||
}
|
||||
|
||||
public SortedMap<byte[], Cell> subMap(byte[] fromKey, byte[] toKey) {
|
||||
return this.cells.subMap(fromKey, toKey);
|
||||
}
|
||||
|
||||
public SortedMap<byte[], Cell> tailMap(byte[] fromKey) {
|
||||
return this.cells.tailMap(fromKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Row entry.
|
||||
*/
|
||||
public class Entry implements Map.Entry<byte [], Cell> {
|
||||
private final byte [] column;
|
||||
private final Cell cell;
|
||||
|
||||
Entry(byte [] row, Cell cell) {
|
||||
this.column = row;
|
||||
this.cell = cell;
|
||||
}
|
||||
|
||||
public Cell setValue(Cell c) {
|
||||
throw new UnsupportedOperationException("RowResult is read-only!");
|
||||
}
|
||||
|
||||
public byte [] getKey() {
|
||||
return column;
|
||||
}
|
||||
|
||||
public Cell getValue() {
|
||||
return cell;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("row=");
|
||||
sb.append(Bytes.toString(this.row));
|
||||
sb.append(", cells={");
|
||||
boolean moreThanOne = false;
|
||||
for (Map.Entry<byte [], Cell> e: this.cells.entrySet()) {
|
||||
if (moreThanOne) {
|
||||
sb.append(", ");
|
||||
} else {
|
||||
moreThanOne = true;
|
||||
}
|
||||
sb.append("(column=");
|
||||
sb.append(Bytes.toString(e.getKey()));
|
||||
sb.append(", timestamp=");
|
||||
sb.append(Long.toString(e.getValue().getTimestamp()));
|
||||
sb.append(", value=");
|
||||
byte [] v = e.getValue().getValue();
|
||||
if (Bytes.equals(e.getKey(), this.COL_REGIONINFO)) {
|
||||
try {
|
||||
sb.append(Writables.getHRegionInfo(v).toString());
|
||||
} catch (IOException ioe) {
|
||||
sb.append(ioe.toString());
|
||||
}
|
||||
} else {
|
||||
sb.append(v);
|
||||
}
|
||||
sb.append(")");
|
||||
}
|
||||
sb.append("}");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.hadoop.hbase.rest.xml.IOutputXML#toXML()
|
||||
*/
|
||||
public void restSerialize(IRestSerializer serializer) throws HBaseRestException {
|
||||
serializer.serializeRowResult(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param l
|
||||
* @return
|
||||
* TODO: This is the glue between old way of doing things and the new.
|
||||
* Herein we are converting our clean KeyValues to old RowResult.
|
||||
*/
|
||||
public static RowResult [] createRowResultArray(final List<List<KeyValue>> l) {
|
||||
RowResult [] results = new RowResult[l.size()];
|
||||
int i = 0;
|
||||
for (List<KeyValue> kvl: l) {
|
||||
results[i++] = createRowResult(kvl);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param results
|
||||
* @return
|
||||
* TODO: This is the glue between old way of doing things and the new.
|
||||
* Herein we are converting our clean KeyValues to old RowResult.
|
||||
*/
|
||||
public static RowResult createRowResult(final List<KeyValue> results) {
|
||||
if (results.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
HbaseMapWritable<byte [], Cell> cells = Cell.createCells(results);
|
||||
byte [] row = results.get(0).getRow();
|
||||
return new RowResult(row, cells);
|
||||
}
|
||||
|
||||
//
|
||||
// Writable
|
||||
//
|
||||
|
||||
public void readFields(final DataInput in) throws IOException {
|
||||
this.row = Bytes.readByteArray(in);
|
||||
this.cells.readFields(in);
|
||||
}
|
||||
|
||||
public void write(final DataOutput out) throws IOException {
|
||||
Bytes.writeByteArray(out, this.row);
|
||||
this.cells.write(out);
|
||||
}
|
||||
|
||||
//
|
||||
// Comparable
|
||||
//
|
||||
/**
|
||||
* Comparing this RowResult with another one by
|
||||
* comparing the row in it.
|
||||
* @param o the RowResult Object to compare to
|
||||
* @return the compare number
|
||||
*/
|
||||
public int compareTo(RowResult o){
|
||||
return Bytes.compareTo(this.row, o.getRow());
|
||||
}
|
||||
}
|
|
@ -910,7 +910,7 @@ public abstract class HBaseServer {
|
|||
|
||||
CurCall.set(call);
|
||||
UserGroupInformation previous = UserGroupInformation.getCurrentUGI();
|
||||
UserGroupInformation.setCurrentUGI(call.connection.ticket);
|
||||
UserGroupInformation.setCurrentUser(call.connection.ticket);
|
||||
try {
|
||||
value = call(call.param, call.timestamp); // make the call
|
||||
} catch (Throwable e) {
|
||||
|
@ -918,7 +918,7 @@ public abstract class HBaseServer {
|
|||
errorClass = e.getClass().getName();
|
||||
error = StringUtils.stringifyException(e);
|
||||
}
|
||||
UserGroupInformation.setCurrentUGI(previous);
|
||||
UserGroupInformation.setCurrentUser(previous);
|
||||
CurCall.set(null);
|
||||
|
||||
if (buf.size() > buffersize) {
|
||||
|
|
|
@ -1,206 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.mapred.FileOutputFormat;
|
||||
import org.apache.hadoop.mapred.JobClient;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
|
||||
/**
|
||||
* Example table column indexing class. Runs a mapreduce job to index
|
||||
* specified table columns.
|
||||
* <ul><li>Each row is modeled as a Lucene document: row key is indexed in
|
||||
* its untokenized form, column name-value pairs are Lucene field name-value
|
||||
* pairs.</li>
|
||||
* <li>A file passed on command line is used to populate an
|
||||
* {@link IndexConfiguration} which is used to set various Lucene parameters,
|
||||
* specify whether to optimize an index and which columns to index and/or
|
||||
* store, in tokenized or untokenized form, etc. For an example, see the
|
||||
* <code>createIndexConfContent</code> method in TestTableIndex
|
||||
* </li>
|
||||
* <li>The number of reduce tasks decides the number of indexes (partitions).
|
||||
* The index(es) is stored in the output path of job configuration.</li>
|
||||
* <li>The index build process is done in the reduce phase. Users can use
|
||||
* the map phase to join rows from different tables or to pre-parse/analyze
|
||||
* column content, etc.</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Deprecated
|
||||
public class BuildTableIndex {
|
||||
private static final String USAGE = "Usage: BuildTableIndex " +
|
||||
"-m <numMapTasks> -r <numReduceTasks>\n -indexConf <iconfFile> " +
|
||||
"-indexDir <indexDir>\n -table <tableName> -columns <columnName1> " +
|
||||
"[<columnName2> ...]";
|
||||
|
||||
private static void printUsage(String message) {
|
||||
System.err.println(message);
|
||||
System.err.println(USAGE);
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
/** default constructor */
|
||||
public BuildTableIndex() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws IOException
|
||||
*/
|
||||
public void run(String[] args) throws IOException {
|
||||
if (args.length < 6) {
|
||||
printUsage("Too few arguments");
|
||||
}
|
||||
|
||||
int numMapTasks = 1;
|
||||
int numReduceTasks = 1;
|
||||
String iconfFile = null;
|
||||
String indexDir = null;
|
||||
String tableName = null;
|
||||
StringBuffer columnNames = null;
|
||||
|
||||
// parse args
|
||||
for (int i = 0; i < args.length - 1; i++) {
|
||||
if ("-m".equals(args[i])) {
|
||||
numMapTasks = Integer.parseInt(args[++i]);
|
||||
} else if ("-r".equals(args[i])) {
|
||||
numReduceTasks = Integer.parseInt(args[++i]);
|
||||
} else if ("-indexConf".equals(args[i])) {
|
||||
iconfFile = args[++i];
|
||||
} else if ("-indexDir".equals(args[i])) {
|
||||
indexDir = args[++i];
|
||||
} else if ("-table".equals(args[i])) {
|
||||
tableName = args[++i];
|
||||
} else if ("-columns".equals(args[i])) {
|
||||
columnNames = new StringBuffer(args[++i]);
|
||||
while (i + 1 < args.length && !args[i + 1].startsWith("-")) {
|
||||
columnNames.append(" ");
|
||||
columnNames.append(args[++i]);
|
||||
}
|
||||
} else {
|
||||
printUsage("Unsupported option " + args[i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (indexDir == null || tableName == null || columnNames == null) {
|
||||
printUsage("Index directory, table name and at least one column must " +
|
||||
"be specified");
|
||||
}
|
||||
|
||||
Configuration conf = new HBaseConfiguration();
|
||||
if (iconfFile != null) {
|
||||
// set index configuration content from a file
|
||||
String content = readContent(iconfFile);
|
||||
IndexConfiguration iconf = new IndexConfiguration();
|
||||
// purely to validate, exception will be thrown if not valid
|
||||
iconf.addFromXML(content);
|
||||
conf.set("hbase.index.conf", content);
|
||||
}
|
||||
|
||||
if (columnNames != null) {
|
||||
JobConf jobConf = createJob(conf, numMapTasks, numReduceTasks, indexDir,
|
||||
tableName, columnNames.toString());
|
||||
JobClient.runJob(jobConf);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param conf
|
||||
* @param numMapTasks
|
||||
* @param numReduceTasks
|
||||
* @param indexDir
|
||||
* @param tableName
|
||||
* @param columnNames
|
||||
* @return JobConf
|
||||
*/
|
||||
public JobConf createJob(Configuration conf, int numMapTasks,
|
||||
int numReduceTasks, String indexDir, String tableName,
|
||||
String columnNames) {
|
||||
JobConf jobConf = new JobConf(conf, BuildTableIndex.class);
|
||||
jobConf.setJobName("build index for table " + tableName);
|
||||
jobConf.setNumMapTasks(numMapTasks);
|
||||
// number of indexes to partition into
|
||||
jobConf.setNumReduceTasks(numReduceTasks);
|
||||
|
||||
// use identity map (a waste, but just as an example)
|
||||
IdentityTableMap.initJob(tableName, columnNames, IdentityTableMap.class,
|
||||
jobConf);
|
||||
|
||||
// use IndexTableReduce to build a Lucene index
|
||||
jobConf.setReducerClass(IndexTableReduce.class);
|
||||
FileOutputFormat.setOutputPath(jobConf, new Path(indexDir));
|
||||
jobConf.setOutputFormat(IndexOutputFormat.class);
|
||||
|
||||
return jobConf;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read xml file of indexing configurations. The xml format is similar to
|
||||
* hbase-default.xml and hadoop-default.xml. For an example configuration,
|
||||
* see the <code>createIndexConfContent</code> method in TestTableIndex
|
||||
* @param fileName File to read.
|
||||
* @return XML configuration read from file
|
||||
* @throws IOException
|
||||
*/
|
||||
private String readContent(String fileName) throws IOException {
|
||||
File file = new File(fileName);
|
||||
int length = (int) file.length();
|
||||
if (length == 0) {
|
||||
printUsage("Index configuration file " + fileName + " does not exist");
|
||||
}
|
||||
|
||||
int bytesRead = 0;
|
||||
byte[] bytes = new byte[length];
|
||||
FileInputStream fis = new FileInputStream(file);
|
||||
|
||||
try {
|
||||
// read entire file into content
|
||||
while (bytesRead < length) {
|
||||
int read = fis.read(bytes, bytesRead, length - bytesRead);
|
||||
if (read > 0) {
|
||||
bytesRead += read;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
fis.close();
|
||||
}
|
||||
|
||||
return new String(bytes, 0, bytesRead, HConstants.UTF8_ENCODING);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws IOException
|
||||
*/
|
||||
public static void main(String[] args) throws IOException {
|
||||
BuildTableIndex build = new BuildTableIndex();
|
||||
build.run(args);
|
||||
}
|
||||
}
|
|
@ -1,40 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import org.apache.hadoop.util.ProgramDriver;
|
||||
|
||||
/**
|
||||
* Driver for hbase mapreduce jobs. Select which to run by passing
|
||||
* name of job to this main.
|
||||
*/
|
||||
@Deprecated
|
||||
public class Driver {
|
||||
/**
|
||||
* @param args
|
||||
* @throws Throwable
|
||||
*/
|
||||
public static void main(String[] args) throws Throwable {
|
||||
ProgramDriver pgd = new ProgramDriver();
|
||||
pgd.addClass(RowCounter.NAME, RowCounter.class,
|
||||
"Count rows in HBase table");
|
||||
pgd.driver(args);
|
||||
}
|
||||
}
|
|
@ -1,161 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.MapReduceBase;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
|
||||
|
||||
/**
|
||||
* Extract grouping columns from input record
|
||||
*/
|
||||
@Deprecated
|
||||
public class GroupingTableMap
|
||||
extends MapReduceBase
|
||||
implements TableMap<ImmutableBytesWritable,RowResult> {
|
||||
|
||||
/**
|
||||
* JobConf parameter to specify the columns used to produce the key passed to
|
||||
* collect from the map phase
|
||||
*/
|
||||
public static final String GROUP_COLUMNS =
|
||||
"hbase.mapred.groupingtablemap.columns";
|
||||
|
||||
protected byte [][] m_columns;
|
||||
|
||||
/**
|
||||
* Use this before submitting a TableMap job. It will appropriately set up the
|
||||
* JobConf.
|
||||
*
|
||||
* @param table table to be processed
|
||||
* @param columns space separated list of columns to fetch
|
||||
* @param groupColumns space separated list of columns used to form the key
|
||||
* used in collect
|
||||
* @param mapper map class
|
||||
* @param job job configuration object
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static void initJob(String table, String columns, String groupColumns,
|
||||
Class<? extends TableMap> mapper, JobConf job) {
|
||||
|
||||
TableMapReduceUtil.initTableMapJob(table, columns, mapper,
|
||||
ImmutableBytesWritable.class, RowResult.class, job);
|
||||
job.set(GROUP_COLUMNS, groupColumns);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(JobConf job) {
|
||||
super.configure(job);
|
||||
String[] cols = job.get(GROUP_COLUMNS, "").split(" ");
|
||||
m_columns = new byte[cols.length][];
|
||||
for(int i = 0; i < cols.length; i++) {
|
||||
m_columns[i] = Bytes.toBytes(cols[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the grouping columns from value to construct a new key.
|
||||
*
|
||||
* Pass the new key and value to reduce.
|
||||
* If any of the grouping columns are not found in the value, the record is skipped.
|
||||
* @param key
|
||||
* @param value
|
||||
* @param output
|
||||
* @param reporter
|
||||
* @throws IOException
|
||||
*/
|
||||
public void map(ImmutableBytesWritable key, RowResult value,
|
||||
OutputCollector<ImmutableBytesWritable,RowResult> output,
|
||||
Reporter reporter) throws IOException {
|
||||
|
||||
byte[][] keyVals = extractKeyValues(value);
|
||||
if(keyVals != null) {
|
||||
ImmutableBytesWritable tKey = createGroupKey(keyVals);
|
||||
output.collect(tKey, value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract columns values from the current record. This method returns
|
||||
* null if any of the columns are not found.
|
||||
*
|
||||
* Override this method if you want to deal with nulls differently.
|
||||
*
|
||||
* @param r
|
||||
* @return array of byte values
|
||||
*/
|
||||
protected byte[][] extractKeyValues(RowResult r) {
|
||||
byte[][] keyVals = null;
|
||||
ArrayList<byte[]> foundList = new ArrayList<byte[]>();
|
||||
int numCols = m_columns.length;
|
||||
if(numCols > 0) {
|
||||
for (Map.Entry<byte [], Cell> e: r.entrySet()) {
|
||||
byte [] column = e.getKey();
|
||||
for (int i = 0; i < numCols; i++) {
|
||||
if (Bytes.equals(column, m_columns[i])) {
|
||||
foundList.add(e.getValue().getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(foundList.size() == numCols) {
|
||||
keyVals = foundList.toArray(new byte[numCols][]);
|
||||
}
|
||||
}
|
||||
return keyVals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a key by concatenating multiple column values.
|
||||
* Override this function in order to produce different types of keys.
|
||||
*
|
||||
* @param vals
|
||||
* @return key generated by concatenating multiple column values
|
||||
*/
|
||||
protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
|
||||
if(vals == null) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(int i = 0; i < vals.length; i++) {
|
||||
if(i > 0) {
|
||||
sb.append(" ");
|
||||
}
|
||||
try {
|
||||
sb.append(new String(vals[i], HConstants.UTF8_ENCODING));
|
||||
} catch (UnsupportedEncodingException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
return new ImmutableBytesWritable(Bytes.toBytes(sb.toString()));
|
||||
}
|
||||
}
|
|
@ -1,91 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.Partitioner;
|
||||
|
||||
|
||||
/**
|
||||
* This is used to partition the output keys into groups of keys.
|
||||
* Keys are grouped according to the regions that currently exist
|
||||
* so that each reducer fills a single region so load is distributed.
|
||||
*
|
||||
* @param <K2>
|
||||
* @param <V2>
|
||||
*/
|
||||
@Deprecated
|
||||
public class HRegionPartitioner<K2,V2>
|
||||
implements Partitioner<ImmutableBytesWritable, V2> {
|
||||
private final Log LOG = LogFactory.getLog(TableInputFormat.class);
|
||||
private HTable table;
|
||||
private byte[][] startKeys;
|
||||
|
||||
public void configure(JobConf job) {
|
||||
try {
|
||||
this.table = new HTable(new HBaseConfiguration(job),
|
||||
job.get(TableOutputFormat.OUTPUT_TABLE));
|
||||
} catch (IOException e) {
|
||||
LOG.error(e);
|
||||
}
|
||||
|
||||
try {
|
||||
this.startKeys = this.table.getStartKeys();
|
||||
} catch (IOException e) {
|
||||
LOG.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
public int getPartition(ImmutableBytesWritable key,
|
||||
V2 value, int numPartitions) {
|
||||
byte[] region = null;
|
||||
// Only one region return 0
|
||||
if (this.startKeys.length == 1){
|
||||
return 0;
|
||||
}
|
||||
try {
|
||||
// Not sure if this is cached after a split so we could have problems
|
||||
// here if a region splits while mapping
|
||||
region = table.getRegionLocation(key.get()).getRegionInfo().getStartKey();
|
||||
} catch (IOException e) {
|
||||
LOG.error(e);
|
||||
}
|
||||
for (int i = 0; i < this.startKeys.length; i++){
|
||||
if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
|
||||
if (i >= numPartitions-1){
|
||||
// cover if we have less reduces then regions.
|
||||
return (Integer.toString(i).hashCode()
|
||||
& Integer.MAX_VALUE) % numPartitions;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
}
|
||||
// if above fails to find start key that match we need to return something
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -1,76 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.MapReduceBase;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
|
||||
/**
|
||||
* Pass the given key and record as-is to reduce
|
||||
*/
|
||||
@Deprecated
|
||||
public class IdentityTableMap
|
||||
extends MapReduceBase
|
||||
implements TableMap<ImmutableBytesWritable, RowResult> {
|
||||
|
||||
/** constructor */
|
||||
public IdentityTableMap() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this before submitting a TableMap job. It will
|
||||
* appropriately set up the JobConf.
|
||||
*
|
||||
* @param table table name
|
||||
* @param columns columns to scan
|
||||
* @param mapper mapper class
|
||||
* @param job job configuration
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static void initJob(String table, String columns,
|
||||
Class<? extends TableMap> mapper, JobConf job) {
|
||||
TableMapReduceUtil.initTableMapJob(table, columns, mapper,
|
||||
ImmutableBytesWritable.class,
|
||||
RowResult.class, job);
|
||||
}
|
||||
|
||||
/**
|
||||
* Pass the key, value to reduce
|
||||
* @param key
|
||||
* @param value
|
||||
* @param output
|
||||
* @param reporter
|
||||
* @throws IOException
|
||||
*/
|
||||
public void map(ImmutableBytesWritable key, RowResult value,
|
||||
OutputCollector<ImmutableBytesWritable,RowResult> output,
|
||||
Reporter reporter) throws IOException {
|
||||
|
||||
// convert
|
||||
output.collect(key, value);
|
||||
}
|
||||
}
|
|
@ -1,61 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.mapred.MapReduceBase;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
|
||||
/**
|
||||
* Write to table each key, record pair
|
||||
*/
|
||||
@Deprecated
|
||||
public class IdentityTableReduce
|
||||
extends MapReduceBase
|
||||
implements TableReduce<ImmutableBytesWritable, BatchUpdate> {
|
||||
@SuppressWarnings("unused")
|
||||
private static final Log LOG =
|
||||
LogFactory.getLog(IdentityTableReduce.class.getName());
|
||||
|
||||
/**
|
||||
* No aggregation, output pairs of (key, record)
|
||||
* @param key
|
||||
* @param values
|
||||
* @param output
|
||||
* @param reporter
|
||||
* @throws IOException
|
||||
*/
|
||||
public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate> values,
|
||||
OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
|
||||
Reporter reporter)
|
||||
throws IOException {
|
||||
|
||||
while(values.hasNext()) {
|
||||
output.collect(key, values.next());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,423 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.StringWriter;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
/**
|
||||
* Configuration parameters for building a Lucene index
|
||||
*/
|
||||
@Deprecated
|
||||
public class IndexConfiguration extends Configuration {
|
||||
private static final Log LOG = LogFactory.getLog(IndexConfiguration.class);
|
||||
|
||||
static final String HBASE_COLUMN_NAME = "hbase.column.name";
|
||||
static final String HBASE_COLUMN_STORE = "hbase.column.store";
|
||||
static final String HBASE_COLUMN_INDEX = "hbase.column.index";
|
||||
static final String HBASE_COLUMN_TOKENIZE = "hbase.column.tokenize";
|
||||
static final String HBASE_COLUMN_BOOST = "hbase.column.boost";
|
||||
static final String HBASE_COLUMN_OMIT_NORMS = "hbase.column.omit.norms";
|
||||
static final String HBASE_INDEX_ROWKEY_NAME = "hbase.index.rowkey.name";
|
||||
static final String HBASE_INDEX_ANALYZER_NAME = "hbase.index.analyzer.name";
|
||||
static final String HBASE_INDEX_MAX_BUFFERED_DOCS =
|
||||
"hbase.index.max.buffered.docs";
|
||||
static final String HBASE_INDEX_MAX_BUFFERED_DELS =
|
||||
"hbase.index.max.buffered.dels";
|
||||
static final String HBASE_INDEX_MAX_FIELD_LENGTH =
|
||||
"hbase.index.max.field.length";
|
||||
static final String HBASE_INDEX_MAX_MERGE_DOCS =
|
||||
"hbase.index.max.merge.docs";
|
||||
static final String HBASE_INDEX_MERGE_FACTOR = "hbase.index.merge.factor";
|
||||
// double ramBufferSizeMB;
|
||||
static final String HBASE_INDEX_SIMILARITY_NAME =
|
||||
"hbase.index.similarity.name";
|
||||
static final String HBASE_INDEX_USE_COMPOUND_FILE =
|
||||
"hbase.index.use.compound.file";
|
||||
static final String HBASE_INDEX_OPTIMIZE = "hbase.index.optimize";
|
||||
|
||||
public static class ColumnConf extends Properties {
|
||||
|
||||
private static final long serialVersionUID = 7419012290580607821L;
|
||||
|
||||
boolean getBoolean(String name, boolean defaultValue) {
|
||||
String valueString = getProperty(name);
|
||||
if ("true".equals(valueString))
|
||||
return true;
|
||||
else if ("false".equals(valueString))
|
||||
return false;
|
||||
else
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
void setBoolean(String name, boolean value) {
|
||||
setProperty(name, Boolean.toString(value));
|
||||
}
|
||||
|
||||
float getFloat(String name, float defaultValue) {
|
||||
String valueString = getProperty(name);
|
||||
if (valueString == null)
|
||||
return defaultValue;
|
||||
try {
|
||||
return Float.parseFloat(valueString);
|
||||
} catch (NumberFormatException e) {
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
void setFloat(String name, float value) {
|
||||
setProperty(name, Float.toString(value));
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, ColumnConf> columnMap =
|
||||
new ConcurrentHashMap<String, ColumnConf>();
|
||||
|
||||
public Iterator<String> columnNameIterator() {
|
||||
return columnMap.keySet().iterator();
|
||||
}
|
||||
|
||||
public boolean isIndex(String columnName) {
|
||||
return getColumn(columnName).getBoolean(HBASE_COLUMN_INDEX, true);
|
||||
}
|
||||
|
||||
public void setIndex(String columnName, boolean index) {
|
||||
getColumn(columnName).setBoolean(HBASE_COLUMN_INDEX, index);
|
||||
}
|
||||
|
||||
public boolean isStore(String columnName) {
|
||||
return getColumn(columnName).getBoolean(HBASE_COLUMN_STORE, false);
|
||||
}
|
||||
|
||||
public void setStore(String columnName, boolean store) {
|
||||
getColumn(columnName).setBoolean(HBASE_COLUMN_STORE, store);
|
||||
}
|
||||
|
||||
public boolean isTokenize(String columnName) {
|
||||
return getColumn(columnName).getBoolean(HBASE_COLUMN_TOKENIZE, true);
|
||||
}
|
||||
|
||||
public void setTokenize(String columnName, boolean tokenize) {
|
||||
getColumn(columnName).setBoolean(HBASE_COLUMN_TOKENIZE, tokenize);
|
||||
}
|
||||
|
||||
public float getBoost(String columnName) {
|
||||
return getColumn(columnName).getFloat(HBASE_COLUMN_BOOST, 1.0f);
|
||||
}
|
||||
|
||||
public void setBoost(String columnName, float boost) {
|
||||
getColumn(columnName).setFloat(HBASE_COLUMN_BOOST, boost);
|
||||
}
|
||||
|
||||
public boolean isOmitNorms(String columnName) {
|
||||
return getColumn(columnName).getBoolean(HBASE_COLUMN_OMIT_NORMS, true);
|
||||
}
|
||||
|
||||
public void setOmitNorms(String columnName, boolean omitNorms) {
|
||||
getColumn(columnName).setBoolean(HBASE_COLUMN_OMIT_NORMS, omitNorms);
|
||||
}
|
||||
|
||||
private ColumnConf getColumn(String columnName) {
|
||||
ColumnConf column = columnMap.get(columnName);
|
||||
if (column == null) {
|
||||
column = new ColumnConf();
|
||||
columnMap.put(columnName, column);
|
||||
}
|
||||
return column;
|
||||
}
|
||||
|
||||
public String getAnalyzerName() {
|
||||
return get(HBASE_INDEX_ANALYZER_NAME,
|
||||
"org.apache.lucene.analysis.standard.StandardAnalyzer");
|
||||
}
|
||||
|
||||
public void setAnalyzerName(String analyzerName) {
|
||||
set(HBASE_INDEX_ANALYZER_NAME, analyzerName);
|
||||
}
|
||||
|
||||
public int getMaxBufferedDeleteTerms() {
|
||||
return getInt(HBASE_INDEX_MAX_BUFFERED_DELS, 1000);
|
||||
}
|
||||
|
||||
public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
|
||||
setInt(HBASE_INDEX_MAX_BUFFERED_DELS, maxBufferedDeleteTerms);
|
||||
}
|
||||
|
||||
public int getMaxBufferedDocs() {
|
||||
return getInt(HBASE_INDEX_MAX_BUFFERED_DOCS, 10);
|
||||
}
|
||||
|
||||
public void setMaxBufferedDocs(int maxBufferedDocs) {
|
||||
setInt(HBASE_INDEX_MAX_BUFFERED_DOCS, maxBufferedDocs);
|
||||
}
|
||||
|
||||
public int getMaxFieldLength() {
|
||||
return getInt(HBASE_INDEX_MAX_FIELD_LENGTH, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
public void setMaxFieldLength(int maxFieldLength) {
|
||||
setInt(HBASE_INDEX_MAX_FIELD_LENGTH, maxFieldLength);
|
||||
}
|
||||
|
||||
public int getMaxMergeDocs() {
|
||||
return getInt(HBASE_INDEX_MAX_MERGE_DOCS, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
public void setMaxMergeDocs(int maxMergeDocs) {
|
||||
setInt(HBASE_INDEX_MAX_MERGE_DOCS, maxMergeDocs);
|
||||
}
|
||||
|
||||
public int getMergeFactor() {
|
||||
return getInt(HBASE_INDEX_MERGE_FACTOR, 10);
|
||||
}
|
||||
|
||||
public void setMergeFactor(int mergeFactor) {
|
||||
setInt(HBASE_INDEX_MERGE_FACTOR, mergeFactor);
|
||||
}
|
||||
|
||||
public String getRowkeyName() {
|
||||
return get(HBASE_INDEX_ROWKEY_NAME, "ROWKEY");
|
||||
}
|
||||
|
||||
public void setRowkeyName(String rowkeyName) {
|
||||
set(HBASE_INDEX_ROWKEY_NAME, rowkeyName);
|
||||
}
|
||||
|
||||
public String getSimilarityName() {
|
||||
return get(HBASE_INDEX_SIMILARITY_NAME, null);
|
||||
}
|
||||
|
||||
public void setSimilarityName(String similarityName) {
|
||||
set(HBASE_INDEX_SIMILARITY_NAME, similarityName);
|
||||
}
|
||||
|
||||
public boolean isUseCompoundFile() {
|
||||
return getBoolean(HBASE_INDEX_USE_COMPOUND_FILE, false);
|
||||
}
|
||||
|
||||
public void setUseCompoundFile(boolean useCompoundFile) {
|
||||
setBoolean(HBASE_INDEX_USE_COMPOUND_FILE, useCompoundFile);
|
||||
}
|
||||
|
||||
public boolean doOptimize() {
|
||||
return getBoolean(HBASE_INDEX_OPTIMIZE, true);
|
||||
}
|
||||
|
||||
public void setDoOptimize(boolean doOptimize) {
|
||||
setBoolean(HBASE_INDEX_OPTIMIZE, doOptimize);
|
||||
}
|
||||
|
||||
public void addFromXML(String content) {
|
||||
try {
|
||||
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
|
||||
.newDocumentBuilder();
|
||||
|
||||
Document doc = builder
|
||||
.parse(new ByteArrayInputStream(content.getBytes()));
|
||||
|
||||
Element root = doc.getDocumentElement();
|
||||
if (!"configuration".equals(root.getTagName())) {
|
||||
LOG.fatal("bad conf file: top-level element not <configuration>");
|
||||
}
|
||||
|
||||
NodeList props = root.getChildNodes();
|
||||
for (int i = 0; i < props.getLength(); i++) {
|
||||
Node propNode = props.item(i);
|
||||
if (!(propNode instanceof Element)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Element prop = (Element) propNode;
|
||||
if ("property".equals(prop.getTagName())) {
|
||||
propertyFromXML(prop, null);
|
||||
} else if ("column".equals(prop.getTagName())) {
|
||||
columnConfFromXML(prop);
|
||||
} else {
|
||||
LOG.warn("bad conf content: element neither <property> nor <column>");
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.fatal("error parsing conf content: " + e);
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void propertyFromXML(Element prop, Properties properties) {
|
||||
NodeList fields = prop.getChildNodes();
|
||||
String attr = null;
|
||||
String value = null;
|
||||
|
||||
for (int j = 0; j < fields.getLength(); j++) {
|
||||
Node fieldNode = fields.item(j);
|
||||
if (!(fieldNode instanceof Element)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Element field = (Element) fieldNode;
|
||||
if ("name".equals(field.getTagName())) {
|
||||
attr = ((Text) field.getFirstChild()).getData();
|
||||
}
|
||||
if ("value".equals(field.getTagName()) && field.hasChildNodes()) {
|
||||
value = ((Text) field.getFirstChild()).getData();
|
||||
}
|
||||
}
|
||||
|
||||
if (attr != null && value != null) {
|
||||
if (properties == null) {
|
||||
set(attr, value);
|
||||
} else {
|
||||
properties.setProperty(attr, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void columnConfFromXML(Element column) {
|
||||
ColumnConf columnConf = new ColumnConf();
|
||||
NodeList props = column.getChildNodes();
|
||||
for (int i = 0; i < props.getLength(); i++) {
|
||||
Node propNode = props.item(i);
|
||||
if (!(propNode instanceof Element)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Element prop = (Element) propNode;
|
||||
if ("property".equals(prop.getTagName())) {
|
||||
propertyFromXML(prop, columnConf);
|
||||
} else {
|
||||
LOG.warn("bad conf content: element not <property>");
|
||||
}
|
||||
}
|
||||
|
||||
if (columnConf.getProperty(HBASE_COLUMN_NAME) != null) {
|
||||
columnMap.put(columnConf.getProperty(HBASE_COLUMN_NAME), columnConf);
|
||||
} else {
|
||||
LOG.warn("bad column conf: name not specified");
|
||||
}
|
||||
}
|
||||
|
||||
public void write(OutputStream out) {
|
||||
try {
|
||||
Document doc = writeDocument();
|
||||
DOMSource source = new DOMSource(doc);
|
||||
StreamResult result = new StreamResult(out);
|
||||
TransformerFactory transFactory = TransformerFactory.newInstance();
|
||||
Transformer transformer = transFactory.newTransformer();
|
||||
transformer.transform(source, result);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private Document writeDocument() {
|
||||
Iterator<Map.Entry<String, String>> iter = iterator();
|
||||
try {
|
||||
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
.newDocument();
|
||||
Element conf = doc.createElement("configuration");
|
||||
doc.appendChild(conf);
|
||||
conf.appendChild(doc.createTextNode("\n"));
|
||||
|
||||
Map.Entry<String, String> entry;
|
||||
while (iter.hasNext()) {
|
||||
entry = iter.next();
|
||||
String name = entry.getKey();
|
||||
String value = entry.getValue();
|
||||
writeProperty(doc, conf, name, value);
|
||||
}
|
||||
|
||||
Iterator<String> columnIter = columnNameIterator();
|
||||
while (columnIter.hasNext()) {
|
||||
writeColumn(doc, conf, columnIter.next());
|
||||
}
|
||||
|
||||
return doc;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeProperty(Document doc, Element parent, String name,
|
||||
String value) {
|
||||
Element propNode = doc.createElement("property");
|
||||
parent.appendChild(propNode);
|
||||
|
||||
Element nameNode = doc.createElement("name");
|
||||
nameNode.appendChild(doc.createTextNode(name));
|
||||
propNode.appendChild(nameNode);
|
||||
|
||||
Element valueNode = doc.createElement("value");
|
||||
valueNode.appendChild(doc.createTextNode(value));
|
||||
propNode.appendChild(valueNode);
|
||||
|
||||
parent.appendChild(doc.createTextNode("\n"));
|
||||
}
|
||||
|
||||
private void writeColumn(Document doc, Element parent, String columnName) {
|
||||
Element column = doc.createElement("column");
|
||||
parent.appendChild(column);
|
||||
column.appendChild(doc.createTextNode("\n"));
|
||||
|
||||
ColumnConf columnConf = getColumn(columnName);
|
||||
for (Map.Entry<Object, Object> entry : columnConf.entrySet()) {
|
||||
if (entry.getKey() instanceof String
|
||||
&& entry.getValue() instanceof String) {
|
||||
writeProperty(doc, column, (String) entry.getKey(), (String) entry
|
||||
.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringWriter writer = new StringWriter();
|
||||
try {
|
||||
Document doc = writeDocument();
|
||||
DOMSource source = new DOMSource(doc);
|
||||
StreamResult result = new StreamResult(writer);
|
||||
TransformerFactory transFactory = TransformerFactory.newInstance();
|
||||
Transformer transformer = transFactory.newTransformer();
|
||||
transformer.transform(source, result);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return writer.toString();
|
||||
}
|
||||
}
|
|
@ -1,164 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.FileOutputFormat;
|
||||
import org.apache.hadoop.mapred.RecordWriter;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
|
||||
/**
|
||||
* Create a local index, unwrap Lucene documents created by reduce, add them to
|
||||
* the index, and copy the index to the destination.
|
||||
*/
|
||||
@Deprecated
|
||||
public class IndexOutputFormat extends
|
||||
FileOutputFormat<ImmutableBytesWritable, LuceneDocumentWrapper> {
|
||||
static final Log LOG = LogFactory.getLog(IndexOutputFormat.class);
|
||||
|
||||
private Random random = new Random();
|
||||
|
||||
@Override
|
||||
public RecordWriter<ImmutableBytesWritable, LuceneDocumentWrapper>
|
||||
getRecordWriter(final FileSystem fs, JobConf job, String name,
|
||||
final Progressable progress)
|
||||
throws IOException {
|
||||
|
||||
final Path perm = new Path(FileOutputFormat.getOutputPath(job), name);
|
||||
final Path temp = job.getLocalPath("index/_"
|
||||
+ Integer.toString(random.nextInt()));
|
||||
|
||||
LOG.info("To index into " + perm);
|
||||
|
||||
// delete old, if any
|
||||
fs.delete(perm, true);
|
||||
|
||||
final IndexConfiguration indexConf = new IndexConfiguration();
|
||||
String content = job.get("hbase.index.conf");
|
||||
if (content != null) {
|
||||
indexConf.addFromXML(content);
|
||||
}
|
||||
|
||||
String analyzerName = indexConf.getAnalyzerName();
|
||||
Analyzer analyzer;
|
||||
try {
|
||||
Class<?> analyzerClass = Class.forName(analyzerName);
|
||||
analyzer = (Analyzer) analyzerClass.newInstance();
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Error in creating an analyzer object "
|
||||
+ analyzerName);
|
||||
}
|
||||
|
||||
// build locally first
|
||||
final IndexWriter writer = new IndexWriter(fs.startLocalOutput(perm, temp)
|
||||
.toString(), analyzer, true);
|
||||
|
||||
// no delete, so no need for maxBufferedDeleteTerms
|
||||
writer.setMaxBufferedDocs(indexConf.getMaxBufferedDocs());
|
||||
writer.setMaxFieldLength(indexConf.getMaxFieldLength());
|
||||
writer.setMaxMergeDocs(indexConf.getMaxMergeDocs());
|
||||
writer.setMergeFactor(indexConf.getMergeFactor());
|
||||
String similarityName = indexConf.getSimilarityName();
|
||||
if (similarityName != null) {
|
||||
try {
|
||||
Class<?> similarityClass = Class.forName(similarityName);
|
||||
Similarity similarity = (Similarity) similarityClass.newInstance();
|
||||
writer.setSimilarity(similarity);
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Error in creating a similarty object "
|
||||
+ similarityName);
|
||||
}
|
||||
}
|
||||
writer.setUseCompoundFile(indexConf.isUseCompoundFile());
|
||||
|
||||
return new RecordWriter<ImmutableBytesWritable, LuceneDocumentWrapper>() {
|
||||
boolean closed;
|
||||
private long docCount = 0;
|
||||
|
||||
public void write(ImmutableBytesWritable key,
|
||||
LuceneDocumentWrapper value)
|
||||
throws IOException {
|
||||
// unwrap and index doc
|
||||
Document doc = value.get();
|
||||
writer.addDocument(doc);
|
||||
docCount++;
|
||||
progress.progress();
|
||||
}
|
||||
|
||||
public void close(final Reporter reporter) throws IOException {
|
||||
// spawn a thread to give progress heartbeats
|
||||
Thread prog = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
while (!closed) {
|
||||
try {
|
||||
reporter.setStatus("closing");
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
continue;
|
||||
} catch (Throwable e) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
prog.start();
|
||||
|
||||
// optimize index
|
||||
if (indexConf.doOptimize()) {
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("Optimizing index.");
|
||||
}
|
||||
writer.optimize();
|
||||
}
|
||||
|
||||
// close index
|
||||
writer.close();
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("Done indexing " + docCount + " docs.");
|
||||
}
|
||||
|
||||
// copy to perm destination in dfs
|
||||
fs.completeLocalOutput(perm, temp);
|
||||
if (LOG.isInfoEnabled()) {
|
||||
LOG.info("Copy done.");
|
||||
}
|
||||
} finally {
|
||||
closed = true;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.MapReduceBase;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reducer;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
/**
|
||||
* Construct a Lucene document per row, which is consumed by IndexOutputFormat
|
||||
* to build a Lucene index
|
||||
*/
|
||||
@Deprecated
|
||||
public class IndexTableReduce extends MapReduceBase implements
|
||||
Reducer<ImmutableBytesWritable, RowResult, ImmutableBytesWritable, LuceneDocumentWrapper> {
|
||||
private static final Log LOG = LogFactory.getLog(IndexTableReduce.class);
|
||||
private IndexConfiguration indexConf;
|
||||
|
||||
@Override
|
||||
public void configure(JobConf job) {
|
||||
super.configure(job);
|
||||
indexConf = new IndexConfiguration();
|
||||
String content = job.get("hbase.index.conf");
|
||||
if (content != null) {
|
||||
indexConf.addFromXML(content);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Index conf: " + indexConf);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
}
|
||||
|
||||
public void reduce(ImmutableBytesWritable key, Iterator<RowResult> values,
|
||||
OutputCollector<ImmutableBytesWritable, LuceneDocumentWrapper> output,
|
||||
Reporter reporter)
|
||||
throws IOException {
|
||||
if (!values.hasNext()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Document doc = new Document();
|
||||
|
||||
// index and store row key, row key already UTF-8 encoded
|
||||
Field keyField = new Field(indexConf.getRowkeyName(),
|
||||
Bytes.toString(key.get(), key.getOffset(), key.getLength()),
|
||||
Field.Store.YES, Field.Index.UN_TOKENIZED);
|
||||
keyField.setOmitNorms(true);
|
||||
doc.add(keyField);
|
||||
|
||||
while (values.hasNext()) {
|
||||
RowResult value = values.next();
|
||||
|
||||
// each column (name-value pair) is a field (name-value pair)
|
||||
for (Map.Entry<byte [], Cell> entry : value.entrySet()) {
|
||||
// name is already UTF-8 encoded
|
||||
String column = Bytes.toString(entry.getKey());
|
||||
byte[] columnValue = entry.getValue().getValue();
|
||||
Field.Store store = indexConf.isStore(column)?
|
||||
Field.Store.YES: Field.Store.NO;
|
||||
Field.Index index = indexConf.isIndex(column)?
|
||||
(indexConf.isTokenize(column)?
|
||||
Field.Index.TOKENIZED: Field.Index.UN_TOKENIZED):
|
||||
Field.Index.NO;
|
||||
|
||||
// UTF-8 encode value
|
||||
Field field = new Field(column, Bytes.toString(columnValue),
|
||||
store, index);
|
||||
field.setBoost(indexConf.getBoost(column));
|
||||
field.setOmitNorms(indexConf.isOmitNorms(column));
|
||||
|
||||
doc.add(field);
|
||||
}
|
||||
}
|
||||
output.collect(key, new LuceneDocumentWrapper(doc));
|
||||
}
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
||||
/**
|
||||
* A utility class used to pass a lucene document from reduce to OutputFormat.
|
||||
* It doesn't really serialize/deserialize a lucene document.
|
||||
*/
|
||||
@Deprecated
|
||||
public class LuceneDocumentWrapper implements Writable {
|
||||
protected Document doc;
|
||||
|
||||
/**
|
||||
* @param doc
|
||||
*/
|
||||
public LuceneDocumentWrapper(Document doc) {
|
||||
this.doc = doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the document
|
||||
*/
|
||||
public Document get() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) {
|
||||
// intentionally left blank
|
||||
}
|
||||
|
||||
public void write(DataOutput out) {
|
||||
// intentionally left blank
|
||||
}
|
||||
}
|
|
@ -1,137 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.io.Cell;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.mapred.FileOutputFormat;
|
||||
import org.apache.hadoop.mapred.JobClient;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.mapred.lib.IdentityReducer;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
/**
|
||||
* A job with a map to count rows.
|
||||
* Map outputs table rows IF the input row has columns that have content.
|
||||
* Uses an {@link IdentityReducer}
|
||||
*/
|
||||
@Deprecated
|
||||
public class RowCounter extends Configured implements Tool {
|
||||
// Name of this 'program'
|
||||
static final String NAME = "rowcounter";
|
||||
|
||||
/**
|
||||
* Mapper that runs the count.
|
||||
*/
|
||||
static class RowCounterMapper
|
||||
implements TableMap<ImmutableBytesWritable, RowResult> {
|
||||
private static enum Counters {ROWS}
|
||||
|
||||
public void map(ImmutableBytesWritable row, RowResult value,
|
||||
OutputCollector<ImmutableBytesWritable, RowResult> output,
|
||||
Reporter reporter)
|
||||
throws IOException {
|
||||
boolean content = false;
|
||||
for (Map.Entry<byte [], Cell> e: value.entrySet()) {
|
||||
Cell cell = e.getValue();
|
||||
if (cell != null && cell.getValue().length > 0) {
|
||||
content = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!content) {
|
||||
// Don't count rows that are all empty values.
|
||||
return;
|
||||
}
|
||||
// Give out same value every time. We're only interested in the row/key
|
||||
reporter.incrCounter(Counters.ROWS, 1);
|
||||
}
|
||||
|
||||
public void configure(JobConf jc) {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
// Nothing to do.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @return the JobConf
|
||||
* @throws IOException
|
||||
*/
|
||||
public JobConf createSubmittableJob(String[] args) throws IOException {
|
||||
JobConf c = new JobConf(getConf(), getClass());
|
||||
c.setJobName(NAME);
|
||||
// Columns are space delimited
|
||||
StringBuilder sb = new StringBuilder();
|
||||
final int columnoffset = 2;
|
||||
for (int i = columnoffset; i < args.length; i++) {
|
||||
if (i > columnoffset) {
|
||||
sb.append(" ");
|
||||
}
|
||||
sb.append(args[i]);
|
||||
}
|
||||
// Second argument is the table name.
|
||||
TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
|
||||
RowCounterMapper.class, ImmutableBytesWritable.class, RowResult.class, c);
|
||||
c.setNumReduceTasks(0);
|
||||
// First arg is the output directory.
|
||||
FileOutputFormat.setOutputPath(c, new Path(args[0]));
|
||||
return c;
|
||||
}
|
||||
|
||||
static int printUsage() {
|
||||
System.out.println(NAME +
|
||||
" <outputdir> <tablename> <column1> [<column2>...]");
|
||||
return -1;
|
||||
}
|
||||
|
||||
public int run(final String[] args) throws Exception {
|
||||
// Make sure there are at least 3 parameters
|
||||
if (args.length < 3) {
|
||||
System.err.println("ERROR: Wrong number of parameters: " + args.length);
|
||||
return printUsage();
|
||||
}
|
||||
JobClient.runJob(createSubmittableJob(args));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* @throws Exception
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
HBaseConfiguration c = new HBaseConfiguration();
|
||||
int errCode = ToolRunner.run(c, new RowCounter(), args);
|
||||
System.exit(errCode);
|
||||
}
|
||||
}
|
|
@ -1,6 +0,0 @@
|
|||
|
||||
# ResourceBundle properties file for RowCounter MR job
|
||||
|
||||
CounterGroupName= RowCounter
|
||||
|
||||
ROWS.name= Rows
|
|
@ -1,83 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.mapred.FileInputFormat;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.JobConfigurable;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Convert HBase tabular data into a format that is consumable by Map/Reduce.
|
||||
*/
|
||||
@Deprecated
|
||||
public class TableInputFormat extends TableInputFormatBase implements
|
||||
JobConfigurable {
|
||||
private final Log LOG = LogFactory.getLog(TableInputFormat.class);
|
||||
|
||||
/**
|
||||
* space delimited list of columns
|
||||
*/
|
||||
public static final String COLUMN_LIST = "hbase.mapred.tablecolumns";
|
||||
|
||||
public void configure(JobConf job) {
|
||||
Path[] tableNames = FileInputFormat.getInputPaths(job);
|
||||
String colArg = job.get(COLUMN_LIST);
|
||||
String[] colNames = colArg.split(" ");
|
||||
byte [][] m_cols = new byte[colNames.length][];
|
||||
for (int i = 0; i < m_cols.length; i++) {
|
||||
m_cols[i] = Bytes.toBytes(colNames[i]);
|
||||
}
|
||||
setInputColumns(m_cols);
|
||||
try {
|
||||
setHTable(new HTable(new HBaseConfiguration(job), tableNames[0].getName()));
|
||||
} catch (Exception e) {
|
||||
LOG.error(StringUtils.stringifyException(e));
|
||||
}
|
||||
}
|
||||
|
||||
public void validateInput(JobConf job) throws IOException {
|
||||
// expecting exactly one path
|
||||
Path [] tableNames = FileInputFormat.getInputPaths(job);
|
||||
if (tableNames == null || tableNames.length > 1) {
|
||||
throw new IOException("expecting one table name");
|
||||
}
|
||||
|
||||
// connected to table?
|
||||
if (getHTable() == null) {
|
||||
throw new IOException("could not connect to table '" +
|
||||
tableNames[0].getName() + "'");
|
||||
}
|
||||
|
||||
// expecting at least one column
|
||||
String colArg = job.get(COLUMN_LIST);
|
||||
if (colArg == null || colArg.length() == 0) {
|
||||
throw new IOException("expecting at least one column");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,352 +0,0 @@
|
|||
/**
|
||||
* Copyright 2009 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.UnknownScannerException;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterSet;
|
||||
import org.apache.hadoop.hbase.filter.StopRowFilter;
|
||||
import org.apache.hadoop.hbase.filter.WhileMatchRowFilter;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.mapred.InputFormat;
|
||||
import org.apache.hadoop.mapred.InputSplit;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.RecordReader;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
|
||||
/**
|
||||
* A Base for {@link TableInputFormat}s. Receives a {@link HTable}, a
|
||||
* byte[] of input columns and optionally a {@link RowFilterInterface}.
|
||||
* Subclasses may use other TableRecordReader implementations.
|
||||
* <p>
|
||||
* An example of a subclass:
|
||||
* <pre>
|
||||
* class ExampleTIF extends TableInputFormatBase implements JobConfigurable {
|
||||
*
|
||||
* public void configure(JobConf job) {
|
||||
* HTable exampleTable = new HTable(new HBaseConfiguration(job),
|
||||
* Bytes.toBytes("exampleTable"));
|
||||
* // mandatory
|
||||
* setHTable(exampleTable);
|
||||
* Text[] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
|
||||
* Bytes.toBytes("columnB") };
|
||||
* // mandatory
|
||||
* setInputColumns(inputColumns);
|
||||
* RowFilterInterface exampleFilter = new RegExpRowFilter("keyPrefix.*");
|
||||
* // optional
|
||||
* setRowFilter(exampleFilter);
|
||||
* }
|
||||
*
|
||||
* public void validateInput(JobConf job) throws IOException {
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
|
||||
@Deprecated
|
||||
public abstract class TableInputFormatBase
|
||||
implements InputFormat<ImmutableBytesWritable, RowResult> {
|
||||
final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
|
||||
private byte [][] inputColumns;
|
||||
private HTable table;
|
||||
private TableRecordReader tableRecordReader;
|
||||
private RowFilterInterface rowFilter;
|
||||
|
||||
/**
|
||||
* Iterate over an HBase table data, return (Text, RowResult) pairs
|
||||
*/
|
||||
protected class TableRecordReader
|
||||
implements RecordReader<ImmutableBytesWritable, RowResult> {
|
||||
private byte [] startRow;
|
||||
private byte [] endRow;
|
||||
private byte [] lastRow;
|
||||
private RowFilterInterface trrRowFilter;
|
||||
private ResultScanner scanner;
|
||||
private HTable htable;
|
||||
private byte [][] trrInputColumns;
|
||||
|
||||
/**
|
||||
* Restart from survivable exceptions by creating a new scanner.
|
||||
*
|
||||
* @param firstRow
|
||||
* @throws IOException
|
||||
*/
|
||||
public void restart(byte[] firstRow) throws IOException {
|
||||
if ((endRow != null) && (endRow.length > 0)) {
|
||||
if (trrRowFilter != null) {
|
||||
Scan scan = new Scan(firstRow, endRow);
|
||||
scan.addColumns(trrInputColumns);
|
||||
scan.setOldFilter(trrRowFilter);
|
||||
this.scanner = this.htable.getScanner(scan);
|
||||
} else {
|
||||
LOG.debug("TIFB.restart, firstRow: " +
|
||||
Bytes.toStringBinary(firstRow) + ", endRow: " +
|
||||
Bytes.toStringBinary(endRow));
|
||||
Scan scan = new Scan(firstRow, endRow);
|
||||
scan.addColumns(trrInputColumns);
|
||||
this.scanner = this.htable.getScanner(scan);
|
||||
}
|
||||
} else {
|
||||
LOG.debug("TIFB.restart, firstRow: " +
|
||||
Bytes.toStringBinary(firstRow) + ", no endRow");
|
||||
|
||||
Scan scan = new Scan(firstRow);
|
||||
scan.addColumns(trrInputColumns);
|
||||
// scan.setFilter(trrRowFilter);
|
||||
this.scanner = this.htable.getScanner(scan);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the scanner. Not done in constructor to allow for extension.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void init() throws IOException {
|
||||
restart(startRow);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param htable the {@link HTable} to scan.
|
||||
*/
|
||||
public void setHTable(HTable htable) {
|
||||
this.htable = htable;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param inputColumns the columns to be placed in {@link RowResult}.
|
||||
*/
|
||||
public void setInputColumns(final byte [][] inputColumns) {
|
||||
this.trrInputColumns = inputColumns;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param startRow the first row in the split
|
||||
*/
|
||||
public void setStartRow(final byte [] startRow) {
|
||||
this.startRow = startRow;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param endRow the last row in the split
|
||||
*/
|
||||
public void setEndRow(final byte [] endRow) {
|
||||
this.endRow = endRow;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param rowFilter the {@link RowFilterInterface} to be used.
|
||||
*/
|
||||
public void setRowFilter(RowFilterInterface rowFilter) {
|
||||
this.trrRowFilter = rowFilter;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
this.scanner.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return ImmutableBytesWritable
|
||||
*
|
||||
* @see org.apache.hadoop.mapred.RecordReader#createKey()
|
||||
*/
|
||||
public ImmutableBytesWritable createKey() {
|
||||
return new ImmutableBytesWritable();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return RowResult
|
||||
*
|
||||
* @see org.apache.hadoop.mapred.RecordReader#createValue()
|
||||
*/
|
||||
public RowResult createValue() {
|
||||
return new RowResult();
|
||||
}
|
||||
|
||||
public long getPos() {
|
||||
// This should be the ordinal tuple in the range;
|
||||
// not clear how to calculate...
|
||||
return 0;
|
||||
}
|
||||
|
||||
public float getProgress() {
|
||||
// Depends on the total number of tuples and getPos
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key HStoreKey as input key.
|
||||
* @param value MapWritable as input value
|
||||
* @return true if there was more data
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean next(ImmutableBytesWritable key, RowResult value)
|
||||
throws IOException {
|
||||
Result result;
|
||||
try {
|
||||
result = this.scanner.next();
|
||||
} catch (UnknownScannerException e) {
|
||||
LOG.debug("recovered from " + StringUtils.stringifyException(e));
|
||||
restart(lastRow);
|
||||
this.scanner.next(); // skip presumed already mapped row
|
||||
result = this.scanner.next();
|
||||
}
|
||||
|
||||
if (result != null && result.size() > 0) {
|
||||
key.set(result.getRow());
|
||||
lastRow = key.get();
|
||||
Writables.copyWritable(result.getRowResult(), value);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a TableRecordReader. If no TableRecordReader was provided, uses
|
||||
* the default.
|
||||
*
|
||||
* @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,
|
||||
* JobConf, Reporter)
|
||||
*/
|
||||
public RecordReader<ImmutableBytesWritable, RowResult> getRecordReader(
|
||||
InputSplit split, JobConf job, Reporter reporter)
|
||||
throws IOException {
|
||||
TableSplit tSplit = (TableSplit) split;
|
||||
TableRecordReader trr = this.tableRecordReader;
|
||||
// if no table record reader was provided use default
|
||||
if (trr == null) {
|
||||
trr = new TableRecordReader();
|
||||
}
|
||||
trr.setStartRow(tSplit.getStartRow());
|
||||
trr.setEndRow(tSplit.getEndRow());
|
||||
trr.setHTable(this.table);
|
||||
trr.setInputColumns(this.inputColumns);
|
||||
trr.setRowFilter(this.rowFilter);
|
||||
trr.init();
|
||||
return trr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the splits that will serve as input for the map tasks.
|
||||
* <ul>
|
||||
* Splits are created in number equal to the smallest between numSplits and
|
||||
* the number of {@link HRegion}s in the table. If the number of splits is
|
||||
* smaller than the number of {@link HRegion}s then splits are spanned across
|
||||
* multiple {@link HRegion}s and are grouped the most evenly possible. In the
|
||||
* case splits are uneven the bigger splits are placed first in the
|
||||
* {@link InputSplit} array.
|
||||
*
|
||||
* @param job the map task {@link JobConf}
|
||||
* @param numSplits a hint to calculate the number of splits (mapred.map.tasks).
|
||||
*
|
||||
* @return the input splits
|
||||
*
|
||||
* @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)
|
||||
*/
|
||||
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
|
||||
byte [][] startKeys = this.table.getStartKeys();
|
||||
if (startKeys == null || startKeys.length == 0) {
|
||||
throw new IOException("Expecting at least one region");
|
||||
}
|
||||
if (this.table == null) {
|
||||
throw new IOException("No table was provided");
|
||||
}
|
||||
if (this.inputColumns == null || this.inputColumns.length == 0) {
|
||||
throw new IOException("Expecting at least one column");
|
||||
}
|
||||
int realNumSplits = numSplits > startKeys.length? startKeys.length:
|
||||
numSplits;
|
||||
InputSplit[] splits = new InputSplit[realNumSplits];
|
||||
int middle = startKeys.length / realNumSplits;
|
||||
int startPos = 0;
|
||||
for (int i = 0; i < realNumSplits; i++) {
|
||||
int lastPos = startPos + middle;
|
||||
lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;
|
||||
String regionLocation = table.getRegionLocation(startKeys[startPos]).
|
||||
getServerAddress().getHostname();
|
||||
splits[i] = new TableSplit(this.table.getTableName(),
|
||||
startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:
|
||||
HConstants.EMPTY_START_ROW, regionLocation);
|
||||
LOG.info("split: " + i + "->" + splits[i]);
|
||||
startPos = lastPos;
|
||||
}
|
||||
return splits;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param inputColumns to be passed in {@link RowResult} to the map task.
|
||||
*/
|
||||
protected void setInputColumns(byte [][] inputColumns) {
|
||||
this.inputColumns = inputColumns;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows subclasses to get the {@link HTable}.
|
||||
*/
|
||||
protected HTable getHTable() {
|
||||
return this.table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows subclasses to set the {@link HTable}.
|
||||
*
|
||||
* @param table to get the data from
|
||||
*/
|
||||
protected void setHTable(HTable table) {
|
||||
this.table = table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows subclasses to set the {@link TableRecordReader}.
|
||||
*
|
||||
* @param tableRecordReader
|
||||
* to provide other {@link TableRecordReader} implementations.
|
||||
*/
|
||||
protected void setTableRecordReader(TableRecordReader tableRecordReader) {
|
||||
this.tableRecordReader = tableRecordReader;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows subclasses to set the {@link RowFilterInterface} to be used.
|
||||
*
|
||||
* @param rowFilter
|
||||
*/
|
||||
protected void setRowFilter(RowFilterInterface rowFilter) {
|
||||
this.rowFilter = rowFilter;
|
||||
}
|
||||
}
|
|
@ -1,39 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
import org.apache.hadoop.mapred.Mapper;
|
||||
|
||||
/**
|
||||
* Scan an HBase table to sort by a specified sort column.
|
||||
* If the column does not exist, the record is not passed to Reduce.
|
||||
*
|
||||
* @param <K> WritableComparable key class
|
||||
* @param <V> Writable value class
|
||||
*/
|
||||
@Deprecated
|
||||
public interface TableMap<K extends WritableComparable<? super K>, V extends Writable>
|
||||
extends Mapper<ImmutableBytesWritable, RowResult, K, V> {
|
||||
|
||||
}
|
|
@ -1,184 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
import org.apache.hadoop.mapred.FileInputFormat;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
|
||||
/**
|
||||
* Utility for {@link TableMap} and {@link TableReduce}
|
||||
*/
|
||||
@Deprecated
|
||||
@SuppressWarnings("unchecked")
|
||||
public class TableMapReduceUtil {
|
||||
|
||||
/**
|
||||
* Use this before submitting a TableMap job. It will
|
||||
* appropriately set up the JobConf.
|
||||
*
|
||||
* @param table The table name to read from.
|
||||
* @param columns The columns to scan.
|
||||
* @param mapper The mapper class to use.
|
||||
* @param outputKeyClass The class of the output key.
|
||||
* @param outputValueClass The class of the output value.
|
||||
* @param job The current job configuration to adjust.
|
||||
*/
|
||||
public static void initTableMapJob(String table, String columns,
|
||||
Class<? extends TableMap> mapper,
|
||||
Class<? extends WritableComparable> outputKeyClass,
|
||||
Class<? extends Writable> outputValueClass, JobConf job) {
|
||||
|
||||
job.setInputFormat(TableInputFormat.class);
|
||||
job.setMapOutputValueClass(outputValueClass);
|
||||
job.setMapOutputKeyClass(outputKeyClass);
|
||||
job.setMapperClass(mapper);
|
||||
FileInputFormat.addInputPaths(job, table);
|
||||
job.set(TableInputFormat.COLUMN_LIST, columns);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this before submitting a TableReduce job. It will
|
||||
* appropriately set up the JobConf.
|
||||
*
|
||||
* @param table The output table.
|
||||
* @param reducer The reducer class to use.
|
||||
* @param job The current job configuration to adjust.
|
||||
* @throws IOException When determining the region count fails.
|
||||
*/
|
||||
public static void initTableReduceJob(String table,
|
||||
Class<? extends TableReduce> reducer, JobConf job)
|
||||
throws IOException {
|
||||
initTableReduceJob(table, reducer, job, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this before submitting a TableReduce job. It will
|
||||
* appropriately set up the JobConf.
|
||||
*
|
||||
* @param table The output table.
|
||||
* @param reducer The reducer class to use.
|
||||
* @param job The current job configuration to adjust.
|
||||
* @param partitioner Partitioner to use. Pass <code>null</code> to use
|
||||
* default partitioner.
|
||||
* @throws IOException When determining the region count fails.
|
||||
*/
|
||||
public static void initTableReduceJob(String table,
|
||||
Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
|
||||
throws IOException {
|
||||
job.setOutputFormat(TableOutputFormat.class);
|
||||
job.setReducerClass(reducer);
|
||||
job.set(TableOutputFormat.OUTPUT_TABLE, table);
|
||||
job.setOutputKeyClass(ImmutableBytesWritable.class);
|
||||
job.setOutputValueClass(BatchUpdate.class);
|
||||
if (partitioner == HRegionPartitioner.class) {
|
||||
job.setPartitionerClass(HRegionPartitioner.class);
|
||||
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
|
||||
int regions = outputTable.getRegionsInfo().size();
|
||||
if (job.getNumReduceTasks() > regions) {
|
||||
job.setNumReduceTasks(outputTable.getRegionsInfo().size());
|
||||
}
|
||||
} else if (partitioner != null) {
|
||||
job.setPartitionerClass(partitioner);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that the given number of reduce tasks for the given job
|
||||
* configuration does not exceed the number of regions for the given table.
|
||||
*
|
||||
* @param table The table to get the region count for.
|
||||
* @param job The current job configuration to adjust.
|
||||
* @throws IOException When retrieving the table details fails.
|
||||
*/
|
||||
public static void limitNumReduceTasks(String table, JobConf job)
|
||||
throws IOException {
|
||||
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
|
||||
int regions = outputTable.getRegionsInfo().size();
|
||||
if (job.getNumReduceTasks() > regions)
|
||||
job.setNumReduceTasks(regions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that the given number of map tasks for the given job
|
||||
* configuration does not exceed the number of regions for the given table.
|
||||
*
|
||||
* @param table The table to get the region count for.
|
||||
* @param job The current job configuration to adjust.
|
||||
* @throws IOException When retrieving the table details fails.
|
||||
*/
|
||||
public static void limitNumMapTasks(String table, JobConf job)
|
||||
throws IOException {
|
||||
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
|
||||
int regions = outputTable.getRegionsInfo().size();
|
||||
if (job.getNumMapTasks() > regions)
|
||||
job.setNumMapTasks(regions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of reduce tasks for the given job configuration to the
|
||||
* number of regions the given table has.
|
||||
*
|
||||
* @param table The table to get the region count for.
|
||||
* @param job The current job configuration to adjust.
|
||||
* @throws IOException When retrieving the table details fails.
|
||||
*/
|
||||
public static void setNumReduceTasks(String table, JobConf job)
|
||||
throws IOException {
|
||||
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
|
||||
int regions = outputTable.getRegionsInfo().size();
|
||||
job.setNumReduceTasks(regions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of map tasks for the given job configuration to the
|
||||
* number of regions the given table has.
|
||||
*
|
||||
* @param table The table to get the region count for.
|
||||
* @param job The current job configuration to adjust.
|
||||
* @throws IOException When retrieving the table details fails.
|
||||
*/
|
||||
public static void setNumMapTasks(String table, JobConf job)
|
||||
throws IOException {
|
||||
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
|
||||
int regions = outputTable.getRegionsInfo().size();
|
||||
job.setNumMapTasks(regions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of rows to return and cache with each scanner iteration.
|
||||
* Higher caching values will enable faster mapreduce jobs at the expense of
|
||||
* requiring more heap to contain the cached rows.
|
||||
*
|
||||
* @param job The current job configuration to adjust.
|
||||
* @param batchSize The number of rows to return in batch with each scanner
|
||||
* iteration.
|
||||
*/
|
||||
public static void setScannerCaching(JobConf job, int batchSize) {
|
||||
job.setInt("hbase.client.scanner.caching", batchSize);
|
||||
}
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
/**
|
||||
* Copyright 2009 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.mapred.FileAlreadyExistsException;
|
||||
import org.apache.hadoop.mapred.InvalidJobConfException;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.FileOutputFormat;
|
||||
import org.apache.hadoop.mapred.RecordWriter;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.util.Progressable;
|
||||
|
||||
/**
|
||||
* Convert Map/Reduce output and write it to an HBase table
|
||||
*/
|
||||
@Deprecated
|
||||
public class TableOutputFormat extends
|
||||
FileOutputFormat<ImmutableBytesWritable, BatchUpdate> {
|
||||
|
||||
/** JobConf parameter that specifies the output table */
|
||||
public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
|
||||
private final Log LOG = LogFactory.getLog(TableOutputFormat.class);
|
||||
|
||||
/**
|
||||
* Convert Reduce output (key, value) to (HStoreKey, KeyedDataArrayWritable)
|
||||
* and write to an HBase table
|
||||
*/
|
||||
protected static class TableRecordWriter
|
||||
implements RecordWriter<ImmutableBytesWritable, BatchUpdate> {
|
||||
private HTable m_table;
|
||||
|
||||
/**
|
||||
* Instantiate a TableRecordWriter with the HBase HClient for writing.
|
||||
*
|
||||
* @param table
|
||||
*/
|
||||
public TableRecordWriter(HTable table) {
|
||||
m_table = table;
|
||||
}
|
||||
|
||||
public void close(Reporter reporter)
|
||||
throws IOException {
|
||||
m_table.flushCommits();
|
||||
}
|
||||
|
||||
public void write(ImmutableBytesWritable key,
|
||||
BatchUpdate value) throws IOException {
|
||||
m_table.commit(new BatchUpdate(value));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public RecordWriter getRecordWriter(FileSystem ignored,
|
||||
JobConf job, String name, Progressable progress) throws IOException {
|
||||
|
||||
// expecting exactly one path
|
||||
|
||||
String tableName = job.get(OUTPUT_TABLE);
|
||||
HTable table = null;
|
||||
try {
|
||||
table = new HTable(new HBaseConfiguration(job), tableName);
|
||||
} catch(IOException e) {
|
||||
LOG.error(e);
|
||||
throw e;
|
||||
}
|
||||
table.setAutoFlush(false);
|
||||
return new TableRecordWriter(table);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkOutputSpecs(FileSystem ignored, JobConf job)
|
||||
throws FileAlreadyExistsException, InvalidJobConfException, IOException {
|
||||
|
||||
String tableName = job.get(OUTPUT_TABLE);
|
||||
if(tableName == null) {
|
||||
throw new IOException("Must specify table name");
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,39 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
import org.apache.hadoop.mapred.Reducer;
|
||||
|
||||
/**
|
||||
* Write a table, sorting by the input key
|
||||
*
|
||||
* @param <K> key class
|
||||
* @param <V> value class
|
||||
*/
|
||||
@Deprecated
|
||||
@SuppressWarnings("unchecked")
|
||||
public interface TableReduce<K extends WritableComparable, V extends Writable>
|
||||
extends Reducer<K, V, ImmutableBytesWritable, BatchUpdate> {
|
||||
|
||||
}
|
|
@ -1,113 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.mapred.InputSplit;
|
||||
|
||||
/**
|
||||
* A table split corresponds to a key range [low, high)
|
||||
*/
|
||||
@Deprecated
|
||||
public class TableSplit implements InputSplit, Comparable<TableSplit> {
|
||||
private byte [] m_tableName;
|
||||
private byte [] m_startRow;
|
||||
private byte [] m_endRow;
|
||||
private String m_regionLocation;
|
||||
|
||||
/** default constructor */
|
||||
public TableSplit() {
|
||||
this(HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY,
|
||||
HConstants.EMPTY_BYTE_ARRAY, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param tableName
|
||||
* @param startRow
|
||||
* @param endRow
|
||||
* @param location
|
||||
*/
|
||||
public TableSplit(byte [] tableName, byte [] startRow, byte [] endRow,
|
||||
final String location) {
|
||||
this.m_tableName = tableName;
|
||||
this.m_startRow = startRow;
|
||||
this.m_endRow = endRow;
|
||||
this.m_regionLocation = location;
|
||||
}
|
||||
|
||||
/** @return table name */
|
||||
public byte [] getTableName() {
|
||||
return this.m_tableName;
|
||||
}
|
||||
|
||||
/** @return starting row key */
|
||||
public byte [] getStartRow() {
|
||||
return this.m_startRow;
|
||||
}
|
||||
|
||||
/** @return end row key */
|
||||
public byte [] getEndRow() {
|
||||
return this.m_endRow;
|
||||
}
|
||||
|
||||
/** @return the region's hostname */
|
||||
public String getRegionLocation() {
|
||||
return this.m_regionLocation;
|
||||
}
|
||||
|
||||
public String[] getLocations() {
|
||||
return new String[] {this.m_regionLocation};
|
||||
}
|
||||
|
||||
public long getLength() {
|
||||
// Not clear how to obtain this... seems to be used only for sorting splits
|
||||
return 0;
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.m_tableName = Bytes.readByteArray(in);
|
||||
this.m_startRow = Bytes.readByteArray(in);
|
||||
this.m_endRow = Bytes.readByteArray(in);
|
||||
this.m_regionLocation = Bytes.toString(Bytes.readByteArray(in));
|
||||
}
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
Bytes.writeByteArray(out, this.m_tableName);
|
||||
Bytes.writeByteArray(out, this.m_startRow);
|
||||
Bytes.writeByteArray(out, this.m_endRow);
|
||||
Bytes.writeByteArray(out, Bytes.toBytes(this.m_regionLocation));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return m_regionLocation + ":" +
|
||||
Bytes.toStringBinary(m_startRow) + "," + Bytes.toStringBinary(m_endRow);
|
||||
}
|
||||
|
||||
public int compareTo(TableSplit o) {
|
||||
return Bytes.compareTo(getStartRow(), o.getStartRow());
|
||||
}
|
||||
}
|
|
@ -1,267 +0,0 @@
|
|||
/*
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
|
||||
Input/OutputFormats, a table indexing MapReduce job, and utility
|
||||
|
||||
<h2>Table of Contents</h2>
|
||||
<ul>
|
||||
<li><a href="#classpath">HBase, MapReduce and the CLASSPATH</a></li>
|
||||
<li><a href="#sink">HBase as MapReduce job data source and sink</a></li>
|
||||
<li><a href="#examples">Example Code</a></li>
|
||||
</ul>
|
||||
|
||||
<h2><a name="classpath">HBase, MapReduce and the CLASSPATH</a></h2>
|
||||
|
||||
<p>MapReduce jobs deployed to a MapReduce cluster do not by default have access
|
||||
to the HBase configuration under <code>$HBASE_CONF_DIR</code> nor to HBase classes.
|
||||
You could add <code>hbase-site.xml</code> to $HADOOP_HOME/conf and add
|
||||
<code>hbase-X.X.X.jar</code> to the <code>$HADOOP_HOME/lib</code> and copy these
|
||||
changes across your cluster but the cleanest means of adding hbase configuration
|
||||
and classes to the cluster <code>CLASSPATH</code> is by uncommenting
|
||||
<code>HADOOP_CLASSPATH</code> in <code>$HADOOP_HOME/conf/hadoop-env.sh</code>
|
||||
and adding the path to the hbase jar and <code>$HBASE_CONF_DIR</code> directory.
|
||||
Then copy the amended configuration around the cluster.
|
||||
You'll probably need to restart the MapReduce cluster if you want it to notice
|
||||
the new configuration.
|
||||
</p>
|
||||
|
||||
<p>For example, here is how you would amend <code>hadoop-env.sh</code> adding the
|
||||
built hbase jar, hbase conf, and the <code>PerformanceEvaluation</code> class from
|
||||
the built hbase test jar to the hadoop <code>CLASSPATH<code>:
|
||||
|
||||
<blockquote><pre># Extra Java CLASSPATH elements. Optional.
|
||||
# export HADOOP_CLASSPATH=
|
||||
export HADOOP_CLASSPATH=$HBASE_HOME/build/test:$HBASE_HOME/build/hbase-X.X.X.jar:$HBASE_HOME/build/hbase-X.X.X-test.jar:$HBASE_HOME/conf</pre></blockquote>
|
||||
|
||||
<p>Expand <code>$HBASE_HOME</code> in the above appropriately to suit your
|
||||
local environment.</p>
|
||||
|
||||
<p>After copying the above change around your cluster, this is how you would run
|
||||
the PerformanceEvaluation MR job to put up 4 clients (Presumes a ready mapreduce
|
||||
cluster):
|
||||
|
||||
<blockquote><pre>$HADOOP_HOME/bin/hadoop org.apache.hadoop.hbase.PerformanceEvaluation sequentialWrite 4</pre></blockquote>
|
||||
|
||||
The PerformanceEvaluation class wil be found on the CLASSPATH because you
|
||||
added <code>$HBASE_HOME/build/test</code> to HADOOP_CLASSPATH
|
||||
</p>
|
||||
|
||||
<p>Another possibility, if for example you do not have access to hadoop-env.sh or
|
||||
are unable to restart the hadoop cluster, is bundling the hbase jar into a mapreduce
|
||||
job jar adding it and its dependencies under the job jar <code>lib/</code>
|
||||
directory and the hbase conf into a job jar <code>conf/</code> directory.
|
||||
</a>
|
||||
|
||||
<h2><a name="sink">HBase as MapReduce job data source and sink</a></h2>
|
||||
|
||||
<p>HBase can be used as a data source, {@link org.apache.hadoop.hbase.mapred.TableInputFormat TableInputFormat},
|
||||
and data sink, {@link org.apache.hadoop.hbase.mapred.TableOutputFormat TableOutputFormat}, for MapReduce jobs.
|
||||
Writing MapReduce jobs that read or write HBase, you'll probably want to subclass
|
||||
{@link org.apache.hadoop.hbase.mapred.TableMap TableMap} and/or
|
||||
{@link org.apache.hadoop.hbase.mapred.TableReduce TableReduce}. See the do-nothing
|
||||
pass-through classes {@link org.apache.hadoop.hbase.mapred.IdentityTableMap IdentityTableMap} and
|
||||
{@link org.apache.hadoop.hbase.mapred.IdentityTableReduce IdentityTableReduce} for basic usage. For a more
|
||||
involved example, see {@link org.apache.hadoop.hbase.mapred.BuildTableIndex BuildTableIndex}
|
||||
or review the <code>org.apache.hadoop.hbase.mapred.TestTableMapReduce</code> unit test.
|
||||
</p>
|
||||
|
||||
<p>Running mapreduce jobs that have hbase as source or sink, you'll need to
|
||||
specify source/sink table and column names in your configuration.</p>
|
||||
|
||||
<p>Reading from hbase, the TableInputFormat asks hbase for the list of
|
||||
regions and makes a map-per-region or <code>mapred.map.tasks maps</code>,
|
||||
whichever is smaller (If your job only has two maps, up mapred.map.tasks
|
||||
to a number > number of regions). Maps will run on the adjacent TaskTracker
|
||||
if you are running a TaskTracer and RegionServer per node.
|
||||
Writing, it may make sense to avoid the reduce step and write yourself back into
|
||||
hbase from inside your map. You'd do this when your job does not need the sort
|
||||
and collation that mapreduce does on the map emitted data; on insert,
|
||||
hbase 'sorts' so there is no point double-sorting (and shuffling data around
|
||||
your mapreduce cluster) unless you need to. If you do not need the reduce,
|
||||
you might just have your map emit counts of records processed just so the
|
||||
framework's report at the end of your job has meaning or set the number of
|
||||
reduces to zero and use TableOutputFormat. See example code
|
||||
below. If running the reduce step makes sense in your case, its usually better
|
||||
to have lots of reducers so load is spread across the hbase cluster.</p>
|
||||
|
||||
<p>There is also a new hbase partitioner that will run as many reducers as
|
||||
currently existing regions. The
|
||||
{@link org.apache.hadoop.hbase.mapred.HRegionPartitioner} is suitable
|
||||
when your table is large and your upload is not such that it will greatly
|
||||
alter the number of existing regions when done; other use the default
|
||||
partitioner.
|
||||
</p>
|
||||
|
||||
<h2><a name="examples">Example Code</a></h2>
|
||||
<h3>Sample Row Counter</h3>
|
||||
<p>See {@link org.apache.hadoop.hbase.mapred.RowCounter}. You should be able to run
|
||||
it by doing: <code>% ./bin/hadoop jar hbase-X.X.X.jar</code>. This will invoke
|
||||
the hbase MapReduce Driver class. Select 'rowcounter' from the choice of jobs
|
||||
offered. You may need to add the hbase conf directory to <code>$HADOOP_HOME/conf/hadoop-env.sh#HADOOP_CLASSPATH</code>
|
||||
so the rowcounter gets pointed at the right hbase cluster (or, build a new jar
|
||||
with an appropriate hbase-site.xml built into your job jar).
|
||||
</p>
|
||||
<h3>PerformanceEvaluation</h3>
|
||||
<p>See org.apache.hadoop.hbase.PerformanceEvaluation from hbase src/test. It runs
|
||||
a mapreduce job to run concurrent clients reading and writing hbase.
|
||||
</p>
|
||||
|
||||
<h3>Sample MR Bulk Uploader</h3>
|
||||
<p>A students/classes example based on a contribution by Naama Kraus with logs of
|
||||
documentation can be found over in src/examples/mapred.
|
||||
Its the <code>org.apache.hadoop.hbase.mapred.SampleUploader</code> class.
|
||||
Just copy it under src/java/org/apache/hadoop/hbase/mapred to compile and try it
|
||||
(until we start generating an hbase examples jar). The class reads a data file
|
||||
from HDFS and per line, does an upload to HBase using TableReduce.
|
||||
Read the class comment for specification of inputs, prerequisites, etc.
|
||||
</p>
|
||||
|
||||
<h3>Example to bulk import/load a text file into an HTable
|
||||
</h3>
|
||||
|
||||
<p>Here's a sample program from
|
||||
<a href="http://www.spicylogic.com/allenday/blog/category/computing/distributed-systems/hadoop/hbase/">Allen Day</a>
|
||||
that takes an HDFS text file path and an HBase table name as inputs, and loads the contents of the text file to the table
|
||||
all up in the map phase.
|
||||
</p>
|
||||
|
||||
<blockquote><pre>
|
||||
package com.spicylogic.hbase;
|
||||
package org.apache.hadoop.hbase.mapred;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapred.FileInputFormat;
|
||||
import org.apache.hadoop.mapred.JobClient;
|
||||
import org.apache.hadoop.mapred.JobConf;
|
||||
import org.apache.hadoop.mapred.MapReduceBase;
|
||||
import org.apache.hadoop.mapred.Mapper;
|
||||
import org.apache.hadoop.mapred.OutputCollector;
|
||||
import org.apache.hadoop.mapred.Reporter;
|
||||
import org.apache.hadoop.mapred.lib.NullOutputFormat;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
/**
|
||||
* Class that adds the parsed line from the input to hbase
|
||||
* in the map function. Map has no emissions and job
|
||||
* has no reduce.
|
||||
*/
|
||||
public class BulkImport implements Tool {
|
||||
private static final String NAME = "BulkImport";
|
||||
private Configuration conf;
|
||||
|
||||
public static class InnerMap extends MapReduceBase implements Mapper<LongWritable, Text, Text, Text> {
|
||||
private HTable table;
|
||||
private HBaseConfiguration HBconf;
|
||||
|
||||
public void map(LongWritable key, Text value,
|
||||
OutputCollector<Text, Text> output, Reporter reporter)
|
||||
throws IOException {
|
||||
if ( table == null )
|
||||
throw new IOException("table is null");
|
||||
|
||||
// Split input line on tab character
|
||||
String [] splits = value.toString().split("\t");
|
||||
if ( splits.length != 4 )
|
||||
return;
|
||||
|
||||
String rowID = splits[0];
|
||||
int timestamp = Integer.parseInt( splits[1] );
|
||||
String colID = splits[2];
|
||||
String cellValue = splits[3];
|
||||
|
||||
reporter.setStatus("Map emitting cell for row='" + rowID +
|
||||
"', column='" + colID + "', time='" + timestamp + "'");
|
||||
|
||||
BatchUpdate bu = new BatchUpdate( rowID );
|
||||
if ( timestamp > 0 )
|
||||
bu.setTimestamp( timestamp );
|
||||
|
||||
bu.put(colID, cellValue.getBytes());
|
||||
table.commit( bu );
|
||||
}
|
||||
|
||||
public void configure(JobConf job) {
|
||||
HBconf = new HBaseConfiguration(job);
|
||||
try {
|
||||
table = new HTable( HBconf, job.get("input.table") );
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public JobConf createSubmittableJob(String[] args) {
|
||||
JobConf c = new JobConf(getConf(), BulkImport.class);
|
||||
c.setJobName(NAME);
|
||||
FileInputFormat.setInputPaths(c, new Path(args[0]));
|
||||
|
||||
c.set("input.table", args[1]);
|
||||
c.setMapperClass(InnerMap.class);
|
||||
c.setNumReduceTasks(0);
|
||||
c.setOutputFormat(NullOutputFormat.class);
|
||||
return c;
|
||||
}
|
||||
|
||||
static int printUsage() {
|
||||
System.err.println("Usage: " + NAME + " <input> <table_name>");
|
||||
System.err.println("\twhere <input> is a tab-delimited text file with 4 columns.");
|
||||
System.err.println("\t\tcolumn 1 = row ID");
|
||||
System.err.println("\t\tcolumn 2 = timestamp (use a negative value for current time)");
|
||||
System.err.println("\t\tcolumn 3 = column ID");
|
||||
System.err.println("\t\tcolumn 4 = cell value");
|
||||
return -1;
|
||||
}
|
||||
|
||||
public int run(@SuppressWarnings("unused") String[] args) throws Exception {
|
||||
// Make sure there are exactly 3 parameters left.
|
||||
if (args.length != 2) {
|
||||
return printUsage();
|
||||
}
|
||||
JobClient.runJob(createSubmittableJob(args));
|
||||
return 0;
|
||||
}
|
||||
|
||||
public Configuration getConf() {
|
||||
return this.conf;
|
||||
}
|
||||
|
||||
public void setConf(final Configuration c) {
|
||||
this.conf = c;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
int errCode = ToolRunner.run(new Configuration(), new BulkImport(), args);
|
||||
System.exit(errCode);
|
||||
}
|
||||
}
|
||||
</pre></blockquote>
|
||||
|
||||
*/
|
||||
package org.apache.hadoop.hbase.mapred;
|
|
@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
|
|||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.util.GenericOptionsParser;
|
||||
|
@ -127,7 +128,14 @@ public class BuildTableIndex {
|
|||
// number of indexes to partition into
|
||||
job.setNumReduceTasks(numReduceTasks);
|
||||
Scan scan = new Scan();
|
||||
scan.addColumns(columnNames.toString());
|
||||
for(String columnName : columnNames.toString().split(" ")) {
|
||||
String [] fields = columnName.split(":");
|
||||
if(fields.length == 1) {
|
||||
scan.addFamily(Bytes.toBytes(fields[0]));
|
||||
} else {
|
||||
scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1]));
|
||||
}
|
||||
}
|
||||
// use identity map (a waste, but just as an example)
|
||||
IdentityTableMapper.initJob(tableName, scan,
|
||||
IdentityTableMapper.class, job);
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.mapreduce;
|
||||
|
||||
import org.apache.hadoop.hbase.migration.nineteen.HStoreFileToStoreFile;
|
||||
import org.apache.hadoop.util.ProgramDriver;
|
||||
|
||||
/**
|
||||
|
@ -35,9 +34,6 @@ public class Driver {
|
|||
ProgramDriver pgd = new ProgramDriver();
|
||||
pgd.addClass(RowCounter.NAME, RowCounter.class,
|
||||
"Count rows in HBase table");
|
||||
pgd.addClass(HStoreFileToStoreFile.JOBNAME,
|
||||
HStoreFileToStoreFile.class,
|
||||
"Bulk convert 0.19 HStoreFiles to 0.20 StoreFiles");
|
||||
pgd.driver(args);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -107,7 +107,8 @@ extends TableMapper<ImmutableBytesWritable,Result> implements Configurable {
|
|||
int numCols = columns.length;
|
||||
if (numCols > 0) {
|
||||
for (KeyValue value: r.list()) {
|
||||
byte [] column = value.getColumn();
|
||||
byte [] column = KeyValue.makeColumn(value.getFamily(),
|
||||
value.getQualifier());
|
||||
for (int i = 0; i < numCols; i++) {
|
||||
if (Bytes.equals(column, columns[i])) {
|
||||
foundList.add(value.getValue());
|
||||
|
|
|
@ -75,7 +75,8 @@ implements Configurable {
|
|||
// each column (name-value pair) is a field (name-value pair)
|
||||
for (KeyValue kv: r.list()) {
|
||||
// name is already UTF-8 encoded
|
||||
String column = Bytes.toString(kv.getColumn());
|
||||
String column = Bytes.toString(KeyValue.makeColumn(kv.getFamily(),
|
||||
kv.getQualifier()));
|
||||
byte[] columnValue = kv.getValue();
|
||||
Field.Store store = indexConf.isStore(column)?
|
||||
Field.Store.YES: Field.Store.NO;
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration;
|
|||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.util.GenericOptionsParser;
|
||||
|
@ -96,7 +97,14 @@ public class RowCounter {
|
|||
sb.append(args[i]);
|
||||
}
|
||||
Scan scan = new Scan();
|
||||
scan.addColumns(sb.toString());
|
||||
for(String columnName : sb.toString().split(" ")) {
|
||||
String [] fields = columnName.split(":");
|
||||
if(fields.length == 1) {
|
||||
scan.addFamily(Bytes.toBytes(fields[0]));
|
||||
} else {
|
||||
scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1]));
|
||||
}
|
||||
}
|
||||
// Second argument is the table name.
|
||||
TableMapReduceUtil.initTableMapperJob(args[1], scan,
|
||||
RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
|
||||
|
|
|
@ -28,10 +28,9 @@ import org.apache.commons.logging.LogFactory;
|
|||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
import org.apache.hadoop.mapreduce.InputFormat;
|
||||
import org.apache.hadoop.mapreduce.InputSplit;
|
||||
import org.apache.hadoop.mapreduce.JobContext;
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.hadoop.hbase.HRegionInfo;
|
|||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
|
||||
/** Instantiated to enable or disable a table */
|
||||
|
|
|
@ -19,11 +19,8 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.hadoop.hbase.HTableDescriptor;
|
|||
import org.apache.hadoop.hbase.TableNotDisabledException;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.io.BatchUpdate;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
|
||||
/**
|
||||
|
|
|
@ -26,19 +26,16 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.HServerInfo;
|
||||
import org.apache.hadoop.hbase.RemoteExceptionHandler;
|
||||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.regionserver.HLog;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.io.RowResult;
|
||||
|
||||
/**
|
||||
* Instantiated when a server's lease has expired, meaning it has crashed.
|
||||
|
|
|
@ -734,8 +734,6 @@ class RegionManager implements HConstants {
|
|||
byte [] regionName = region.getRegionName();
|
||||
|
||||
Put put = new Put(regionName);
|
||||
byte [] infoBytes = Writables.getBytes(info);
|
||||
String infoString = new String(infoBytes);
|
||||
put.add(CATALOG_FAMILY, REGIONINFO_QUALIFIER, Writables.getBytes(info));
|
||||
server.put(metaRegionName, put);
|
||||
|
||||
|
|
|
@ -1,188 +0,0 @@
|
|||
/*
|
||||
* Copyright 2009 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.Migrate;
|
||||
import org.apache.hadoop.hbase.util.FSUtils.DirFilter;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.util.Tool;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
/**
|
||||
* Mapper that rewrites hbase 0.19 HStoreFiles as 0.20 StoreFiles.
|
||||
* Creates passed directories as input and output. On startup, it does not
|
||||
* check filesystem is 0.19 generation just in case it fails part way so it
|
||||
* should be possible to rerun the MR job. It'll just fix the 0.19 regions
|
||||
* found.
|
||||
* If the input dir does not exist, it first crawls the filesystem to find the
|
||||
* files to migrate writing a file into the input directory. Next it starts up
|
||||
* the MR job to rewrite the 0.19 HStoreFiles as 0.20 StoreFiles deleting the
|
||||
* old as it goes. Presumption is that only
|
||||
* one file per in the family Store else stuff breaks; i.e. the 0.19 install
|
||||
* was major compacted before migration began. If this job fails, fix why then
|
||||
* it should be possible to rerun the job. You may want to edit the
|
||||
* generated file in the input dir first.
|
||||
*/
|
||||
public class HStoreFileToStoreFile extends Configured implements Tool {
|
||||
static final Log LOG = LogFactory.getLog(HStoreFileToStoreFile.class);
|
||||
public static final String JOBNAME = "hsf2sf";
|
||||
|
||||
HStoreFileToStoreFile() {
|
||||
super();
|
||||
}
|
||||
|
||||
public static class Map extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
|
||||
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, LongWritable, LongWritable>.Context context)
|
||||
throws java.io.IOException, InterruptedException {
|
||||
HBaseConfiguration c = new HBaseConfiguration(context.getConfiguration());
|
||||
Path p = new Path(value.toString());
|
||||
context.setStatus(key.toString() + " " + p.toString());
|
||||
Migrate.rewrite(c, FileSystem.get(c), p);
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeInputFiles(final HBaseConfiguration conf,
|
||||
final FileSystem fs, final Path dir)
|
||||
throws IOException {
|
||||
if (fs.exists(dir)) {
|
||||
LOG.warn("Input directory already exits. Using content for this MR job.");
|
||||
return;
|
||||
}
|
||||
FSDataOutputStream out = fs.create(new Path(dir, "mapfiles"));
|
||||
try {
|
||||
gathermapfiles(conf, fs, out);
|
||||
} finally {
|
||||
if (out != null) out.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static void gathermapfiles(final HBaseConfiguration conf,
|
||||
final FileSystem fs, final FSDataOutputStream out)
|
||||
throws IOException {
|
||||
// Presumes any directory under hbase.rootdir is a table.
|
||||
FileStatus [] tableDirs =
|
||||
fs.listStatus(FSUtils.getRootDir(conf), new DirFilter(fs));
|
||||
for (int i = 0; i < tableDirs.length; i++) {
|
||||
// Inside a table, there are compaction.dir directories to skip.
|
||||
// Otherwise, all else should be regions. Then in each region, should
|
||||
// only be family directories. Under each of these, should be a mapfile
|
||||
// and info directory and in these only one file.
|
||||
Path d = tableDirs[i].getPath();
|
||||
if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) continue;
|
||||
FileStatus [] regionDirs = fs.listStatus(d, new DirFilter(fs));
|
||||
for (int j = 0; j < regionDirs.length; j++) {
|
||||
Path dd = regionDirs[j].getPath();
|
||||
if (dd.equals(HConstants.HREGION_COMPACTIONDIR_NAME)) continue;
|
||||
// Else its a region name. Now look in region for families.
|
||||
FileStatus [] familyDirs = fs.listStatus(dd, new DirFilter(fs));
|
||||
for (int k = 0; k < familyDirs.length; k++) {
|
||||
Path family = familyDirs[k].getPath();
|
||||
FileStatus [] infoAndMapfile = fs.listStatus(family);
|
||||
// Assert that only info and mapfile in family dir.
|
||||
if (infoAndMapfile.length != 2) {
|
||||
LOG.warn(family.toString() + " has more than just info and mapfile: " +
|
||||
infoAndMapfile.length + ". Continuing...");
|
||||
continue;
|
||||
}
|
||||
// Make sure directory named info or mapfile.
|
||||
for (int ll = 0; ll < 2; ll++) {
|
||||
if (infoAndMapfile[ll].getPath().getName().equals("info") ||
|
||||
infoAndMapfile[ll].getPath().getName().equals("mapfiles"))
|
||||
continue;
|
||||
LOG.warn("Unexpected directory name: " +
|
||||
infoAndMapfile[ll].getPath() + ". Continuing...");
|
||||
continue;
|
||||
}
|
||||
// Now in family, there are 'mapfile' and 'info' subdirs. Just
|
||||
// look in the 'mapfile' subdir.
|
||||
Path mfsdir = new Path(family, "mapfiles");
|
||||
FileStatus [] familyStatus = fs.listStatus(mfsdir);
|
||||
if (familyStatus == null || familyStatus.length > 1) {
|
||||
LOG.warn(family.toString() + " has " +
|
||||
((familyStatus == null) ? "null": familyStatus.length) +
|
||||
" files. Continuing...");
|
||||
continue;
|
||||
}
|
||||
if (familyStatus.length == 1) {
|
||||
// If we got here, then this is good. Add the mapfile to out
|
||||
String str = familyStatus[0].getPath().makeQualified(fs).toString();
|
||||
LOG.info(str);
|
||||
out.write(Bytes.toBytes(str + "\n"));
|
||||
} else {
|
||||
// Special case. Empty region. Remove the mapfiles and info dirs.
|
||||
Path infodir = new Path(family, "info");
|
||||
LOG.info("Removing " + mfsdir + " and " + infodir + " because empty");
|
||||
fs.delete(mfsdir, true);
|
||||
fs.delete(infodir, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int run(final String[] args) throws Exception {
|
||||
if (args.length < 2) {
|
||||
System.err.println("ERROR: Wrong number of arguments: " + args.length);
|
||||
System.err.println("Usage: " + getClass().getSimpleName() +
|
||||
" <inputdir> <outputdir>");
|
||||
ToolRunner.printGenericCommandUsage(System.err);
|
||||
return -1;
|
||||
}
|
||||
Path input = new Path(args[0]);
|
||||
HBaseConfiguration conf = (HBaseConfiguration)getConf();
|
||||
FileSystem fs = FileSystem.get(conf);
|
||||
writeInputFiles(conf, fs, input);
|
||||
Job job = new Job(conf);
|
||||
job.setJarByClass(HStoreFileToStoreFile.class);
|
||||
job.setJobName(JOBNAME);
|
||||
job.setInputFormatClass(TextInputFormat.class);
|
||||
job.setMapperClass(Map.class);
|
||||
job.setNumReduceTasks(0);
|
||||
FileInputFormat.addInputPath(job, input);
|
||||
Path output = new Path(args[1]);
|
||||
FileOutputFormat.setOutputPath(job, output);
|
||||
return job.waitForCompletion(true) ? 0 : 1;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
int exitCode = ToolRunner.run(new HBaseConfiguration(),
|
||||
new HStoreFileToStoreFile(), args);
|
||||
System.exit(exitCode);
|
||||
}
|
||||
}
|
|
@ -1,738 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen;
|
||||
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.ColumnNameParseException;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.io.HeapSize;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
import org.apache.hadoop.io.WritableComparator;
|
||||
|
||||
/**
|
||||
* A Key for a stored row.
|
||||
*/
|
||||
public class HStoreKey implements WritableComparable<HStoreKey>, HeapSize {
|
||||
/**
|
||||
* Colon character in UTF-8
|
||||
*/
|
||||
public static final char COLUMN_FAMILY_DELIMITER = ':';
|
||||
|
||||
private byte [] row = HConstants.EMPTY_BYTE_ARRAY;
|
||||
private byte [] column = HConstants.EMPTY_BYTE_ARRAY;
|
||||
private long timestamp = Long.MAX_VALUE;
|
||||
|
||||
/*
|
||||
* regionInfo is only used as a hack to compare HSKs.
|
||||
* It is not serialized. See https://issues.apache.org/jira/browse/HBASE-832
|
||||
*/
|
||||
private HRegionInfo regionInfo = null;
|
||||
|
||||
/**
|
||||
* Estimated size tax paid for each instance of HSK. Estimate based on
|
||||
* study of jhat and jprofiler numbers.
|
||||
*/
|
||||
// In jprofiler, says shallow size is 48 bytes. Add to it cost of two
|
||||
// byte arrays and then something for the HRI hosting.
|
||||
public static final int ESTIMATED_HEAP_TAX = 48;
|
||||
|
||||
/** Default constructor used in conjunction with Writable interface */
|
||||
public HStoreKey() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying only the row
|
||||
* The column defaults to the empty string, the time stamp defaults to
|
||||
* Long.MAX_VALUE and the table defaults to empty string
|
||||
*
|
||||
* @param row - row key
|
||||
*/
|
||||
public HStoreKey(final byte [] row) {
|
||||
this(row, Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying only the row
|
||||
* The column defaults to the empty string, the time stamp defaults to
|
||||
* Long.MAX_VALUE and the table defaults to empty string
|
||||
*
|
||||
* @param row - row key
|
||||
*/
|
||||
public HStoreKey(final String row) {
|
||||
this(row, Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying the row and timestamp
|
||||
* The column and table names default to the empty string
|
||||
*
|
||||
* @param row row key
|
||||
* @param hri
|
||||
*/
|
||||
public HStoreKey(final byte [] row, final HRegionInfo hri) {
|
||||
this(row, HConstants.EMPTY_BYTE_ARRAY, hri);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying the row and timestamp
|
||||
* The column and table names default to the empty string
|
||||
*
|
||||
* @param row row key
|
||||
* @param timestamp timestamp value
|
||||
* @param hri HRegionInfo
|
||||
*/
|
||||
public HStoreKey(final byte [] row, long timestamp, final HRegionInfo hri) {
|
||||
this(row, HConstants.EMPTY_BYTE_ARRAY, timestamp, hri);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying the row and timestamp
|
||||
* The column and table names default to the empty string
|
||||
*
|
||||
* @param row row key
|
||||
* @param timestamp timestamp value
|
||||
*/
|
||||
public HStoreKey(final byte [] row, long timestamp) {
|
||||
this(row, HConstants.EMPTY_BYTE_ARRAY, timestamp);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying the row and timestamp
|
||||
* The column and table names default to the empty string
|
||||
*
|
||||
* @param row row key
|
||||
* @param timestamp timestamp value
|
||||
*/
|
||||
public HStoreKey(final String row, long timestamp) {
|
||||
this (row, "", timestamp, new HRegionInfo());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying the row and column names
|
||||
* The timestamp defaults to LATEST_TIMESTAMP
|
||||
* and table name defaults to the empty string
|
||||
*
|
||||
* @param row row key
|
||||
* @param column column key
|
||||
*/
|
||||
public HStoreKey(final String row, final String column) {
|
||||
this(row, column, HConstants.LATEST_TIMESTAMP, new HRegionInfo());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying the row and column names
|
||||
* The timestamp defaults to LATEST_TIMESTAMP
|
||||
* and table name defaults to the empty string
|
||||
*
|
||||
* @param row row key
|
||||
* @param column column key
|
||||
*/
|
||||
public HStoreKey(final byte [] row, final byte [] column) {
|
||||
this(row, column, HConstants.LATEST_TIMESTAMP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying the row, column names and table name
|
||||
* The timestamp defaults to LATEST_TIMESTAMP
|
||||
*
|
||||
* @param row row key
|
||||
* @param column column key
|
||||
* @param regionInfo region info
|
||||
*/
|
||||
public HStoreKey(final byte [] row,
|
||||
final byte [] column, final HRegionInfo regionInfo) {
|
||||
this(row, column, HConstants.LATEST_TIMESTAMP, regionInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying all the fields
|
||||
* Does not make copies of the passed byte arrays. Presumes the passed
|
||||
* arrays immutable.
|
||||
* @param row row key
|
||||
* @param column column key
|
||||
* @param timestamp timestamp value
|
||||
* @param regionInfo region info
|
||||
*/
|
||||
public HStoreKey(final String row,
|
||||
final String column, long timestamp, final HRegionInfo regionInfo) {
|
||||
this (Bytes.toBytes(row), Bytes.toBytes(column),
|
||||
timestamp, regionInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying all the fields with unspecified table
|
||||
* Does not make copies of the passed byte arrays. Presumes the passed
|
||||
* arrays immutable.
|
||||
* @param row row key
|
||||
* @param column column key
|
||||
* @param timestamp timestamp value
|
||||
*/
|
||||
public HStoreKey(final byte [] row, final byte [] column, long timestamp) {
|
||||
this(row, column, timestamp, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an HStoreKey specifying all the fields with specified table
|
||||
* Does not make copies of the passed byte arrays. Presumes the passed
|
||||
* arrays immutable.
|
||||
* @param row row key
|
||||
* @param column column key
|
||||
* @param timestamp timestamp value
|
||||
* @param regionInfo region info
|
||||
*/
|
||||
public HStoreKey(final byte [] row,
|
||||
final byte [] column, long timestamp, final HRegionInfo regionInfo) {
|
||||
// Make copies
|
||||
this.row = row;
|
||||
this.column = column;
|
||||
this.timestamp = timestamp;
|
||||
this.regionInfo = regionInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new HStoreKey from another
|
||||
*
|
||||
* @param other the source key
|
||||
*/
|
||||
public HStoreKey(HStoreKey other) {
|
||||
this(other.getRow(), other.getColumn(), other.getTimestamp(),
|
||||
other.getHRegionInfo());
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the value of the row key
|
||||
*
|
||||
* @param newrow new row key value
|
||||
*/
|
||||
public void setRow(byte [] newrow) {
|
||||
this.row = newrow;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the value of the column in this key
|
||||
*
|
||||
* @param c new column family value
|
||||
*/
|
||||
public void setColumn(byte [] c) {
|
||||
this.column = c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Change the value of the timestamp field
|
||||
*
|
||||
* @param timestamp new timestamp value
|
||||
*/
|
||||
public void setVersion(long timestamp) {
|
||||
this.timestamp = timestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value of this HStoreKey from the supplied key
|
||||
*
|
||||
* @param k key value to copy
|
||||
*/
|
||||
public void set(HStoreKey k) {
|
||||
this.row = k.getRow();
|
||||
this.column = k.getColumn();
|
||||
this.timestamp = k.getTimestamp();
|
||||
}
|
||||
|
||||
/** @return value of row key */
|
||||
public byte [] getRow() {
|
||||
return row;
|
||||
}
|
||||
|
||||
/** @return value of column */
|
||||
public byte [] getColumn() {
|
||||
return this.column;
|
||||
}
|
||||
|
||||
/** @return value of timestamp */
|
||||
public long getTimestamp() {
|
||||
return this.timestamp;
|
||||
}
|
||||
|
||||
/** @return value of regioninfo */
|
||||
public HRegionInfo getHRegionInfo() {
|
||||
return this.regionInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param hri
|
||||
*/
|
||||
public void setHRegionInfo(final HRegionInfo hri) {
|
||||
this.regionInfo = hri;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the row and column of two keys
|
||||
* @param other Key to compare against. Compares row and column.
|
||||
* @return True if same row and column.
|
||||
* @see #matchesWithoutColumn(HStoreKey)
|
||||
* @see #matchesRowFamily(HStoreKey)
|
||||
*/
|
||||
public boolean matchesRowCol(HStoreKey other) {
|
||||
return HStoreKey.equalsTwoRowKeys(getHRegionInfo(), getRow(), other.getRow()) &&
|
||||
Bytes.equals(getColumn(), other.getColumn());
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the row and timestamp of two keys
|
||||
*
|
||||
* @param other Key to copmare against. Compares row and timestamp.
|
||||
*
|
||||
* @return True if same row and timestamp is greater than <code>other</code>
|
||||
* @see #matchesRowCol(HStoreKey)
|
||||
* @see #matchesRowFamily(HStoreKey)
|
||||
*/
|
||||
public boolean matchesWithoutColumn(HStoreKey other) {
|
||||
return equalsTwoRowKeys(getHRegionInfo(), getRow(), other.getRow()) &&
|
||||
getTimestamp() >= other.getTimestamp();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares the row and column family of two keys
|
||||
*
|
||||
* @param that Key to compare against. Compares row and column family
|
||||
*
|
||||
* @return true if same row and column family
|
||||
* @see #matchesRowCol(HStoreKey)
|
||||
* @see #matchesWithoutColumn(HStoreKey)
|
||||
*/
|
||||
public boolean matchesRowFamily(HStoreKey that) {
|
||||
int delimiterIndex = getFamilyDelimiterIndex(getColumn());
|
||||
return equalsTwoRowKeys(getHRegionInfo(), getRow(), that.getRow()) &&
|
||||
Bytes.compareTo(getColumn(), 0, delimiterIndex, that.getColumn(), 0,
|
||||
delimiterIndex) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return Bytes.toString(this.row) + "/" + Bytes.toString(this.column) + "/" +
|
||||
timestamp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
HStoreKey other = (HStoreKey)obj;
|
||||
// Do a quick check.
|
||||
if (this.row.length != other.row.length ||
|
||||
this.column.length != other.column.length ||
|
||||
this.timestamp != other.timestamp) {
|
||||
return false;
|
||||
}
|
||||
return compareTo(other) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = Bytes.hashCode(getRow());
|
||||
result ^= Bytes.hashCode(getColumn());
|
||||
result ^= getTimestamp();
|
||||
return result;
|
||||
}
|
||||
|
||||
// Comparable
|
||||
|
||||
public int compareTo(final HStoreKey o) {
|
||||
return compareTo(this.regionInfo, this, o);
|
||||
}
|
||||
|
||||
static int compareTo(final HRegionInfo hri, final HStoreKey left,
|
||||
final HStoreKey right) {
|
||||
// We can be passed null
|
||||
if (left == null && right == null) return 0;
|
||||
if (left == null) return -1;
|
||||
if (right == null) return 1;
|
||||
|
||||
int result = compareTwoRowKeys(hri, left.getRow(), right.getRow());
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
result = left.getColumn() == null && right.getColumn() == null? 0:
|
||||
left.getColumn() == null && right.getColumn() != null? -1:
|
||||
left.getColumn() != null && right.getColumn() == null? 1:
|
||||
Bytes.compareTo(left.getColumn(), right.getColumn());
|
||||
if (result != 0) {
|
||||
return result;
|
||||
}
|
||||
// The below older timestamps sorting ahead of newer timestamps looks
|
||||
// wrong but it is intentional. This way, newer timestamps are first
|
||||
// found when we iterate over a memcache and newer versions are the
|
||||
// first we trip over when reading from a store file.
|
||||
if (left.getTimestamp() < right.getTimestamp()) {
|
||||
result = 1;
|
||||
} else if (left.getTimestamp() > right.getTimestamp()) {
|
||||
result = -1;
|
||||
}
|
||||
// Because of HBASE-877, our BeforeThisStoreKey trick no longer works in
|
||||
// mapfiles and so instead we need to do this weird check here below.
|
||||
return result == 0 && left instanceof BeforeThisStoreKey? -1:
|
||||
result == 0 && right instanceof BeforeThisStoreKey? 1:
|
||||
result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param column
|
||||
* @return New byte array that holds <code>column</code> family prefix only
|
||||
* (Does not include the colon DELIMITER).
|
||||
* @throws ColumnNameParseException
|
||||
* @see #parseColumn(byte[])
|
||||
*/
|
||||
public static byte [] getFamily(final byte [] column)
|
||||
throws ColumnNameParseException {
|
||||
int index = getFamilyDelimiterIndex(column);
|
||||
if (index <= 0) {
|
||||
throw new ColumnNameParseException("Missing ':' delimiter between " +
|
||||
"column family and qualifier in the passed column name <" +
|
||||
Bytes.toString(column) + ">");
|
||||
}
|
||||
byte [] result = new byte[index];
|
||||
System.arraycopy(column, 0, result, 0, index);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param column
|
||||
* @return Return hash of family portion of passed column.
|
||||
*/
|
||||
public static Integer getFamilyMapKey(final byte [] column) {
|
||||
int index = getFamilyDelimiterIndex(column);
|
||||
// If index < -1, presume passed column is a family name absent colon
|
||||
// delimiter
|
||||
return Bytes.mapKey(column, index > 0? index: column.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param family
|
||||
* @param column
|
||||
* @return True if <code>column</code> has a family of <code>family</code>.
|
||||
*/
|
||||
public static boolean matchingFamily(final byte [] family,
|
||||
final byte [] column) {
|
||||
// Make sure index of the ':' is at same offset.
|
||||
int index = getFamilyDelimiterIndex(column);
|
||||
if (index != family.length) {
|
||||
return false;
|
||||
}
|
||||
return Bytes.compareTo(family, 0, index, column, 0, index) == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param family
|
||||
* @return Return <code>family</code> plus the family delimiter.
|
||||
*/
|
||||
public static byte [] addDelimiter(final byte [] family) {
|
||||
// Manufacture key by adding delimiter to the passed in colFamily.
|
||||
byte [] familyPlusDelimiter = new byte [family.length + 1];
|
||||
System.arraycopy(family, 0, familyPlusDelimiter, 0, family.length);
|
||||
familyPlusDelimiter[family.length] = HStoreKey.COLUMN_FAMILY_DELIMITER;
|
||||
return familyPlusDelimiter;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param column
|
||||
* @return New byte array that holds <code>column</code> qualifier suffix.
|
||||
* @see #parseColumn(byte[])
|
||||
*/
|
||||
public static byte [] getQualifier(final byte [] column) {
|
||||
int index = getFamilyDelimiterIndex(column);
|
||||
int len = column.length - (index + 1);
|
||||
byte [] result = new byte[len];
|
||||
System.arraycopy(column, index + 1, result, 0, len);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param c Column name
|
||||
* @return Return array of size two whose first element has the family
|
||||
* prefix of passed column <code>c</code> and whose second element is the
|
||||
* column qualifier.
|
||||
* @throws ColumnNameParseException
|
||||
*/
|
||||
public static byte [][] parseColumn(final byte [] c)
|
||||
throws ColumnNameParseException {
|
||||
byte [][] result = new byte [2][];
|
||||
int index = getFamilyDelimiterIndex(c);
|
||||
if (index == -1) {
|
||||
throw new ColumnNameParseException("Impossible column name: " + c);
|
||||
}
|
||||
result[0] = new byte [index];
|
||||
System.arraycopy(c, 0, result[0], 0, index);
|
||||
int len = c.length - (index + 1);
|
||||
result[1] = new byte[len];
|
||||
System.arraycopy(c, index + 1 /*Skip delimiter*/, result[1], 0,
|
||||
len);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param b
|
||||
* @return Index of the family-qualifier colon delimiter character in passed
|
||||
* buffer.
|
||||
*/
|
||||
public static int getFamilyDelimiterIndex(final byte [] b) {
|
||||
if (b == null) {
|
||||
throw new NullPointerException();
|
||||
}
|
||||
int result = -1;
|
||||
for (int i = 0; i < b.length; i++) {
|
||||
if (b[i] == COLUMN_FAMILY_DELIMITER) {
|
||||
result = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns row and column bytes out of an HStoreKey.
|
||||
* @param hsk Store key.
|
||||
* @return byte array encoding of HStoreKey
|
||||
*/
|
||||
public static byte[] getBytes(final HStoreKey hsk) {
|
||||
return Bytes.add(hsk.getRow(), hsk.getColumn());
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to compare two row keys.
|
||||
* This is required because of the meta delimiters.
|
||||
* This is a hack.
|
||||
* @param regionInfo
|
||||
* @param rowA
|
||||
* @param rowB
|
||||
* @return value of the comparison
|
||||
*/
|
||||
public static int compareTwoRowKeys(HRegionInfo regionInfo,
|
||||
byte[] rowA, byte[] rowB) {
|
||||
if (regionInfo != null && regionInfo.isMetaRegion()) {
|
||||
byte[][] keysA = stripStartKeyMeta(rowA);
|
||||
byte[][] KeysB = stripStartKeyMeta(rowB);
|
||||
int rowCompare = Bytes.compareTo(keysA[0], KeysB[0]);
|
||||
if(rowCompare == 0)
|
||||
rowCompare = Bytes.compareTo(keysA[1], KeysB[1]);
|
||||
return rowCompare;
|
||||
}
|
||||
return Bytes.compareTo(rowA, rowB);
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method to check if two row keys are equal.
|
||||
* This is required because of the meta delimiters
|
||||
* This is a hack
|
||||
* @param regionInfo
|
||||
* @param rowA
|
||||
* @param rowB
|
||||
* @return if it's equal
|
||||
*/
|
||||
public static boolean equalsTwoRowKeys(HRegionInfo regionInfo,
|
||||
byte[] rowA, byte[] rowB) {
|
||||
return ((rowA == null) && (rowB == null)) ? true:
|
||||
(rowA == null) || (rowB == null) || (rowA.length != rowB.length) ? false:
|
||||
compareTwoRowKeys(regionInfo,rowA,rowB) == 0;
|
||||
}
|
||||
|
||||
private static byte[][] stripStartKeyMeta(byte[] rowKey) {
|
||||
int offset = -1;
|
||||
for (int i = rowKey.length - 1; i > 0; i--) {
|
||||
if (rowKey[i] == HConstants.META_ROW_DELIMITER) {
|
||||
offset = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
byte [] row = rowKey;
|
||||
byte [] timestamp = HConstants.EMPTY_BYTE_ARRAY;
|
||||
if (offset != -1) {
|
||||
row = new byte[offset];
|
||||
System.arraycopy(rowKey, 0, row, 0, offset);
|
||||
timestamp = new byte[rowKey.length - offset - 1];
|
||||
System.arraycopy(rowKey, offset+1, timestamp, 0,rowKey.length - offset - 1);
|
||||
}
|
||||
byte[][] elements = new byte[2][];
|
||||
elements[0] = row;
|
||||
elements[1] = timestamp;
|
||||
return elements;
|
||||
}
|
||||
|
||||
// Writable
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
Bytes.writeByteArray(out, this.row);
|
||||
Bytes.writeByteArray(out, this.column);
|
||||
out.writeLong(timestamp);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.row = Bytes.readByteArray(in);
|
||||
this.column = Bytes.readByteArray(in);
|
||||
this.timestamp = in.readLong();
|
||||
}
|
||||
|
||||
public long heapSize() {
|
||||
return getRow().length + Bytes.ESTIMATED_HEAP_TAX +
|
||||
getColumn().length + Bytes.ESTIMATED_HEAP_TAX +
|
||||
ESTIMATED_HEAP_TAX;
|
||||
}
|
||||
|
||||
/**
|
||||
* Passed as comparator for memcache and for store files. See HBASE-868.
|
||||
*/
|
||||
public static class HStoreKeyWritableComparator extends WritableComparator {
|
||||
private final HRegionInfo hri;
|
||||
|
||||
/** @param hri */
|
||||
public HStoreKeyWritableComparator(final HRegionInfo hri) {
|
||||
super(HStoreKey.class);
|
||||
this.hri = hri;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public int compare(final WritableComparable left,
|
||||
final WritableComparable right) {
|
||||
return compareTo(this.hri, (HStoreKey)left, (HStoreKey)right);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pass this class into {@link org.apache.hadoop.io.MapFile}.getClosest when
|
||||
* searching for the key that comes BEFORE this one but NOT this one. This
|
||||
* class will return > 0 when asked to compare against itself rather than 0.
|
||||
* This is a hack for case where getClosest returns a deleted key and we want
|
||||
* to get the previous. Can't unless use use this class; it'll just keep
|
||||
* returning us the deleted key (getClosest gets exact or nearest before when
|
||||
* you pass true argument). TODO: Throw this class away when MapFile has
|
||||
* a real 'previous' method. See HBASE-751.
|
||||
*/
|
||||
public static class BeforeThisStoreKey extends HStoreKey {
|
||||
private final HStoreKey beforeThisKey;
|
||||
|
||||
/**
|
||||
* @param beforeThisKey
|
||||
*/
|
||||
public BeforeThisStoreKey(final HStoreKey beforeThisKey) {
|
||||
super();
|
||||
this.beforeThisKey = beforeThisKey;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final HStoreKey o) {
|
||||
int result = this.beforeThisKey.compareTo(o);
|
||||
return result == 0? -1: result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getColumn() {
|
||||
return this.beforeThisKey.getColumn();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getRow() {
|
||||
return this.beforeThisKey.getRow();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long heapSize() {
|
||||
return this.beforeThisKey.heapSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getTimestamp() {
|
||||
return this.beforeThisKey.getTimestamp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.beforeThisKey.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchesRowCol(HStoreKey other) {
|
||||
return this.beforeThisKey.matchesRowCol(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchesRowFamily(HStoreKey that) {
|
||||
return this.beforeThisKey.matchesRowFamily(that);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matchesWithoutColumn(HStoreKey other) {
|
||||
return this.beforeThisKey.matchesWithoutColumn(other);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.beforeThisKey.readFields(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void set(HStoreKey k) {
|
||||
this.beforeThisKey.set(k);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setColumn(byte[] c) {
|
||||
this.beforeThisKey.setColumn(c);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setRow(byte[] newrow) {
|
||||
this.beforeThisKey.setRow(newrow);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setVersion(long timestamp) {
|
||||
this.beforeThisKey.setVersion(timestamp);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.beforeThisKey.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
this.beforeThisKey.write(out);
|
||||
}
|
||||
|
||||
@Override
|
||||
public HRegionInfo getHRegionInfo() {
|
||||
return this.beforeThisKey.getHRegionInfo();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setHRegionInfo(final HRegionInfo hri) {
|
||||
this.beforeThisKey.setHRegionInfo(hri);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,249 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.io;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.util.Hash;
|
||||
import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
|
||||
import org.apache.hadoop.hbase.migration.nineteen.onelab.filter.BloomFilter;
|
||||
import org.apache.hadoop.hbase.migration.nineteen.onelab.filter.Key;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
|
||||
/**
|
||||
* On write, all keys are added to a bloom filter. On read, all keys are
|
||||
* tested first against bloom filter. Keys are HStoreKey. If passed bloom
|
||||
* filter is null, just passes invocation to parent.
|
||||
*/
|
||||
// TODO should be fixed generic warnings from MapFile methods
|
||||
@SuppressWarnings("unchecked")
|
||||
public class BloomFilterMapFile extends HBaseMapFile {
|
||||
@SuppressWarnings("hiding")
|
||||
static final Log LOG = LogFactory.getLog(BloomFilterMapFile.class);
|
||||
protected static final String BLOOMFILTER_FILE_NAME = "filter";
|
||||
|
||||
public static class Reader extends HBaseReader {
|
||||
private final BloomFilter bloomFilter;
|
||||
|
||||
/**
|
||||
* @param fs
|
||||
* @param dirName
|
||||
* @param conf
|
||||
* @param filter
|
||||
* @param blockCacheEnabled
|
||||
* @param hri
|
||||
* @throws IOException
|
||||
*/
|
||||
public Reader(FileSystem fs, String dirName, Configuration conf,
|
||||
final boolean filter, final boolean blockCacheEnabled,
|
||||
HRegionInfo hri)
|
||||
throws IOException {
|
||||
super(fs, dirName, conf, blockCacheEnabled, hri);
|
||||
if (filter) {
|
||||
this.bloomFilter = loadBloomFilter(fs, dirName);
|
||||
} else {
|
||||
this.bloomFilter = null;
|
||||
}
|
||||
}
|
||||
|
||||
private BloomFilter loadBloomFilter(FileSystem fs, String dirName)
|
||||
throws IOException {
|
||||
Path filterFile = new Path(dirName, BLOOMFILTER_FILE_NAME);
|
||||
if(!fs.exists(filterFile)) {
|
||||
LOG.warn("FileNotFound: " + filterFile + "; proceeding without");
|
||||
return null;
|
||||
}
|
||||
BloomFilter filter = new BloomFilter();
|
||||
FSDataInputStream in = fs.open(filterFile);
|
||||
try {
|
||||
filter.readFields(in);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
return filter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Writable get(WritableComparable key, Writable val)
|
||||
throws IOException {
|
||||
if (bloomFilter == null) {
|
||||
return super.get(key, val);
|
||||
}
|
||||
if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("bloom filter reported that key exists");
|
||||
}
|
||||
return super.get(key, val);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("bloom filter reported that key does not exist");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public WritableComparable getClosest(WritableComparable key,
|
||||
Writable val) throws IOException {
|
||||
if (bloomFilter == null) {
|
||||
return super.getClosest(key, val);
|
||||
}
|
||||
// Note - the key being passed to us is always a HStoreKey
|
||||
if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("bloom filter reported that key exists");
|
||||
}
|
||||
return super.getClosest(key, val);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("bloom filter reported that key does not exist");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return size of the bloom filter
|
||||
*/
|
||||
public int getBloomFilterSize() {
|
||||
return bloomFilter == null ? 0 : bloomFilter.getVectorSize();
|
||||
}
|
||||
}
|
||||
|
||||
public static class Writer extends HBaseWriter {
|
||||
private static final double DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4.0;
|
||||
private final BloomFilter bloomFilter;
|
||||
private final String dirName;
|
||||
private final FileSystem fs;
|
||||
|
||||
/**
|
||||
* @param conf
|
||||
* @param fs
|
||||
* @param dirName
|
||||
* @param compression
|
||||
* @param filter
|
||||
* @param nrows
|
||||
* @param hri
|
||||
* @throws IOException
|
||||
*/
|
||||
public Writer(Configuration conf, FileSystem fs, String dirName,
|
||||
SequenceFile.CompressionType compression, final boolean filter,
|
||||
int nrows, final HRegionInfo hri)
|
||||
throws IOException {
|
||||
super(conf, fs, dirName, compression, hri);
|
||||
this.dirName = dirName;
|
||||
this.fs = fs;
|
||||
if (filter) {
|
||||
/*
|
||||
* There is no way to automatically determine the vector size and the
|
||||
* number of hash functions to use. In particular, bloom filters are
|
||||
* very sensitive to the number of elements inserted into them. For
|
||||
* HBase, the number of entries depends on the size of the data stored
|
||||
* in the column. Currently the default region size is 256MB, so the
|
||||
* number of entries is approximately
|
||||
* 256MB / (average value size for column).
|
||||
*
|
||||
* If m denotes the number of bits in the Bloom filter (vectorSize),
|
||||
* n denotes the number of elements inserted into the Bloom filter and
|
||||
* k represents the number of hash functions used (nbHash), then
|
||||
* according to Broder and Mitzenmacher,
|
||||
*
|
||||
* ( http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf )
|
||||
*
|
||||
* the probability of false positives is minimized when k is
|
||||
* approximately m/n ln(2).
|
||||
*
|
||||
* If we fix the number of hash functions and know the number of
|
||||
* entries, then the optimal vector size m = (k * n) / ln(2)
|
||||
*/
|
||||
BloomFilter f = null;
|
||||
try {
|
||||
f = new BloomFilter(
|
||||
(int) Math.ceil(
|
||||
(DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) /
|
||||
Math.log(2.0)),
|
||||
(int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS,
|
||||
Hash.getHashType(conf)
|
||||
);
|
||||
} catch (IllegalArgumentException e) {
|
||||
LOG.warn("Failed creating bloomfilter; proceeding without", e);
|
||||
}
|
||||
this.bloomFilter = f;
|
||||
} else {
|
||||
this.bloomFilter = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(WritableComparable key, Writable val)
|
||||
throws IOException {
|
||||
if (bloomFilter != null) {
|
||||
bloomFilter.add(getBloomFilterKey(key));
|
||||
}
|
||||
super.append(key, val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() throws IOException {
|
||||
super.close();
|
||||
if (this.bloomFilter != null) {
|
||||
flushBloomFilter();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Flushes bloom filter to disk
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
private void flushBloomFilter() throws IOException {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("flushing bloom filter for " + this.dirName);
|
||||
}
|
||||
FSDataOutputStream out =
|
||||
fs.create(new Path(dirName, BLOOMFILTER_FILE_NAME));
|
||||
try {
|
||||
bloomFilter.write(out);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("flushed bloom filter for " + this.dirName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom bloom filter key maker.
|
||||
* @param key
|
||||
* @return Key made of bytes of row only.
|
||||
*/
|
||||
protected static Key getBloomFilterKey(WritableComparable key) {
|
||||
return new Key(((HStoreKey) key).getRow());
|
||||
}
|
||||
}
|
|
@ -1,114 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.io;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HStoreKey;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.io.MapFile;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/**
|
||||
* HBase customizations of MapFile.
|
||||
*/
|
||||
public class HBaseMapFile extends MapFile {
|
||||
// TODO not used. remove?!
|
||||
// private static final Log LOG = LogFactory.getLog(HBaseMapFile.class);
|
||||
|
||||
/**
|
||||
* Values are instances of this class.
|
||||
*/
|
||||
public static final Class<? extends Writable> VALUE_CLASS =
|
||||
ImmutableBytesWritable.class;
|
||||
|
||||
/**
|
||||
* A reader capable of reading and caching blocks of the data file.
|
||||
*/
|
||||
public static class HBaseReader extends MapFile.Reader {
|
||||
private final boolean blockCacheEnabled;
|
||||
|
||||
/**
|
||||
* @param fs
|
||||
* @param dirName
|
||||
* @param conf
|
||||
* @param hri
|
||||
* @throws IOException
|
||||
*/
|
||||
public HBaseReader(FileSystem fs, String dirName, Configuration conf,
|
||||
HRegionInfo hri)
|
||||
throws IOException {
|
||||
this(fs, dirName, conf, false, hri);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param fs
|
||||
* @param dirName
|
||||
* @param conf
|
||||
* @param blockCacheEnabled
|
||||
* @param hri
|
||||
* @throws IOException
|
||||
*/
|
||||
public HBaseReader(FileSystem fs, String dirName, Configuration conf,
|
||||
boolean blockCacheEnabled, HRegionInfo hri)
|
||||
throws IOException {
|
||||
super(fs, dirName, new org.apache.hadoop.hbase.migration.nineteen.HStoreKey.HStoreKeyWritableComparator(hri),
|
||||
conf, false); // defer opening streams
|
||||
this.blockCacheEnabled = blockCacheEnabled;
|
||||
open(fs, dirName, new org.apache.hadoop.hbase.migration.nineteen.HStoreKey.HStoreKeyWritableComparator(hri), conf);
|
||||
|
||||
// Force reading of the mapfile index by calling midKey. Reading the
|
||||
// index will bring the index into memory over here on the client and
|
||||
// then close the index file freeing up socket connection and resources
|
||||
// in the datanode. Usually, the first access on a MapFile.Reader will
|
||||
// load the index force the issue in HStoreFile MapFiles because an
|
||||
// access may not happen for some time; meantime we're using up datanode
|
||||
// resources (See HADOOP-2341). midKey() goes to index. Does not seek.
|
||||
|
||||
|
||||
// Disable for migration !!! midKey();
|
||||
}
|
||||
}
|
||||
|
||||
public static class HBaseWriter extends MapFile.Writer {
|
||||
/**
|
||||
* @param conf
|
||||
* @param fs
|
||||
* @param dirName
|
||||
* @param compression
|
||||
* @param hri
|
||||
* @throws IOException
|
||||
*/
|
||||
public HBaseWriter(Configuration conf, FileSystem fs, String dirName,
|
||||
SequenceFile.CompressionType compression, final HRegionInfo hri)
|
||||
throws IOException {
|
||||
super(conf, fs, dirName, new org.apache.hadoop.hbase.migration.nineteen.HStoreKey.HStoreKeyWritableComparator(hri),
|
||||
VALUE_CLASS, compression);
|
||||
// Default for mapfiles is 128. Makes random reads faster if we
|
||||
// have more keys indexed and we're not 'next'-ing around in the
|
||||
// mapfile.
|
||||
setIndexInterval(conf.getInt("hbase.io.index.interval", 128));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,228 +0,0 @@
|
|||
/**
|
||||
* Copyright 2008 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.io;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
|
||||
import org.apache.hadoop.hbase.migration.nineteen.io.Reference.Range;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
|
||||
/**
|
||||
* A facade for a {@link org.apache.hadoop.io.MapFile.Reader} that serves up
|
||||
* either the top or bottom half of a MapFile where 'bottom' is the first half
|
||||
* of the file containing the keys that sort lowest and 'top' is the second half
|
||||
* of the file with keys that sort greater than those of the bottom half.
|
||||
* The top includes the split files midkey, of the key that follows if it does
|
||||
* not exist in the file.
|
||||
*
|
||||
* <p>This type works in tandem with the {@link Reference} type. This class
|
||||
* is used reading while Reference is used writing.
|
||||
*
|
||||
* <p>This file is not splitable. Calls to {@link #midKey()} return null.
|
||||
*/
|
||||
//TODO should be fixed generic warnings from MapFile methods
|
||||
public class HalfMapFileReader extends BloomFilterMapFile.Reader {
|
||||
private static final Log LOG = LogFactory.getLog(HalfMapFileReader.class);
|
||||
|
||||
private final boolean top;
|
||||
private final HStoreKey midkey;
|
||||
private boolean firstNextCall = true;
|
||||
|
||||
/**
|
||||
* @param fs
|
||||
* @param dirName
|
||||
* @param conf
|
||||
* @param r
|
||||
* @param mk
|
||||
* @param hri
|
||||
* @throws IOException
|
||||
*/
|
||||
public HalfMapFileReader(final FileSystem fs, final String dirName,
|
||||
final Configuration conf, final Range r,
|
||||
final WritableComparable<HStoreKey> mk,
|
||||
final HRegionInfo hri)
|
||||
throws IOException {
|
||||
this(fs, dirName, conf, r, mk, false, false, hri);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param fs
|
||||
* @param dirName
|
||||
* @param conf
|
||||
* @param r
|
||||
* @param mk
|
||||
* @param filter
|
||||
* @param blockCacheEnabled
|
||||
* @param hri
|
||||
* @throws IOException
|
||||
*/
|
||||
public HalfMapFileReader(final FileSystem fs, final String dirName,
|
||||
final Configuration conf, final Range r,
|
||||
final WritableComparable<HStoreKey> mk, final boolean filter,
|
||||
final boolean blockCacheEnabled,
|
||||
final HRegionInfo hri)
|
||||
throws IOException {
|
||||
super(fs, dirName, conf, filter, blockCacheEnabled, hri);
|
||||
// This is not actual midkey for this half-file; its just border
|
||||
// around which we split top and bottom. Have to look in files to find
|
||||
// actual last and first keys for bottom and top halves. Half-files don't
|
||||
// have an actual midkey themselves. No midkey is how we indicate file is
|
||||
// not splittable.
|
||||
this.midkey = new HStoreKey((HStoreKey)mk);
|
||||
this.midkey.setHRegionInfo(hri);
|
||||
// Is it top or bottom half?
|
||||
this.top = Reference.isTopFileRegion(r);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check key is not bleeding into wrong half of the file.
|
||||
* @param key
|
||||
* @throws IOException
|
||||
*/
|
||||
private void checkKey(final WritableComparable<HStoreKey> key)
|
||||
throws IOException {
|
||||
if (top) {
|
||||
if (key.compareTo(midkey) < 0) {
|
||||
throw new IOException("Illegal Access: Key is less than midKey of " +
|
||||
"backing mapfile");
|
||||
}
|
||||
} else if (key.compareTo(midkey) >= 0) {
|
||||
throw new IOException("Illegal Access: Key is greater than or equal " +
|
||||
"to midKey of backing mapfile");
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public synchronized void finalKey(WritableComparable key)
|
||||
throws IOException {
|
||||
if (top) {
|
||||
super.finalKey(key);
|
||||
} else {
|
||||
Writable value = new ImmutableBytesWritable();
|
||||
WritableComparable found = super.getClosest(midkey, value, true);
|
||||
Writables.copyWritable(found, key);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public synchronized Writable get(WritableComparable key, Writable val)
|
||||
throws IOException {
|
||||
checkKey(key);
|
||||
return super.get(key, val);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public synchronized WritableComparable getClosest(WritableComparable key,
|
||||
Writable val)
|
||||
throws IOException {
|
||||
WritableComparable closest = null;
|
||||
if (top) {
|
||||
// If top, the lowest possible key is first key. Do not have to check
|
||||
// what comes back from super getClosest. Will return exact match or
|
||||
// greater.
|
||||
closest = (key.compareTo(this.midkey) < 0)?
|
||||
this.midkey: super.getClosest(key, val);
|
||||
// we know that we just went past the midkey
|
||||
firstNextCall = false;
|
||||
} else {
|
||||
// We're serving bottom of the file.
|
||||
if (key.compareTo(this.midkey) < 0) {
|
||||
// Check key is within range for bottom.
|
||||
closest = super.getClosest(key, val);
|
||||
// midkey was made against largest store file at time of split. Smaller
|
||||
// store files could have anything in them. Check return value is
|
||||
// not beyond the midkey (getClosest returns exact match or next after)
|
||||
if (closest != null && closest.compareTo(this.midkey) >= 0) {
|
||||
// Don't let this value out.
|
||||
closest = null;
|
||||
}
|
||||
}
|
||||
// Else, key is > midkey so let out closest = null.
|
||||
}
|
||||
return closest;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public synchronized WritableComparable midKey() throws IOException {
|
||||
// Returns null to indicate file is not splitable.
|
||||
return null;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public synchronized boolean next(WritableComparable key, Writable val)
|
||||
throws IOException {
|
||||
if (firstNextCall) {
|
||||
firstNextCall = false;
|
||||
if (this.top) {
|
||||
// Seek to midkey. Midkey may not exist in this file. That should be
|
||||
// fine. Then we'll either be positioned at end or start of file.
|
||||
WritableComparable nearest = getClosest(this.midkey, val);
|
||||
// Now copy the midkey into the passed key.
|
||||
if (nearest != null) {
|
||||
Writables.copyWritable(nearest, key);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
boolean result = super.next(key, val);
|
||||
int cmpresult = key.compareTo(midkey);
|
||||
|
||||
if (top && cmpresult < 0) {
|
||||
LOG.error("BUG BUG BUG. HalfMapFileReader wanted to return key out of range. DANGER");
|
||||
throw new IOException("BUG BUG BUG. HalfMapFileReader wanted to return key out of range. DANGER");
|
||||
} else if (!top && cmpresult >= 0) {
|
||||
result = false;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void reset() throws IOException {
|
||||
if (top) {
|
||||
firstNextCall = true;
|
||||
return;
|
||||
}
|
||||
super.reset();
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public synchronized boolean seek(WritableComparable key)
|
||||
throws IOException {
|
||||
checkKey(key);
|
||||
return super.seek(key);
|
||||
}
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
/**
|
||||
*
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.io;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/**
|
||||
* A reference to a part of a store file. The file referenced usually lives
|
||||
* under a different region. The part referenced is usually the top or bottom
|
||||
* half of the file. References are made at region split time. Being lazy
|
||||
* about copying data between the parent of the split and the split daughters
|
||||
* makes splitting faster.
|
||||
*
|
||||
* <p>References work with {@link HalfMapFileReader}. References know how to
|
||||
* write out the reference format in the file system and are whats juggled when
|
||||
* references are mixed in with direct store files. The
|
||||
* {@link HalfMapFileReader} is used reading the referred to file.
|
||||
*
|
||||
* <p>References to store files located over in some other region look like
|
||||
* this in the file system
|
||||
* <code>1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184</code>:
|
||||
* i.e. an id followed by the name of the referenced region. The data
|
||||
* ('mapfiles') of references are empty. The accompanying <code>info</code> file
|
||||
* contains the <code>midkey</code> that demarks top and bottom of the
|
||||
* referenced storefile, the id of the remote store we're referencing and
|
||||
* whether we're to serve the top or bottom region of the remote store file.
|
||||
* Note, a region is itself not splitable if it has instances of store file
|
||||
* references. References are cleaned up by compactions.
|
||||
*/
|
||||
public class Reference implements Writable {
|
||||
// TODO: see if it makes sense making a ReferenceMapFile whose Writer is this
|
||||
// class and whose Reader is the {@link HalfMapFileReader}.
|
||||
|
||||
private int encodedRegionName;
|
||||
private long fileid;
|
||||
private Range region;
|
||||
private HStoreKey midkey;
|
||||
|
||||
/**
|
||||
* For split HStoreFiles, it specifies if the file covers the lower half or
|
||||
* the upper half of the key range
|
||||
*/
|
||||
public static enum Range {
|
||||
/** HStoreFile contains upper half of key range */
|
||||
top,
|
||||
/** HStoreFile contains lower half of key range */
|
||||
bottom
|
||||
}
|
||||
|
||||
public Reference(final int ern, final long fid, final HStoreKey m,
|
||||
final Range fr) {
|
||||
this.encodedRegionName = ern;
|
||||
this.fileid = fid;
|
||||
this.region = fr;
|
||||
this.midkey = m;
|
||||
}
|
||||
|
||||
public Reference() {
|
||||
this(-1, -1, null, Range.bottom);
|
||||
}
|
||||
|
||||
public long getFileId() {
|
||||
return fileid;
|
||||
}
|
||||
|
||||
public Range getFileRegion() {
|
||||
return region;
|
||||
}
|
||||
|
||||
public HStoreKey getMidkey() {
|
||||
return midkey;
|
||||
}
|
||||
|
||||
public int getEncodedRegionName() {
|
||||
return this.encodedRegionName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return encodedRegionName + "/" + fileid + "/" + region;
|
||||
}
|
||||
|
||||
// Make it serializable.
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
// Write out the encoded region name as a String. Doing it as a String
|
||||
// keeps a Reference's serialization backword compatible with
|
||||
// pre-HBASE-82 serializations. ALternative is rewriting all
|
||||
// info files in hbase (Serialized References are written into the
|
||||
// 'info' file that accompanies HBase Store files).
|
||||
out.writeUTF(Integer.toString(encodedRegionName));
|
||||
out.writeLong(fileid);
|
||||
// Write true if we're doing top of the file.
|
||||
out.writeBoolean(isTopFileRegion(region));
|
||||
this.midkey.write(out);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.encodedRegionName = Integer.parseInt(in.readUTF());
|
||||
fileid = in.readLong();
|
||||
boolean tmp = in.readBoolean();
|
||||
// If true, set region to top.
|
||||
region = tmp? Range.top: Range.bottom;
|
||||
midkey = new HStoreKey();
|
||||
midkey.readFields(in);
|
||||
}
|
||||
|
||||
public static boolean isTopFileRegion(final Range r) {
|
||||
return r.equals(Range.top);
|
||||
}
|
||||
}
|
|
@ -1,236 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of the University Catholique de Louvain - UCL
|
||||
* nor the names of its contributors may be used to endorse or
|
||||
* promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Hash;
|
||||
|
||||
/**
|
||||
* Implements a <i>Bloom filter</i>, as defined by Bloom in 1970.
|
||||
* <p>
|
||||
* The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
|
||||
* the networking research community in the past decade thanks to the bandwidth efficiencies that it
|
||||
* offers for the transmission of set membership information between networked hosts. A sender encodes
|
||||
* the information into a bit vector, the Bloom filter, that is more compact than a conventional
|
||||
* representation. Computation and space costs for construction are linear in the number of elements.
|
||||
* The receiver uses the filter to test whether various elements are members of the set. Though the
|
||||
* filter will occasionally return a false positive, it will never return a false negative. When creating
|
||||
* the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
|
||||
*
|
||||
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
|
||||
*
|
||||
* @version 1.0 - 2 Feb. 07
|
||||
*/
|
||||
public class BloomFilter extends Filter {
|
||||
private static final byte[] bitvalues = new byte[] {
|
||||
(byte)0x01,
|
||||
(byte)0x02,
|
||||
(byte)0x04,
|
||||
(byte)0x08,
|
||||
(byte)0x10,
|
||||
(byte)0x20,
|
||||
(byte)0x40,
|
||||
(byte)0x80
|
||||
};
|
||||
|
||||
/** The bit vector. */
|
||||
BitSet bits;
|
||||
|
||||
/** Default constructor - use with readFields */
|
||||
public BloomFilter() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param vectorSize The vector size of <i>this</i> filter.
|
||||
* @param nbHash The number of hash function to consider.
|
||||
* @param hashType type of the hashing function (see {@link Hash}).
|
||||
*/
|
||||
public BloomFilter(int vectorSize, int nbHash, int hashType){
|
||||
super(vectorSize, nbHash, hashType);
|
||||
|
||||
bits = new BitSet(this.vectorSize);
|
||||
}//end constructor
|
||||
|
||||
@Override
|
||||
public void add(Key key) {
|
||||
if(key == null) {
|
||||
throw new NullPointerException("key cannot be null");
|
||||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
bits.set(h[i]);
|
||||
}
|
||||
}//end add()
|
||||
|
||||
@Override
|
||||
public void and(Filter filter){
|
||||
if(filter == null
|
||||
|| !(filter instanceof BloomFilter)
|
||||
|| filter.vectorSize != this.vectorSize
|
||||
|| filter.nbHash != this.nbHash) {
|
||||
throw new IllegalArgumentException("filters cannot be and-ed");
|
||||
}
|
||||
|
||||
this.bits.and(((BloomFilter) filter).bits);
|
||||
}//end and()
|
||||
|
||||
@Override
|
||||
public boolean membershipTest(Key key){
|
||||
if(key == null) {
|
||||
throw new NullPointerException("key cannot be null");
|
||||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
if(!bits.get(h[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}//end memberhsipTest()
|
||||
|
||||
@Override
|
||||
public void not(){
|
||||
bits.flip(0, vectorSize - 1);
|
||||
}//end not()
|
||||
|
||||
@Override
|
||||
public void or(Filter filter){
|
||||
if(filter == null
|
||||
|| !(filter instanceof BloomFilter)
|
||||
|| filter.vectorSize != this.vectorSize
|
||||
|| filter.nbHash != this.nbHash) {
|
||||
throw new IllegalArgumentException("filters cannot be or-ed");
|
||||
}
|
||||
bits.or(((BloomFilter) filter).bits);
|
||||
}//end or()
|
||||
|
||||
@Override
|
||||
public void xor(Filter filter){
|
||||
if(filter == null
|
||||
|| !(filter instanceof BloomFilter)
|
||||
|| filter.vectorSize != this.vectorSize
|
||||
|| filter.nbHash != this.nbHash) {
|
||||
throw new IllegalArgumentException("filters cannot be xor-ed");
|
||||
}
|
||||
bits.xor(((BloomFilter) filter).bits);
|
||||
}//and xor()
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
return bits.toString();
|
||||
}//end toString()
|
||||
|
||||
@Override
|
||||
public Object clone(){
|
||||
BloomFilter bf = new BloomFilter(vectorSize, nbHash, hashType);
|
||||
bf.or(this);
|
||||
return bf;
|
||||
}//end clone()
|
||||
|
||||
/**
|
||||
* @return size of the the bloomfilter
|
||||
*/
|
||||
public int getVectorSize() {
|
||||
return this.vectorSize;
|
||||
}
|
||||
|
||||
// Writable
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
super.write(out);
|
||||
byte[] bytes = new byte[getNBytes()];
|
||||
for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
|
||||
if (bitIndex == 8) {
|
||||
bitIndex = 0;
|
||||
byteIndex++;
|
||||
}
|
||||
if (bitIndex == 0) {
|
||||
bytes[byteIndex] = 0;
|
||||
}
|
||||
if (bits.get(i)) {
|
||||
bytes[byteIndex] |= bitvalues[bitIndex];
|
||||
}
|
||||
}
|
||||
out.write(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
super.readFields(in);
|
||||
bits = new BitSet(this.vectorSize);
|
||||
byte[] bytes = new byte[getNBytes()];
|
||||
in.readFully(bytes);
|
||||
for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
|
||||
if (bitIndex == 8) {
|
||||
bitIndex = 0;
|
||||
byteIndex++;
|
||||
}
|
||||
if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
|
||||
bits.set(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* @return number of bytes needed to hold bit vector */
|
||||
private int getNBytes() {
|
||||
return (vectorSize + 7) / 8;
|
||||
}
|
||||
}//end class
|
|
@ -1,311 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of the University Catholique de Louvain - UCL
|
||||
* nor the names of its contributors may be used to endorse or
|
||||
* promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays; //TODO: remove
|
||||
|
||||
import org.apache.hadoop.hbase.util.Hash;
|
||||
|
||||
/**
|
||||
* Implements a <i>counting Bloom filter</i>, as defined by Fan et al. in a ToN
|
||||
* 2000 paper.
|
||||
* <p>
|
||||
* A counting Bloom filter is an improvement to standard a Bloom filter as it
|
||||
* allows dynamic additions and deletions of set membership information. This
|
||||
* is achieved through the use of a counting vector instead of a bit vector.
|
||||
*
|
||||
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
|
||||
*
|
||||
* @version 1.1 - 19 Jan. 08
|
||||
*
|
||||
*/
|
||||
public final class CountingBloomFilter extends Filter {
|
||||
/** Storage for the counting buckets */
|
||||
private long[] buckets;
|
||||
|
||||
/** We are using 4bit buckets, so each bucket can count to 15 */
|
||||
private final static long BUCKET_MAX_VALUE = 15;
|
||||
|
||||
/** Default constructor - use with readFields */
|
||||
public CountingBloomFilter() {}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param vectorSize The vector size of <i>this</i> filter.
|
||||
* @param nbHash The number of hash function to consider.
|
||||
* @param hashType type of the hashing function (see {@link Hash}).
|
||||
*/
|
||||
public CountingBloomFilter(int vectorSize, int nbHash, int hashType){
|
||||
super(vectorSize, nbHash, hashType);
|
||||
buckets = new long[buckets2words(vectorSize)];
|
||||
}//end constructor
|
||||
|
||||
/** returns the number of 64 bit words it would take to hold vectorSize buckets */
|
||||
private static int buckets2words(int vectorSize) {
|
||||
return ((vectorSize - 1) >>> 4) + 1;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void add(Key key) {
|
||||
if(key == null) {
|
||||
throw new NullPointerException("key can not be null");
|
||||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
// find the bucket
|
||||
int wordNum = h[i] >> 4; // div 16
|
||||
int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
|
||||
|
||||
long bucketMask = 15L << bucketShift;
|
||||
long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
|
||||
|
||||
// only increment if the count in the bucket is less than BUCKET_MAX_VALUE
|
||||
if(bucketValue < BUCKET_MAX_VALUE) {
|
||||
// increment by 1
|
||||
buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift);
|
||||
}
|
||||
}
|
||||
}//end add()
|
||||
|
||||
/**
|
||||
* Removes a specified key from <i>this</i> counting Bloom filter.
|
||||
* <p>
|
||||
* <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter.
|
||||
* @param key The key to remove.
|
||||
*/
|
||||
public void delete(Key key) {
|
||||
if(key == null) {
|
||||
throw new NullPointerException("Key may not be null");
|
||||
}
|
||||
if(!membershipTest(key)) {
|
||||
throw new IllegalArgumentException("Key is not a member");
|
||||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
// find the bucket
|
||||
int wordNum = h[i] >> 4; // div 16
|
||||
int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
|
||||
|
||||
long bucketMask = 15L << bucketShift;
|
||||
long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
|
||||
|
||||
// only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE
|
||||
if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) {
|
||||
// decrement by 1
|
||||
buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift);
|
||||
}
|
||||
}
|
||||
}//end delete
|
||||
|
||||
@Override
|
||||
public void and(Filter filter){
|
||||
if(filter == null
|
||||
|| !(filter instanceof CountingBloomFilter)
|
||||
|| filter.vectorSize != this.vectorSize
|
||||
|| filter.nbHash != this.nbHash) {
|
||||
throw new IllegalArgumentException("filters cannot be and-ed");
|
||||
}
|
||||
CountingBloomFilter cbf = (CountingBloomFilter)filter;
|
||||
|
||||
int sizeInWords = buckets2words(vectorSize);
|
||||
for(int i = 0; i < sizeInWords; i++) {
|
||||
this.buckets[i] &= cbf.buckets[i];
|
||||
}
|
||||
}//end and()
|
||||
|
||||
@Override
|
||||
public boolean membershipTest(Key key){
|
||||
if(key == null) {
|
||||
throw new NullPointerException("Key may not be null");
|
||||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
// find the bucket
|
||||
int wordNum = h[i] >> 4; // div 16
|
||||
int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
|
||||
|
||||
long bucketMask = 15L << bucketShift;
|
||||
|
||||
if((buckets[wordNum] & bucketMask) == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}//end membershipTest()
|
||||
|
||||
/**
|
||||
* This method calculates an approximate count of the key, i.e. how many
|
||||
* times the key was added to the filter. This allows the filter to be
|
||||
* used as an approximate <code>key -> count</code> map.
|
||||
* <p>NOTE: due to the bucket size of this filter, inserting the same
|
||||
* key more than 15 times will cause an overflow at all filter positions
|
||||
* associated with this key, and it will significantly increase the error
|
||||
* rate for this and other keys. For this reason the filter can only be
|
||||
* used to store small count values <code>0 <= N << 15</code>.
|
||||
* @param key key to be tested
|
||||
* @return 0 if the key is not present. Otherwise, a positive value v will
|
||||
* be returned such that <code>v == count</code> with probability equal to the
|
||||
* error rate of this filter, and <code>v > count</code> otherwise.
|
||||
* Additionally, if the filter experienced an underflow as a result of
|
||||
* {@link #delete(Key)} operation, the return value may be lower than the
|
||||
* <code>count</code> with the probability of the false negative rate of such
|
||||
* filter.
|
||||
*/
|
||||
public int approximateCount(Key key) {
|
||||
int res = Integer.MAX_VALUE;
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
for (int i = 0; i < nbHash; i++) {
|
||||
// find the bucket
|
||||
int wordNum = h[i] >> 4; // div 16
|
||||
int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
|
||||
|
||||
long bucketMask = 15L << bucketShift;
|
||||
long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
|
||||
if (bucketValue < res) res = (int)bucketValue;
|
||||
}
|
||||
if (res != Integer.MAX_VALUE) {
|
||||
return res;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void not(){
|
||||
throw new UnsupportedOperationException("not() is undefined for "
|
||||
+ this.getClass().getName());
|
||||
}//end not()
|
||||
|
||||
@Override
|
||||
public void or(Filter filter){
|
||||
if(filter == null
|
||||
|| !(filter instanceof CountingBloomFilter)
|
||||
|| filter.vectorSize != this.vectorSize
|
||||
|| filter.nbHash != this.nbHash) {
|
||||
throw new IllegalArgumentException("filters cannot be or-ed");
|
||||
}
|
||||
|
||||
CountingBloomFilter cbf = (CountingBloomFilter)filter;
|
||||
|
||||
int sizeInWords = buckets2words(vectorSize);
|
||||
for(int i = 0; i < sizeInWords; i++) {
|
||||
this.buckets[i] |= cbf.buckets[i];
|
||||
}
|
||||
}//end or()
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unused")
|
||||
public void xor(Filter filter){
|
||||
throw new UnsupportedOperationException("xor() is undefined for "
|
||||
+ this.getClass().getName());
|
||||
}//end xor()
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
StringBuilder res = new StringBuilder();
|
||||
|
||||
for(int i = 0; i < vectorSize; i++) {
|
||||
if(i > 0) {
|
||||
res.append(" ");
|
||||
}
|
||||
|
||||
int wordNum = i >> 4; // div 16
|
||||
int bucketShift = (i & 0x0f) << 2; // (mod 16) * 4
|
||||
|
||||
long bucketMask = 15L << bucketShift;
|
||||
long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
|
||||
|
||||
res.append(bucketValue);
|
||||
}
|
||||
|
||||
return res.toString();
|
||||
}//end toString()
|
||||
|
||||
@Override
|
||||
public Object clone(){
|
||||
CountingBloomFilter cbf = new CountingBloomFilter(vectorSize, nbHash, hashType);
|
||||
cbf.buckets = this.buckets.clone();
|
||||
return cbf;
|
||||
}//end clone()
|
||||
|
||||
// Writable
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
super.write(out);
|
||||
int sizeInWords = buckets2words(vectorSize);
|
||||
for(int i = 0; i < sizeInWords; i++) {
|
||||
out.writeLong(buckets[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
super.readFields(in);
|
||||
int sizeInWords = buckets2words(vectorSize);
|
||||
buckets = new long[sizeInWords];
|
||||
for(int i = 0; i < sizeInWords; i++) {
|
||||
buckets[i] = in.readLong();
|
||||
}
|
||||
}
|
||||
}//end class
|
|
@ -1,299 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of the University Catholique de Louvain - UCL
|
||||
* nor the names of its contributors may be used to endorse or
|
||||
* promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Hash;
|
||||
|
||||
/**
|
||||
* Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper.
|
||||
* <p>
|
||||
* A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but
|
||||
* each of the <code>s</code> rows is a standard Bloom filter. The creation
|
||||
* process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code>
|
||||
* bit matrix, i.e., it is composed of a single standard Bloom filter.
|
||||
* It assumes that <code>n<sub>r</sub></code> elements are recorded in the
|
||||
* initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is
|
||||
* the cardinality of the set <code>A</code> to record in the filter).
|
||||
* <p>
|
||||
* As the size of <code>A</code> grows during the execution of the application,
|
||||
* several keys must be inserted in the DBF. When inserting a key into the DBF,
|
||||
* one must first get an active Bloom filter in the matrix. A Bloom filter is
|
||||
* active when the number of recorded keys, <code>n<sub>r</sub></code>, is
|
||||
* strictly less than the current cardinality of <code>A</code>, <code>n</code>.
|
||||
* If an active Bloom filter is found, the key is inserted and
|
||||
* <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there
|
||||
* is no active Bloom filter, a new one is created (i.e., a new row is added to
|
||||
* the matrix) according to the current size of <code>A</code> and the element
|
||||
* is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of
|
||||
* this new Bloom filter is set to one. A given key is said to belong to the
|
||||
* DBF if the <code>k</code> positions are set to one in one of the matrix rows.
|
||||
*
|
||||
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
|
||||
*
|
||||
* @version 1.0 - 6 Feb. 07
|
||||
*
|
||||
*/
|
||||
public class DynamicBloomFilter extends Filter {
|
||||
/**
|
||||
* Threshold for the maximum number of key to record in a dynamic Bloom filter row.
|
||||
*/
|
||||
private int nr;
|
||||
|
||||
/**
|
||||
* The number of keys recorded in the current standard active Bloom filter.
|
||||
*/
|
||||
private int currentNbRecord;
|
||||
|
||||
/**
|
||||
* The matrix of Bloom filter.
|
||||
*/
|
||||
private BloomFilter[] matrix;
|
||||
|
||||
/**
|
||||
* Zero-args constructor for the serialization.
|
||||
*/
|
||||
public DynamicBloomFilter() { }
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* <p>
|
||||
* Builds an empty Dynamic Bloom filter.
|
||||
* @param vectorSize The number of bits in the vector.
|
||||
* @param nbHash The number of hash function to consider.
|
||||
* @param hashType type of the hashing function (see {@link Hash}).
|
||||
* @param nr The threshold for the maximum number of keys to record in a dynamic Bloom filter row.
|
||||
*/
|
||||
public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
|
||||
super(vectorSize, nbHash, hashType);
|
||||
|
||||
this.nr = nr;
|
||||
this.currentNbRecord = 0;
|
||||
|
||||
matrix = new BloomFilter[1];
|
||||
matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
|
||||
}//end constructor
|
||||
|
||||
@Override
|
||||
public void add(Key key){
|
||||
if(key == null) {
|
||||
throw new NullPointerException("Key can not be null");
|
||||
}
|
||||
|
||||
BloomFilter bf = getActiveStandardBF();
|
||||
|
||||
if(bf == null){
|
||||
addRow();
|
||||
bf = matrix[matrix.length - 1];
|
||||
currentNbRecord = 0;
|
||||
}
|
||||
|
||||
bf.add(key);
|
||||
|
||||
currentNbRecord++;
|
||||
}//end add()
|
||||
|
||||
@Override
|
||||
public void and(Filter filter) {
|
||||
if(filter == null
|
||||
|| !(filter instanceof DynamicBloomFilter)
|
||||
|| filter.vectorSize != this.vectorSize
|
||||
|| filter.nbHash != this.nbHash) {
|
||||
throw new IllegalArgumentException("filters cannot be and-ed");
|
||||
}
|
||||
|
||||
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
|
||||
|
||||
if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
|
||||
throw new IllegalArgumentException("filters cannot be and-ed");
|
||||
}
|
||||
|
||||
for(int i = 0; i < matrix.length; i++) {
|
||||
matrix[i].and(dbf.matrix[i]);
|
||||
}
|
||||
}//end and()
|
||||
|
||||
@Override
|
||||
public boolean membershipTest(Key key){
|
||||
if(key == null) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for(int i = 0; i < matrix.length; i++) {
|
||||
if(matrix[i].membershipTest(key)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}//end membershipTest()
|
||||
|
||||
@Override
|
||||
public void not(){
|
||||
for(int i = 0; i < matrix.length; i++) {
|
||||
matrix[i].not();
|
||||
}
|
||||
}//end not()
|
||||
|
||||
@Override
|
||||
public void or(Filter filter){
|
||||
if(filter == null
|
||||
|| !(filter instanceof DynamicBloomFilter)
|
||||
|| filter.vectorSize != this.vectorSize
|
||||
|| filter.nbHash != this.nbHash) {
|
||||
throw new IllegalArgumentException("filters cannot be or-ed");
|
||||
}
|
||||
|
||||
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
|
||||
|
||||
if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
|
||||
throw new IllegalArgumentException("filters cannot be or-ed");
|
||||
}
|
||||
for(int i = 0; i < matrix.length; i++) {
|
||||
matrix[i].or(dbf.matrix[i]);
|
||||
}
|
||||
}//end or()
|
||||
|
||||
@Override
|
||||
public void xor(Filter filter){
|
||||
if(filter == null
|
||||
|| !(filter instanceof DynamicBloomFilter)
|
||||
|| filter.vectorSize != this.vectorSize
|
||||
|| filter.nbHash != this.nbHash) {
|
||||
throw new IllegalArgumentException("filters cannot be xor-ed");
|
||||
}
|
||||
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
|
||||
|
||||
if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
|
||||
throw new IllegalArgumentException("filters cannot be xor-ed");
|
||||
}
|
||||
|
||||
for(int i = 0; i<matrix.length; i++) {
|
||||
matrix[i].xor(dbf.matrix[i]);
|
||||
}
|
||||
}//end xor()
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
StringBuilder res = new StringBuilder();
|
||||
|
||||
for(int i=0; i<matrix.length; i++) {
|
||||
res.append(matrix[i]);
|
||||
res.append(Character.LINE_SEPARATOR);
|
||||
}
|
||||
return res.toString();
|
||||
}//end toString()
|
||||
|
||||
@Override
|
||||
public Object clone(){
|
||||
DynamicBloomFilter dbf = new DynamicBloomFilter(vectorSize, nbHash, hashType, nr);
|
||||
dbf.currentNbRecord = this.currentNbRecord;
|
||||
dbf.matrix = new BloomFilter[this.matrix.length];
|
||||
for(int i = 0; i < this.matrix.length; i++) {
|
||||
dbf.matrix[i] = (BloomFilter)this.matrix[i].clone();
|
||||
}
|
||||
return dbf;
|
||||
}//end clone()
|
||||
|
||||
// Writable
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
super.write(out);
|
||||
out.writeInt(nr);
|
||||
out.writeInt(currentNbRecord);
|
||||
out.writeInt(matrix.length);
|
||||
for (int i = 0; i < matrix.length; i++) {
|
||||
matrix[i].write(out);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
super.readFields(in);
|
||||
nr = in.readInt();
|
||||
currentNbRecord = in.readInt();
|
||||
int len = in.readInt();
|
||||
matrix = new BloomFilter[len];
|
||||
for (int i = 0; i < matrix.length; i++) {
|
||||
matrix[i] = new BloomFilter();
|
||||
matrix[i].readFields(in);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a new row to <i>this</i> dynamic Bloom filter.
|
||||
*/
|
||||
private void addRow(){
|
||||
BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
|
||||
|
||||
for(int i = 0; i < matrix.length; i++) {
|
||||
tmp[i] = (BloomFilter)matrix[i].clone();
|
||||
}
|
||||
|
||||
tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
|
||||
|
||||
matrix = tmp;
|
||||
}//end addRow()
|
||||
|
||||
/**
|
||||
* Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter.
|
||||
* @return BloomFilter The active standard Bloom filter.
|
||||
* <code>Null</code> otherwise.
|
||||
*/
|
||||
private BloomFilter getActiveStandardBF() {
|
||||
if(currentNbRecord >= nr) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return matrix[matrix.length - 1];
|
||||
}//end getActiveStandardBF()
|
||||
}//end class
|
|
@ -1,211 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright (c) 2005, European Commission project OneLab under contract 034819
|
||||
* (http://www.one-lab.org)
|
||||
*
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of the University Catholique de Louvain - UCL
|
||||
* nor the names of its contributors may be used to endorse or
|
||||
* promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Hash;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
/**
|
||||
* Defines the general behavior of a filter.
|
||||
* <p>
|
||||
* A filter is a data structure which aims at offering a lossy summary of a set <code>A</code>. The
|
||||
* key idea is to map entries of <code>A</code> (also called <i>keys</i>) into several positions
|
||||
* in a vector through the use of several hash functions.
|
||||
* <p>
|
||||
* Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension).
|
||||
* <p>
|
||||
* It must be extended in order to define the real behavior.
|
||||
*
|
||||
* @version 1.0 - 2 Feb. 07
|
||||
*/
|
||||
public abstract class Filter implements Writable {
|
||||
private static final int VERSION = -1; // negative to accommodate for old format
|
||||
/** The vector size of <i>this</i> filter. */
|
||||
protected int vectorSize;
|
||||
|
||||
/** The hash function used to map a key to several positions in the vector. */
|
||||
protected HashFunction hash;
|
||||
|
||||
/** The number of hash function to consider. */
|
||||
protected int nbHash;
|
||||
|
||||
/** Type of hashing function to use. */
|
||||
protected int hashType;
|
||||
|
||||
protected Filter() {}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param vectorSize The vector size of <i>this</i> filter.
|
||||
* @param nbHash The number of hash functions to consider.
|
||||
* @param hashType type of the hashing function (see {@link Hash}).
|
||||
*/
|
||||
protected Filter(int vectorSize, int nbHash, int hashType) {
|
||||
this.vectorSize = vectorSize;
|
||||
this.nbHash = nbHash;
|
||||
this.hashType = hashType;
|
||||
this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
|
||||
}//end constructor
|
||||
|
||||
/**
|
||||
* Adds a key to <i>this</i> filter.
|
||||
* @param key The key to add.
|
||||
*/
|
||||
public abstract void add(Key key);
|
||||
|
||||
/**
|
||||
* Determines wether a specified key belongs to <i>this</i> filter.
|
||||
* @param key The key to test.
|
||||
* @return boolean True if the specified key belongs to <i>this</i> filter.
|
||||
* False otherwise.
|
||||
*/
|
||||
public abstract boolean membershipTest(Key key);
|
||||
|
||||
/**
|
||||
* Peforms a logical AND between <i>this</i> filter and a specified filter.
|
||||
* <p>
|
||||
* <b>Invariant</b>: The result is assigned to <i>this</i> filter.
|
||||
* @param filter The filter to AND with.
|
||||
*/
|
||||
public abstract void and(Filter filter);
|
||||
|
||||
/**
|
||||
* Peforms a logical OR between <i>this</i> filter and a specified filter.
|
||||
* <p>
|
||||
* <b>Invariant</b>: The result is assigned to <i>this</i> filter.
|
||||
* @param filter The filter to OR with.
|
||||
*/
|
||||
public abstract void or(Filter filter);
|
||||
|
||||
/**
|
||||
* Peforms a logical XOR between <i>this</i> filter and a specified filter.
|
||||
* <p>
|
||||
* <b>Invariant</b>: The result is assigned to <i>this</i> filter.
|
||||
* @param filter The filter to XOR with.
|
||||
*/
|
||||
public abstract void xor(Filter filter);
|
||||
|
||||
/**
|
||||
* Performs a logical NOT on <i>this</i> filter.
|
||||
* <p>
|
||||
* The result is assigned to <i>this</i> filter.
|
||||
*/
|
||||
public abstract void not();
|
||||
|
||||
/**
|
||||
* Adds a list of keys to <i>this</i> filter.
|
||||
* @param keys The list of keys.
|
||||
*/
|
||||
public void add(List<Key> keys){
|
||||
if(keys == null) {
|
||||
throw new IllegalArgumentException("ArrayList<Key> may not be null");
|
||||
}
|
||||
|
||||
for(Key key: keys) {
|
||||
add(key);
|
||||
}
|
||||
}//end add()
|
||||
|
||||
/**
|
||||
* Adds a collection of keys to <i>this</i> filter.
|
||||
* @param keys The collection of keys.
|
||||
*/
|
||||
public void add(Collection<Key> keys){
|
||||
if(keys == null) {
|
||||
throw new IllegalArgumentException("Collection<Key> may not be null");
|
||||
}
|
||||
for(Key key: keys) {
|
||||
add(key);
|
||||
}
|
||||
}//end add()
|
||||
|
||||
/**
|
||||
* Adds an array of keys to <i>this</i> filter.
|
||||
* @param keys The array of keys.
|
||||
*/
|
||||
public void add(Key[] keys){
|
||||
if(keys == null) {
|
||||
throw new IllegalArgumentException("Key[] may not be null");
|
||||
}
|
||||
for(int i = 0; i < keys.length; i++) {
|
||||
add(keys[i]);
|
||||
}
|
||||
}//end add()
|
||||
|
||||
// Writable interface
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeInt(VERSION);
|
||||
out.writeInt(this.nbHash);
|
||||
out.writeByte(this.hashType);
|
||||
out.writeInt(this.vectorSize);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
int ver = in.readInt();
|
||||
if (ver > 0) { // old unversioned format
|
||||
this.nbHash = ver;
|
||||
this.hashType = Hash.JENKINS_HASH;
|
||||
} else if (ver == VERSION) {
|
||||
this.nbHash = in.readInt();
|
||||
this.hashType = in.readByte();
|
||||
} else {
|
||||
throw new IOException("Unsupported version: " + ver);
|
||||
}
|
||||
this.vectorSize = in.readInt();
|
||||
this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
|
||||
}
|
||||
}//end class
|
|
@ -1,120 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright (c) 2005, European Commission project OneLab under contract 034819
|
||||
* (http://www.one-lab.org)
|
||||
*
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of the University Catholique de Louvain - UCL
|
||||
* nor the names of its contributors may be used to endorse or
|
||||
* promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Hash;
|
||||
|
||||
/**
|
||||
* Implements a hash object that returns a certain number of hashed values.
|
||||
* <p>
|
||||
* It is based on the SHA-1 algorithm.
|
||||
*
|
||||
* @version 1.0 - 2 Feb. 07
|
||||
*/
|
||||
public final class HashFunction {
|
||||
/** The number of hashed values. */
|
||||
private int nbHash;
|
||||
|
||||
/** The maximum highest returned value. */
|
||||
private int maxValue;
|
||||
|
||||
/** Hashing algorithm to use. */
|
||||
private Hash hashFunction;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* <p>
|
||||
* Builds a hash function that must obey to a given maximum number of returned values and a highest value.
|
||||
* @param maxValue The maximum highest returned value.
|
||||
* @param nbHash The number of resulting hashed values.
|
||||
* @param hashType type of the hashing function (see {@link Hash}).
|
||||
*/
|
||||
public HashFunction(int maxValue, int nbHash, int hashType) {
|
||||
if(maxValue <= 0) {
|
||||
throw new IllegalArgumentException("maxValue must be > 0");
|
||||
}
|
||||
|
||||
if(nbHash <= 0) {
|
||||
throw new IllegalArgumentException("nbHash must be > 0");
|
||||
}
|
||||
|
||||
this.maxValue = maxValue;
|
||||
this.nbHash = nbHash;
|
||||
this.hashFunction = Hash.getInstance(hashType);
|
||||
if (this.hashFunction == null)
|
||||
throw new IllegalArgumentException("hashType must be known");
|
||||
}//end constructor
|
||||
|
||||
/** Clears <i>this</i> hash function. A NOOP */
|
||||
public void clear() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Hashes a specified key into several integers.
|
||||
* @param k The specified key.
|
||||
* @return The array of hashed values.
|
||||
*/
|
||||
public int[] hash(Key k){
|
||||
byte[] b = k.getBytes();
|
||||
if(b == null) {
|
||||
throw new NullPointerException("buffer reference is null");
|
||||
}
|
||||
if(b.length == 0) {
|
||||
throw new IllegalArgumentException("key length must be > 0");
|
||||
}
|
||||
int[] result = new int[nbHash];
|
||||
for (int i = 0, initval = 0; i < nbHash; i++) {
|
||||
initval = hashFunction.hash(b, initval);
|
||||
result[i] = Math.abs(initval) % maxValue;
|
||||
}
|
||||
return result;
|
||||
}//end hash()
|
||||
|
||||
}//end class
|
|
@ -1,174 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of the University Catholique de Louvain - UCL
|
||||
* nor the names of its contributors may be used to endorse or
|
||||
* promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.hadoop.io.WritableComparable;
|
||||
|
||||
/**
|
||||
* The general behavior of a key that must be stored in a filter.
|
||||
*/
|
||||
public class Key implements WritableComparable {
|
||||
/** Byte value of key */
|
||||
byte[] bytes;
|
||||
|
||||
/**
|
||||
* The weight associated to <i>this</i> key.
|
||||
* <p>
|
||||
* <b>Invariant</b>: if it is not specified, each instance of
|
||||
* <code>Key</code> will have a default weight of 1.0
|
||||
*/
|
||||
double weight;
|
||||
|
||||
/** default constructor - use with readFields */
|
||||
public Key() {}
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* <p>
|
||||
* Builds a key with a default weight.
|
||||
* @param value The byte value of <i>this</i> key.
|
||||
*/
|
||||
public Key(byte[] value) {
|
||||
this(value, 1.0);
|
||||
}//end constructor
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* <p>
|
||||
* Builds a key with a specified weight.
|
||||
* @param value The value of <i>this</i> key.
|
||||
* @param weight The weight associated to <i>this</i> key.
|
||||
*/
|
||||
public Key(byte[] value, double weight) {
|
||||
set(value, weight);
|
||||
}//end constructor
|
||||
|
||||
/**
|
||||
* @param value
|
||||
* @param weight
|
||||
*/
|
||||
public void set(byte[] value, double weight) {
|
||||
if(value == null) {
|
||||
throw new IllegalArgumentException("value can not be null");
|
||||
}
|
||||
this.bytes = value;
|
||||
this.weight = weight;
|
||||
}
|
||||
|
||||
/** @return byte[] The value of <i>this</i> key. */
|
||||
public byte[] getBytes() {
|
||||
return this.bytes;
|
||||
}
|
||||
|
||||
/** @return Returns the weight associated to <i>this</i> key. */
|
||||
public double getWeight(){
|
||||
return weight;
|
||||
}//end getWeight()
|
||||
|
||||
/**
|
||||
* Increments the weight of <i>this</i> key with a specified value.
|
||||
* @param weight The increment.
|
||||
*/
|
||||
public void incrementWeight(double weight){
|
||||
this.weight += weight;
|
||||
}//end incrementWeight()
|
||||
|
||||
/** Increments the weight of <i>this</i> key by one. */
|
||||
public void incrementWeight(){
|
||||
this.weight++;
|
||||
}//end incrementWeight()
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return this.compareTo(o) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = 0;
|
||||
for(int i = 0; i < bytes.length; i++) {
|
||||
result ^= Byte.valueOf(bytes[i]).hashCode();
|
||||
}
|
||||
result ^= Double.valueOf(weight).hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
// Writable
|
||||
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeInt(bytes.length);
|
||||
out.write(bytes);
|
||||
out.writeDouble(weight);
|
||||
}
|
||||
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.bytes = new byte[in.readInt()];
|
||||
in.readFully(this.bytes);
|
||||
weight = in.readDouble();
|
||||
}
|
||||
|
||||
// Comparable
|
||||
|
||||
public int compareTo(Object o) {
|
||||
Key other = (Key)o;
|
||||
|
||||
int result = this.bytes.length - other.getBytes().length;
|
||||
for(int i = 0; result == 0 && i < bytes.length; i++) {
|
||||
result = this.bytes[i] - other.bytes[i];
|
||||
}
|
||||
|
||||
if(result == 0) {
|
||||
result = Double.valueOf(this.weight - other.weight).intValue();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}//end class
|
|
@ -1,91 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright (c) 2005, European Commission project OneLab under contract 034819
|
||||
* (http://www.one-lab.org)
|
||||
*
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of the University Catholique de Louvain - UCL
|
||||
* nor the names of its contributors may be used to endorse or
|
||||
* promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
|
||||
|
||||
/**
|
||||
* Defines the different remove scheme for retouched Bloom filters.
|
||||
*
|
||||
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
|
||||
*
|
||||
* @version 1.0 - 7 Feb. 07
|
||||
*/
|
||||
public interface RemoveScheme {
|
||||
/**
|
||||
* Random selection.
|
||||
* <p>
|
||||
* The idea is to randomly select a bit to reset.
|
||||
*/
|
||||
public final static short RANDOM = 0;
|
||||
|
||||
/**
|
||||
* MinimumFN Selection.
|
||||
* <p>
|
||||
* The idea is to select the bit to reset that will generate the minimum
|
||||
* number of false negative.
|
||||
*/
|
||||
public final static short MINIMUM_FN = 1;
|
||||
|
||||
/**
|
||||
* MaximumFP Selection.
|
||||
* <p>
|
||||
* The idea is to select the bit to reset that will remove the maximum number
|
||||
* of false positive.
|
||||
*/
|
||||
public final static short MAXIMUM_FP = 2;
|
||||
|
||||
/**
|
||||
* Ratio Selection.
|
||||
* <p>
|
||||
* The idea is to select the bit to reset that will, at the same time, remove
|
||||
* the maximum number of false positve while minimizing the amount of false
|
||||
* negative generated.
|
||||
*/
|
||||
public final static short RATIO = 3;
|
||||
}//end interface
|
|
@ -1,445 +0,0 @@
|
|||
/**
|
||||
*
|
||||
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of the University Catholique de Louvain - UCL
|
||||
* nor the names of its contributors may be used to endorse or
|
||||
* promote products derived from this software without specific prior
|
||||
* written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.hadoop.hbase.util.Hash;
|
||||
|
||||
/**
|
||||
* Implements a <i>retouched Bloom filter</i>, as defined in the CoNEXT 2006 paper.
|
||||
* <p>
|
||||
* It allows the removal of selected false positives at the cost of introducing
|
||||
* random false negatives, and with the benefit of eliminating some random false
|
||||
* positives at the same time.
|
||||
*
|
||||
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
|
||||
*
|
||||
* @version 1.0 - 7 Feb. 07
|
||||
*
|
||||
*/
|
||||
public final class RetouchedBloomFilter extends BloomFilter
|
||||
implements RemoveScheme {
|
||||
/**
|
||||
* KeyList vector (or ElementList Vector, as defined in the paper) of false positives.
|
||||
*/
|
||||
List<Key>[] fpVector;
|
||||
|
||||
/**
|
||||
* KeyList vector of keys recorded in the filter.
|
||||
*/
|
||||
List<Key>[] keyVector;
|
||||
|
||||
/**
|
||||
* Ratio vector.
|
||||
*/
|
||||
double[] ratio;
|
||||
|
||||
private Random rand;
|
||||
|
||||
/** Default constructor - use with readFields */
|
||||
public RetouchedBloomFilter() {}
|
||||
|
||||
/**
|
||||
* Constructor
|
||||
* @param vectorSize The vector size of <i>this</i> filter.
|
||||
* @param nbHash The number of hash function to consider.
|
||||
* @param hashType type of the hashing function (see {@link Hash}).
|
||||
*/
|
||||
public RetouchedBloomFilter(int vectorSize, int nbHash, int hashType) {
|
||||
super(vectorSize, nbHash, hashType);
|
||||
|
||||
this.rand = null;
|
||||
createVector();
|
||||
}//end constructor
|
||||
|
||||
@Override
|
||||
public void add(Key key){
|
||||
if(key == null) {
|
||||
throw new NullPointerException("key can not be null");
|
||||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
bits.set(h[i]);
|
||||
keyVector[h[i]].add(key);
|
||||
}//end for - i
|
||||
}//end add()
|
||||
|
||||
/**
|
||||
* Adds a false positive information to <i>this</i> retouched Bloom filter.
|
||||
* <p>
|
||||
* <b>Invariant</b>: if the false positive is <code>null</code>, nothing happens.
|
||||
* @param key The false positive key to add.
|
||||
*/
|
||||
public void addFalsePositive(Key key){
|
||||
if(key == null) {
|
||||
throw new NullPointerException("key can not be null");
|
||||
}
|
||||
|
||||
int[] h = hash.hash(key);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
fpVector[h[i]].add(key);
|
||||
}
|
||||
}//end addFalsePositive()
|
||||
|
||||
/**
|
||||
* Adds a collection of false positive information to <i>this</i> retouched Bloom filter.
|
||||
* @param coll The collection of false positive.
|
||||
*/
|
||||
public void addFalsePositive(Collection<Key> coll) {
|
||||
if(coll == null) {
|
||||
throw new NullPointerException("Collection<Key> can not be null");
|
||||
}
|
||||
|
||||
for(Key k: coll) {
|
||||
addFalsePositive(k);
|
||||
}
|
||||
}//end addFalsePositive()
|
||||
|
||||
/**
|
||||
* Adds a list of false positive information to <i>this</i> retouched Bloom filter.
|
||||
* @param keys The list of false positive.
|
||||
*/
|
||||
public void addFalsePositive(List<Key> keys){
|
||||
if(keys == null) {
|
||||
throw new NullPointerException("ArrayList<Key> can not be null");
|
||||
}
|
||||
|
||||
for(Key k: keys) {
|
||||
addFalsePositive(k);
|
||||
}
|
||||
}//end addFalsePositive()
|
||||
|
||||
/**
|
||||
* Adds an array of false positive information to <i>this</i> retouched Bloom filter.
|
||||
* @param keys The array of false positive.
|
||||
*/
|
||||
public void addFalsePositive(Key[] keys){
|
||||
if(keys == null) {
|
||||
throw new NullPointerException("Key[] can not be null");
|
||||
}
|
||||
|
||||
for(int i = 0; i < keys.length; i++) {
|
||||
addFalsePositive(keys[i]);
|
||||
}
|
||||
}//end addFalsePositive()
|
||||
|
||||
/**
|
||||
* Performs the selective clearing for a given key.
|
||||
* @param k The false positive key to remove from <i>this</i> retouched Bloom filter.
|
||||
* @param scheme The selective clearing scheme to apply.
|
||||
*/
|
||||
public void selectiveClearing(Key k, short scheme) {
|
||||
if(k == null) {
|
||||
throw new NullPointerException("Key can not be null");
|
||||
}
|
||||
|
||||
if(!membershipTest(k)) {
|
||||
throw new IllegalArgumentException("Key is not a member");
|
||||
}
|
||||
|
||||
int index = 0;
|
||||
int[] h = hash.hash(k);
|
||||
|
||||
switch(scheme) {
|
||||
|
||||
case RANDOM:
|
||||
index = randomRemove();
|
||||
break;
|
||||
|
||||
case MINIMUM_FN:
|
||||
index = minimumFnRemove(h);
|
||||
break;
|
||||
|
||||
case MAXIMUM_FP:
|
||||
index = maximumFpRemove(h);
|
||||
break;
|
||||
|
||||
case RATIO:
|
||||
index = ratioRemove(h);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new AssertionError("Undefined selective clearing scheme");
|
||||
|
||||
}//end switch
|
||||
|
||||
clearBit(index);
|
||||
}//end selectiveClearing()
|
||||
|
||||
private int randomRemove() {
|
||||
if(rand == null) {
|
||||
rand = new Random();
|
||||
}
|
||||
|
||||
return rand.nextInt(nbHash);
|
||||
}//end randomRemove()
|
||||
|
||||
/**
|
||||
* Chooses the bit position that minimizes the number of false negative generated.
|
||||
* @param h The different bit positions.
|
||||
* @return int The position that minimizes the number of false negative generated.
|
||||
*/
|
||||
private int minimumFnRemove(int[] h) {
|
||||
int minIndex = Integer.MAX_VALUE;
|
||||
double minValue = Double.MAX_VALUE;
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
double keyWeight = getWeight(keyVector[h[i]]);
|
||||
|
||||
if(keyWeight < minValue) {
|
||||
minIndex = h[i];
|
||||
minValue = keyWeight;
|
||||
}
|
||||
|
||||
}//end for - i
|
||||
|
||||
return minIndex;
|
||||
}//end minimumFnRemove()
|
||||
|
||||
/**
|
||||
* Chooses the bit position that maximizes the number of false positive removed.
|
||||
* @param h The different bit positions.
|
||||
* @return int The position that maximizes the number of false positive removed.
|
||||
*/
|
||||
private int maximumFpRemove(int[] h){
|
||||
int maxIndex = Integer.MIN_VALUE;
|
||||
double maxValue = Double.MIN_VALUE;
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
double fpWeight = getWeight(fpVector[h[i]]);
|
||||
|
||||
if(fpWeight > maxValue) {
|
||||
maxValue = fpWeight;
|
||||
maxIndex = h[i];
|
||||
}
|
||||
}
|
||||
|
||||
return maxIndex;
|
||||
}//end maximumFpRemove()
|
||||
|
||||
/**
|
||||
* Chooses the bit position that minimizes the number of false negative generated while maximizing.
|
||||
* the number of false positive removed.
|
||||
* @param h The different bit positions.
|
||||
* @return int The position that minimizes the number of false negative generated while maximizing.
|
||||
*/
|
||||
private int ratioRemove(int[] h){
|
||||
computeRatio();
|
||||
int minIndex = Integer.MAX_VALUE;
|
||||
double minValue = Double.MAX_VALUE;
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
if(ratio[h[i]] < minValue) {
|
||||
minValue = ratio[h[i]];
|
||||
minIndex = h[i];
|
||||
}
|
||||
}//end for - i
|
||||
|
||||
return minIndex;
|
||||
}//end ratioRemove()
|
||||
|
||||
/**
|
||||
* Clears a specified bit in the bit vector and keeps up-to-date the KeyList vectors.
|
||||
* @param index The position of the bit to clear.
|
||||
*/
|
||||
private void clearBit(int index){
|
||||
if(index < 0 || index >= vectorSize) {
|
||||
throw new ArrayIndexOutOfBoundsException(index);
|
||||
}
|
||||
|
||||
List<Key> kl = keyVector[index];
|
||||
List<Key> fpl = fpVector[index];
|
||||
|
||||
// update key list
|
||||
int listSize = kl.size();
|
||||
for(int i = 0; i < listSize && !kl.isEmpty(); i++) {
|
||||
removeKey(kl.get(0), keyVector);
|
||||
}
|
||||
|
||||
kl.clear();
|
||||
keyVector[index].clear();
|
||||
|
||||
//update false positive list
|
||||
listSize = fpl.size();
|
||||
for(int i = 0; i < listSize && !fpl.isEmpty(); i++) {
|
||||
removeKey(fpl.get(0), fpVector);
|
||||
}
|
||||
|
||||
fpl.clear();
|
||||
fpVector[index].clear();
|
||||
|
||||
//update ratio
|
||||
ratio[index] = 0.0;
|
||||
|
||||
//update bit vector
|
||||
bits.clear(index);
|
||||
}//end clearBit()
|
||||
|
||||
/**
|
||||
* Removes a given key from <i>this</i> filer.
|
||||
* @param k The key to remove.
|
||||
* @param vector The counting vector associated to the key.
|
||||
*/
|
||||
private void removeKey(Key k, List<Key>[] vector) {
|
||||
if(k == null) {
|
||||
throw new NullPointerException("Key can not be null");
|
||||
}
|
||||
if(vector == null) {
|
||||
throw new NullPointerException("ArrayList<Key>[] can not be null");
|
||||
}
|
||||
|
||||
int[] h = hash.hash(k);
|
||||
hash.clear();
|
||||
|
||||
for(int i = 0; i < nbHash; i++) {
|
||||
vector[h[i]].remove(k);
|
||||
}
|
||||
}//end removeKey()
|
||||
|
||||
/**
|
||||
* Computes the ratio A/FP.
|
||||
*/
|
||||
private void computeRatio() {
|
||||
for(int i = 0; i < vectorSize; i++) {
|
||||
double keyWeight = getWeight(keyVector[i]);
|
||||
double fpWeight = getWeight(fpVector[i]);
|
||||
|
||||
if(keyWeight > 0 && fpWeight > 0) {
|
||||
ratio[i] = keyWeight/fpWeight;
|
||||
}
|
||||
}//end for - i
|
||||
}//end computeRatio()
|
||||
|
||||
private double getWeight(List<Key> keyList) {
|
||||
double weight = 0.0;
|
||||
for(Key k: keyList) {
|
||||
weight += k.getWeight();
|
||||
}
|
||||
return weight;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and initialises the various vectors.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
private void createVector() {
|
||||
fpVector = new List[vectorSize];
|
||||
keyVector = new List[vectorSize];
|
||||
ratio = new double[vectorSize];
|
||||
|
||||
for(int i = 0; i < vectorSize; i++) {
|
||||
fpVector[i] = Collections.synchronizedList(new ArrayList<Key>());
|
||||
keyVector[i] = Collections.synchronizedList(new ArrayList<Key>());
|
||||
ratio[i] = 0.0;
|
||||
}//end for -i
|
||||
}//end createVector()
|
||||
|
||||
// Writable
|
||||
|
||||
@Override
|
||||
public void write(DataOutput out) throws IOException {
|
||||
super.write(out);
|
||||
for(int i = 0; i < fpVector.length; i++) {
|
||||
List<Key> list = fpVector[i];
|
||||
out.writeInt(list.size());
|
||||
for(Key k: list) {
|
||||
k.write(out);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < keyVector.length; i++) {
|
||||
List<Key> list = keyVector[i];
|
||||
out.writeInt(list.size());
|
||||
for(Key k: list) {
|
||||
k.write(out);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < ratio.length; i++) {
|
||||
out.writeDouble(ratio[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
super.readFields(in);
|
||||
createVector();
|
||||
for(int i = 0; i < fpVector.length; i++) {
|
||||
List<Key> list = fpVector[i];
|
||||
int size = in.readInt();
|
||||
for(int j = 0; j < size; j++) {
|
||||
Key k = new Key();
|
||||
k.readFields(in);
|
||||
list.add(k);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < keyVector.length; i++) {
|
||||
List<Key> list = keyVector[i];
|
||||
int size = in.readInt();
|
||||
for(int j = 0; j < size; j++) {
|
||||
Key k = new Key();
|
||||
k.readFields(in);
|
||||
list.add(k);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < ratio.length; i++) {
|
||||
ratio[i] = in.readDouble();
|
||||
}
|
||||
}
|
||||
}//end class
|
|
@ -1,585 +0,0 @@
|
|||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration.nineteen.regionserver;
|
||||
|
||||
import java.io.DataInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.migration.nineteen.io.BloomFilterMapFile;
|
||||
import org.apache.hadoop.hbase.migration.nineteen.io.HalfMapFileReader;
|
||||
import org.apache.hadoop.hbase.migration.nineteen.io.Reference;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.io.MapFile;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
|
||||
/**
|
||||
* A HStore data file. HStores usually have one or more of these files. They
|
||||
* are produced by flushing the memcache to disk.
|
||||
*
|
||||
* <p>This one has been doctored to be used in migrations. Private and
|
||||
* protecteds have been made public, etc.
|
||||
*
|
||||
* <p>Each HStore maintains a bunch of different data files. The filename is a
|
||||
* mix of the parent dir, the region name, the column name, and a file
|
||||
* identifier. The name may also be a reference to a store file located
|
||||
* elsewhere. This class handles all that path-building stuff for you.
|
||||
*
|
||||
* <p>An HStoreFile usually tracks 4 things: its parent dir, the region
|
||||
* identifier, the column family, and the file identifier. If you know those
|
||||
* four things, you know how to obtain the right HStoreFile. HStoreFiles may
|
||||
* also reference store files in another region serving either from
|
||||
* the top-half of the remote file or from the bottom-half. Such references
|
||||
* are made fast splitting regions.
|
||||
*
|
||||
* <p>Plain HStoreFiles are named for a randomly generated id as in:
|
||||
* <code>1278437856009925445</code> A file by this name is made in both the
|
||||
* <code>mapfiles</code> and <code>info</code> subdirectories of a
|
||||
* HStore columnfamily directoy: E.g. If the column family is 'anchor:', then
|
||||
* under the region directory there is a subdirectory named 'anchor' within
|
||||
* which is a 'mapfiles' and 'info' subdirectory. In each will be found a
|
||||
* file named something like <code>1278437856009925445</code>, one to hold the
|
||||
* data in 'mapfiles' and one under 'info' that holds the sequence id for this
|
||||
* store file.
|
||||
*
|
||||
* <p>References to store files located over in some other region look like
|
||||
* this:
|
||||
* <code>1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184</code>:
|
||||
* i.e. an id followed by the name of the referenced region. The data
|
||||
* ('mapfiles') of HStoreFile references are empty. The accompanying
|
||||
* <code>info</code> file contains the
|
||||
* midkey, the id of the remote store we're referencing and whether we're
|
||||
* to serve the top or bottom region of the remote store file. Note, a region
|
||||
* is not splitable if it has instances of store file references (References
|
||||
* are cleaned up by compactions).
|
||||
*
|
||||
* <p>When merging or splitting HRegions, we might want to modify one of the
|
||||
* params for an HStoreFile (effectively moving it elsewhere).
|
||||
*/
|
||||
public class HStoreFile implements HConstants {
|
||||
static final Log LOG = LogFactory.getLog(HStoreFile.class.getName());
|
||||
static final byte INFO_SEQ_NUM = 0;
|
||||
static final byte MAJOR_COMPACTION = INFO_SEQ_NUM + 1;
|
||||
static final String HSTORE_DATFILE_DIR = "mapfiles";
|
||||
static final String HSTORE_INFO_DIR = "info";
|
||||
static final String HSTORE_FILTER_DIR = "filter";
|
||||
|
||||
private final static Random rand = new Random();
|
||||
|
||||
private final Path basedir;
|
||||
private final int encodedRegionName;
|
||||
private final byte [] colFamily;
|
||||
private final long fileId;
|
||||
private final HBaseConfiguration conf;
|
||||
private final FileSystem fs;
|
||||
private final Reference reference;
|
||||
private final HRegionInfo hri;
|
||||
/* If true, this file was product of a major compaction.
|
||||
*/
|
||||
private boolean majorCompaction = false;
|
||||
private long indexLength;
|
||||
|
||||
/**
|
||||
* Constructor that fully initializes the object
|
||||
* @param conf Configuration object
|
||||
* @param basedir qualified path that is parent of region directory
|
||||
* @param colFamily name of the column family
|
||||
* @param fileId file identifier
|
||||
* @param ref Reference to another HStoreFile.
|
||||
* @param encodedName Encoded name.
|
||||
* @throws IOException
|
||||
*/
|
||||
public HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
|
||||
final int encodedName, byte [] colFamily, long fileId,
|
||||
final Reference ref)
|
||||
throws IOException {
|
||||
this(conf, fs, basedir, encodedName, colFamily, fileId, ref, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor that fully initializes the object
|
||||
* @param conf Configuration object
|
||||
* @param basedir qualified path that is parent of region directory
|
||||
* @param colFamily name of the column family
|
||||
* @param fileId file identifier
|
||||
* @param ref Reference to another HStoreFile.
|
||||
* @param encodedName Encoded name.
|
||||
* @param mc Try if this file was result of a major compression.
|
||||
* @throws IOException
|
||||
*/
|
||||
HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
|
||||
final int encodedName, byte [] colFamily, long fileId,
|
||||
final Reference ref, final boolean mc)
|
||||
throws IOException {
|
||||
this.conf = conf;
|
||||
this.fs = fs;
|
||||
this.basedir = basedir;
|
||||
this.encodedRegionName = encodedName;
|
||||
this.colFamily = colFamily;
|
||||
// NOT PASSED IN MIGRATIONS
|
||||
this.hri = null;
|
||||
|
||||
long id = fileId;
|
||||
if (id == -1) {
|
||||
Path mapdir = HStoreFile.getMapDir(basedir, encodedRegionName, colFamily);
|
||||
Path testpath = null;
|
||||
do {
|
||||
id = Math.abs(rand.nextLong());
|
||||
testpath = new Path(mapdir, createHStoreFilename(id, -1));
|
||||
} while(fs.exists(testpath));
|
||||
}
|
||||
this.fileId = id;
|
||||
|
||||
// If a reference, construction does not write the pointer files. Thats
|
||||
// done by invocations of writeReferenceFiles(hsf, fs). Happens at split.
|
||||
this.reference = ref;
|
||||
this.majorCompaction = mc;
|
||||
}
|
||||
|
||||
/** @return the region name */
|
||||
boolean isReference() {
|
||||
return reference != null;
|
||||
}
|
||||
|
||||
|
||||
private static final Pattern REF_NAME_PARSER =
|
||||
Pattern.compile("^(\\d+)(?:\\.(.+))?$");
|
||||
|
||||
/**
|
||||
* @param p Path to check.
|
||||
* @return True if the path has format of a HStoreFile reference.
|
||||
*/
|
||||
public static boolean isReference(final Path p) {
|
||||
return isReference(p, REF_NAME_PARSER.matcher(p.getName()));
|
||||
}
|
||||
|
||||
private static boolean isReference(final Path p, final Matcher m) {
|
||||
if (m == null || !m.matches()) {
|
||||
LOG.warn("Failed match of store file name " + p.toString());
|
||||
throw new RuntimeException("Failed match of store file name " +
|
||||
p.toString());
|
||||
}
|
||||
return m.groupCount() > 1 && m.group(2) != null;
|
||||
}
|
||||
|
||||
Reference getReference() {
|
||||
return reference;
|
||||
}
|
||||
|
||||
int getEncodedRegionName() {
|
||||
return this.encodedRegionName;
|
||||
}
|
||||
|
||||
/** @return the column family */
|
||||
byte [] getColFamily() {
|
||||
return colFamily;
|
||||
}
|
||||
|
||||
/** @return the file identifier */
|
||||
long getFileId() {
|
||||
return fileId;
|
||||
}
|
||||
|
||||
// Build full filenames from those components
|
||||
|
||||
/** @return path for MapFile */
|
||||
Path getMapFilePath() {
|
||||
if (isReference()) {
|
||||
return getMapFilePath(encodedRegionName, fileId,
|
||||
reference.getEncodedRegionName());
|
||||
}
|
||||
return getMapFilePath(this.encodedRegionName, fileId);
|
||||
}
|
||||
|
||||
private Path getMapFilePath(final Reference r) {
|
||||
if (r == null) {
|
||||
return getMapFilePath();
|
||||
}
|
||||
return getMapFilePath(r.getEncodedRegionName(), r.getFileId());
|
||||
}
|
||||
|
||||
private Path getMapFilePath(final int encodedName, final long fid) {
|
||||
return getMapFilePath(encodedName, fid, HRegionInfo.NO_HASH);
|
||||
}
|
||||
|
||||
private Path getMapFilePath(final int encodedName, final long fid,
|
||||
final int ern) {
|
||||
return new Path(HStoreFile.getMapDir(basedir, encodedName, colFamily),
|
||||
createHStoreFilename(fid, ern));
|
||||
}
|
||||
|
||||
/** @return path for info file */
|
||||
Path getInfoFilePath() {
|
||||
if (isReference()) {
|
||||
return getInfoFilePath(encodedRegionName, fileId,
|
||||
reference.getEncodedRegionName());
|
||||
|
||||
}
|
||||
return getInfoFilePath(encodedRegionName, fileId);
|
||||
}
|
||||
|
||||
private Path getInfoFilePath(final int encodedName, final long fid) {
|
||||
return getInfoFilePath(encodedName, fid, HRegionInfo.NO_HASH);
|
||||
}
|
||||
|
||||
private Path getInfoFilePath(final int encodedName, final long fid,
|
||||
final int ern) {
|
||||
return new Path(HStoreFile.getInfoDir(basedir, encodedName, colFamily),
|
||||
createHStoreFilename(fid, ern));
|
||||
}
|
||||
|
||||
// File handling
|
||||
|
||||
/*
|
||||
* Split by making two new store files that reference top and bottom regions
|
||||
* of original store file.
|
||||
* @param midKey
|
||||
* @param dstA
|
||||
* @param dstB
|
||||
* @param fs
|
||||
* @param c
|
||||
* @throws IOException
|
||||
*
|
||||
* @param midKey the key which will be the starting key of the second region
|
||||
* @param dstA the file which will contain keys from the start of the source
|
||||
* @param dstB the file which will contain keys from midKey to end of source
|
||||
* @param fs file system
|
||||
* @param c configuration
|
||||
* @throws IOException
|
||||
*/
|
||||
void splitStoreFile(final HStoreFile dstA, final HStoreFile dstB,
|
||||
final FileSystem fs)
|
||||
throws IOException {
|
||||
dstA.writeReferenceFiles(fs);
|
||||
dstB.writeReferenceFiles(fs);
|
||||
}
|
||||
|
||||
void writeReferenceFiles(final FileSystem fs)
|
||||
throws IOException {
|
||||
createOrFail(fs, getMapFilePath());
|
||||
writeSplitInfo(fs);
|
||||
}
|
||||
|
||||
/*
|
||||
* If reference, create and write the remote store file id, the midkey and
|
||||
* whether we're going against the top file region of the referent out to
|
||||
* the info file.
|
||||
* @param p Path to info file.
|
||||
* @param hsf
|
||||
* @param fs
|
||||
* @throws IOException
|
||||
*/
|
||||
private void writeSplitInfo(final FileSystem fs) throws IOException {
|
||||
Path p = getInfoFilePath();
|
||||
if (fs.exists(p)) {
|
||||
throw new IOException("File already exists " + p.toString());
|
||||
}
|
||||
FSDataOutputStream out = fs.create(p);
|
||||
try {
|
||||
reference.write(out);
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #writeSplitInfo(FileSystem fs)
|
||||
*/
|
||||
static Reference readSplitInfo(final Path p, final FileSystem fs)
|
||||
throws IOException {
|
||||
FSDataInputStream in = fs.open(p);
|
||||
try {
|
||||
Reference r = new Reference();
|
||||
r.readFields(in);
|
||||
return r;
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void createOrFail(final FileSystem fs, final Path p)
|
||||
throws IOException {
|
||||
if (fs.exists(p)) {
|
||||
throw new IOException("File already exists " + p.toString());
|
||||
}
|
||||
if (!fs.createNewFile(p)) {
|
||||
throw new IOException("Failed create of " + p);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in an info file
|
||||
*
|
||||
* @param filesystem file system
|
||||
* @return The sequence id contained in the info file
|
||||
* @throws IOException
|
||||
*/
|
||||
public long loadInfo(final FileSystem filesystem) throws IOException {
|
||||
Path p = null;
|
||||
if (isReference()) {
|
||||
p = getInfoFilePath(reference.getEncodedRegionName(),
|
||||
this.reference.getFileId());
|
||||
} else {
|
||||
p = getInfoFilePath();
|
||||
}
|
||||
long length = filesystem.getFileStatus(p).getLen();
|
||||
boolean hasMoreThanSeqNum = length > (Byte.SIZE + Bytes.SIZEOF_LONG);
|
||||
DataInputStream in = new DataInputStream(filesystem.open(p));
|
||||
try {
|
||||
byte flag = in.readByte();
|
||||
if (flag == INFO_SEQ_NUM) {
|
||||
if (hasMoreThanSeqNum) {
|
||||
flag = in.readByte();
|
||||
if (flag == MAJOR_COMPACTION) {
|
||||
this.majorCompaction = in.readBoolean();
|
||||
}
|
||||
}
|
||||
return in.readLong();
|
||||
}
|
||||
throw new IOException("Cannot process log file: " + p);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the file-identifier to disk
|
||||
*
|
||||
* @param filesystem file system
|
||||
* @param infonum file id
|
||||
* @throws IOException
|
||||
*/
|
||||
void writeInfo(final FileSystem filesystem, final long infonum)
|
||||
throws IOException {
|
||||
writeInfo(filesystem, infonum, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the file-identifier to disk
|
||||
*
|
||||
* @param filesystem file system
|
||||
* @param infonum file id
|
||||
* @param mc True if this file is product of a major compaction
|
||||
* @throws IOException
|
||||
*/
|
||||
void writeInfo(final FileSystem filesystem, final long infonum,
|
||||
final boolean mc)
|
||||
throws IOException {
|
||||
Path p = getInfoFilePath();
|
||||
FSDataOutputStream out = filesystem.create(p);
|
||||
try {
|
||||
out.writeByte(INFO_SEQ_NUM);
|
||||
out.writeLong(infonum);
|
||||
if (mc) {
|
||||
// Set whether major compaction flag on this file.
|
||||
this.majorCompaction = mc;
|
||||
out.writeByte(MAJOR_COMPACTION);
|
||||
out.writeBoolean(mc);
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete store map files.
|
||||
* @throws IOException
|
||||
*/
|
||||
public void delete() throws IOException {
|
||||
fs.delete(getMapFilePath(), true);
|
||||
fs.delete(getInfoFilePath(), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Renames the mapfiles and info directories under the passed
|
||||
* <code>hsf</code> directory.
|
||||
* @param fs
|
||||
* @param hsf
|
||||
* @return True if succeeded.
|
||||
* @throws IOException
|
||||
*/
|
||||
public boolean rename(final FileSystem fs, final HStoreFile hsf)
|
||||
throws IOException {
|
||||
Path src = getMapFilePath();
|
||||
if (!fs.exists(src)) {
|
||||
throw new FileNotFoundException(src.toString());
|
||||
}
|
||||
boolean success = fs.rename(src, hsf.getMapFilePath());
|
||||
if (!success) {
|
||||
LOG.warn("Failed rename of " + src + " to " + hsf.getMapFilePath());
|
||||
} else {
|
||||
src = getInfoFilePath();
|
||||
if (!fs.exists(src)) {
|
||||
throw new FileNotFoundException(src.toString());
|
||||
}
|
||||
success = fs.rename(src, hsf.getInfoFilePath());
|
||||
if (!success) {
|
||||
LOG.warn("Failed rename of " + src + " to " + hsf.getInfoFilePath());
|
||||
}
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get reader for the store file map file.
|
||||
* Client is responsible for closing file when done.
|
||||
* @param fs
|
||||
* @param bloomFilter If true, a bloom filter exists
|
||||
* @param blockCacheEnabled If true, MapFile blocks should be cached.
|
||||
* @return BloomFilterMapFile.Reader
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized BloomFilterMapFile.Reader getReader(final FileSystem fs,
|
||||
final boolean bloomFilter, final boolean blockCacheEnabled)
|
||||
throws IOException {
|
||||
if (isReference()) {
|
||||
return new HalfMapFileReader(fs,
|
||||
getMapFilePath(reference).toString(), conf,
|
||||
reference.getFileRegion(), reference.getMidkey(), bloomFilter,
|
||||
blockCacheEnabled, this.hri);
|
||||
}
|
||||
return new BloomFilterMapFile.Reader(fs, getMapFilePath().toString(),
|
||||
conf, bloomFilter, blockCacheEnabled, this.hri);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a store file writer.
|
||||
* Client is responsible for closing file when done.
|
||||
* @param fs
|
||||
* @param compression Pass <code>SequenceFile.CompressionType.NONE</code>
|
||||
* for none.
|
||||
* @param bloomFilter If true, create a bloom filter
|
||||
* @param nrows number of rows expected. Required if bloomFilter is true.
|
||||
* @return MapFile.Writer
|
||||
* @throws IOException
|
||||
*/
|
||||
public MapFile.Writer getWriter(final FileSystem fs,
|
||||
final SequenceFile.CompressionType compression,
|
||||
final boolean bloomFilter, int nrows)
|
||||
throws IOException {
|
||||
if (isReference()) {
|
||||
throw new IOException("Illegal Access: Cannot get a writer on a" +
|
||||
"HStoreFile reference");
|
||||
}
|
||||
return new BloomFilterMapFile.Writer(conf, fs,
|
||||
getMapFilePath().toString(), compression, bloomFilter, nrows, this.hri);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Length of the store map file. If a reference, size is
|
||||
* approximation.
|
||||
* @throws IOException
|
||||
*/
|
||||
public long length() throws IOException {
|
||||
Path p = new Path(getMapFilePath(reference), MapFile.DATA_FILE_NAME);
|
||||
long l = p.getFileSystem(conf).getFileStatus(p).getLen();
|
||||
return (isReference())? l / 2: l;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Length of the store map file index.
|
||||
* @throws IOException
|
||||
*/
|
||||
public synchronized long indexLength() throws IOException {
|
||||
if (indexLength == 0) {
|
||||
Path p = new Path(getMapFilePath(reference), MapFile.INDEX_FILE_NAME);
|
||||
indexLength = p.getFileSystem(conf).getFileStatus(p).getLen();
|
||||
}
|
||||
return indexLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return encodedRegionName + "/" + Bytes.toString(colFamily) + "/" + fileId +
|
||||
(isReference()? "-" + reference.toString(): "");
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if this file was made by a major compaction.
|
||||
*/
|
||||
public boolean isMajorCompaction() {
|
||||
return this.majorCompaction;
|
||||
}
|
||||
|
||||
private static String createHStoreFilename(final long fid,
|
||||
final int encodedRegionName) {
|
||||
return Long.toString(fid) +
|
||||
((encodedRegionName != HRegionInfo.NO_HASH)?
|
||||
"." + encodedRegionName : "");
|
||||
}
|
||||
|
||||
/**
|
||||
* @param dir Base directory
|
||||
* @param encodedRegionName Encoding of region name.
|
||||
* @param f Column family.
|
||||
* @return path for map file directory
|
||||
*/
|
||||
public static Path getMapDir(Path dir, int encodedRegionName,
|
||||
final byte [] f) {
|
||||
return getFamilySubDir(dir, encodedRegionName, f, HSTORE_DATFILE_DIR);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param dir Base directory
|
||||
* @param encodedRegionName Encoding of region name.
|
||||
* @param f Column family.
|
||||
* @return the info directory path
|
||||
*/
|
||||
public static Path getInfoDir(Path dir, int encodedRegionName, byte [] f) {
|
||||
return getFamilySubDir(dir, encodedRegionName, f, HSTORE_INFO_DIR);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param dir Base directory
|
||||
* @param encodedRegionName Encoding of region name.
|
||||
* @param f Column family.
|
||||
* @return the bloom filter directory path
|
||||
*/
|
||||
@Deprecated
|
||||
public static Path getFilterDir(Path dir, int encodedRegionName,
|
||||
final byte [] f) {
|
||||
return getFamilySubDir(dir, encodedRegionName, f, HSTORE_FILTER_DIR);
|
||||
}
|
||||
|
||||
/*
|
||||
* @param base Base directory
|
||||
* @param encodedRegionName Encoding of region name.
|
||||
* @param f Column family.
|
||||
* @param subdir Subdirectory to create under column family/store directory.
|
||||
* @return
|
||||
*/
|
||||
private static Path getFamilySubDir(final Path base,
|
||||
final int encodedRegionName, final byte [] f, final String subdir) {
|
||||
return new Path(base, new Path(Integer.toString(encodedRegionName),
|
||||
new Path(Bytes.toString(f), subdir)));
|
||||
}
|
||||
}
|
|
@ -1,26 +0,0 @@
|
|||
/*
|
||||
* Copyright 2009 The Apache Software Foundation
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
/**
|
||||
Provides classes from old hbase versions used migrating data.
|
||||
Nineteen package has classes from hbase 0.19. See
|
||||
<a href="http://wiki.apache.org/hadoop/Hbase/HowToMigrate">How to Migrate</a>
|
||||
for more on migrations.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.migration;
|
|
@ -54,7 +54,6 @@ import org.apache.hadoop.hbase.client.Put;
|
|||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.filter.Filter;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.hbase.io.HeapSize;
|
||||
import org.apache.hadoop.hbase.io.Reference.Range;
|
||||
import org.apache.hadoop.hbase.io.hfile.BlockCache;
|
||||
|
@ -1678,12 +1677,10 @@ public class HRegion implements HConstants, HeapSize { // , Writable{
|
|||
private final KeyValueHeap storeHeap;
|
||||
private final byte [] stopRow;
|
||||
private Filter filter;
|
||||
private RowFilterInterface oldFilter;
|
||||
private List<KeyValue> results = new ArrayList<KeyValue>();
|
||||
|
||||
RegionScanner(Scan scan, List<KeyValueScanner> additionalScanners) {
|
||||
this.filter = scan.getFilter();
|
||||
this.oldFilter = scan.getOldFilter();
|
||||
if (Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
|
||||
this.stopRow = null;
|
||||
} else {
|
||||
|
@ -1711,9 +1708,6 @@ public class HRegion implements HConstants, HeapSize { // , Writable{
|
|||
if (filter != null) {
|
||||
filter.reset();
|
||||
}
|
||||
if (oldFilter != null) {
|
||||
oldFilter.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1760,8 +1754,7 @@ public class HRegion implements HConstants, HeapSize { // , Writable{
|
|||
continue;
|
||||
}
|
||||
// see if current row should be filtered based on row key
|
||||
if ((filter != null && filter.filterRowKey(row, 0, row.length)) ||
|
||||
(oldFilter != null && oldFilter.filterRowKey(row, 0, row.length))) {
|
||||
if (filter != null && filter.filterRowKey(row, 0, row.length)) {
|
||||
if(!results.isEmpty() && !Bytes.equals(currentRow, row)) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -782,19 +782,22 @@ public class MemStore implements HeapSize {
|
|||
long size = 0;
|
||||
final int count = 10000;
|
||||
byte [] column = Bytes.toBytes("col:umn");
|
||||
byte [] fam = Bytes.toBytes("col");
|
||||
byte [] qf = Bytes.toBytes("umn");
|
||||
byte [] empty = new byte[0];
|
||||
for (int i = 0; i < count; i++) {
|
||||
// Give each its own ts
|
||||
size += memstore1.add(new KeyValue(Bytes.toBytes(i), column, i));
|
||||
size += memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty));
|
||||
}
|
||||
LOG.info("memstore1 estimated size=" + size);
|
||||
for (int i = 0; i < count; i++) {
|
||||
size += memstore1.add(new KeyValue(Bytes.toBytes(i), column, i));
|
||||
size += memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty));
|
||||
}
|
||||
LOG.info("memstore1 estimated size (2nd loading of same data)=" + size);
|
||||
// Make a variably sized memstore.
|
||||
MemStore memstore2 = new MemStore();
|
||||
for (int i = 0; i < count; i++) {
|
||||
size += memstore2.add(new KeyValue(Bytes.toBytes(i), column, i,
|
||||
size += memstore2.add(new KeyValue(Bytes.toBytes(i), fam, qf, i,
|
||||
new byte[i]));
|
||||
}
|
||||
LOG.info("memstore2 estimated size=" + size);
|
||||
|
|
|
@ -24,7 +24,6 @@ import java.util.NavigableSet;
|
|||
|
||||
import org.apache.hadoop.hbase.KeyValue;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.filter.RowFilterInterface;
|
||||
import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
|
@ -32,8 +31,6 @@ import org.apache.hadoop.hbase.util.Bytes;
|
|||
* A query matcher that is specifically designed for the scan case.
|
||||
*/
|
||||
public class ScanQueryMatcher extends QueryMatcher {
|
||||
// have to support old style filter for now.
|
||||
private RowFilterInterface oldFilter;
|
||||
// Optimization so we can skip lots of compares when we decide to skip
|
||||
// to the next row.
|
||||
private boolean stickyNextRow;
|
||||
|
@ -57,7 +54,6 @@ public class ScanQueryMatcher extends QueryMatcher {
|
|||
this.startKey = KeyValue.createFirstOnRow(scan.getStartRow());
|
||||
this.stopKey = KeyValue.createFirstOnRow(scan.getStopRow());
|
||||
this.filter = scan.getFilter();
|
||||
this.oldFilter = scan.getOldFilter();
|
||||
|
||||
// Single branch to deal with two types of reads (columns vs all in family)
|
||||
if (columns == null || columns.size() == 0) {
|
||||
|
@ -84,9 +80,6 @@ public class ScanQueryMatcher extends QueryMatcher {
|
|||
public MatchCode match(KeyValue kv) {
|
||||
if (filter != null && filter.filterAllRemaining()) {
|
||||
return MatchCode.DONE_SCAN;
|
||||
} else if (oldFilter != null && oldFilter.filterAllRemaining()) {
|
||||
// the old filter runs only if the other filter didnt work.
|
||||
return MatchCode.DONE_SCAN;
|
||||
}
|
||||
|
||||
byte [] bytes = kv.getBuffer();
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue