HBASE-1822 Remove the deprecated APIs (incompatible change)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@816014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Gray 2009-09-16 23:59:51 +00:00
parent 6cd712c7be
commit b37eca06af
192 changed files with 645 additions and 22052 deletions

View File

@ -1,6 +1,7 @@
HBase Change Log
Release 0.21.0 - Unreleased
INCOMPATIBLE CHANGES
HBASE-1822 Remove the deprecated APIs
BUG FIXES
HBASE-1791 Timeout in IndexRecordWriter (Bradford Stephens via Andrew

View File

@ -27,7 +27,6 @@ import java.util.Map;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.HTablePool;

View File

@ -91,9 +91,7 @@ public class RowResource implements Constants {
rowKey = value.getRow();
rowModel = new RowModel(rowKey);
}
rowModel.addCell(
new CellModel(value.getColumn(), value.getTimestamp(),
value.getValue()));
rowModel.addCell(new CellModel(value));
value = generator.next();
} while (value != null);
model.addRow(rowModel);
@ -148,8 +146,12 @@ public class RowResource implements Constants {
Put put = new Put(row.getKey());
for (CellModel cell: row.getCells()) {
byte [][] parts = KeyValue.parseColumn(cell.getColumn());
if(parts.length == 1) {
put.add(parts[0], new byte[0], cell.getTimestamp(), cell.getValue());
} else {
put.add(parts[0], parts[1], cell.getTimestamp(), cell.getValue());
}
}
table.put(put);
if (LOG.isDebugEnabled()) {
LOG.debug("PUT " + put.toString());
@ -203,7 +205,11 @@ public class RowResource implements Constants {
}
Put put = new Put(row);
byte parts[][] = KeyValue.parseColumn(column);
if(parts.length == 1) {
put.add(parts[0], new byte[0], timestamp, message);
} else {
put.add(parts[0], parts[1], timestamp, message);
}
table = pool.getTable(this.table);
table.put(put);
if (LOG.isDebugEnabled()) {
@ -272,13 +278,13 @@ public class RowResource implements Constants {
for (byte[] column: rowspec.getColumns()) {
byte[][] split = KeyValue.parseColumn(column);
if (rowspec.hasTimestamp()) {
if (split[1] != null) {
if (split.length == 2) {
delete.deleteColumns(split[0], split[1], rowspec.getTimestamp());
} else {
delete.deleteFamily(split[0], rowspec.getTimestamp());
}
} else {
if (split[1] != null) {
if (split.length == 2) {
delete.deleteColumns(split[0], split[1]);
} else {
delete.deleteFamily(split[0]);

View File

@ -41,7 +41,15 @@ public class RowResultGenerator extends ResultGenerator {
try {
Get get = new Get(rowspec.getRow());
if (rowspec.hasColumns()) {
get.addColumns(rowspec.getColumns());
byte [][] columns = rowspec.getColumns();
for(byte [] column : columns) {
byte [][] famQf = KeyValue.parseColumn(column);
if(famQf.length == 1) {
get.addFamily(famQf[0]);
} else {
get.addColumn(famQf[0], famQf[1]);
}
}
} else {
// rowspec does not explicitly specify columns, return them all
for (HColumnDescriptor family:

View File

@ -99,9 +99,7 @@ public class ScannerInstanceResource implements Constants {
rowKey = value.getRow();
rowModel = new RowModel(rowKey);
}
rowModel.addCell(
new CellModel(value.getColumn(), value.getTimestamp(),
value.getValue()));
rowModel.addCell(new CellModel(value));
} while (--count > 0);
model.addRow(rowModel);
ResponseBuilder response = Response.ok(model);
@ -122,10 +120,12 @@ public class ScannerInstanceResource implements Constants {
LOG.info("generator exhausted");
return Response.noContent().build();
}
byte [] column = KeyValue.makeColumn(value.getFamily(),
value.getQualifier());
ResponseBuilder response = Response.ok(value.getValue());
response.cacheControl(cacheControl);
response.header("X-Row", Base64.encode(value.getRow()));
response.header("X-Column", Base64.encode(value.getColumn()));
response.header("X-Column", Base64.encode(column));
response.header("X-Timestamp", value.getTimestamp());
return response.build();
} catch (IllegalStateException e) {

View File

@ -59,7 +59,7 @@ public class ScannerResultGenerator extends ResultGenerator {
byte[][] columns = rowspec.getColumns();
for (byte[] column: columns) {
byte[][] split = KeyValue.parseColumn(column);
if (split[1] != null) {
if (split.length == 2) {
scan.addColumn(split[0], split[1]);
} else {
scan.addFamily(split[0]);

View File

@ -29,6 +29,7 @@ import javax.xml.bind.annotation.XmlType;
import javax.xml.bind.annotation.XmlValue;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.stargate.ProtobufMessageHandler;
import org.apache.hadoop.hbase.stargate.protobuf.generated.CellMessage.Cell;
@ -76,6 +77,15 @@ public class CellModel implements ProtobufMessageHandler, Serializable {
this(column, HConstants.LATEST_TIMESTAMP, value);
}
/**
* Constructor from KeyValue
* @param kv
*/
public CellModel(KeyValue kv) {
this(KeyValue.makeColumn(kv.getFamily(), kv.getQualifier()),
kv.getTimestamp(), kv.getValue());
}
/**
* Constructor
* @param column

View File

@ -32,6 +32,7 @@ import javax.xml.bind.Unmarshaller;
import org.apache.commons.httpclient.Header;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.stargate.client.Client;
import org.apache.hadoop.hbase.stargate.client.Cluster;
@ -79,8 +80,10 @@ public class TestRowResource extends MiniClusterTestCase {
return;
}
HTableDescriptor htd = new HTableDescriptor(TABLE);
htd.addFamily(new HColumnDescriptor(COLUMN_1));
htd.addFamily(new HColumnDescriptor(COLUMN_2));
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
Bytes.toBytes(COLUMN_1))[0]));
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
Bytes.toBytes(COLUMN_2))[0]));
admin.createTable(htd);
}

View File

@ -76,7 +76,11 @@ public class TestScannerResource extends MiniClusterTestCase {
k[1] = b2;
k[2] = b3;
Put put = new Put(k);
if(famAndQf.length == 1) {
put.add(famAndQf[0], new byte[0], k);
} else {
put.add(famAndQf[0], famAndQf[1], k);
}
table.put(put);
count++;
}
@ -107,8 +111,10 @@ public class TestScannerResource extends MiniClusterTestCase {
return;
}
HTableDescriptor htd = new HTableDescriptor(TABLE);
htd.addFamily(new HColumnDescriptor(COLUMN_1));
htd.addFamily(new HColumnDescriptor(COLUMN_2));
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
Bytes.toBytes(COLUMN_1))[0]));
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
Bytes.toBytes(COLUMN_2))[0]));
admin.createTable(htd);
expectedRows1 = insertData(TABLE, COLUMN_1, 1.0);
expectedRows2 = insertData(TABLE, COLUMN_2, 0.5);

View File

@ -29,6 +29,7 @@ import javax.xml.bind.JAXBException;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.stargate.client.Client;
@ -38,6 +39,7 @@ import org.apache.hadoop.hbase.stargate.model.TableModel;
import org.apache.hadoop.hbase.stargate.model.TableInfoModel;
import org.apache.hadoop.hbase.stargate.model.TableListModel;
import org.apache.hadoop.hbase.stargate.model.TableRegionModel;
import org.apache.hadoop.hbase.util.Bytes;
public class TestTableResource extends MiniClusterTestCase {
private static String TABLE = "TestTableResource";
@ -65,7 +67,8 @@ public class TestTableResource extends MiniClusterTestCase {
return;
}
HTableDescriptor htd = new HTableDescriptor(TABLE);
htd.addFamily(new HColumnDescriptor(COLUMN));
htd.addFamily(new HColumnDescriptor(KeyValue.parseColumn(
Bytes.toBytes(COLUMN))[0]));
admin.createTable(htd);
new HTable(conf, TABLE);
}

View File

@ -29,7 +29,6 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
@ -46,7 +45,7 @@ public class IndexedTable extends TransactionalTable {
// TODO move these schema constants elsewhere
public static final byte[] INDEX_COL_FAMILY_NAME = Bytes.toBytes("__INDEX__");
public static final byte[] INDEX_COL_FAMILY = Bytes.add(
INDEX_COL_FAMILY_NAME, new byte[] { HStoreKey.COLUMN_FAMILY_DELIMITER });
INDEX_COL_FAMILY_NAME, KeyValue.COLUMN_FAMILY_DELIM_ARRAY);
public static final byte[] INDEX_BASE_ROW = Bytes.toBytes("ROW");
public static final byte[] INDEX_BASE_ROW_COLUMN = Bytes.add(
INDEX_COL_FAMILY, INDEX_BASE_ROW);
@ -114,7 +113,14 @@ public class IndexedTable extends TransactionalTable {
Scan indexScan = new Scan();
indexScan.setFilter(indexFilter);
indexScan.addColumns(allIndexColumns);
for(byte [] column : allIndexColumns) {
byte [][] famQf = KeyValue.parseColumn(column);
if(famQf.length == 1) {
indexScan.addFamily(famQf[0]);
} else {
indexScan.addColumn(famQf[0], famQf[1]);
}
}
if (indexStartRow != null) {
indexScan.setStartRow(indexStartRow);
}
@ -174,24 +180,32 @@ public class IndexedTable extends TransactionalTable {
for (int i = 0; i < indexResult.length; i++) {
Result row = indexResult[i];
byte[] baseRow = row.getValue(INDEX_BASE_ROW_COLUMN);
byte[] baseRow = row.getValue(INDEX_COL_FAMILY_NAME, INDEX_BASE_ROW);
LOG.debug("next index row [" + Bytes.toString(row.getRow())
+ "] -> base row [" + Bytes.toString(baseRow) + "]");
Result baseResult = null;
if (columns != null && columns.length > 0) {
LOG.debug("Going to base table for remaining columns");
Get baseGet = new Get(baseRow);
baseGet.addColumns(columns);
for(byte [] column : columns) {
byte [][] famQf = KeyValue.parseColumn(column);
if(famQf.length == 1) {
baseGet.addFamily(famQf[0]);
} else {
baseGet.addColumn(famQf[0], famQf[1]);
}
}
baseResult = IndexedTable.this.get(baseGet);
}
List<KeyValue> results = new ArrayList<KeyValue>();
for (KeyValue indexKV : row.list()) {
byte[] col = indexKV.getColumn();
if (HStoreKey.matchingFamily(INDEX_COL_FAMILY_NAME, col)) {
if (indexKV.matchingFamily(INDEX_COL_FAMILY_NAME)) {
continue;
}
results.add(new KeyValue(baseRow, indexKV.getColumn(), indexKV.getTimestamp(), KeyValue.Type.Put, indexKV.getValue()));
results.add(new KeyValue(baseRow, indexKV.getFamily(),
indexKV.getQualifier(), indexKV.getTimestamp(), KeyValue.Type.Put,
indexKV.getValue()));
}
if (baseResult != null) {

View File

@ -24,22 +24,20 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.ColumnNameParseException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.regionserver.tableindexed.IndexMaintenanceUtils;
import org.apache.hadoop.hbase.util.Bytes;
@ -88,16 +86,13 @@ public class IndexedTableAdmin extends HBaseAdmin {
HTableDescriptor indexTableDesc = new HTableDescriptor(indexSpec
.getIndexedTableName(baseTableName));
Set<byte[]> families = new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
families.add(IndexedTable.INDEX_COL_FAMILY);
families.add(IndexedTable.INDEX_COL_FAMILY_NAME);
for (byte[] column : indexSpec.getAllColumns()) {
families.add(Bytes.add(HStoreKey.getFamily(column),
new byte[] { HStoreKey.COLUMN_FAMILY_DELIMITER }));
families.add(KeyValue.parseColumn(column)[0]);
}
for (byte[] colFamily : families) {
indexTableDesc.addFamily(new HColumnDescriptor(colFamily));
}
return indexTableDesc;
}
@ -135,13 +130,23 @@ public class IndexedTableAdmin extends HBaseAdmin {
private void reIndexTable(byte[] baseTableName, IndexSpecification indexSpec) throws IOException {
HTable baseTable = new HTable(baseTableName);
HTable indexTable = new HTable(indexSpec.getIndexedTableName(baseTableName));
for (RowResult rowResult : baseTable.getScanner(indexSpec.getAllColumns())) {
Scan baseScan = new Scan();
for(byte [] column : indexSpec.getAllColumns()) {
byte [][] famQf = KeyValue.parseColumn(column);
if(famQf.length == 1) {
baseScan.addFamily(famQf[0]);
} else {
baseScan.addColumn(famQf[0], famQf[1]);
}
}
for (Result result : baseTable.getScanner(baseScan)) {
SortedMap<byte[], byte[]> columnValues = new TreeMap<byte[], byte[]>(Bytes.BYTES_COMPARATOR);
for (Entry<byte[], Cell> entry : rowResult.entrySet()) {
columnValues.put(entry.getKey(), entry.getValue().getValue());
for(KeyValue kv : result.sorted()) {
columnValues.put(Bytes.add(kv.getFamily(), KeyValue.COLUMN_FAMILY_DELIM_ARRAY,
kv.getQualifier()), kv.getValue());
}
if (IndexMaintenanceUtils.doesApplyToIndex(indexSpec, columnValues)) {
Put indexUpdate = IndexMaintenanceUtils.createIndexUpdate(indexSpec, rowResult.getRow(), columnValues);
Put indexUpdate = IndexMaintenanceUtils.createIndexUpdate(indexSpec, result.getRow(), columnValues);
indexTable.put(indexUpdate);
}
}

View File

@ -25,11 +25,12 @@ import java.util.Random;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
public class HBaseBackedTransactionLogger implements TransactionLogger {
@ -37,16 +38,12 @@ public class HBaseBackedTransactionLogger implements TransactionLogger {
/** The name of the transaction status table. */
public static final String TABLE_NAME = "__GLOBAL_TRX_LOG__";
private static final String INFO_FAMILY = "Info:";
/**
* Column which holds the transaction status.
*
*/
private static final String STATUS_COLUMN = INFO_FAMILY + "Status";
private static final byte[] STATUS_COLUMN_BYTES = Bytes
.toBytes(STATUS_COLUMN);
private static final byte [] STATUS_FAMILY = Bytes.toBytes("Info");
private static final byte [] STATUS_QUALIFIER = Bytes.toBytes("Status");
/**
* Create the table.
*
@ -55,7 +52,7 @@ public class HBaseBackedTransactionLogger implements TransactionLogger {
*/
public static void createTable() throws IOException {
HTableDescriptor tableDesc = new HTableDescriptor(TABLE_NAME);
tableDesc.addFamily(new HColumnDescriptor(INFO_FAMILY));
tableDesc.addFamily(new HColumnDescriptor(STATUS_FAMILY));
HBaseAdmin admin = new HBaseAdmin(new HBaseConfiguration());
admin.createTable(tableDesc);
}
@ -93,15 +90,15 @@ public class HBaseBackedTransactionLogger implements TransactionLogger {
public TransactionStatus getStatusForTransaction(long transactionId) {
try {
RowResult result = table.getRow(getRow(transactionId));
Result result = table.get(new Get(getRow(transactionId)));
if (result == null || result.isEmpty()) {
return null;
}
Cell statusCell = result.get(STATUS_COLUMN_BYTES);
if (statusCell == null) {
byte [] statusValue = result.getValue(STATUS_FAMILY, STATUS_QUALIFIER);
if (statusValue == null) {
throw new RuntimeException("No status cell for row " + transactionId);
}
String statusString = Bytes.toString(statusCell.getValue());
String statusString = Bytes.toString(statusValue);
return TransactionStatus.valueOf(statusString);
} catch (IOException e) {
@ -115,22 +112,20 @@ public class HBaseBackedTransactionLogger implements TransactionLogger {
public void setStatusForTransaction(long transactionId,
TransactionStatus status) {
BatchUpdate update = new BatchUpdate(getRow(transactionId));
update.put(STATUS_COLUMN, Bytes.toBytes(status.name()));
Put put = new Put(getRow(transactionId));
put.add(STATUS_FAMILY, STATUS_QUALIFIER, Bytes.toBytes(status.name()));
try {
table.commit(update);
table.put(put);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public void forgetTransaction(long transactionId) {
BatchUpdate update = new BatchUpdate(getRow(transactionId));
update.delete(STATUS_COLUMN);
Delete delete = new Delete(getRow(transactionId));
delete.deleteColumns(STATUS_FAMILY, STATUS_QUALIFIER);
try {
table.commit(update);
table.delete(delete);
} catch (IOException e) {
throw new RuntimeException(e);
}

View File

@ -32,7 +32,6 @@ import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.ScannerCallable;
import org.apache.hadoop.hbase.client.ServerCallable;
import org.apache.hadoop.hbase.ipc.HBaseRPC;
import org.apache.hadoop.hbase.ipc.TransactionalRegionInterface;
import org.apache.hadoop.hbase.util.Bytes;

View File

@ -24,7 +24,7 @@ import java.util.SortedMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.ColumnNameParseException;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.tableindexed.IndexSpecification;
import org.apache.hadoop.hbase.client.tableindexed.IndexedTable;
@ -50,15 +50,21 @@ public class IndexMaintenanceUtils {
if (val == null) {
throw new RuntimeException("Unexpected missing column value. [" + Bytes.toString(col) + "]");
}
byte [][] colSeperated = HStoreKey.parseColumn(col);
update.add(colSeperated[0], colSeperated[1], val);
byte [][] colSeparated = KeyValue.parseColumn(col);
if(colSeparated.length == 1) {
throw new ColumnNameParseException("Expected family:qualifier but only got a family");
}
update.add(colSeparated[0], colSeparated[1], val);
}
for (byte[] col : indexSpec.getAdditionalColumns()) {
byte[] val = columnValues.get(col);
if (val != null) {
byte [][] colSeperated = HStoreKey.parseColumn(col);
update.add(colSeperated[0], colSeperated[1], val);
byte [][] colSeparated = KeyValue.parseColumn(col);
if(colSeparated.length == 1) {
throw new ColumnNameParseException("Expected family:qualifier but only got a family");
}
update.add(colSeparated[0], colSeparated[1], val);
}
}
} catch (ColumnNameParseException e) {

View File

@ -116,7 +116,12 @@ class IndexedRegion extends TransactionalRegion {
Get oldGet = new Get(put.getRow());
for (byte [] neededCol : neededColumns) {
oldGet.addColumn(neededCol);
byte [][] famQf = KeyValue.parseColumn(neededCol);
if(famQf.length == 1) {
oldGet.addFamily(famQf[0]);
} else {
oldGet.addColumn(famQf[0], famQf[1]);
}
}
Result oldResult = super.get(oldGet, lockId);
@ -124,8 +129,10 @@ class IndexedRegion extends TransactionalRegion {
// Add the old values to the new if they are not there
if (oldResult != null && oldResult.raw() != null) {
for (KeyValue oldKV : oldResult.raw()) {
if (!newColumnValues.containsKey(oldKV.getColumn())) {
newColumnValues.put(oldKV.getColumn(), oldKV.getValue());
byte [] column = KeyValue.makeColumn(oldKV.getFamily(),
oldKV.getQualifier());
if (!newColumnValues.containsKey(column)) {
newColumnValues.put(column, oldKV.getValue());
}
}
}
@ -181,7 +188,8 @@ class IndexedRegion extends TransactionalRegion {
Bytes.BYTES_COMPARATOR);
for (List<KeyValue> familyPuts : put.getFamilyMap().values()) {
for (KeyValue kv : familyPuts) {
columnValues.put(kv.getColumn(), kv.getValue());
byte [] column = KeyValue.makeColumn(kv.getFamily(), kv.getQualifier());
columnValues.put(column, kv.getValue());
}
}
return columnValues;
@ -196,7 +204,8 @@ class IndexedRegion extends TransactionalRegion {
private boolean possiblyAppliesToIndex(IndexSpecification indexSpec, Put put) {
for (List<KeyValue> familyPuts : put.getFamilyMap().values()) {
for (KeyValue kv : familyPuts) {
if (indexSpec.containsColumn(kv.getColumn())) {
byte [] column = KeyValue.makeColumn(kv.getFamily(), kv.getQualifier());
if (indexSpec.containsColumn(column)) {
return true;
}
}
@ -227,7 +236,12 @@ class IndexedRegion extends TransactionalRegion {
Get get = new Get(delete.getRow());
for (byte [] col : neededColumns) {
get.addColumn(col);
byte [][] famQf = KeyValue.parseColumn(col);
if(famQf.length == 1) {
get.addFamily(famQf[0]);
} else {
get.addColumn(famQf[0], famQf[1]);
}
}
Result oldRow = super.get(get, null);
@ -263,7 +277,8 @@ class IndexedRegion extends TransactionalRegion {
List<KeyValue> list = result.list();
if (list != null) {
for(KeyValue kv : result.list()) {
currentColumnValues.put(kv.getColumn(), kv.getValue());
byte [] column = KeyValue.makeColumn(kv.getFamily(), kv.getQualifier());
currentColumnValues.put(column, kv.getValue());
}
}
return currentColumnValues;

View File

@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.PerformanceEvaluation;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
@ -71,7 +72,7 @@ public class TestIndexedTable extends HBaseClusterTestCase {
super.setUp();
desc = new HTableDescriptor(TABLE_NAME);
desc.addFamily(new HColumnDescriptor(FAMILY_COLON));
desc.addFamily(new HColumnDescriptor(FAMILY));
IndexedTableDescriptor indexDesc = new IndexedTableDescriptor(desc);
// Create a new index that does lexicographic ordering on COL_A
@ -107,7 +108,7 @@ public class TestIndexedTable extends HBaseClusterTestCase {
int numRows = 0;
byte[] lastColA = null;
for (Result rowResult : scanner) {
byte[] colA = rowResult.getValue(COL_A);
byte[] colA = rowResult.getValue(FAMILY, QUAL_A);
LOG.info("index scan : row [" + Bytes.toString(rowResult.getRow())
+ "] value [" + Bytes.toString(colA) + "]");
if (lastColA != null) {
@ -127,7 +128,7 @@ public class TestIndexedTable extends HBaseClusterTestCase {
byte[] persistedRowValue = null;
for (Result rowResult : scanner) {
byte[] row = rowResult.getRow();
byte[] value = rowResult.getValue(COL_A);
byte[] value = rowResult.getValue(FAMILY, QUAL_A);
if (Bytes.toString(row).equals(Bytes.toString(PerformanceEvaluation.format(updatedRow)))) {
persistedRowValue = value;
LOG.info("update found: row [" + Bytes.toString(row)
@ -191,7 +192,7 @@ public class TestIndexedTable extends HBaseClusterTestCase {
public void testDelete() throws IOException {
writeInitalRows();
// Delete the first row;
table.deleteAll(PerformanceEvaluation.format(0));
table.delete(new Delete(PerformanceEvaluation.format(0)));
assertRowsInOrder(NUM_ROWS - 1);
}

View File

@ -232,9 +232,9 @@ public class StressTestTransactions extends HBaseClusterTestCase {
TransactionState transactionState = transactionManager.beginTransaction();
int row1Amount = Bytes.toInt(table.get(transactionState,
new Get(row1).addColumn(COL)).getValue(COL));
new Get(row1).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
int row2Amount = Bytes.toInt(table.get(transactionState,
new Get(row2).addColumn(COL)).getValue(COL));
new Get(row2).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
row1Amount -= transferAmount;
row2Amount += transferAmount;
@ -257,7 +257,8 @@ public class StressTestTransactions extends HBaseClusterTestCase {
int totalSum = 0;
for (int i = 0; i < NUM_ST_ROWS; i++) {
totalSum += Bytes.toInt(table.get(transactionState,
new Get(makeSTRow(i)).addColumn(COL)).getValue(COL));
new Get(makeSTRow(i)).addColumn(FAMILY, QUAL_A)).getValue(FAMILY,
QUAL_A));
}
transactionManager.tryCommit(transactionState);
@ -309,9 +310,9 @@ public class StressTestTransactions extends HBaseClusterTestCase {
TransactionState transactionState = transactionManager.beginTransaction();
int table1Amount = Bytes.toInt(table1.get(transactionState,
new Get(row).addColumn(COL)).getValue(COL));
new Get(row).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
int table2Amount = Bytes.toInt(table2.get(transactionState,
new Get(row).addColumn(COL)).getValue(COL));
new Get(row).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
table1Amount -= transferAmount;
table2Amount += transferAmount;
@ -337,7 +338,7 @@ public class StressTestTransactions extends HBaseClusterTestCase {
int[] amounts = new int[tables.length];
for (int i = 0; i < tables.length; i++) {
int amount = Bytes.toInt(tables[i].get(transactionState,
new Get(row).addColumn(COL)).getValue(COL));
new Get(row).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
amounts[i] = amount;
totalSum += amount;
}
@ -397,15 +398,15 @@ public class StressTestTransactions extends HBaseClusterTestCase {
int thisTableSum = 0;
for (int i = 0; i < NUM_ST_ROWS; i++) {
byte[] row = makeSTRow(i);
thisTableSum += Bytes.toInt(table.get(new Get(row).addColumn(COL))
.getValue(COL));
thisTableSum += Bytes.toInt(table.get(new Get(row).addColumn(FAMILY, QUAL_A))
.getValue(FAMILY, QUAL_A));
}
Assert.assertEquals(SingleTableTransactionThread.TOTAL_SUM, thisTableSum);
for (int i = 0; i < NUM_MT_ROWS; i++) {
byte[] row = makeMTRow(i);
mtSums[i] += Bytes.toInt(table.get(new Get(row).addColumn(COL))
.getValue(COL));
mtSums[i] += Bytes.toInt(table.get(new Get(row).addColumn(FAMILY, QUAL_A))
.getValue(FAMILY, QUAL_A));
}
}

View File

@ -71,7 +71,7 @@ public class TestTransactions extends HBaseClusterTestCase {
super.setUp();
HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
desc.addFamily(new HColumnDescriptor(FAMILY_COLON));
desc.addFamily(new HColumnDescriptor(FAMILY));
admin = new HBaseAdmin(conf);
admin.createTable(desc);
table = new TransactionalTable(conf, desc.getName());
@ -118,13 +118,14 @@ public class TestTransactions extends HBaseClusterTestCase {
TransactionState transactionState = transactionManager.beginTransaction();
int originalValue = Bytes.toInt(table.get(transactionState,
new Get(ROW1).addColumn(COL_A)).value());
new Get(ROW1).addColumn(FAMILY, QUAL_A)).value());
int newValue = originalValue + 1;
table.put(transactionState, new Put(ROW1).add(FAMILY, QUAL_A, Bytes
.toBytes(newValue)));
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(COL_A));
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(FAMILY,
QUAL_A));
Assert.assertEquals(newValue, Bytes.toInt(row1_A.value()));
}
@ -132,7 +133,7 @@ public class TestTransactions extends HBaseClusterTestCase {
TransactionState transactionState = transactionManager.beginTransaction();
int originalValue = Bytes.toInt(table.get(transactionState,
new Get(ROW1).addColumn(COL_A)).value());
new Get(ROW1).addColumn(FAMILY, QUAL_A)).value());
int newValue = originalValue + 1;
table.put(transactionState, new Put(ROW1).add(FAMILY, QUAL_A, Bytes
.toBytes(newValue)));
@ -175,12 +176,13 @@ public class TestTransactions extends HBaseClusterTestCase {
private TransactionState makeTransaction1() throws IOException {
TransactionState transactionState = transactionManager.beginTransaction();
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(COL_A));
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(FAMILY,
QUAL_A));
table.put(transactionState, new Put(ROW2).add(FAMILY, QUAL_A, row1_A
.getValue(COL_A)));
.getValue(FAMILY, QUAL_A)));
table.put(transactionState, new Put(ROW3).add(FAMILY, QUAL_A, row1_A
.getValue(COL_A)));
.getValue(FAMILY, QUAL_A)));
return transactionState;
}
@ -189,9 +191,10 @@ public class TestTransactions extends HBaseClusterTestCase {
private TransactionState makeTransaction2() throws IOException {
TransactionState transactionState = transactionManager.beginTransaction();
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(COL_A));
Result row1_A = table.get(transactionState, new Get(ROW1).addColumn(FAMILY,
QUAL_A));
int value = Bytes.toInt(row1_A.getValue(COL_A));
int value = Bytes.toInt(row1_A.getValue(FAMILY, QUAL_A));
table.put(transactionState, new Put(ROW1).add(FAMILY, QUAL_A, Bytes
.toBytes(value + 1)));

View File

@ -93,7 +93,7 @@ public class TestTHLogRecovery extends HBaseClusterTestCase {
super.setUp();
HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
desc.addFamily(new HColumnDescriptor(FAMILY_COLON));
desc.addFamily(new HColumnDescriptor(FAMILY));
admin = new HBaseAdmin(conf);
admin.createTable(desc);
table = new TransactionalTable(conf, desc.getName());
@ -203,12 +203,12 @@ public class TestTHLogRecovery extends HBaseClusterTestCase {
private void verify(final int numRuns) throws IOException {
// Reads
int row1 = Bytes.toInt(table.get(new Get(ROW1).addColumn(COL_A)).getValue(
COL_A));
int row2 = Bytes.toInt(table.get(new Get(ROW2).addColumn(COL_A)).getValue(
COL_A));
int row3 = Bytes.toInt(table.get(new Get(ROW3).addColumn(COL_A)).getValue(
COL_A));
int row1 = Bytes.toInt(table.get(new Get(ROW1).addColumn(FAMILY, QUAL_A))
.getValue(FAMILY, QUAL_A));
int row2 = Bytes.toInt(table.get(new Get(ROW2).addColumn(FAMILY, QUAL_A))
.getValue(FAMILY, QUAL_A));
int row3 = Bytes.toInt(table.get(new Get(ROW3).addColumn(FAMILY, QUAL_A))
.getValue(FAMILY, QUAL_A));
assertEquals(TOTAL_VALUE - 2 * numRuns, row1);
assertEquals(numRuns, row2);
@ -222,11 +222,11 @@ public class TestTHLogRecovery extends HBaseClusterTestCase {
// Reads
int row1 = Bytes.toInt(table.get(transactionState,
new Get(ROW1).addColumn(COL_A)).getValue(COL_A));
new Get(ROW1).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
int row2 = Bytes.toInt(table.get(transactionState,
new Get(ROW2).addColumn(COL_A)).getValue(COL_A));
new Get(ROW2).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
int row3 = Bytes.toInt(table.get(transactionState,
new Get(ROW3).addColumn(COL_A)).getValue(COL_A));
new Get(ROW3).addColumn(FAMILY, QUAL_A)).getValue(FAMILY, QUAL_A));
row1 -= 2;
row2 += 1;

View File

@ -26,7 +26,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;

View File

@ -29,9 +29,6 @@ import java.util.Map;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.hfile.Compression;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.rest.exception.HBaseRestException;
import org.apache.hadoop.hbase.rest.serializer.IRestSerializer;
import org.apache.hadoop.hbase.rest.serializer.ISerializable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
@ -47,7 +44,7 @@ import agilejson.TOJSON;
* column and recreating it. If there is data stored in the column, it will be
* deleted when the column is deleted.
*/
public class HColumnDescriptor implements ISerializable, WritableComparable<HColumnDescriptor> {
public class HColumnDescriptor implements WritableComparable<HColumnDescriptor> {
// For future backward compatibility
// Version 3 was when column names become byte arrays and when we picked up
@ -150,7 +147,7 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
* The other attributes are defaulted.
*
* @param familyName Column family name. Must be 'printable' -- digit or
* letter -- and end in a <code>:<code>
* letter -- and may not contain a <code>:<code>
*/
public HColumnDescriptor(final String familyName) {
this(Bytes.toBytes(familyName));
@ -161,7 +158,7 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
* The other attributes are defaulted.
*
* @param familyName Column family name. Must be 'printable' -- digit or
* letter -- and end in a <code>:<code>
* letter -- and may not contain a <code>:<code>
*/
public HColumnDescriptor(final byte [] familyName) {
this (familyName == null || familyName.length <= 0?
@ -188,7 +185,7 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
/**
* Constructor
* @param familyName Column family name. Must be 'printable' -- digit or
* letter -- and end in a <code>:<code>
* letter -- and may not contain a <code>:<code>
* @param maxVersions Maximum number of versions to keep
* @param compression Compression type
* @param inMemory If true, column data should be kept in an HRegionServer's
@ -199,8 +196,8 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
* @param bloomFilter Enable the specified bloom filter for this column
*
* @throws IllegalArgumentException if passed a family name that is made of
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> and does not
* end in a <code>:</code>
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> or contains
* a <code>:</code>
* @throws IllegalArgumentException if the number of versions is &lt;= 0
*/
public HColumnDescriptor(final byte [] familyName, final int maxVersions,
@ -211,42 +208,10 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
DEFAULT_BLOCKSIZE, timeToLive, bloomFilter);
}
/**
* Backwards compatible Constructor. Maximum value length is no longer
* configurable.
*
* @param familyName Column family name. Must be 'printable' -- digit or
* letter -- and end in a <code>:<code>
* @param maxVersions Maximum number of versions to keep
* @param compression Compression type
* @param inMemory If true, column data should be kept in an HRegionServer's
* cache
* @param blockCacheEnabled If true, MapFile blocks should be cached
* @param blocksize
* @param maxValueLength Restrict values to &lt;= this value (UNSUPPORTED)
* @param timeToLive Time-to-live of cell contents, in seconds
* (use HConstants.FOREVER for unlimited TTL)
* @param bloomFilter Enable the specified bloom filter for this column
*
* @throws IllegalArgumentException if passed a family name that is made of
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> and does not
* end in a <code>:</code>
* @throws IllegalArgumentException if the number of versions is &lt;= 0
* @deprecated As of hbase 0.20.0, max value length no longer supported
*/
// public HColumnDescriptor(final byte [] familyName, final int maxVersions,
// final String compression, final boolean inMemory,
// final boolean blockCacheEnabled, final int blocksize,
// final int maxValueLength,
// final int timeToLive, final boolean bloomFilter) {
// this(familyName, maxVersions, compression, inMemory, blockCacheEnabled,
// blocksize, timeToLive, bloomFilter);
// }
/**
* Constructor
* @param familyName Column family name. Must be 'printable' -- digit or
* letter -- and end in a <code>:<code>
* letter -- and may not contain a <code>:<code>
* @param maxVersions Maximum number of versions to keep
* @param compression Compression type
* @param inMemory If true, column data should be kept in an HRegionServer's
@ -258,16 +223,16 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
* @param bloomFilter Enable the specified bloom filter for this column
*
* @throws IllegalArgumentException if passed a family name that is made of
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> and does not
* end in a <code>:</code>
* other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> or contains
* a <code>:</code>
* @throws IllegalArgumentException if the number of versions is &lt;= 0
*/
public HColumnDescriptor(final byte [] familyName, final int maxVersions,
final String compression, final boolean inMemory,
final boolean blockCacheEnabled, final int blocksize,
final int timeToLive, final boolean bloomFilter) {
this.name = stripColon(familyName);
isLegalFamilyName(this.name);
isLegalFamilyName(familyName);
this.name = familyName;
if (maxVersions <= 0) {
// TODO: Allow maxVersion of 0 to be the way you say "Keep all versions".
@ -284,17 +249,6 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
setBlocksize(blocksize);
}
private static byte [] stripColon(final byte [] n) {
byte col = n[n.length-1];
if (col == ':') {
// strip.
byte [] res = new byte[n.length-1];
System.arraycopy(n, 0, res, 0, n.length-1);
return res;
}
return n;
}
/**
* @param b Family name.
* @return <code>b</code>
@ -311,7 +265,7 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
throw new IllegalArgumentException("Family names cannot start with a " +
"period: " + Bytes.toString(b));
}
for (int i = 0; i < (b.length - 1); i++) {
for (int i = 0; i < b.length; i++) {
if (Character.isISOControl(b[i]) || b[i] == ':') {
throw new IllegalArgumentException("Illegal character <" + b[i] +
">. Family names cannot contain control characters or colons: " +
@ -324,18 +278,11 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
/**
* @return Name of this column family
*/
@TOJSON(fieldName = "name", base64=true)
public byte [] getName() {
return name;
}
/**
* @return Name of this column family with colon as required by client API
*/
@TOJSON(fieldName = "name", base64=true)
public byte [] getNameWithColon() {
return Bytes.add(this.name, new byte[]{':'});
}
/**
* @return Name of this column family
*/
@ -684,11 +631,4 @@ public class HColumnDescriptor implements ISerializable, WritableComparable<HCol
}
return result;
}
/* (non-Javadoc)
* @see org.apache.hadoop.hbase.rest.xml.IOutputXML#toXML()
*/
public void restSerialize(IRestSerializer serializer) throws HBaseRestException {
serializer.serializeColumnDescriptor(this);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -33,9 +33,6 @@ import java.util.TreeMap;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.hfile.Compression;
import org.apache.hadoop.hbase.rest.exception.HBaseRestException;
import org.apache.hadoop.hbase.rest.serializer.IRestSerializer;
import org.apache.hadoop.hbase.rest.serializer.ISerializable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.WritableComparable;
@ -45,8 +42,7 @@ import agilejson.TOJSON;
* HTableDescriptor contains the name of an HTable, and its
* column families.
*/
public class HTableDescriptor implements WritableComparable<HTableDescriptor>,
ISerializable {
public class HTableDescriptor implements WritableComparable<HTableDescriptor> {
// Changes prior to version 3 were not recorded here.
// Version 3 adds metadata as a map where keys and values are byte[].
@ -658,11 +654,4 @@ ISerializable {
HConstants.ALL_VERSIONS, Compression.Algorithm.NONE.getName(),
false, false, 8 * 1024,
HConstants.WEEK_IN_SECONDS, false)});
/* (non-Javadoc)
* @see org.apache.hadoop.hbase.rest.xml.IOutputXML#toXML()
*/
public void restSerialize(IRestSerializer serializer) throws HBaseRestException {
serializer.serializeTableDescriptor(this);
}
}

View File

@ -22,6 +22,7 @@ package org.apache.hadoop.hbase;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Comparator;
import org.apache.commons.logging.Log;
@ -236,35 +237,6 @@ public class KeyValue implements Writable, HeapSize {
this.length = length;
}
/** Temporary constructors until 880/1249 is committed to remove deps */
/**
* Temporary.
*/
public KeyValue(final byte [] row, final byte [] column) {
this(row, column, HConstants.LATEST_TIMESTAMP, null);
}
public KeyValue(final byte [] row, final byte [] column, long ts) {
this(row, column, ts, null);
}
public KeyValue(final byte [] row, final byte [] column, long ts,
byte [] value) {
this(row, column, ts, Type.Put, value);
}
public KeyValue(final byte [] row, final byte [] column, long ts, Type type,
byte [] value) {
int rlength = row == null ? 0 : row.length;
int vlength = value == null ? 0 : value.length;
int clength = column == null ? 0 : column.length;
this.bytes = createByteArray(row, 0, rlength, column, 0, clength,
ts, type, value, 0, vlength);
this.length = this.bytes.length;
this.offset = 0;
}
/** Constructors that build a new backing byte array from fields */
/**
@ -911,25 +883,6 @@ public class KeyValue implements Writable, HeapSize {
return getType() == Type.DeleteFamily.getCode();
}
/**
* Primarily for use client-side. Returns the column of this KeyValue in the
* deprecated format: <i>family:qualifier</i>, and in a new byte array.<p>
*
* If server-side, use {@link #getBuffer()} with appropriate offsets and
* lengths instead.
* @return Returns column. Makes a copy. Inserts delimiter.
*/
public byte [] getColumn() {
int fo = getFamilyOffset();
int fl = getFamilyLength(fo);
int ql = getQualifierLength();
byte [] result = new byte[fl + 1 + ql];
System.arraycopy(this.bytes, fo, result, 0, fl);
result[fl] = COLUMN_FAMILY_DELIMITER;
System.arraycopy(this.bytes, fo + fl, result, fl + 1, ql);
return result;
}
/**
* Primarily for use client-side. Returns the family of this KeyValue in a
* new byte array.<p>
@ -1073,23 +1026,6 @@ public class KeyValue implements Writable, HeapSize {
return Bytes.compareTo(column, 0, column.length, this.bytes, o, l) == 0;
}
/**
* @param column Column with delimiter
* @return True if column matches.
*/
public boolean matchingColumn(final byte [] column) {
int index = getFamilyDelimiterIndex(column, 0, column.length);
int rl = getRowLength();
int o = getFamilyOffset(rl);
int fl = getFamilyLength(o);
int ql = getQualifierLength(rl,fl);
if(Bytes.compareTo(column, 0, index, this.bytes, o, fl) != 0) {
return false;
}
return Bytes.compareTo(column, index + 1, column.length - (index + 1),
this.bytes, o + fl, ql) == 0;
}
/**
*
* @param family column family
@ -1158,19 +1094,24 @@ public class KeyValue implements Writable, HeapSize {
/**
* Splits a column in family:qualifier form into separate byte arrays.
*
* <p>
* Not recommend to be used as this is old-style API.
* @param c The column.
* @return The parsed column.
*/
public static byte [][] parseColumn(byte [] c) {
final byte [][] result = new byte [2][];
final int index = getDelimiter(c, 0, c.length, COLUMN_FAMILY_DELIMITER);
if (index == -1) {
// If no delimiter, return <code>c</code> as family and null qualifier.
result[0] = c;
result[1] = null;
return result;
// If no delimiter, return array of size 1
return new byte [][] { c };
} else if(index == c.length - 1) {
// Only a family, return array size 1
byte [] family = new byte[c.length-1];
System.arraycopy(c, 0, family, 0, family.length);
return new byte [][] { family };
}
// Family and column, return array size 2
final byte [][] result = new byte [2][];
result[0] = new byte [index];
System.arraycopy(c, 0, result[0], 0, index);
final int len = c.length - (index + 1);
@ -1180,6 +1121,18 @@ public class KeyValue implements Writable, HeapSize {
return result;
}
/**
* Makes a column in family:qualifier form from separate byte arrays.
* <p>
* Not recommended for usage as this is old-style API.
* @param family
* @param qualifier
* @return family:qualifier
*/
public static byte [] makeColumn(byte [] family, byte [] qualifier) {
return Bytes.add(family, COLUMN_FAMILY_DELIM_ARRAY, qualifier);
}
/**
* @param b
* @return Index of the family-qualifier colon delimiter character in passed
@ -1551,6 +1504,24 @@ public class KeyValue implements Writable, HeapSize {
return new KeyValue(row, f, q, ts, Type.Maximum);
}
/**
* @param b
* @return A KeyValue made of a byte array that holds the key-only part.
* Needed to convert hfile index members to KeyValues.
*/
public static KeyValue createKeyValueFromKey(final byte [] b) {
return createKeyValueFromKey(b, 0, b.length);
}
/**
* @param b
* @return A KeyValue made of a byte buffer that holds the key-only part.
* Needed to convert hfile index members to KeyValues.
*/
public static KeyValue createKeyValueFromKey(final ByteBuffer bb) {
return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit());
}
/**
* @param b
* @param o

View File

@ -195,19 +195,6 @@ public class Delete implements Writable, Row, Comparable<Row> {
return this;
}
/**
* Delete all versions of the specified column, given in
* <code>family:qualifier</code> notation, and with a timestamp less than
* or equal to the specified timestamp.
* @param column colon-delimited family and qualifier
* @param timestamp maximum version timestamp
*/
public Delete deleteColumns(byte [] column, long timestamp) {
byte [][] parts = KeyValue.parseColumn(column);
this.deleteColumns(parts[0], parts[1], timestamp);
return this;
}
/**
* Delete the latest version of the specified column.
* This is an expensive call in that on the server-side, it first does a
@ -238,22 +225,6 @@ public class Delete implements Writable, Row, Comparable<Row> {
return this;
}
public void deleteColumns(byte [] column) {
byte [][] parts = KeyValue.parseColumn(column);
this.deleteColumns(parts[0], parts[1]);
}
/**
* Delete the latest version of the specified column, given in
* <code>family:qualifier</code> notation.
* @param column colon-delimited family and qualifier
*/
public Delete deleteColumn(byte [] column) {
byte [][] parts = KeyValue.parseColumn(column);
this.deleteColumn(parts[0], parts[1], HConstants.LATEST_TIMESTAMP);
return this;
}
/**
* Method for retrieving the delete's familyMap
* @return familyMap

View File

@ -128,37 +128,6 @@ public class Get implements Writable {
return this;
}
/**
* Adds an array of columns specified the old format, family:qualifier.
* <p>
* Overrides previous calls to addFamily for any families in the input.
* @param columns array of columns, formatted as <pre>family:qualifier</pre>
*/
public Get addColumns(byte [][] columns) {
if (columns == null) return this;
for(int i = 0; i < columns.length; i++) {
try {
addColumn(columns[i]);
} catch(Exception e) {}
}
return this;
}
/**
* @param column Old format column.
* @return This.
*/
public Get addColumn(final byte [] column) {
if (column == null) return this;
byte [][] split = KeyValue.parseColumn(column);
if (split.length > 1 && split[1] != null && split[1].length > 0) {
addColumn(split[0], split[1]);
} else {
addFamily(split[0]);
}
return this;
}
/**
* Get versions of columns only within the specified timestamp range,
* [minStamp, maxStamp).

File diff suppressed because it is too large Load Diff

View File

@ -109,19 +109,6 @@ public class Put implements HeapSize, Writable, Row, Comparable<Row> {
return add(family, qualifier, this.timestamp, value);
}
/**
* Add the specified column and value, with the specified timestamp as
* its version to this Put operation.
* @param column Old style column name with family and qualifier put together
* with a colon.
* @param ts version timestamp
* @param value column value
*/
public Put add(byte [] column, long ts, byte [] value) {
byte [][] parts = KeyValue.parseColumn(column);
return add(parts[0], parts[1], ts, value);
}
/**
* Add the specified column and value, with the specified timestamp as
* its version to this Put operation.

View File

@ -30,13 +30,11 @@ import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.SplitKeyValue;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
/**
@ -275,32 +273,6 @@ public class Result implements Writable {
return entry == null? null: entry.getValue();
}
public Cell getCellValue(byte[] family, byte[] qualifier) {
Map.Entry<Long,byte[]> val = getKeyValue(family, qualifier);
if (val == null)
return null;
return new Cell(val.getValue(), val.getKey());
}
/**
* @return First KeyValue in this Result as a Cell or null if empty.
*/
public Cell getCellValue() {
return isEmpty()? null: new Cell(kvs[0].getValue(), kvs[0].getTimestamp());
}
/**
* @return This Result as array of Cells or null if empty.
*/
public Cell [] getCellValues() {
if (isEmpty()) return null;
Cell [] results = new Cell[kvs.length];
for (int i = 0; i < kvs.length; i++) {
results[i] = new Cell(kvs[i].getValue(), kvs[i].getTimestamp());
}
return results;
}
private Map.Entry<Long,byte[]> getKeyValue(byte[] family, byte[] qualifier) {
if(this.familyMap == null) {
getMap();
@ -327,21 +299,6 @@ public class Result implements Writable {
qualifierMap.get(qualifier): qualifierMap.get(new byte[0]);
}
/**
* Get the latest version of the specified column,
* using <pre>family:qualifier</pre> notation.
* @param column column in family:qualifier notation
* @return value of latest version of column, null if none found
*/
public byte [] getValue(byte [] column) {
try {
byte [][] split = KeyValue.parseColumn(column);
return getValue(split[0], split[1]);
} catch(Exception e) {
return null;
}
}
/**
* Checks for existence of the specified column.
* @param family family name
@ -367,17 +324,6 @@ public class Result implements Writable {
return true;
}
/**
* Returns this Result in the old return format, {@link RowResult}.
* @return a RowResult
*/
public RowResult getRowResult() {
if(this.kvs == null) {
readFields();
}
return RowResult.createRowResult(Arrays.asList(kvs));
}
/**
* Returns the value of the first column in the Result.
* @return value of the first column

View File

@ -30,9 +30,7 @@ import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.RowFilterInterface;
import org.apache.hadoop.hbase.io.TimeRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
@ -80,7 +78,6 @@ public class Scan implements Writable {
private int caching = -1;
private boolean cacheBlocks = true;
private Filter filter = null;
private RowFilterInterface oldFilter = null;
private TimeRange tr = new TimeRange();
private Map<byte [], NavigableSet<byte []>> familyMap =
new TreeMap<byte [], NavigableSet<byte []>>(Bytes.BYTES_COMPARATOR);
@ -128,7 +125,6 @@ public class Scan implements Writable {
maxVersions = scan.getMaxVersions();
caching = scan.getCaching();
filter = scan.getFilter(); // clone?
oldFilter = scan.getOldFilter(); // clone?
TimeRange ctr = scan.getTimeRange();
tr = new TimeRange(ctr.getMin(), ctr.getMax());
Map<byte[], NavigableSet<byte[]>> fams = scan.getFamilyMap();
@ -174,89 +170,6 @@ public class Scan implements Writable {
return this;
}
/**
* Parses a combined family and qualifier and adds either both or just the
* family in case there is not qualifier. This assumes the older colon
* divided notation, e.g. "data:contents" or "meta:".
* <p>
* Note: It will through an error when the colon is missing.
*
* @param familyAndQualifier
* @return A reference to this instance.
* @throws IllegalArgumentException When the colon is missing.
*/
public Scan addColumn(byte[] familyAndQualifier) {
byte [][] fq = KeyValue.parseColumn(familyAndQualifier);
if (fq.length > 1 && fq[1] != null && fq[1].length > 0) {
addColumn(fq[0], fq[1]);
} else {
addFamily(fq[0]);
}
return this;
}
/**
* Adds an array of columns specified using old format, family:qualifier.
* <p>
* Overrides previous calls to addFamily for any families in the input.
*
* @param columns array of columns, formatted as <pre>family:qualifier</pre>
*/
public Scan addColumns(byte [][] columns) {
for (int i = 0; i < columns.length; i++) {
addColumn(columns[i]);
}
return this;
}
/**
* Convenience method to help parse old style (or rather user entry on the
* command line) column definitions, e.g. "data:contents mime:". The columns
* must be space delimited and always have a colon (":") to denote family
* and qualifier.
*
* @param columns The columns to parse.
* @return A reference to this instance.
*/
public Scan addColumns(String columns) {
String[] cols = columns.split(" ");
for (String col : cols) {
addColumn(Bytes.toBytes(col));
}
return this;
}
/**
* Helps to convert the binary column families and qualifiers to a text
* representation, e.g. "data:mimetype data:contents meta:". Binary values
* are properly encoded using {@link Bytes#toBytesBinary(String)}.
*
* @return The columns in an old style string format.
*/
public String getInputColumns() {
String cols = "";
for (Map.Entry<byte[], NavigableSet<byte[]>> e :
familyMap.entrySet()) {
byte[] fam = e.getKey();
if (cols.length() > 0) cols += " ";
NavigableSet<byte[]> quals = e.getValue();
// check if this family has qualifiers
if (quals != null && quals.size() > 0) {
String cs = "";
for (byte[] qual : quals) {
if (cs.length() > 0) cs += " ";
// encode values to make parsing easier later
cs += Bytes.toStringBinary(fam) + ":" + Bytes.toStringBinary(qual);
}
cols += cs;
} else {
// only add the family but with old style delimiter
cols += Bytes.toStringBinary(fam) + ":";
}
}
return cols;
}
/**
* Get versions of columns only within the specified timestamp range,
* [minStamp, maxStamp).
@ -337,19 +250,6 @@ public class Scan implements Writable {
return this;
}
/**
* Set an old-style filter interface to use. Note: not all features of the
* old style filters are supported.
*
* @deprecated
* @param filter
* @return The scan instance.
*/
public Scan setOldFilter(RowFilterInterface filter) {
oldFilter = filter;
return this;
}
/**
* Setting the familyMap
* @param familyMap
@ -436,20 +336,11 @@ public class Scan implements Writable {
return filter;
}
/**
* Get the old style filter, if there is one.
* @deprecated
* @return null or instance
*/
public RowFilterInterface getOldFilter() {
return oldFilter;
}
/**
* @return true is a filter has been specified, false if not
*/
public boolean hasFilter() {
return filter != null || oldFilter != null;
return filter != null;
}
/**
@ -550,11 +441,6 @@ public class Scan implements Writable {
this.filter = (Filter)createForName(Bytes.toString(Bytes.readByteArray(in)));
this.filter.readFields(in);
}
if (in.readBoolean()) {
this.oldFilter =
(RowFilterInterface)createForName(Bytes.toString(Bytes.readByteArray(in)));
this.oldFilter.readFields(in);
}
this.tr = new TimeRange();
tr.readFields(in);
int numFamilies = in.readInt();
@ -586,13 +472,6 @@ public class Scan implements Writable {
Bytes.writeByteArray(out, Bytes.toBytes(filter.getClass().getName()));
filter.write(out);
}
if (this.oldFilter == null) {
out.writeBoolean(false);
} else {
out.writeBoolean(true);
Bytes.writeByteArray(out, Bytes.toBytes(oldFilter.getClass().getName()));
oldFilter.write(out);
}
tr.write(out);
out.writeInt(familyMap.size());
for(Map.Entry<byte [], NavigableSet<byte []>> entry : familyMap.entrySet()) {

View File

@ -1,52 +0,0 @@
/**
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import java.io.Closeable;
import java.io.IOException;
import org.apache.hadoop.hbase.io.RowResult;
/**
* Interface for client-side scanning.
* Go to {@link HTable} to obtain instances.
* @deprecated See {@link ResultScanner}
*/
public interface Scanner extends Closeable, Iterable<RowResult> {
/**
* Grab the next row's worth of values.
* @return RowResult object if there is another row, null if the scanner is
* exhausted.
* @throws IOException
*/
public RowResult next() throws IOException;
/**
* @param nbRows number of rows to return
* @return Between zero and <param>nbRows</param> Results
* @throws IOException
*/
public RowResult [] next(int nbRows) throws IOException;
/**
* Closes the scanner and releases any resources it has allocated
*/
public void close();
}

View File

@ -1,286 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.SortedMap;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.io.ObjectWritable;
/**
* This filter is a no-op in HBase 0.20. Don't use it.
*
* This filter is used to filter based on the value of a given column. It takes
* an operator (equal, greater, not equal, etc) and either a byte [] value or a
* byte [] comparator. If we have a byte [] value then we just do a
* lexicographic compare. If this is not sufficient (eg you want to deserialize
* a long and then compare it to a fixed long value), then you can pass in your
* own comparator instead.
* @deprecated Use filters that are rooted on @{link Filter} instead.
*/
public class ColumnValueFilter implements RowFilterInterface {
/** Comparison operators. */
public enum CompareOp {
/** less than */
LESS,
/** less than or equal to */
LESS_OR_EQUAL,
/** equals */
EQUAL,
/** not equal */
NOT_EQUAL,
/** greater than or equal to */
GREATER_OR_EQUAL,
/** greater than */
GREATER;
}
private byte[] columnName;
private CompareOp compareOp;
private byte[] value;
private WritableByteArrayComparable comparator;
private boolean filterIfColumnMissing;
ColumnValueFilter() {
// for Writable
}
/**
* Constructor.
*
* @param columnName name of column
* @param compareOp operator
* @param value value to compare column values against
*/
public ColumnValueFilter(final byte[] columnName, final CompareOp compareOp,
final byte[] value) {
this(columnName, compareOp, value, true);
}
/**
* Constructor.
*
* @param columnName name of column
* @param compareOp operator
* @param value value to compare column values against
* @param filterIfColumnMissing if true then we will filter rows that don't have the column.
*/
public ColumnValueFilter(final byte[] columnName, final CompareOp compareOp,
final byte[] value, boolean filterIfColumnMissing) {
this.columnName = columnName;
this.compareOp = compareOp;
this.value = value;
this.filterIfColumnMissing = filterIfColumnMissing;
}
/**
* Constructor.
*
* @param columnName name of column
* @param compareOp operator
* @param comparator Comparator to use.
*/
public ColumnValueFilter(final byte[] columnName, final CompareOp compareOp,
final WritableByteArrayComparable comparator) {
this(columnName, compareOp, comparator, true);
}
/**
* Constructor.
*
* @param columnName name of column
* @param compareOp operator
* @param comparator Comparator to use.
* @param filterIfColumnMissing if true then we will filter rows that don't have the column.
*/
public ColumnValueFilter(final byte[] columnName, final CompareOp compareOp,
final WritableByteArrayComparable comparator, boolean filterIfColumnMissing) {
this.columnName = columnName;
this.compareOp = compareOp;
this.comparator = comparator;
this.filterIfColumnMissing = filterIfColumnMissing;
}
public boolean filterRowKey(final byte[] rowKey) {
return filterRowKey(rowKey, 0, rowKey.length);
}
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
return false;
}
public boolean filterColumn(final byte[] rowKey,
final byte[] colKey, final byte[] data) {
if (!filterIfColumnMissing) {
return false; // Must filter on the whole row
}
if (!Arrays.equals(colKey, columnName)) {
return false;
}
return filterColumnValue(data, 0, data.length);
}
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
byte[] cn, int coffset, int clength, byte[] columnValue,
int voffset, int vlength) {
if (!filterIfColumnMissing) {
return false; // Must filter on the whole row
}
if (Bytes.compareTo(cn, coffset, clength,
this.columnName, 0, this.columnName.length) != 0) {
return false;
}
return filterColumnValue(columnValue, voffset, vlength);
}
private boolean filterColumnValue(final byte [] data, final int offset,
final int length) {
int compareResult;
if (comparator != null) {
compareResult = comparator.compareTo(Arrays.copyOfRange(data, offset, offset+length));
} else {
compareResult = Bytes.compareTo(value, 0, value.length, data, offset, length);
}
switch (compareOp) {
case LESS:
return compareResult <= 0;
case LESS_OR_EQUAL:
return compareResult < 0;
case EQUAL:
return compareResult != 0;
case NOT_EQUAL:
return compareResult == 0;
case GREATER_OR_EQUAL:
return compareResult > 0;
case GREATER:
return compareResult >= 0;
default:
throw new RuntimeException("Unknown Compare op " + compareOp.name());
}
}
public boolean filterAllRemaining() {
return false;
}
public boolean filterRow(final SortedMap<byte[], Cell> columns) {
if (columns == null)
return false;
if (filterIfColumnMissing) {
return !columns.containsKey(columnName);
}
// Otherwise we must do the filter here
Cell colCell = columns.get(columnName);
if (colCell == null) {
return false;
}
byte [] v = colCell.getValue();
return this.filterColumnValue(v, 0, v.length);
}
public boolean filterRow(List<KeyValue> results) {
if (results == null) return false;
KeyValue found = null;
if (filterIfColumnMissing) {
boolean doesntHaveIt = true;
for (KeyValue kv: results) {
if (kv.matchingColumn(columnName)) {
doesntHaveIt = false;
found = kv;
break;
}
}
if (doesntHaveIt) return doesntHaveIt;
}
if (found == null) {
for (KeyValue kv: results) {
if (kv.matchingColumn(columnName)) {
found = kv;
break;
}
}
}
if (found == null) {
return false;
}
return this.filterColumnValue(found.getBuffer(), found.getValueOffset(),
found.getValueLength());
}
public boolean processAlways() {
return false;
}
public void reset() {
// Nothing.
}
public void rowProcessed(final boolean filtered,
final byte[] key) {
// Nothing
}
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
// Nothing
}
public void validate(final byte[][] columns) {
// Nothing
}
public void readFields(final DataInput in) throws IOException {
int valueLen = in.readInt();
if (valueLen > 0) {
value = new byte[valueLen];
in.readFully(value);
}
columnName = Bytes.readByteArray(in);
compareOp = CompareOp.valueOf(in.readUTF());
comparator = (WritableByteArrayComparable) ObjectWritable.readObject(in,
new HBaseConfiguration());
filterIfColumnMissing = in.readBoolean();
}
public void write(final DataOutput out) throws IOException {
if (value == null) {
out.writeInt(0);
} else {
out.writeInt(value.length);
out.write(value);
}
Bytes.writeByteArray(out, columnName);
out.writeUTF(compareOp.name());
ObjectWritable.writeObject(out, comparator,
WritableByteArrayComparable.class, new HBaseConfiguration());
out.writeBoolean(filterIfColumnMissing);
}
}

View File

@ -1,64 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import org.apache.hadoop.hbase.util.Bytes;
/**
* Subclass of StopRowFilter that filters rows > the stop row,
* making it include up to the last row but no further.
*
* @deprecated Use filters that are rooted on @{link Filter} instead
*/
public class InclusiveStopRowFilter extends StopRowFilter {
/**
* Default constructor, filters nothing. Required though for RPC
* deserialization.
*/
public InclusiveStopRowFilter() {super();}
/**
* Constructor that takes a stopRowKey on which to filter
*
* @param stopRowKey rowKey to filter on.
*/
public InclusiveStopRowFilter(final byte [] stopRowKey) {
super(stopRowKey);
}
/**
* @see org.apache.hadoop.hbase.filter.StopRowFilter#filterRowKey(byte[])
*/
@Override
public boolean filterRowKey(final byte [] rowKey) {
return filterRowKey(rowKey, 0, rowKey.length);
}
public boolean filterRowKey(byte []rowKey, int offset, int length) {
if (rowKey == null) {
if (getStopRowKey() == null) {
return true;
}
return false;
}
return Bytes.compareTo(getStopRowKey(), 0, getStopRowKey().length,
rowKey, offset, length) < 0;
}
}

View File

@ -1,132 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import java.util.SortedMap;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.Cell;
/**
* Implementation of RowFilterInterface that limits results to a specific page
* size. It terminates scanning once the number of filter-passed results is >=
* the given page size.
*
* <p>
* Note that this filter cannot guarantee that the number of results returned
* to a client are <= page size. This is because the filter is applied
* separately on different region servers. It does however optimize the scan of
* individual HRegions by making sure that the page size is never exceeded
* locally.
* </p>
*
* @deprecated Use filters that are rooted on @{link Filter} instead
*/
public class PageRowFilter implements RowFilterInterface {
private long pageSize = Long.MAX_VALUE;
private int rowsAccepted = 0;
/**
* Default constructor, filters nothing. Required though for RPC
* deserialization.
*/
public PageRowFilter() {
super();
}
/**
* Constructor that takes a maximum page size.
*
* @param pageSize Maximum result size.
*/
public PageRowFilter(final long pageSize) {
this.pageSize = pageSize;
}
public void validate(final byte [][] columns) {
// Doesn't filter columns
}
public void reset() {
rowsAccepted = 0;
}
public void rowProcessed(boolean filtered,
byte [] rowKey) {
rowProcessed(filtered, rowKey, 0, rowKey.length);
}
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
if (!filtered) {
this.rowsAccepted++;
}
}
public boolean processAlways() {
return false;
}
public boolean filterAllRemaining() {
return this.rowsAccepted > this.pageSize;
}
public boolean filterRowKey(final byte [] r) {
return filterRowKey(r, 0, r.length);
}
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
return filterAllRemaining();
}
public boolean filterColumn(final byte [] rowKey,
final byte [] colKey,
final byte[] data) {
return filterColumn(rowKey, 0, rowKey.length, colKey, 0, colKey.length,
data, 0, data.length);
}
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
byte[] colunmName, int coffset, int clength, byte[] columnValue,
int voffset, int vlength) {
return filterAllRemaining();
}
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
return filterAllRemaining();
}
public boolean filterRow(List<KeyValue> results) {
return filterAllRemaining();
}
public void readFields(final DataInput in) throws IOException {
this.pageSize = in.readLong();
}
public void write(final DataOutput out) throws IOException {
out.writeLong(pageSize);
}
}

View File

@ -1,120 +0,0 @@
/**
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import java.util.SortedMap;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.util.Bytes;
/**
* RowFilterInterface that filters everything that does not match a prefix
*
* @deprecated Use filters that are rooted on @{link Filter} instead
*/
public class PrefixRowFilter implements RowFilterInterface {
protected byte[] prefix;
/**
* Constructor that takes a row prefix to filter on
* @param prefix
*/
public PrefixRowFilter(byte[] prefix) {
this.prefix = prefix;
}
/**
* Default Constructor, filters nothing. Required for RPC
* deserialization
*/
public PrefixRowFilter() { }
public void reset() {
// Nothing to reset
}
public void rowProcessed(boolean filtered, byte [] key) {
rowProcessed(filtered, key, 0, key.length);
}
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
// does not care
}
public boolean processAlways() {
return false;
}
public boolean filterAllRemaining() {
return false;
}
public boolean filterRowKey(final byte [] rowKey) {
return filterRowKey(rowKey, 0, rowKey.length);
}
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
if (rowKey == null)
return true;
if (length < prefix.length)
return true;
for(int i = 0;i < prefix.length;i++)
if (prefix[i] != rowKey[i + offset])
return true;
return false;
}
public boolean filterColumn(final byte [] rowKey, final byte [] colunmName,
final byte[] columnValue) {
return false;
}
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
byte[] colunmName, int coffset, int clength, byte[] columnValue,
int voffset, int vlength) {
return false;
}
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
return false;
}
public boolean filterRow(List<KeyValue> results) {
return false;
}
public void validate(final byte [][] columns) {
// does not do this
}
public void readFields(final DataInput in) throws IOException {
prefix = Bytes.readByteArray(in);
}
public void write(final DataOutput out) throws IOException {
Bytes.writeByteArray(out, prefix);
}
}

View File

@ -1,344 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.Map.Entry;
import java.util.regex.Pattern;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.util.Bytes;
/**
* Implementation of RowFilterInterface that can filter by rowkey regular
* expression and/or individual column values (equals comparison only). Multiple
* column filters imply an implicit conjunction of filter criteria.
*
* Note that column value filtering in this interface has been replaced by
* {@link ColumnValueFilter}.
* @deprecated This interface doesn't work well in new KeyValue world.
* Use filters based on new {@link Filter} instead.
*/
public class RegExpRowFilter implements RowFilterInterface {
private Pattern rowKeyPattern = null;
private String rowKeyRegExp = null;
private Map<byte [], byte[]> equalsMap =
new TreeMap<byte [], byte[]>(Bytes.BYTES_COMPARATOR);
private Set<byte []> nullColumns =
new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
/**
* Default constructor, filters nothing. Required though for RPC
* deserialization.
*/
public RegExpRowFilter() {
super();
}
/**
* Constructor that takes a row key regular expression to filter on.
*
* @param rowKeyRegExp
*/
public RegExpRowFilter(final String rowKeyRegExp) {
this.rowKeyRegExp = rowKeyRegExp;
}
/**
* @deprecated Column filtering has been replaced by {@link ColumnValueFilter}
* Constructor that takes a row key regular expression to filter on.
*
* @param rowKeyRegExp
* @param columnFilter
*/
@Deprecated
public RegExpRowFilter(final String rowKeyRegExp,
final Map<byte [], Cell> columnFilter) {
this.rowKeyRegExp = rowKeyRegExp;
this.setColumnFilters(columnFilter);
}
public void rowProcessed(boolean filtered, byte [] rowKey) {
rowProcessed(filtered, rowKey, 0, rowKey.length);
}
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
//doesn't care
}
public boolean processAlways() {
return false;
}
/**
* @deprecated Column filtering has been replaced by {@link ColumnValueFilter}
* Specify a value that must be matched for the given column.
*
* @param colKey
* the column to match on
* @param value
* the value that must equal the stored value.
*/
@Deprecated
public void setColumnFilter(final byte [] colKey, final byte[] value) {
if (value == null) {
nullColumns.add(colKey);
} else {
equalsMap.put(colKey, value);
}
}
/**
* @deprecated Column filtering has been replaced by {@link ColumnValueFilter}
* Set column filters for a number of columns.
*
* @param columnFilter
* Map of columns with value criteria.
*/
@Deprecated
public void setColumnFilters(final Map<byte [], Cell> columnFilter) {
if (null == columnFilter) {
nullColumns.clear();
equalsMap.clear();
} else {
for (Entry<byte [], Cell> entry : columnFilter.entrySet()) {
setColumnFilter(entry.getKey(), entry.getValue().getValue());
}
}
}
public void reset() {
// Nothing to reset
}
public boolean filterAllRemaining() {
return false;
}
public boolean filterRowKey(final byte [] rowKey) {
return filterRowKey(rowKey, 0, rowKey.length);
}
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
return (filtersByRowKey() && rowKey != null)?
!getRowKeyPattern().matcher(Bytes.toString(rowKey, offset, length)).matches():
false;
}
public boolean filterColumn(final byte [] rowKey, final byte [] colKey,
final byte[] data) {
if (filterRowKey(rowKey)) {
return true;
}
if (filtersByColumnValue()) {
byte[] filterValue = equalsMap.get(colKey);
if (null != filterValue) {
return !Arrays.equals(filterValue, data);
}
}
if (nullColumns.contains(colKey)) {
if (data != null /* DELETE IS IN KEY NOW && !HLogEdit.isDeleted(data)*/) {
return true;
}
}
return false;
}
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
byte [] colunmName, int coffset, int clength, byte[] columnValue,
int voffset, int vlength) {
if (filterRowKey(rowKey, roffset, rlength)) {
return true;
}
byte [] colkey = null;
if (filtersByColumnValue()) {
colkey = getColKey(colunmName, coffset, clength);
byte [] filterValue = equalsMap.get(colkey);
if (null != filterValue) {
return Bytes.compareTo(filterValue, 0, filterValue.length, columnValue,
voffset, vlength) != 0;
}
}
if (colkey == null) {
colkey = getColKey(colunmName, coffset, clength);
}
if (nullColumns.contains(colkey)) {
if (columnValue != null /* TODO: FIX!!! && !HLogEdit.isDeleted(data)*/) {
return true;
}
}
return false;
}
private byte [] getColKey(final byte [] c, final int offset, final int length) {
byte [] colkey = null;
if (offset == 0) {
colkey = c;
} else {
colkey = new byte [length];
System.arraycopy(c, offset, colkey, 0, length);
}
return colkey;
}
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
for (Entry<byte [], Cell> col : columns.entrySet()) {
if (nullColumns.contains(col.getKey())
/* DELETE IS IN KEY NOW && !HLogEdit.isDeleted(col.getValue().getValue())*/) {
return true;
}
}
for (byte [] col : equalsMap.keySet()) {
if (!columns.containsKey(col)) {
return true;
}
}
return false;
}
// THIS METHOD IS HORRIDLY EXPENSIVE TO RUN. NEEDS FIXUP.
public boolean filterRow(List<KeyValue> kvs) {
for (KeyValue kv: kvs) {
byte [] column = kv.getColumn();
if (nullColumns.contains(column) && !kv.isDeleteType()) {
return true;
}
if (!equalsMap.containsKey(column)) {
return true;
}
}
return false;
}
private boolean filtersByColumnValue() {
return equalsMap != null && equalsMap.size() > 0;
}
private boolean filtersByRowKey() {
return null != rowKeyPattern || null != rowKeyRegExp;
}
private String getRowKeyRegExp() {
if (null == rowKeyRegExp && rowKeyPattern != null) {
rowKeyRegExp = rowKeyPattern.toString();
}
return rowKeyRegExp;
}
private Pattern getRowKeyPattern() {
if (rowKeyPattern == null && rowKeyRegExp != null) {
rowKeyPattern = Pattern.compile(rowKeyRegExp);
}
return rowKeyPattern;
}
public void readFields(final DataInput in) throws IOException {
boolean hasRowKeyPattern = in.readBoolean();
if (hasRowKeyPattern) {
rowKeyRegExp = in.readUTF();
}
// equals map
equalsMap.clear();
int size = in.readInt();
for (int i = 0; i < size; i++) {
byte [] key = Bytes.readByteArray(in);
int len = in.readInt();
byte[] value = null;
if (len >= 0) {
value = new byte[len];
in.readFully(value);
}
setColumnFilter(key, value);
}
// nullColumns
nullColumns.clear();
size = in.readInt();
for (int i = 0; i < size; i++) {
setColumnFilter(Bytes.readByteArray(in), null);
}
}
public void validate(final byte [][] columns) {
Set<byte []> invalids = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
for (byte [] colKey : getFilterColumns()) {
boolean found = false;
for (byte [] col : columns) {
if (Bytes.equals(col, colKey)) {
found = true;
break;
}
}
if (!found) {
invalids.add(colKey);
}
}
if (invalids.size() > 0) {
throw new InvalidRowFilterException(String.format(
"RowFilter contains criteria on columns %s not in %s", invalids,
Arrays.toString(columns)));
}
}
@Deprecated
private Set<byte []> getFilterColumns() {
Set<byte []> cols = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
cols.addAll(equalsMap.keySet());
cols.addAll(nullColumns);
return cols;
}
public void write(final DataOutput out) throws IOException {
if (!filtersByRowKey()) {
out.writeBoolean(false);
} else {
out.writeBoolean(true);
out.writeUTF(getRowKeyRegExp());
}
// equalsMap
out.writeInt(equalsMap.size());
for (Entry<byte [], byte[]> entry : equalsMap.entrySet()) {
Bytes.writeByteArray(out, entry.getKey());
byte[] value = entry.getValue();
out.writeInt(value.length);
out.write(value);
}
// null columns
out.writeInt(nullColumns.size());
for (byte [] col : nullColumns) {
Bytes.writeByteArray(out, col);
}
}
}

View File

@ -1,181 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.util.List;
import java.util.SortedMap;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.io.Writable;
/**
*
* Interface used for row-level filters applied to HRegion.HScanner scan
* results during calls to next().
*
* In HBase 0.20, not all of the functions will be called, thus filters which depend
* on them will not work as advertised!
*
* Specifically, you can only count on the following methods to be called:
* boolean filterRowKey(final byte [] rowKey, final int offset, final int length);
* boolean filterAllRemaining();
*
* Complex filters that depend in more need to be rewritten to work with @{link Filter}
*
* Write new filters to use the @{link Filter} API instead.
* @deprecated Use filters that are rooted on @{link Filter} instead
*/
public interface RowFilterInterface extends Writable {
/**
* Resets the state of the filter. Used prior to the start of a Region scan.
*
*/
void reset();
/**
* Called to let filter know the final decision (to pass or filter) on a
* given row. With out HScanner calling this, the filter does not know if a
* row passed filtering even if it passed the row itself because other
* filters may have failed the row. E.g. when this filter is a member of a
* RowFilterSet with an OR operator.
*
* @see RowFilterSet
* @param filtered
* @param key
* @deprecated Use {@link #rowProcessed(boolean, byte[], int, int)} instead.
*/
void rowProcessed(boolean filtered, byte [] key);
/**
* Called to let filter know the final decision (to pass or filter) on a
* given row. With out HScanner calling this, the filter does not know if a
* row passed filtering even if it passed the row itself because other
* filters may have failed the row. E.g. when this filter is a member of a
* RowFilterSet with an OR operator.
*
* @see RowFilterSet
* @param filtered
* @param key
* @param offset
* @param length
*/
void rowProcessed(boolean filtered, byte [] key, int offset, int length);
/**
* Returns whether or not the filter should always be processed in any
* filtering call. This precaution is necessary for filters that maintain
* state and need to be updated according to their response to filtering
* calls (see WhileMatchRowFilter for an example). At times, filters nested
* in RowFilterSets may or may not be called because the RowFilterSet
* determines a result as fast as possible. Returning true for
* processAlways() ensures that the filter will always be called.
*
* @return whether or not to always process the filter
*/
boolean processAlways();
/**
* Determines if the filter has decided that all remaining results should be
* filtered (skipped). This is used to prevent the scanner from scanning a
* the rest of the HRegion when for sure the filter will exclude all
* remaining rows.
*
* @return true if the filter intends to filter all remaining rows.
*/
boolean filterAllRemaining();
/**
* Filters on just a row key. This is the first chance to stop a row.
*
* @param rowKey
* @return true if given row key is filtered and row should not be processed.
* @deprecated Use {@link #filterRowKey(byte[], int, int)} instead.
*/
boolean filterRowKey(final byte [] rowKey);
/**
* Filters on just a row key. This is the first chance to stop a row.
*
* @param rowKey
* @param offset
* @param length
* @return true if given row key is filtered and row should not be processed.
*/
boolean filterRowKey(final byte [] rowKey, final int offset, final int length);
/**
* Filters on row key, column name, and column value. This will take individual columns out of a row,
* but the rest of the row will still get through.
*
* @param rowKey row key to filter on.
* @param columnName column name to filter on
* @param columnValue column value to filter on
* @return true if row filtered and should not be processed.
* @deprecated Use {@link #filterColumn(byte[], int, int, byte[], int, int, byte[], int, int)}
* instead.
*/
@Deprecated
boolean filterColumn(final byte [] rowKey, final byte [] columnName,
final byte [] columnValue);
/**
* Filters on row key, column name, and column value. This will take individual columns out of a row,
* but the rest of the row will still get through.
*
* @param rowKey row key to filter on.
* @param colunmName column name to filter on
* @param columnValue column value to filter on
* @return true if row filtered and should not be processed.
*/
boolean filterColumn(final byte [] rowKey, final int roffset,
final int rlength, final byte [] colunmName, final int coffset,
final int clength, final byte [] columnValue, final int voffset,
final int vlength);
/**
* Filter on the fully assembled row. This is the last chance to stop a row.
*
* @param columns
* @return true if row filtered and should not be processed.
*/
boolean filterRow(final SortedMap<byte [], Cell> columns);
/**
* Filter on the fully assembled row. This is the last chance to stop a row.
*
* @param results
* @return true if row filtered and should not be processed.
*/
boolean filterRow(final List<KeyValue> results);
/**
* Validates that this filter applies only to a subset of the given columns.
* This check is done prior to opening of scanner due to the limitation that
* filtering of columns is dependent on the retrieval of those columns within
* the HRegion. Criteria on columns that are not part of a scanner's column
* list will be ignored. In the case of null value filters, all rows will pass
* the filter. This behavior should be 'undefined' for the user and therefore
* not permitted.
*
* @param columns
*/
void validate(final byte [][] columns);
}

View File

@ -1,295 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.io.ObjectWritable;
/**
* Implementation of RowFilterInterface that represents a set of RowFilters
* which will be evaluated with a specified boolean operator MUST_PASS_ALL
* (!AND) or MUST_PASS_ONE (!OR). Since you can use RowFilterSets as children
* of RowFilterSet, you can create a hierarchy of filters to be evaluated.
*
* It is highly likely this construct will no longer work!
*
* @deprecated Use filters that are rooted on @{link Filter} instead
*/
public class RowFilterSet implements RowFilterInterface {
/** set operator */
public static enum Operator {
/** !AND */
MUST_PASS_ALL,
/** !OR */
MUST_PASS_ONE
}
private Operator operator = Operator.MUST_PASS_ALL;
private Set<RowFilterInterface> filters = new HashSet<RowFilterInterface>();
/**
* Default constructor, filters nothing. Required though for RPC
* deserialization.
*/
public RowFilterSet() {
super();
}
/**
* Constructor that takes a set of RowFilters. The default operator
* MUST_PASS_ALL is assumed.
*
* @param rowFilters
*/
public RowFilterSet(final Set<RowFilterInterface> rowFilters) {
this.filters = rowFilters;
}
/**
* Constructor that takes a set of RowFilters and an operator.
*
* @param operator Operator to process filter set with.
* @param rowFilters Set of row filters.
*/
public RowFilterSet(final Operator operator,
final Set<RowFilterInterface> rowFilters) {
this.filters = rowFilters;
this.operator = operator;
}
/** Get the operator.
*
* @return operator
*/
public Operator getOperator() {
return operator;
}
/** Get the filters.
*
* @return filters
*/
public Set<RowFilterInterface> getFilters() {
return filters;
}
/** Add a filter.
*
* @param filter
*/
public void addFilter(RowFilterInterface filter) {
this.filters.add(filter);
}
public void validate(final byte [][] columns) {
for (RowFilterInterface filter : filters) {
filter.validate(columns);
}
}
public void reset() {
for (RowFilterInterface filter : filters) {
filter.reset();
}
}
public void rowProcessed(boolean filtered, byte [] rowKey) {
rowProcessed(filtered, rowKey, 0, rowKey.length);
}
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
for (RowFilterInterface filter : filters) {
filter.rowProcessed(filtered, key, offset, length);
}
}
public boolean processAlways() {
for (RowFilterInterface filter : filters) {
if (filter.processAlways()) {
return true;
}
}
return false;
}
public boolean filterAllRemaining() {
boolean result = operator == Operator.MUST_PASS_ONE;
for (RowFilterInterface filter : filters) {
if (operator == Operator.MUST_PASS_ALL) {
if (filter.filterAllRemaining()) {
return true;
}
} else if (operator == Operator.MUST_PASS_ONE) {
if (!filter.filterAllRemaining()) {
return false;
}
}
}
return result;
}
public boolean filterRowKey(final byte [] rowKey) {
return filterRowKey(rowKey, 0, rowKey.length);
}
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
boolean resultFound = false;
boolean result = operator == Operator.MUST_PASS_ONE;
for (RowFilterInterface filter : filters) {
if (!resultFound) {
if (operator == Operator.MUST_PASS_ALL) {
if (filter.filterAllRemaining() ||
filter.filterRowKey(rowKey, offset, length)) {
result = true;
resultFound = true;
}
} else if (operator == Operator.MUST_PASS_ONE) {
if (!filter.filterAllRemaining() &&
!filter.filterRowKey(rowKey, offset, length)) {
result = false;
resultFound = true;
}
}
} else if (filter.processAlways()) {
filter.filterRowKey(rowKey, offset, length);
}
}
return result;
}
public boolean filterColumn(final byte [] rowKey, final byte [] colKey,
final byte[] data) {
return filterColumn(rowKey, 0, rowKey.length, colKey, 0, colKey.length,
data, 0, data.length);
}
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
byte[] columnName, int coffset, int clength, byte[] columnValue,
int voffset, int vlength) {
boolean resultFound = false;
boolean result = operator == Operator.MUST_PASS_ONE;
for (RowFilterInterface filter : filters) {
if (!resultFound) {
if (operator == Operator.MUST_PASS_ALL) {
if (filter.filterAllRemaining() ||
filter.filterColumn(rowKey, roffset, rlength, columnName, coffset,
clength, columnValue, voffset, vlength)) {
result = true;
resultFound = true;
}
} else if (operator == Operator.MUST_PASS_ONE) {
if (!filter.filterAllRemaining() &&
!filter.filterColumn(rowKey, roffset, rlength, columnName, coffset,
clength, columnValue, voffset, vlength)) {
result = false;
resultFound = true;
}
}
} else if (filter.processAlways()) {
filter.filterColumn(rowKey, roffset, rlength, columnName, coffset,
clength, columnValue, voffset, vlength);
}
}
return result;
}
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
boolean resultFound = false;
boolean result = operator == Operator.MUST_PASS_ONE;
for (RowFilterInterface filter : filters) {
if (!resultFound) {
if (operator == Operator.MUST_PASS_ALL) {
if (filter.filterAllRemaining() || filter.filterRow(columns)) {
result = true;
resultFound = true;
}
} else if (operator == Operator.MUST_PASS_ONE) {
if (!filter.filterAllRemaining() && !filter.filterRow(columns)) {
result = false;
resultFound = true;
}
}
} else if (filter.processAlways()) {
filter.filterRow(columns);
}
}
return result;
}
public boolean filterRow(List<KeyValue> results) {
boolean resultFound = false;
boolean result = operator == Operator.MUST_PASS_ONE;
for (RowFilterInterface filter : filters) {
if (!resultFound) {
if (operator == Operator.MUST_PASS_ALL) {
if (filter.filterAllRemaining() || filter.filterRow(results)) {
result = true;
resultFound = true;
}
} else if (operator == Operator.MUST_PASS_ONE) {
if (!filter.filterAllRemaining() && !filter.filterRow(results)) {
result = false;
resultFound = true;
}
}
} else if (filter.processAlways()) {
filter.filterRow(results);
}
}
return result;
}
public void readFields(final DataInput in) throws IOException {
Configuration conf = new HBaseConfiguration();
byte opByte = in.readByte();
operator = Operator.values()[opByte];
int size = in.readInt();
if (size > 0) {
filters = new HashSet<RowFilterInterface>();
for (int i = 0; i < size; i++) {
RowFilterInterface filter = (RowFilterInterface) ObjectWritable
.readObject(in, conf);
filters.add(filter);
}
}
}
public void write(final DataOutput out) throws IOException {
Configuration conf = new HBaseConfiguration();
out.writeByte(operator.ordinal());
out.writeInt(filters.size());
for (RowFilterInterface filter : filters) {
ObjectWritable.writeObject(out, filter, RowFilterInterface.class, conf);
}
}
}

View File

@ -1,146 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import java.util.SortedMap;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.util.Bytes;
/**
* Implementation of RowFilterInterface that filters out rows greater than or
* equal to a specified rowKey.
*
* @deprecated Use filters that are rooted on @{link Filter} instead
*/
public class StopRowFilter implements RowFilterInterface {
private byte [] stopRowKey;
/**
* Default constructor, filters nothing. Required though for RPC
* deserialization.
*/
public StopRowFilter() {
super();
}
/**
* Constructor that takes a stopRowKey on which to filter
*
* @param stopRowKey rowKey to filter on.
*/
public StopRowFilter(final byte [] stopRowKey) {
this.stopRowKey = stopRowKey;
}
/**
* An accessor for the stopRowKey
*
* @return the filter's stopRowKey
*/
public byte [] getStopRowKey() {
return this.stopRowKey;
}
public void validate(final byte [][] columns) {
// Doesn't filter columns
}
public void reset() {
// Nothing to reset
}
public void rowProcessed(boolean filtered, byte [] rowKey) {
// Doesn't care
}
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
// Doesn't care
}
public boolean processAlways() {
return false;
}
public boolean filterAllRemaining() {
return false;
}
public boolean filterRowKey(final byte [] rowKey) {
return filterRowKey(rowKey, 0, rowKey.length);
}
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
if (rowKey == null) {
if (this.stopRowKey == null) {
return true;
}
return false;
}
return Bytes.compareTo(stopRowKey, 0, stopRowKey.length, rowKey, offset,
length) <= 0;
}
/**
* Because StopRowFilter does not examine column information, this method
* defaults to calling the rowKey-only version of filter.
* @param rowKey
* @param colKey
* @param data
* @return boolean
*/
public boolean filterColumn(final byte [] rowKey, final byte [] colKey,
final byte[] data) {
return filterRowKey(rowKey);
}
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
byte[] colunmName, int coffset, int clength, byte[] columnValue,
int voffset, int vlength) {
return filterRowKey(rowKey, roffset, rlength);
}
/**
* Because StopRowFilter does not examine column information, this method
* defaults to calling filterAllRemaining().
* @param columns
* @return boolean
*/
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
return filterAllRemaining();
}
public boolean filterRow(List<KeyValue> results) {
return filterAllRemaining();
}
public void readFields(DataInput in) throws IOException {
this.stopRowKey = Bytes.readByteArray(in);
}
public void write(DataOutput out) throws IOException {
Bytes.writeByteArray(out, this.stopRowKey);
}
}

View File

@ -21,7 +21,6 @@
package org.apache.hadoop.hbase.filter;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Get;
/**
* This filter is used to filter based on column value. It takes an

View File

@ -1,167 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import java.util.SortedMap;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.Cell;
/**
* WhileMatchRowFilter is a wrapper filter that filters everything after the
* first filtered row. Once the nested filter returns true for either of it's
* filter(..) methods or filterNotNull(SortedMap<Text, byte[]>), this wrapper's
* filterAllRemaining() will return true. All filtering methods will
* thereafter defer to the result of filterAllRemaining().
*
* @deprecated Use filters that are rooted on @{link Filter} instead
*/
public class WhileMatchRowFilter implements RowFilterInterface {
private boolean filterAllRemaining = false;
private RowFilterInterface filter;
/**
* Default constructor, filters nothing. Required though for RPC
* deserialization.
*/
public WhileMatchRowFilter() {
super();
}
/**
* Constructor
* @param filter
*/
public WhileMatchRowFilter(RowFilterInterface filter) {
this.filter = filter;
}
/**
* Returns the internal filter being wrapped
*
* @return the internal filter
*/
public RowFilterInterface getInternalFilter() {
return this.filter;
}
public void reset() {
this.filterAllRemaining = false;
this.filter.reset();
}
public boolean processAlways() {
return true;
}
/**
* Returns true once the nested filter has filtered out a row (returned true
* on a call to one of it's filtering methods). Until then it returns false.
*
* @return true/false whether the nested filter has returned true on a filter
* call.
*/
public boolean filterAllRemaining() {
return this.filterAllRemaining || this.filter.filterAllRemaining();
}
public boolean filterRowKey(final byte [] rowKey) {
changeFAR(this.filter.filterRowKey(rowKey, 0, rowKey.length));
return filterAllRemaining();
}
public boolean filterRowKey(byte[] rowKey, int offset, int length) {
changeFAR(this.filter.filterRowKey(rowKey, offset, length));
return filterAllRemaining();
}
public boolean filterColumn(final byte [] rowKey, final byte [] colKey,
final byte[] data) {
changeFAR(this.filter.filterColumn(rowKey, colKey, data));
return filterAllRemaining();
}
public boolean filterRow(final SortedMap<byte [], Cell> columns) {
changeFAR(this.filter.filterRow(columns));
return filterAllRemaining();
}
public boolean filterRow(List<KeyValue> results) {
changeFAR(this.filter.filterRow(results));
return filterAllRemaining();
}
/**
* Change filterAllRemaining from false to true if value is true, otherwise
* leave as is.
*
* @param value
*/
private void changeFAR(boolean value) {
this.filterAllRemaining = this.filterAllRemaining || value;
}
public void rowProcessed(boolean filtered, byte [] rowKey) {
this.filter.rowProcessed(filtered, rowKey, 0, rowKey.length);
}
public void rowProcessed(boolean filtered, byte[] key, int offset, int length) {
this.filter.rowProcessed(filtered, key, offset, length);
}
public void validate(final byte [][] columns) {
this.filter.validate(columns);
}
public void readFields(DataInput in) throws IOException {
String className = in.readUTF();
try {
this.filter = (RowFilterInterface)(Class.forName(className).
newInstance());
this.filter.readFields(in);
} catch (InstantiationException e) {
throw new RuntimeException("Failed to deserialize WhileMatchRowFilter.",
e);
} catch (IllegalAccessException e) {
throw new RuntimeException("Failed to deserialize WhileMatchRowFilter.",
e);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Failed to deserialize WhileMatchRowFilter.",
e);
}
}
public void write(DataOutput out) throws IOException {
out.writeUTF(this.filter.getClass().getName());
this.filter.write(out);
}
public boolean filterColumn(byte[] rowKey, int roffset, int rlength,
byte[] colunmName, int coffset, int clength, byte[] columnValue,
int voffset, int vlength) {
changeFAR(this.filter.filterColumn(rowKey, roffset, rlength, colunmName, coffset, clength, columnValue, voffset, vlength));
return filterAllRemaining();
}
}

View File

@ -1,150 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
/**
* Batch update operation.
*
* If value is null, its a DELETE operation. If its non-null, its a PUT.
* This object is purposely bare-bones because many instances are created
* during bulk uploads. We have one class for DELETEs and PUTs rather than
* a class per type because it makes the serialization easier.
* @see BatchUpdate
* @deprecated As of hbase 0.20.0, replaced by new Get/Put/Delete/Result-based API.
*/
public class BatchOperation implements Writable, HeapSize {
/**
* Estimated size of this object.
*/
// JHat says this is 32 bytes.
public final int ESTIMATED_HEAP_TAX = 36;
private byte [] column = null;
// A null value defines DELETE operations.
private byte [] value = null;
/**
* Default constructor
*/
public BatchOperation() {
this((byte [])null);
}
/**
* Creates a DELETE batch operation.
* @param column column name
*/
public BatchOperation(final byte [] column) {
this(column, null);
}
/**
* Creates a DELETE batch operation.
* @param column column name
*/
public BatchOperation(final String column) {
this(Bytes.toBytes(column), null);
}
/**
* Create a batch operation.
* @param column column name
* @param value column value. If non-null, this is a PUT operation.
*/
public BatchOperation(final String column, String value) {
this(Bytes.toBytes(column), Bytes.toBytes(value));
}
/**
* Create a batch operation.
* @param column column name
* @param value column value. If non-null, this is a PUT operation.
*/
public BatchOperation(final byte [] column, final byte [] value) {
this.column = column;
this.value = value;
}
/**
* @return the column
*/
public byte [] getColumn() {
return this.column;
}
/**
* @return the value
*/
public byte[] getValue() {
return this.value;
}
/**
* @return True if this is a PUT operation (this.value is not null).
*/
public boolean isPut() {
return this.value != null;
}
/**
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
return "column => " + Bytes.toString(this.column) + ", value => '...'";
}
// Writable methods
// This is a hotspot when updating deserializing incoming client submissions.
// In Performance Evaluation sequentialWrite, 70% of object allocations are
// done in here.
public void readFields(final DataInput in) throws IOException {
this.column = Bytes.readByteArray(in);
// Is there a value to read?
if (in.readBoolean()) {
this.value = new byte[in.readInt()];
in.readFully(this.value);
}
}
public void write(final DataOutput out) throws IOException {
Bytes.writeByteArray(out, this.column);
boolean p = isPut();
out.writeBoolean(p);
if (p) {
out.writeInt(value.length);
out.write(value);
}
}
public long heapSize() {
return Bytes.ESTIMATED_HEAP_TAX * 2 + this.column.length +
this.value.length + ESTIMATED_HEAP_TAX;
}
}

View File

@ -1,405 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.RuntimeMXBean;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.WritableComparable;
/**
* A Writable object that contains a series of BatchOperations
*
* There is one BatchUpdate object per server, so a series of batch operations
* can result in multiple BatchUpdate objects if the batch contains rows that
* are served by multiple region servers.
* @deprecated As of hbase 0.20.0, replaced by new Get/Put/Delete/Result-based API.
*/
public class BatchUpdate
implements WritableComparable<BatchUpdate>, Iterable<BatchOperation>, HeapSize {
private static final Log LOG = LogFactory.getLog(BatchUpdate.class);
/**
* Estimated 'shallow size' of this object not counting payload.
*/
// Shallow size is 56. Add 32 for the arraylist below.
public static final int ESTIMATED_HEAP_TAX = 56 + 32;
// the row being updated
private byte [] row = null;
private long size = 0;
// the batched operations
private ArrayList<BatchOperation> operations =
new ArrayList<BatchOperation>();
private long timestamp = HConstants.LATEST_TIMESTAMP;
private long rowLock = -1l;
/**
* Default constructor used serializing. Do not use directly.
*/
public BatchUpdate() {
this ((byte [])null);
}
/**
* Initialize a BatchUpdate operation on a row. Timestamp is assumed to be
* now.
*
* @param row
*/
public BatchUpdate(final String row) {
this(Bytes.toBytes(row), HConstants.LATEST_TIMESTAMP);
}
/**
* Initialize a BatchUpdate operation on a row. Timestamp is assumed to be
* now.
*
* @param row
*/
public BatchUpdate(final byte [] row) {
this(row, HConstants.LATEST_TIMESTAMP);
}
/**
* Initialize a BatchUpdate operation on a row with a specific timestamp.
*
* @param row
* @param timestamp
*/
public BatchUpdate(final String row, long timestamp){
this(Bytes.toBytes(row), timestamp);
}
/**
* Recopy constructor
* @param buToCopy BatchUpdate to copy
*/
public BatchUpdate(BatchUpdate buToCopy) {
this(buToCopy.getRow(), buToCopy.getTimestamp());
for(BatchOperation bo : buToCopy) {
byte [] val = bo.getValue();
if (val == null) {
// Presume a delete is intended.
this.delete(bo.getColumn());
} else {
this.put(bo.getColumn(), val);
}
}
}
/**
* Initialize a BatchUpdate operation on a row with a specific timestamp.
*
* @param row
* @param timestamp
*/
public BatchUpdate(final byte [] row, long timestamp){
this.row = row;
this.timestamp = timestamp;
this.operations = new ArrayList<BatchOperation>();
this.size = (row == null)? 0: row.length;
}
/**
* Create a batch operation.
* @param rr the RowResult
*/
public BatchUpdate(final RowResult rr) {
this(rr.getRow());
for(Map.Entry<byte[], Cell> entry : rr.entrySet()){
this.put(entry.getKey(), entry.getValue().getValue());
}
}
/**
* Get the row lock associated with this update
* @return the row lock
*/
public long getRowLock() {
return rowLock;
}
/**
* Set the lock to be used for this update
* @param rowLock the row lock
*/
public void setRowLock(long rowLock) {
this.rowLock = rowLock;
}
/** @return the row */
public byte [] getRow() {
return row;
}
/**
* @return the timestamp this BatchUpdate will be committed with.
*/
public long getTimestamp() {
return timestamp;
}
/**
* Set this BatchUpdate's timestamp.
*
* @param timestamp
*/
public void setTimestamp(long timestamp) {
this.timestamp = timestamp;
}
/**
* Get the current value of the specified column
*
* @param column column name
* @return byte[] the cell value, returns null if the column does not exist.
*/
public synchronized byte[] get(final String column) {
return get(Bytes.toBytes(column));
}
/**
* Get the current value of the specified column
*
* @param column column name
* @return byte[] the cell value, returns null if the column does not exist.
*/
public synchronized byte[] get(final byte[] column) {
for (BatchOperation operation: operations) {
if (Arrays.equals(column, operation.getColumn())) {
return operation.getValue();
}
}
return null;
}
/**
* Get the current columns
*
* @return byte[][] an array of byte[] columns
*/
public synchronized byte[][] getColumns() {
byte[][] columns = new byte[operations.size()][];
for (int i = 0; i < operations.size(); i++) {
columns[i] = operations.get(i).getColumn();
}
return columns;
}
/**
* Check if the specified column is currently assigned a value
*
* @param column column to check for
* @return boolean true if the given column exists
*/
public synchronized boolean hasColumn(String column) {
return hasColumn(Bytes.toBytes(column));
}
/**
* Check if the specified column is currently assigned a value
*
* @param column column to check for
* @return boolean true if the given column exists
*/
public synchronized boolean hasColumn(byte[] column) {
byte[] getColumn = get(column);
if (getColumn == null) {
return false;
}
return true;
}
/**
* Change a value for the specified column
*
* @param column column whose value is being set
* @param val new value for column. Cannot be null (can be empty).
*/
public synchronized void put(final String column, final byte val[]) {
put(Bytes.toBytes(column), val);
}
/**
* Change a value for the specified column
*
* @param column column whose value is being set
* @param val new value for column. Cannot be null (can be empty).
*/
public synchronized void put(final byte [] column, final byte val[]) {
if (val == null) {
// If null, the PUT becomes a DELETE operation.
throw new IllegalArgumentException("Passed value cannot be null");
}
BatchOperation bo = new BatchOperation(column, val);
this.size += bo.heapSize();
operations.add(bo);
}
/**
* Delete the value for a column
* Deletes the cell whose row/column/commit-timestamp match those of the
* delete.
* @param column name of column whose value is to be deleted
*/
public void delete(final String column) {
delete(Bytes.toBytes(column));
}
/**
* Delete the value for a column
* Deletes the cell whose row/column/commit-timestamp match those of the
* delete.
* @param column name of column whose value is to be deleted
*/
public synchronized void delete(final byte [] column) {
operations.add(new BatchOperation(column));
}
//
// Iterable
//
/**
* @return Iterator<BatchOperation>
*/
public Iterator<BatchOperation> iterator() {
return operations.iterator();
}
/**
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("row => ");
sb.append(row == null? "": Bytes.toString(row));
sb.append(", {");
boolean morethanone = false;
for (BatchOperation bo: this.operations) {
if (morethanone) {
sb.append(", ");
}
morethanone = true;
sb.append(bo.toString());
}
sb.append("}");
return sb.toString();
}
//
// Writable
//
public void readFields(final DataInput in) throws IOException {
// Clear any existing operations; may be hangovers from previous use of
// this instance.
if (this.operations.size() != 0) {
this.operations.clear();
}
this.row = Bytes.readByteArray(in);
timestamp = in.readLong();
this.size = in.readLong();
int nOps = in.readInt();
for (int i = 0; i < nOps; i++) {
BatchOperation op = new BatchOperation();
op.readFields(in);
this.operations.add(op);
}
this.rowLock = in.readLong();
}
public void write(final DataOutput out) throws IOException {
Bytes.writeByteArray(out, this.row);
out.writeLong(timestamp);
out.writeLong(this.size);
out.writeInt(operations.size());
for (BatchOperation op: operations) {
op.write(out);
}
out.writeLong(this.rowLock);
}
public int compareTo(BatchUpdate o) {
return Bytes.compareTo(this.row, o.getRow());
}
public long heapSize() {
return this.row.length + Bytes.ESTIMATED_HEAP_TAX + this.size +
ESTIMATED_HEAP_TAX;
}
/**
* Code to test sizes of BatchUpdate arrays.
* @param args
* @throws InterruptedException
*/
public static void main(String[] args) throws InterruptedException {
RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
LOG.info("vmName=" + runtime.getVmName() + ", vmVendor="
+ runtime.getVmVendor() + ", vmVersion=" + runtime.getVmVersion());
LOG.info("vmInputArguments=" + runtime.getInputArguments());
final int count = 10000;
BatchUpdate[] batch1 = new BatchUpdate[count];
// TODO: x32 vs x64
long size = 0;
for (int i = 0; i < count; i++) {
BatchUpdate bu = new BatchUpdate(HConstants.EMPTY_BYTE_ARRAY);
bu.put(HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY);
batch1[i] = bu;
size += bu.heapSize();
}
LOG.info("batch1 estimated size=" + size);
// Make a variably sized memcache.
size = 0;
BatchUpdate[] batch2 = new BatchUpdate[count];
for (int i = 0; i < count; i++) {
BatchUpdate bu = new BatchUpdate(Bytes.toBytes(i));
bu.put(Bytes.toBytes(i), new byte[i]);
batch2[i] = bu;
size += bu.heapSize();
}
LOG.info("batch2 estimated size=" + size);
final int seconds = 30;
LOG.info("Waiting " + seconds + " seconds while heap dump is taken");
for (int i = 0; i < seconds; i++) {
Thread.sleep(1000);
}
LOG.info("Exiting.");
}
}

View File

@ -1,280 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Map.Entry;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.rest.exception.HBaseRestException;
import org.apache.hadoop.hbase.rest.serializer.IRestSerializer;
import org.apache.hadoop.hbase.rest.serializer.ISerializable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Writable;
import agilejson.TOJSON;
/**
* Cell - Used to transport a cell value (byte[]) and the timestamp it was
* stored with together as a result for get and getRow methods. This promotes
* the timestamp of a cell to a first-class value, making it easy to take note
* of temporal data. Cell is used all the way from HStore up to HTable.
* @deprecated As of hbase 0.20.0, replaced by new Get/Put/Delete/Result-based API.
*/
public class Cell implements Writable, Iterable<Map.Entry<Long, byte[]>>,
ISerializable {
protected final SortedMap<Long, byte[]> valueMap = new TreeMap<Long, byte[]>(
new Comparator<Long>() {
public int compare(Long l1, Long l2) {
return l2.compareTo(l1);
}
});
/** For Writable compatibility */
public Cell() {
super();
}
/**
* Create a new Cell with a given value and timestamp. Used by HStore.
*
* @param value
* @param timestamp
*/
public Cell(String value, long timestamp) {
this(Bytes.toBytes(value), timestamp);
}
/**
* Create a new Cell with a given value and timestamp. Used by HStore.
*
* @param value
* @param timestamp
*/
public Cell(byte[] value, long timestamp) {
valueMap.put(timestamp, value);
}
/**
* Create a new Cell with a given value and timestamp. Used by HStore.
*
* @param bb
* @param timestamp
*/
public Cell(final ByteBuffer bb, long timestamp) {
this.valueMap.put(timestamp, Bytes.toBytes(bb));
}
/**
* @param vals
* array of values
* @param ts
* array of timestamps
*/
public Cell(String [] vals, long[] ts) {
this(Bytes.toByteArrays(vals), ts);
}
/**
* @param vals
* array of values
* @param ts
* array of timestamps
*/
public Cell(byte[][] vals, long[] ts) {
if (vals.length != ts.length) {
throw new IllegalArgumentException(
"number of values must be the same as the number of timestamps");
}
for (int i = 0; i < vals.length; i++) {
valueMap.put(ts[i], vals[i]);
}
}
/** @return the current cell's value */
@TOJSON(base64=true)
public byte[] getValue() {
return valueMap.get(valueMap.firstKey());
}
/** @return the current cell's timestamp */
@TOJSON
public long getTimestamp() {
return valueMap.firstKey();
}
/** @return the number of values this cell holds */
public int getNumValues() {
return valueMap.size();
}
/**
* Add a new timestamp and value to this cell provided timestamp does not
* already exist
*
* @param val
* @param ts
*/
public void add(byte[] val, long ts) {
if (!valueMap.containsKey(ts)) {
valueMap.put(ts, val);
}
}
/**
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
if (valueMap.size() == 1) {
return "timestamp=" + getTimestamp() + ", value="
+ Bytes.toString(getValue());
}
StringBuilder s = new StringBuilder("{ ");
int i = 0;
for (Map.Entry<Long, byte[]> entry : valueMap.entrySet()) {
if (i > 0) {
s.append(", ");
}
s.append("[timestamp=");
s.append(entry.getKey());
s.append(", value=");
s.append(Bytes.toString(entry.getValue()));
s.append("]");
i++;
}
s.append(" }");
return s.toString();
}
//
// Writable
//
public void readFields(final DataInput in) throws IOException {
int nvalues = in.readInt();
for (int i = 0; i < nvalues; i++) {
long timestamp = in.readLong();
byte[] value = Bytes.readByteArray(in);
valueMap.put(timestamp, value);
}
}
public void write(final DataOutput out) throws IOException {
out.writeInt(valueMap.size());
for (Map.Entry<Long, byte[]> entry : valueMap.entrySet()) {
out.writeLong(entry.getKey());
Bytes.writeByteArray(out, entry.getValue());
}
}
//
// Iterable
//
public Iterator<Entry<Long, byte[]>> iterator() {
return new CellIterator();
}
private class CellIterator implements Iterator<Entry<Long, byte[]>> {
private Iterator<Entry<Long, byte[]>> it;
CellIterator() {
it = valueMap.entrySet().iterator();
}
public boolean hasNext() {
return it.hasNext();
}
public Entry<Long, byte[]> next() {
return it.next();
}
public void remove() throws UnsupportedOperationException {
throw new UnsupportedOperationException("remove is not supported");
}
}
/**
* @param results
* @return
* TODO: This is the glue between old way of doing things and the new.
* Herein we are converting our clean KeyValues to Map of Cells.
*/
public static HbaseMapWritable<byte [], Cell> createCells(
final List<KeyValue> results) {
HbaseMapWritable<byte [], Cell> cells =
new HbaseMapWritable<byte [], Cell>();
// Walking backward through the list of results though it has no effect
// because we're inserting into a sorted map.
for (ListIterator<KeyValue> i = results.listIterator(results.size());
i.hasPrevious();) {
KeyValue kv = i.previous();
byte [] column = kv.getColumn();
Cell c = cells.get(column);
if (c == null) {
c = new Cell(kv.getValue(), kv.getTimestamp());
cells.put(column, c);
} else {
c.add(kv.getValue(), kv.getTimestamp());
}
}
return cells;
}
/**
* @param results
* @return Array of Cells.
* TODO: This is the glue between old way of doing things and the new.
* Herein we are converting our clean KeyValues to Map of Cells.
*/
public static Cell [] createSingleCellArray(final List<KeyValue> results) {
if (results == null) return null;
int index = 0;
Cell [] cells = new Cell[results.size()];
for (KeyValue kv: results) {
cells[index++] = new Cell(kv.getValue(), kv.getTimestamp());
}
return cells;
}
/*
* (non-Javadoc)
*
* @see
* org.apache.hadoop.hbase.rest.serializer.ISerializable#restSerialize(org
* .apache.hadoop.hbase.rest.serializer.IRestSerializer)
*/
public void restSerialize(IRestSerializer serializer)
throws HBaseRestException {
serializer.serializeCell(this);
}
}

View File

@ -45,7 +45,7 @@ public interface CodeToClassAndBack {
/**
* Class list for supported classes
*/
public Class<?>[] classList = {byte[].class, Cell.class};
public Class<?>[] classList = {byte[].class};
/**
* The static loader that is used instead of the static constructor in

View File

@ -113,8 +113,6 @@ public class HbaseObjectWritable implements Writable, Configurable {
addToMap(HConstants.Modify.class, code++);
addToMap(HMsg.class, code++);
addToMap(HMsg[].class, code++);
addToMap(RowFilterInterface.class, code++);
addToMap(RowFilterSet.class, code++);
addToMap(HRegion.class, code++);
addToMap(HRegion[].class, code++);
addToMap(HRegionInfo.class, code++);

View File

@ -1,342 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeSet;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.rest.descriptors.RestCell;
import org.apache.hadoop.hbase.rest.exception.HBaseRestException;
import org.apache.hadoop.hbase.rest.serializer.IRestSerializer;
import org.apache.hadoop.hbase.rest.serializer.ISerializable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.Writable;
import agilejson.TOJSON;
/**
* Holds row name and then a map of columns to cells.
* @deprecated As of hbase 0.20.0, replaced by new Get/Put/Delete/Result-based API.
*/
public class RowResult implements Writable, SortedMap<byte [], Cell>,
Comparable<RowResult>, ISerializable {
private byte [] row = null;
private final HbaseMapWritable<byte [], Cell> cells;
private final byte [] COL_REGIONINFO = Bytes.toBytes("info:regioninfo");
/** default constructor for writable */
public RowResult() {
this(null, new HbaseMapWritable<byte [], Cell>());
}
/**
* Create a RowResult from a row and Cell map
* @param row
* @param m
*/
public RowResult (final byte [] row,
final HbaseMapWritable<byte [], Cell> m) {
this.row = row;
this.cells = m;
}
/**
* Get the row for this RowResult
* @return the row
*/
@TOJSON(base64=true)
public byte [] getRow() {
return row;
}
//
// Map interface
//
public Cell put(byte [] key,
Cell value) {
throw new UnsupportedOperationException("RowResult is read-only!");
}
@SuppressWarnings("unchecked")
public void putAll(Map map) {
throw new UnsupportedOperationException("RowResult is read-only!");
}
public Cell get(Object key) {
return this.cells.get(key);
}
public Cell remove(Object key) {
throw new UnsupportedOperationException("RowResult is read-only!");
}
public boolean containsKey(Object key) {
return cells.containsKey(key);
}
/**
* Check if the key can be found in this RowResult
* @param key
* @return true if key id found, false if not
*/
public boolean containsKey(String key) {
return cells.containsKey(Bytes.toBytes(key));
}
public boolean containsValue(Object value) {
throw new UnsupportedOperationException("Don't support containsValue!");
}
public boolean isEmpty() {
return cells.isEmpty();
}
public int size() {
return cells.size();
}
public void clear() {
throw new UnsupportedOperationException("RowResult is read-only!");
}
public Set<byte []> keySet() {
Set<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
for (byte [] w : cells.keySet()) {
result.add(w);
}
return result;
}
public Set<Map.Entry<byte [], Cell>> entrySet() {
return Collections.unmodifiableSet(this.cells.entrySet());
}
/**
* This method used solely for the REST serialization
*
* @return Cells
*/
@TOJSON
public RestCell[] getCells() {
RestCell[] restCells = new RestCell[this.cells.size()];
int i = 0;
for (Map.Entry<byte[], Cell> entry : this.cells.entrySet()) {
restCells[i] = new RestCell(entry.getKey(), entry.getValue());
i++;
}
return restCells;
}
public Collection<Cell> values() {
ArrayList<Cell> result = new ArrayList<Cell>();
for (Writable w : cells.values()) {
result.add((Cell)w);
}
return result;
}
/**
* Get the Cell that corresponds to column
* @param column
* @return the Cell
*/
public Cell get(byte [] column) {
return this.cells.get(column);
}
/**
* Get the Cell that corresponds to column, using a String key
* @param key
* @return the Cell
*/
public Cell get(String key) {
return get(Bytes.toBytes(key));
}
/**
* Get a cell using seperate family, columnQualifier arguments.
* @param family
* @param columnQualifier
* @return The cell.
*/
public Cell get(byte [] family, byte [] columnQualifier) {
return get(Bytes.add(family, KeyValue.COLUMN_FAMILY_DELIM_ARRAY, columnQualifier));
}
public Comparator<? super byte[]> comparator() {
return this.cells.comparator();
}
public byte[] firstKey() {
return this.cells.firstKey();
}
public SortedMap<byte[], Cell> headMap(byte[] toKey) {
return this.cells.headMap(toKey);
}
public byte[] lastKey() {
return this.cells.lastKey();
}
public SortedMap<byte[], Cell> subMap(byte[] fromKey, byte[] toKey) {
return this.cells.subMap(fromKey, toKey);
}
public SortedMap<byte[], Cell> tailMap(byte[] fromKey) {
return this.cells.tailMap(fromKey);
}
/**
* Row entry.
*/
public class Entry implements Map.Entry<byte [], Cell> {
private final byte [] column;
private final Cell cell;
Entry(byte [] row, Cell cell) {
this.column = row;
this.cell = cell;
}
public Cell setValue(Cell c) {
throw new UnsupportedOperationException("RowResult is read-only!");
}
public byte [] getKey() {
return column;
}
public Cell getValue() {
return cell;
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("row=");
sb.append(Bytes.toString(this.row));
sb.append(", cells={");
boolean moreThanOne = false;
for (Map.Entry<byte [], Cell> e: this.cells.entrySet()) {
if (moreThanOne) {
sb.append(", ");
} else {
moreThanOne = true;
}
sb.append("(column=");
sb.append(Bytes.toString(e.getKey()));
sb.append(", timestamp=");
sb.append(Long.toString(e.getValue().getTimestamp()));
sb.append(", value=");
byte [] v = e.getValue().getValue();
if (Bytes.equals(e.getKey(), this.COL_REGIONINFO)) {
try {
sb.append(Writables.getHRegionInfo(v).toString());
} catch (IOException ioe) {
sb.append(ioe.toString());
}
} else {
sb.append(v);
}
sb.append(")");
}
sb.append("}");
return sb.toString();
}
/* (non-Javadoc)
* @see org.apache.hadoop.hbase.rest.xml.IOutputXML#toXML()
*/
public void restSerialize(IRestSerializer serializer) throws HBaseRestException {
serializer.serializeRowResult(this);
}
/**
* @param l
* @return
* TODO: This is the glue between old way of doing things and the new.
* Herein we are converting our clean KeyValues to old RowResult.
*/
public static RowResult [] createRowResultArray(final List<List<KeyValue>> l) {
RowResult [] results = new RowResult[l.size()];
int i = 0;
for (List<KeyValue> kvl: l) {
results[i++] = createRowResult(kvl);
}
return results;
}
/**
* @param results
* @return
* TODO: This is the glue between old way of doing things and the new.
* Herein we are converting our clean KeyValues to old RowResult.
*/
public static RowResult createRowResult(final List<KeyValue> results) {
if (results.isEmpty()) {
return null;
}
HbaseMapWritable<byte [], Cell> cells = Cell.createCells(results);
byte [] row = results.get(0).getRow();
return new RowResult(row, cells);
}
//
// Writable
//
public void readFields(final DataInput in) throws IOException {
this.row = Bytes.readByteArray(in);
this.cells.readFields(in);
}
public void write(final DataOutput out) throws IOException {
Bytes.writeByteArray(out, this.row);
this.cells.write(out);
}
//
// Comparable
//
/**
* Comparing this RowResult with another one by
* comparing the row in it.
* @param o the RowResult Object to compare to
* @return the compare number
*/
public int compareTo(RowResult o){
return Bytes.compareTo(this.row, o.getRow());
}
}

View File

@ -910,7 +910,7 @@ public abstract class HBaseServer {
CurCall.set(call);
UserGroupInformation previous = UserGroupInformation.getCurrentUGI();
UserGroupInformation.setCurrentUGI(call.connection.ticket);
UserGroupInformation.setCurrentUser(call.connection.ticket);
try {
value = call(call.param, call.timestamp); // make the call
} catch (Throwable e) {
@ -918,7 +918,7 @@ public abstract class HBaseServer {
errorClass = e.getClass().getName();
error = StringUtils.stringifyException(e);
}
UserGroupInformation.setCurrentUGI(previous);
UserGroupInformation.setCurrentUser(previous);
CurCall.set(null);
if (buf.size() > buffersize) {

View File

@ -1,206 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
/**
* Example table column indexing class. Runs a mapreduce job to index
* specified table columns.
* <ul><li>Each row is modeled as a Lucene document: row key is indexed in
* its untokenized form, column name-value pairs are Lucene field name-value
* pairs.</li>
* <li>A file passed on command line is used to populate an
* {@link IndexConfiguration} which is used to set various Lucene parameters,
* specify whether to optimize an index and which columns to index and/or
* store, in tokenized or untokenized form, etc. For an example, see the
* <code>createIndexConfContent</code> method in TestTableIndex
* </li>
* <li>The number of reduce tasks decides the number of indexes (partitions).
* The index(es) is stored in the output path of job configuration.</li>
* <li>The index build process is done in the reduce phase. Users can use
* the map phase to join rows from different tables or to pre-parse/analyze
* column content, etc.</li>
* </ul>
*/
@Deprecated
public class BuildTableIndex {
private static final String USAGE = "Usage: BuildTableIndex " +
"-m <numMapTasks> -r <numReduceTasks>\n -indexConf <iconfFile> " +
"-indexDir <indexDir>\n -table <tableName> -columns <columnName1> " +
"[<columnName2> ...]";
private static void printUsage(String message) {
System.err.println(message);
System.err.println(USAGE);
System.exit(-1);
}
/** default constructor */
public BuildTableIndex() {
super();
}
/**
* @param args
* @throws IOException
*/
public void run(String[] args) throws IOException {
if (args.length < 6) {
printUsage("Too few arguments");
}
int numMapTasks = 1;
int numReduceTasks = 1;
String iconfFile = null;
String indexDir = null;
String tableName = null;
StringBuffer columnNames = null;
// parse args
for (int i = 0; i < args.length - 1; i++) {
if ("-m".equals(args[i])) {
numMapTasks = Integer.parseInt(args[++i]);
} else if ("-r".equals(args[i])) {
numReduceTasks = Integer.parseInt(args[++i]);
} else if ("-indexConf".equals(args[i])) {
iconfFile = args[++i];
} else if ("-indexDir".equals(args[i])) {
indexDir = args[++i];
} else if ("-table".equals(args[i])) {
tableName = args[++i];
} else if ("-columns".equals(args[i])) {
columnNames = new StringBuffer(args[++i]);
while (i + 1 < args.length && !args[i + 1].startsWith("-")) {
columnNames.append(" ");
columnNames.append(args[++i]);
}
} else {
printUsage("Unsupported option " + args[i]);
}
}
if (indexDir == null || tableName == null || columnNames == null) {
printUsage("Index directory, table name and at least one column must " +
"be specified");
}
Configuration conf = new HBaseConfiguration();
if (iconfFile != null) {
// set index configuration content from a file
String content = readContent(iconfFile);
IndexConfiguration iconf = new IndexConfiguration();
// purely to validate, exception will be thrown if not valid
iconf.addFromXML(content);
conf.set("hbase.index.conf", content);
}
if (columnNames != null) {
JobConf jobConf = createJob(conf, numMapTasks, numReduceTasks, indexDir,
tableName, columnNames.toString());
JobClient.runJob(jobConf);
}
}
/**
* @param conf
* @param numMapTasks
* @param numReduceTasks
* @param indexDir
* @param tableName
* @param columnNames
* @return JobConf
*/
public JobConf createJob(Configuration conf, int numMapTasks,
int numReduceTasks, String indexDir, String tableName,
String columnNames) {
JobConf jobConf = new JobConf(conf, BuildTableIndex.class);
jobConf.setJobName("build index for table " + tableName);
jobConf.setNumMapTasks(numMapTasks);
// number of indexes to partition into
jobConf.setNumReduceTasks(numReduceTasks);
// use identity map (a waste, but just as an example)
IdentityTableMap.initJob(tableName, columnNames, IdentityTableMap.class,
jobConf);
// use IndexTableReduce to build a Lucene index
jobConf.setReducerClass(IndexTableReduce.class);
FileOutputFormat.setOutputPath(jobConf, new Path(indexDir));
jobConf.setOutputFormat(IndexOutputFormat.class);
return jobConf;
}
/*
* Read xml file of indexing configurations. The xml format is similar to
* hbase-default.xml and hadoop-default.xml. For an example configuration,
* see the <code>createIndexConfContent</code> method in TestTableIndex
* @param fileName File to read.
* @return XML configuration read from file
* @throws IOException
*/
private String readContent(String fileName) throws IOException {
File file = new File(fileName);
int length = (int) file.length();
if (length == 0) {
printUsage("Index configuration file " + fileName + " does not exist");
}
int bytesRead = 0;
byte[] bytes = new byte[length];
FileInputStream fis = new FileInputStream(file);
try {
// read entire file into content
while (bytesRead < length) {
int read = fis.read(bytes, bytesRead, length - bytesRead);
if (read > 0) {
bytesRead += read;
} else {
break;
}
}
} finally {
fis.close();
}
return new String(bytes, 0, bytesRead, HConstants.UTF8_ENCODING);
}
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
BuildTableIndex build = new BuildTableIndex();
build.run(args);
}
}

View File

@ -1,40 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import org.apache.hadoop.util.ProgramDriver;
/**
* Driver for hbase mapreduce jobs. Select which to run by passing
* name of job to this main.
*/
@Deprecated
public class Driver {
/**
* @param args
* @throws Throwable
*/
public static void main(String[] args) throws Throwable {
ProgramDriver pgd = new ProgramDriver();
pgd.addClass(RowCounter.NAME, RowCounter.class,
"Count rows in HBase table");
pgd.driver(args);
}
}

View File

@ -1,161 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Map;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
/**
* Extract grouping columns from input record
*/
@Deprecated
public class GroupingTableMap
extends MapReduceBase
implements TableMap<ImmutableBytesWritable,RowResult> {
/**
* JobConf parameter to specify the columns used to produce the key passed to
* collect from the map phase
*/
public static final String GROUP_COLUMNS =
"hbase.mapred.groupingtablemap.columns";
protected byte [][] m_columns;
/**
* Use this before submitting a TableMap job. It will appropriately set up the
* JobConf.
*
* @param table table to be processed
* @param columns space separated list of columns to fetch
* @param groupColumns space separated list of columns used to form the key
* used in collect
* @param mapper map class
* @param job job configuration object
*/
@SuppressWarnings("unchecked")
public static void initJob(String table, String columns, String groupColumns,
Class<? extends TableMap> mapper, JobConf job) {
TableMapReduceUtil.initTableMapJob(table, columns, mapper,
ImmutableBytesWritable.class, RowResult.class, job);
job.set(GROUP_COLUMNS, groupColumns);
}
@Override
public void configure(JobConf job) {
super.configure(job);
String[] cols = job.get(GROUP_COLUMNS, "").split(" ");
m_columns = new byte[cols.length][];
for(int i = 0; i < cols.length; i++) {
m_columns[i] = Bytes.toBytes(cols[i]);
}
}
/**
* Extract the grouping columns from value to construct a new key.
*
* Pass the new key and value to reduce.
* If any of the grouping columns are not found in the value, the record is skipped.
* @param key
* @param value
* @param output
* @param reporter
* @throws IOException
*/
public void map(ImmutableBytesWritable key, RowResult value,
OutputCollector<ImmutableBytesWritable,RowResult> output,
Reporter reporter) throws IOException {
byte[][] keyVals = extractKeyValues(value);
if(keyVals != null) {
ImmutableBytesWritable tKey = createGroupKey(keyVals);
output.collect(tKey, value);
}
}
/**
* Extract columns values from the current record. This method returns
* null if any of the columns are not found.
*
* Override this method if you want to deal with nulls differently.
*
* @param r
* @return array of byte values
*/
protected byte[][] extractKeyValues(RowResult r) {
byte[][] keyVals = null;
ArrayList<byte[]> foundList = new ArrayList<byte[]>();
int numCols = m_columns.length;
if(numCols > 0) {
for (Map.Entry<byte [], Cell> e: r.entrySet()) {
byte [] column = e.getKey();
for (int i = 0; i < numCols; i++) {
if (Bytes.equals(column, m_columns[i])) {
foundList.add(e.getValue().getValue());
break;
}
}
}
if(foundList.size() == numCols) {
keyVals = foundList.toArray(new byte[numCols][]);
}
}
return keyVals;
}
/**
* Create a key by concatenating multiple column values.
* Override this function in order to produce different types of keys.
*
* @param vals
* @return key generated by concatenating multiple column values
*/
protected ImmutableBytesWritable createGroupKey(byte[][] vals) {
if(vals == null) {
return null;
}
StringBuilder sb = new StringBuilder();
for(int i = 0; i < vals.length; i++) {
if(i > 0) {
sb.append(" ");
}
try {
sb.append(new String(vals[i], HConstants.UTF8_ENCODING));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
return new ImmutableBytesWritable(Bytes.toBytes(sb.toString()));
}
}

View File

@ -1,91 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Partitioner;
/**
* This is used to partition the output keys into groups of keys.
* Keys are grouped according to the regions that currently exist
* so that each reducer fills a single region so load is distributed.
*
* @param <K2>
* @param <V2>
*/
@Deprecated
public class HRegionPartitioner<K2,V2>
implements Partitioner<ImmutableBytesWritable, V2> {
private final Log LOG = LogFactory.getLog(TableInputFormat.class);
private HTable table;
private byte[][] startKeys;
public void configure(JobConf job) {
try {
this.table = new HTable(new HBaseConfiguration(job),
job.get(TableOutputFormat.OUTPUT_TABLE));
} catch (IOException e) {
LOG.error(e);
}
try {
this.startKeys = this.table.getStartKeys();
} catch (IOException e) {
LOG.error(e);
}
}
public int getPartition(ImmutableBytesWritable key,
V2 value, int numPartitions) {
byte[] region = null;
// Only one region return 0
if (this.startKeys.length == 1){
return 0;
}
try {
// Not sure if this is cached after a split so we could have problems
// here if a region splits while mapping
region = table.getRegionLocation(key.get()).getRegionInfo().getStartKey();
} catch (IOException e) {
LOG.error(e);
}
for (int i = 0; i < this.startKeys.length; i++){
if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){
if (i >= numPartitions-1){
// cover if we have less reduces then regions.
return (Integer.toString(i).hashCode()
& Integer.MAX_VALUE) % numPartitions;
}
return i;
}
}
// if above fails to find start key that match we need to return something
return 0;
}
}

View File

@ -1,76 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
/**
* Pass the given key and record as-is to reduce
*/
@Deprecated
public class IdentityTableMap
extends MapReduceBase
implements TableMap<ImmutableBytesWritable, RowResult> {
/** constructor */
public IdentityTableMap() {
super();
}
/**
* Use this before submitting a TableMap job. It will
* appropriately set up the JobConf.
*
* @param table table name
* @param columns columns to scan
* @param mapper mapper class
* @param job job configuration
*/
@SuppressWarnings("unchecked")
public static void initJob(String table, String columns,
Class<? extends TableMap> mapper, JobConf job) {
TableMapReduceUtil.initTableMapJob(table, columns, mapper,
ImmutableBytesWritable.class,
RowResult.class, job);
}
/**
* Pass the key, value to reduce
* @param key
* @param value
* @param output
* @param reporter
* @throws IOException
*/
public void map(ImmutableBytesWritable key, RowResult value,
OutputCollector<ImmutableBytesWritable,RowResult> output,
Reporter reporter) throws IOException {
// convert
output.collect(key, value);
}
}

View File

@ -1,61 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
/**
* Write to table each key, record pair
*/
@Deprecated
public class IdentityTableReduce
extends MapReduceBase
implements TableReduce<ImmutableBytesWritable, BatchUpdate> {
@SuppressWarnings("unused")
private static final Log LOG =
LogFactory.getLog(IdentityTableReduce.class.getName());
/**
* No aggregation, output pairs of (key, record)
* @param key
* @param values
* @param output
* @param reporter
* @throws IOException
*/
public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate> values,
OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
Reporter reporter)
throws IOException {
while(values.hasNext()) {
output.collect(key, values.next());
}
}
}

View File

@ -1,423 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.ByteArrayInputStream;
import java.io.OutputStream;
import java.io.StringWriter;
import java.util.concurrent.ConcurrentHashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
/**
* Configuration parameters for building a Lucene index
*/
@Deprecated
public class IndexConfiguration extends Configuration {
private static final Log LOG = LogFactory.getLog(IndexConfiguration.class);
static final String HBASE_COLUMN_NAME = "hbase.column.name";
static final String HBASE_COLUMN_STORE = "hbase.column.store";
static final String HBASE_COLUMN_INDEX = "hbase.column.index";
static final String HBASE_COLUMN_TOKENIZE = "hbase.column.tokenize";
static final String HBASE_COLUMN_BOOST = "hbase.column.boost";
static final String HBASE_COLUMN_OMIT_NORMS = "hbase.column.omit.norms";
static final String HBASE_INDEX_ROWKEY_NAME = "hbase.index.rowkey.name";
static final String HBASE_INDEX_ANALYZER_NAME = "hbase.index.analyzer.name";
static final String HBASE_INDEX_MAX_BUFFERED_DOCS =
"hbase.index.max.buffered.docs";
static final String HBASE_INDEX_MAX_BUFFERED_DELS =
"hbase.index.max.buffered.dels";
static final String HBASE_INDEX_MAX_FIELD_LENGTH =
"hbase.index.max.field.length";
static final String HBASE_INDEX_MAX_MERGE_DOCS =
"hbase.index.max.merge.docs";
static final String HBASE_INDEX_MERGE_FACTOR = "hbase.index.merge.factor";
// double ramBufferSizeMB;
static final String HBASE_INDEX_SIMILARITY_NAME =
"hbase.index.similarity.name";
static final String HBASE_INDEX_USE_COMPOUND_FILE =
"hbase.index.use.compound.file";
static final String HBASE_INDEX_OPTIMIZE = "hbase.index.optimize";
public static class ColumnConf extends Properties {
private static final long serialVersionUID = 7419012290580607821L;
boolean getBoolean(String name, boolean defaultValue) {
String valueString = getProperty(name);
if ("true".equals(valueString))
return true;
else if ("false".equals(valueString))
return false;
else
return defaultValue;
}
void setBoolean(String name, boolean value) {
setProperty(name, Boolean.toString(value));
}
float getFloat(String name, float defaultValue) {
String valueString = getProperty(name);
if (valueString == null)
return defaultValue;
try {
return Float.parseFloat(valueString);
} catch (NumberFormatException e) {
return defaultValue;
}
}
void setFloat(String name, float value) {
setProperty(name, Float.toString(value));
}
}
private Map<String, ColumnConf> columnMap =
new ConcurrentHashMap<String, ColumnConf>();
public Iterator<String> columnNameIterator() {
return columnMap.keySet().iterator();
}
public boolean isIndex(String columnName) {
return getColumn(columnName).getBoolean(HBASE_COLUMN_INDEX, true);
}
public void setIndex(String columnName, boolean index) {
getColumn(columnName).setBoolean(HBASE_COLUMN_INDEX, index);
}
public boolean isStore(String columnName) {
return getColumn(columnName).getBoolean(HBASE_COLUMN_STORE, false);
}
public void setStore(String columnName, boolean store) {
getColumn(columnName).setBoolean(HBASE_COLUMN_STORE, store);
}
public boolean isTokenize(String columnName) {
return getColumn(columnName).getBoolean(HBASE_COLUMN_TOKENIZE, true);
}
public void setTokenize(String columnName, boolean tokenize) {
getColumn(columnName).setBoolean(HBASE_COLUMN_TOKENIZE, tokenize);
}
public float getBoost(String columnName) {
return getColumn(columnName).getFloat(HBASE_COLUMN_BOOST, 1.0f);
}
public void setBoost(String columnName, float boost) {
getColumn(columnName).setFloat(HBASE_COLUMN_BOOST, boost);
}
public boolean isOmitNorms(String columnName) {
return getColumn(columnName).getBoolean(HBASE_COLUMN_OMIT_NORMS, true);
}
public void setOmitNorms(String columnName, boolean omitNorms) {
getColumn(columnName).setBoolean(HBASE_COLUMN_OMIT_NORMS, omitNorms);
}
private ColumnConf getColumn(String columnName) {
ColumnConf column = columnMap.get(columnName);
if (column == null) {
column = new ColumnConf();
columnMap.put(columnName, column);
}
return column;
}
public String getAnalyzerName() {
return get(HBASE_INDEX_ANALYZER_NAME,
"org.apache.lucene.analysis.standard.StandardAnalyzer");
}
public void setAnalyzerName(String analyzerName) {
set(HBASE_INDEX_ANALYZER_NAME, analyzerName);
}
public int getMaxBufferedDeleteTerms() {
return getInt(HBASE_INDEX_MAX_BUFFERED_DELS, 1000);
}
public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
setInt(HBASE_INDEX_MAX_BUFFERED_DELS, maxBufferedDeleteTerms);
}
public int getMaxBufferedDocs() {
return getInt(HBASE_INDEX_MAX_BUFFERED_DOCS, 10);
}
public void setMaxBufferedDocs(int maxBufferedDocs) {
setInt(HBASE_INDEX_MAX_BUFFERED_DOCS, maxBufferedDocs);
}
public int getMaxFieldLength() {
return getInt(HBASE_INDEX_MAX_FIELD_LENGTH, Integer.MAX_VALUE);
}
public void setMaxFieldLength(int maxFieldLength) {
setInt(HBASE_INDEX_MAX_FIELD_LENGTH, maxFieldLength);
}
public int getMaxMergeDocs() {
return getInt(HBASE_INDEX_MAX_MERGE_DOCS, Integer.MAX_VALUE);
}
public void setMaxMergeDocs(int maxMergeDocs) {
setInt(HBASE_INDEX_MAX_MERGE_DOCS, maxMergeDocs);
}
public int getMergeFactor() {
return getInt(HBASE_INDEX_MERGE_FACTOR, 10);
}
public void setMergeFactor(int mergeFactor) {
setInt(HBASE_INDEX_MERGE_FACTOR, mergeFactor);
}
public String getRowkeyName() {
return get(HBASE_INDEX_ROWKEY_NAME, "ROWKEY");
}
public void setRowkeyName(String rowkeyName) {
set(HBASE_INDEX_ROWKEY_NAME, rowkeyName);
}
public String getSimilarityName() {
return get(HBASE_INDEX_SIMILARITY_NAME, null);
}
public void setSimilarityName(String similarityName) {
set(HBASE_INDEX_SIMILARITY_NAME, similarityName);
}
public boolean isUseCompoundFile() {
return getBoolean(HBASE_INDEX_USE_COMPOUND_FILE, false);
}
public void setUseCompoundFile(boolean useCompoundFile) {
setBoolean(HBASE_INDEX_USE_COMPOUND_FILE, useCompoundFile);
}
public boolean doOptimize() {
return getBoolean(HBASE_INDEX_OPTIMIZE, true);
}
public void setDoOptimize(boolean doOptimize) {
setBoolean(HBASE_INDEX_OPTIMIZE, doOptimize);
}
public void addFromXML(String content) {
try {
DocumentBuilder builder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
Document doc = builder
.parse(new ByteArrayInputStream(content.getBytes()));
Element root = doc.getDocumentElement();
if (!"configuration".equals(root.getTagName())) {
LOG.fatal("bad conf file: top-level element not <configuration>");
}
NodeList props = root.getChildNodes();
for (int i = 0; i < props.getLength(); i++) {
Node propNode = props.item(i);
if (!(propNode instanceof Element)) {
continue;
}
Element prop = (Element) propNode;
if ("property".equals(prop.getTagName())) {
propertyFromXML(prop, null);
} else if ("column".equals(prop.getTagName())) {
columnConfFromXML(prop);
} else {
LOG.warn("bad conf content: element neither <property> nor <column>");
}
}
} catch (Exception e) {
LOG.fatal("error parsing conf content: " + e);
throw new RuntimeException(e);
}
}
private void propertyFromXML(Element prop, Properties properties) {
NodeList fields = prop.getChildNodes();
String attr = null;
String value = null;
for (int j = 0; j < fields.getLength(); j++) {
Node fieldNode = fields.item(j);
if (!(fieldNode instanceof Element)) {
continue;
}
Element field = (Element) fieldNode;
if ("name".equals(field.getTagName())) {
attr = ((Text) field.getFirstChild()).getData();
}
if ("value".equals(field.getTagName()) && field.hasChildNodes()) {
value = ((Text) field.getFirstChild()).getData();
}
}
if (attr != null && value != null) {
if (properties == null) {
set(attr, value);
} else {
properties.setProperty(attr, value);
}
}
}
private void columnConfFromXML(Element column) {
ColumnConf columnConf = new ColumnConf();
NodeList props = column.getChildNodes();
for (int i = 0; i < props.getLength(); i++) {
Node propNode = props.item(i);
if (!(propNode instanceof Element)) {
continue;
}
Element prop = (Element) propNode;
if ("property".equals(prop.getTagName())) {
propertyFromXML(prop, columnConf);
} else {
LOG.warn("bad conf content: element not <property>");
}
}
if (columnConf.getProperty(HBASE_COLUMN_NAME) != null) {
columnMap.put(columnConf.getProperty(HBASE_COLUMN_NAME), columnConf);
} else {
LOG.warn("bad column conf: name not specified");
}
}
public void write(OutputStream out) {
try {
Document doc = writeDocument();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(out);
TransformerFactory transFactory = TransformerFactory.newInstance();
Transformer transformer = transFactory.newTransformer();
transformer.transform(source, result);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private Document writeDocument() {
Iterator<Map.Entry<String, String>> iter = iterator();
try {
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument();
Element conf = doc.createElement("configuration");
doc.appendChild(conf);
conf.appendChild(doc.createTextNode("\n"));
Map.Entry<String, String> entry;
while (iter.hasNext()) {
entry = iter.next();
String name = entry.getKey();
String value = entry.getValue();
writeProperty(doc, conf, name, value);
}
Iterator<String> columnIter = columnNameIterator();
while (columnIter.hasNext()) {
writeColumn(doc, conf, columnIter.next());
}
return doc;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void writeProperty(Document doc, Element parent, String name,
String value) {
Element propNode = doc.createElement("property");
parent.appendChild(propNode);
Element nameNode = doc.createElement("name");
nameNode.appendChild(doc.createTextNode(name));
propNode.appendChild(nameNode);
Element valueNode = doc.createElement("value");
valueNode.appendChild(doc.createTextNode(value));
propNode.appendChild(valueNode);
parent.appendChild(doc.createTextNode("\n"));
}
private void writeColumn(Document doc, Element parent, String columnName) {
Element column = doc.createElement("column");
parent.appendChild(column);
column.appendChild(doc.createTextNode("\n"));
ColumnConf columnConf = getColumn(columnName);
for (Map.Entry<Object, Object> entry : columnConf.entrySet()) {
if (entry.getKey() instanceof String
&& entry.getValue() instanceof String) {
writeProperty(doc, column, (String) entry.getKey(), (String) entry
.getValue());
}
}
}
@Override
public String toString() {
StringWriter writer = new StringWriter();
try {
Document doc = writeDocument();
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(writer);
TransformerFactory transFactory = TransformerFactory.newInstance();
Transformer transformer = transFactory.newTransformer();
transformer.transform(source, result);
} catch (Exception e) {
throw new RuntimeException(e);
}
return writer.toString();
}
}

View File

@ -1,164 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.Similarity;
/**
* Create a local index, unwrap Lucene documents created by reduce, add them to
* the index, and copy the index to the destination.
*/
@Deprecated
public class IndexOutputFormat extends
FileOutputFormat<ImmutableBytesWritable, LuceneDocumentWrapper> {
static final Log LOG = LogFactory.getLog(IndexOutputFormat.class);
private Random random = new Random();
@Override
public RecordWriter<ImmutableBytesWritable, LuceneDocumentWrapper>
getRecordWriter(final FileSystem fs, JobConf job, String name,
final Progressable progress)
throws IOException {
final Path perm = new Path(FileOutputFormat.getOutputPath(job), name);
final Path temp = job.getLocalPath("index/_"
+ Integer.toString(random.nextInt()));
LOG.info("To index into " + perm);
// delete old, if any
fs.delete(perm, true);
final IndexConfiguration indexConf = new IndexConfiguration();
String content = job.get("hbase.index.conf");
if (content != null) {
indexConf.addFromXML(content);
}
String analyzerName = indexConf.getAnalyzerName();
Analyzer analyzer;
try {
Class<?> analyzerClass = Class.forName(analyzerName);
analyzer = (Analyzer) analyzerClass.newInstance();
} catch (Exception e) {
throw new IOException("Error in creating an analyzer object "
+ analyzerName);
}
// build locally first
final IndexWriter writer = new IndexWriter(fs.startLocalOutput(perm, temp)
.toString(), analyzer, true);
// no delete, so no need for maxBufferedDeleteTerms
writer.setMaxBufferedDocs(indexConf.getMaxBufferedDocs());
writer.setMaxFieldLength(indexConf.getMaxFieldLength());
writer.setMaxMergeDocs(indexConf.getMaxMergeDocs());
writer.setMergeFactor(indexConf.getMergeFactor());
String similarityName = indexConf.getSimilarityName();
if (similarityName != null) {
try {
Class<?> similarityClass = Class.forName(similarityName);
Similarity similarity = (Similarity) similarityClass.newInstance();
writer.setSimilarity(similarity);
} catch (Exception e) {
throw new IOException("Error in creating a similarty object "
+ similarityName);
}
}
writer.setUseCompoundFile(indexConf.isUseCompoundFile());
return new RecordWriter<ImmutableBytesWritable, LuceneDocumentWrapper>() {
boolean closed;
private long docCount = 0;
public void write(ImmutableBytesWritable key,
LuceneDocumentWrapper value)
throws IOException {
// unwrap and index doc
Document doc = value.get();
writer.addDocument(doc);
docCount++;
progress.progress();
}
public void close(final Reporter reporter) throws IOException {
// spawn a thread to give progress heartbeats
Thread prog = new Thread() {
@Override
public void run() {
while (!closed) {
try {
reporter.setStatus("closing");
Thread.sleep(1000);
} catch (InterruptedException e) {
continue;
} catch (Throwable e) {
return;
}
}
}
};
try {
prog.start();
// optimize index
if (indexConf.doOptimize()) {
if (LOG.isInfoEnabled()) {
LOG.info("Optimizing index.");
}
writer.optimize();
}
// close index
writer.close();
if (LOG.isInfoEnabled()) {
LOG.info("Done indexing " + docCount + " docs.");
}
// copy to perm destination in dfs
fs.completeLocalOutput(perm, temp);
if (LOG.isInfoEnabled()) {
LOG.info("Copy done.");
}
} finally {
closed = true;
}
}
};
}
}

View File

@ -1,111 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Construct a Lucene document per row, which is consumed by IndexOutputFormat
* to build a Lucene index
*/
@Deprecated
public class IndexTableReduce extends MapReduceBase implements
Reducer<ImmutableBytesWritable, RowResult, ImmutableBytesWritable, LuceneDocumentWrapper> {
private static final Log LOG = LogFactory.getLog(IndexTableReduce.class);
private IndexConfiguration indexConf;
@Override
public void configure(JobConf job) {
super.configure(job);
indexConf = new IndexConfiguration();
String content = job.get("hbase.index.conf");
if (content != null) {
indexConf.addFromXML(content);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Index conf: " + indexConf);
}
}
@Override
public void close() throws IOException {
super.close();
}
public void reduce(ImmutableBytesWritable key, Iterator<RowResult> values,
OutputCollector<ImmutableBytesWritable, LuceneDocumentWrapper> output,
Reporter reporter)
throws IOException {
if (!values.hasNext()) {
return;
}
Document doc = new Document();
// index and store row key, row key already UTF-8 encoded
Field keyField = new Field(indexConf.getRowkeyName(),
Bytes.toString(key.get(), key.getOffset(), key.getLength()),
Field.Store.YES, Field.Index.UN_TOKENIZED);
keyField.setOmitNorms(true);
doc.add(keyField);
while (values.hasNext()) {
RowResult value = values.next();
// each column (name-value pair) is a field (name-value pair)
for (Map.Entry<byte [], Cell> entry : value.entrySet()) {
// name is already UTF-8 encoded
String column = Bytes.toString(entry.getKey());
byte[] columnValue = entry.getValue().getValue();
Field.Store store = indexConf.isStore(column)?
Field.Store.YES: Field.Store.NO;
Field.Index index = indexConf.isIndex(column)?
(indexConf.isTokenize(column)?
Field.Index.TOKENIZED: Field.Index.UN_TOKENIZED):
Field.Index.NO;
// UTF-8 encode value
Field field = new Field(column, Bytes.toString(columnValue),
store, index);
field.setBoost(indexConf.getBoost(column));
field.setOmitNorms(indexConf.isOmitNorms(column));
doc.add(field);
}
}
output.collect(key, new LuceneDocumentWrapper(doc));
}
}

View File

@ -1,56 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.DataInput;
import java.io.DataOutput;
import org.apache.hadoop.io.Writable;
import org.apache.lucene.document.Document;
/**
* A utility class used to pass a lucene document from reduce to OutputFormat.
* It doesn't really serialize/deserialize a lucene document.
*/
@Deprecated
public class LuceneDocumentWrapper implements Writable {
protected Document doc;
/**
* @param doc
*/
public LuceneDocumentWrapper(Document doc) {
this.doc = doc;
}
/**
* @return the document
*/
public Document get() {
return doc;
}
public void readFields(DataInput in) {
// intentionally left blank
}
public void write(DataOutput out) {
// intentionally left blank
}
}

View File

@ -1,137 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* A job with a map to count rows.
* Map outputs table rows IF the input row has columns that have content.
* Uses an {@link IdentityReducer}
*/
@Deprecated
public class RowCounter extends Configured implements Tool {
// Name of this 'program'
static final String NAME = "rowcounter";
/**
* Mapper that runs the count.
*/
static class RowCounterMapper
implements TableMap<ImmutableBytesWritable, RowResult> {
private static enum Counters {ROWS}
public void map(ImmutableBytesWritable row, RowResult value,
OutputCollector<ImmutableBytesWritable, RowResult> output,
Reporter reporter)
throws IOException {
boolean content = false;
for (Map.Entry<byte [], Cell> e: value.entrySet()) {
Cell cell = e.getValue();
if (cell != null && cell.getValue().length > 0) {
content = true;
break;
}
}
if (!content) {
// Don't count rows that are all empty values.
return;
}
// Give out same value every time. We're only interested in the row/key
reporter.incrCounter(Counters.ROWS, 1);
}
public void configure(JobConf jc) {
// Nothing to do.
}
public void close() throws IOException {
// Nothing to do.
}
}
/**
* @param args
* @return the JobConf
* @throws IOException
*/
public JobConf createSubmittableJob(String[] args) throws IOException {
JobConf c = new JobConf(getConf(), getClass());
c.setJobName(NAME);
// Columns are space delimited
StringBuilder sb = new StringBuilder();
final int columnoffset = 2;
for (int i = columnoffset; i < args.length; i++) {
if (i > columnoffset) {
sb.append(" ");
}
sb.append(args[i]);
}
// Second argument is the table name.
TableMapReduceUtil.initTableMapJob(args[1], sb.toString(),
RowCounterMapper.class, ImmutableBytesWritable.class, RowResult.class, c);
c.setNumReduceTasks(0);
// First arg is the output directory.
FileOutputFormat.setOutputPath(c, new Path(args[0]));
return c;
}
static int printUsage() {
System.out.println(NAME +
" <outputdir> <tablename> <column1> [<column2>...]");
return -1;
}
public int run(final String[] args) throws Exception {
// Make sure there are at least 3 parameters
if (args.length < 3) {
System.err.println("ERROR: Wrong number of parameters: " + args.length);
return printUsage();
}
JobClient.runJob(createSubmittableJob(args));
return 0;
}
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
HBaseConfiguration c = new HBaseConfiguration();
int errCode = ToolRunner.run(c, new RowCounter(), args);
System.exit(errCode);
}
}

View File

@ -1,6 +0,0 @@
# ResourceBundle properties file for RowCounter MR job
CounterGroupName= RowCounter
ROWS.name= Rows

View File

@ -1,83 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobConfigurable;
import org.apache.hadoop.util.StringUtils;
/**
* Convert HBase tabular data into a format that is consumable by Map/Reduce.
*/
@Deprecated
public class TableInputFormat extends TableInputFormatBase implements
JobConfigurable {
private final Log LOG = LogFactory.getLog(TableInputFormat.class);
/**
* space delimited list of columns
*/
public static final String COLUMN_LIST = "hbase.mapred.tablecolumns";
public void configure(JobConf job) {
Path[] tableNames = FileInputFormat.getInputPaths(job);
String colArg = job.get(COLUMN_LIST);
String[] colNames = colArg.split(" ");
byte [][] m_cols = new byte[colNames.length][];
for (int i = 0; i < m_cols.length; i++) {
m_cols[i] = Bytes.toBytes(colNames[i]);
}
setInputColumns(m_cols);
try {
setHTable(new HTable(new HBaseConfiguration(job), tableNames[0].getName()));
} catch (Exception e) {
LOG.error(StringUtils.stringifyException(e));
}
}
public void validateInput(JobConf job) throws IOException {
// expecting exactly one path
Path [] tableNames = FileInputFormat.getInputPaths(job);
if (tableNames == null || tableNames.length > 1) {
throw new IOException("expecting one table name");
}
// connected to table?
if (getHTable() == null) {
throw new IOException("could not connect to table '" +
tableNames[0].getName() + "'");
}
// expecting at least one column
String colArg = job.get(COLUMN_LIST);
if (colArg == null || colArg.length() == 0) {
throw new IOException("expecting at least one column");
}
}
}

View File

@ -1,352 +0,0 @@
/**
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.filter.RowFilterInterface;
import org.apache.hadoop.hbase.filter.RowFilterSet;
import org.apache.hadoop.hbase.filter.StopRowFilter;
import org.apache.hadoop.hbase.filter.WhileMatchRowFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.StringUtils;
/**
* A Base for {@link TableInputFormat}s. Receives a {@link HTable}, a
* byte[] of input columns and optionally a {@link RowFilterInterface}.
* Subclasses may use other TableRecordReader implementations.
* <p>
* An example of a subclass:
* <pre>
* class ExampleTIF extends TableInputFormatBase implements JobConfigurable {
*
* public void configure(JobConf job) {
* HTable exampleTable = new HTable(new HBaseConfiguration(job),
* Bytes.toBytes("exampleTable"));
* // mandatory
* setHTable(exampleTable);
* Text[] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
* Bytes.toBytes("columnB") };
* // mandatory
* setInputColumns(inputColumns);
* RowFilterInterface exampleFilter = new RegExpRowFilter("keyPrefix.*");
* // optional
* setRowFilter(exampleFilter);
* }
*
* public void validateInput(JobConf job) throws IOException {
* }
* }
* </pre>
*/
@Deprecated
public abstract class TableInputFormatBase
implements InputFormat<ImmutableBytesWritable, RowResult> {
final Log LOG = LogFactory.getLog(TableInputFormatBase.class);
private byte [][] inputColumns;
private HTable table;
private TableRecordReader tableRecordReader;
private RowFilterInterface rowFilter;
/**
* Iterate over an HBase table data, return (Text, RowResult) pairs
*/
protected class TableRecordReader
implements RecordReader<ImmutableBytesWritable, RowResult> {
private byte [] startRow;
private byte [] endRow;
private byte [] lastRow;
private RowFilterInterface trrRowFilter;
private ResultScanner scanner;
private HTable htable;
private byte [][] trrInputColumns;
/**
* Restart from survivable exceptions by creating a new scanner.
*
* @param firstRow
* @throws IOException
*/
public void restart(byte[] firstRow) throws IOException {
if ((endRow != null) && (endRow.length > 0)) {
if (trrRowFilter != null) {
Scan scan = new Scan(firstRow, endRow);
scan.addColumns(trrInputColumns);
scan.setOldFilter(trrRowFilter);
this.scanner = this.htable.getScanner(scan);
} else {
LOG.debug("TIFB.restart, firstRow: " +
Bytes.toStringBinary(firstRow) + ", endRow: " +
Bytes.toStringBinary(endRow));
Scan scan = new Scan(firstRow, endRow);
scan.addColumns(trrInputColumns);
this.scanner = this.htable.getScanner(scan);
}
} else {
LOG.debug("TIFB.restart, firstRow: " +
Bytes.toStringBinary(firstRow) + ", no endRow");
Scan scan = new Scan(firstRow);
scan.addColumns(trrInputColumns);
// scan.setFilter(trrRowFilter);
this.scanner = this.htable.getScanner(scan);
}
}
/**
* Build the scanner. Not done in constructor to allow for extension.
*
* @throws IOException
*/
public void init() throws IOException {
restart(startRow);
}
/**
* @param htable the {@link HTable} to scan.
*/
public void setHTable(HTable htable) {
this.htable = htable;
}
/**
* @param inputColumns the columns to be placed in {@link RowResult}.
*/
public void setInputColumns(final byte [][] inputColumns) {
this.trrInputColumns = inputColumns;
}
/**
* @param startRow the first row in the split
*/
public void setStartRow(final byte [] startRow) {
this.startRow = startRow;
}
/**
*
* @param endRow the last row in the split
*/
public void setEndRow(final byte [] endRow) {
this.endRow = endRow;
}
/**
* @param rowFilter the {@link RowFilterInterface} to be used.
*/
public void setRowFilter(RowFilterInterface rowFilter) {
this.trrRowFilter = rowFilter;
}
public void close() {
this.scanner.close();
}
/**
* @return ImmutableBytesWritable
*
* @see org.apache.hadoop.mapred.RecordReader#createKey()
*/
public ImmutableBytesWritable createKey() {
return new ImmutableBytesWritable();
}
/**
* @return RowResult
*
* @see org.apache.hadoop.mapred.RecordReader#createValue()
*/
public RowResult createValue() {
return new RowResult();
}
public long getPos() {
// This should be the ordinal tuple in the range;
// not clear how to calculate...
return 0;
}
public float getProgress() {
// Depends on the total number of tuples and getPos
return 0;
}
/**
* @param key HStoreKey as input key.
* @param value MapWritable as input value
* @return true if there was more data
* @throws IOException
*/
public boolean next(ImmutableBytesWritable key, RowResult value)
throws IOException {
Result result;
try {
result = this.scanner.next();
} catch (UnknownScannerException e) {
LOG.debug("recovered from " + StringUtils.stringifyException(e));
restart(lastRow);
this.scanner.next(); // skip presumed already mapped row
result = this.scanner.next();
}
if (result != null && result.size() > 0) {
key.set(result.getRow());
lastRow = key.get();
Writables.copyWritable(result.getRowResult(), value);
return true;
}
return false;
}
}
/**
* Builds a TableRecordReader. If no TableRecordReader was provided, uses
* the default.
*
* @see org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit,
* JobConf, Reporter)
*/
public RecordReader<ImmutableBytesWritable, RowResult> getRecordReader(
InputSplit split, JobConf job, Reporter reporter)
throws IOException {
TableSplit tSplit = (TableSplit) split;
TableRecordReader trr = this.tableRecordReader;
// if no table record reader was provided use default
if (trr == null) {
trr = new TableRecordReader();
}
trr.setStartRow(tSplit.getStartRow());
trr.setEndRow(tSplit.getEndRow());
trr.setHTable(this.table);
trr.setInputColumns(this.inputColumns);
trr.setRowFilter(this.rowFilter);
trr.init();
return trr;
}
/**
* Calculates the splits that will serve as input for the map tasks.
* <ul>
* Splits are created in number equal to the smallest between numSplits and
* the number of {@link HRegion}s in the table. If the number of splits is
* smaller than the number of {@link HRegion}s then splits are spanned across
* multiple {@link HRegion}s and are grouped the most evenly possible. In the
* case splits are uneven the bigger splits are placed first in the
* {@link InputSplit} array.
*
* @param job the map task {@link JobConf}
* @param numSplits a hint to calculate the number of splits (mapred.map.tasks).
*
* @return the input splits
*
* @see org.apache.hadoop.mapred.InputFormat#getSplits(org.apache.hadoop.mapred.JobConf, int)
*/
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
byte [][] startKeys = this.table.getStartKeys();
if (startKeys == null || startKeys.length == 0) {
throw new IOException("Expecting at least one region");
}
if (this.table == null) {
throw new IOException("No table was provided");
}
if (this.inputColumns == null || this.inputColumns.length == 0) {
throw new IOException("Expecting at least one column");
}
int realNumSplits = numSplits > startKeys.length? startKeys.length:
numSplits;
InputSplit[] splits = new InputSplit[realNumSplits];
int middle = startKeys.length / realNumSplits;
int startPos = 0;
for (int i = 0; i < realNumSplits; i++) {
int lastPos = startPos + middle;
lastPos = startKeys.length % realNumSplits > i ? lastPos + 1 : lastPos;
String regionLocation = table.getRegionLocation(startKeys[startPos]).
getServerAddress().getHostname();
splits[i] = new TableSplit(this.table.getTableName(),
startKeys[startPos], ((i + 1) < realNumSplits) ? startKeys[lastPos]:
HConstants.EMPTY_START_ROW, regionLocation);
LOG.info("split: " + i + "->" + splits[i]);
startPos = lastPos;
}
return splits;
}
/**
* @param inputColumns to be passed in {@link RowResult} to the map task.
*/
protected void setInputColumns(byte [][] inputColumns) {
this.inputColumns = inputColumns;
}
/**
* Allows subclasses to get the {@link HTable}.
*/
protected HTable getHTable() {
return this.table;
}
/**
* Allows subclasses to set the {@link HTable}.
*
* @param table to get the data from
*/
protected void setHTable(HTable table) {
this.table = table;
}
/**
* Allows subclasses to set the {@link TableRecordReader}.
*
* @param tableRecordReader
* to provide other {@link TableRecordReader} implementations.
*/
protected void setTableRecordReader(TableRecordReader tableRecordReader) {
this.tableRecordReader = tableRecordReader;
}
/**
* Allows subclasses to set the {@link RowFilterInterface} to be used.
*
* @param rowFilter
*/
protected void setRowFilter(RowFilterInterface rowFilter) {
this.rowFilter = rowFilter;
}
}

View File

@ -1,39 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.Mapper;
/**
* Scan an HBase table to sort by a specified sort column.
* If the column does not exist, the record is not passed to Reduce.
*
* @param <K> WritableComparable key class
* @param <V> Writable value class
*/
@Deprecated
public interface TableMap<K extends WritableComparable<? super K>, V extends Writable>
extends Mapper<ImmutableBytesWritable, RowResult, K, V> {
}

View File

@ -1,184 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
/**
* Utility for {@link TableMap} and {@link TableReduce}
*/
@Deprecated
@SuppressWarnings("unchecked")
public class TableMapReduceUtil {
/**
* Use this before submitting a TableMap job. It will
* appropriately set up the JobConf.
*
* @param table The table name to read from.
* @param columns The columns to scan.
* @param mapper The mapper class to use.
* @param outputKeyClass The class of the output key.
* @param outputValueClass The class of the output value.
* @param job The current job configuration to adjust.
*/
public static void initTableMapJob(String table, String columns,
Class<? extends TableMap> mapper,
Class<? extends WritableComparable> outputKeyClass,
Class<? extends Writable> outputValueClass, JobConf job) {
job.setInputFormat(TableInputFormat.class);
job.setMapOutputValueClass(outputValueClass);
job.setMapOutputKeyClass(outputKeyClass);
job.setMapperClass(mapper);
FileInputFormat.addInputPaths(job, table);
job.set(TableInputFormat.COLUMN_LIST, columns);
}
/**
* Use this before submitting a TableReduce job. It will
* appropriately set up the JobConf.
*
* @param table The output table.
* @param reducer The reducer class to use.
* @param job The current job configuration to adjust.
* @throws IOException When determining the region count fails.
*/
public static void initTableReduceJob(String table,
Class<? extends TableReduce> reducer, JobConf job)
throws IOException {
initTableReduceJob(table, reducer, job, null);
}
/**
* Use this before submitting a TableReduce job. It will
* appropriately set up the JobConf.
*
* @param table The output table.
* @param reducer The reducer class to use.
* @param job The current job configuration to adjust.
* @param partitioner Partitioner to use. Pass <code>null</code> to use
* default partitioner.
* @throws IOException When determining the region count fails.
*/
public static void initTableReduceJob(String table,
Class<? extends TableReduce> reducer, JobConf job, Class partitioner)
throws IOException {
job.setOutputFormat(TableOutputFormat.class);
job.setReducerClass(reducer);
job.set(TableOutputFormat.OUTPUT_TABLE, table);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(BatchUpdate.class);
if (partitioner == HRegionPartitioner.class) {
job.setPartitionerClass(HRegionPartitioner.class);
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
int regions = outputTable.getRegionsInfo().size();
if (job.getNumReduceTasks() > regions) {
job.setNumReduceTasks(outputTable.getRegionsInfo().size());
}
} else if (partitioner != null) {
job.setPartitionerClass(partitioner);
}
}
/**
* Ensures that the given number of reduce tasks for the given job
* configuration does not exceed the number of regions for the given table.
*
* @param table The table to get the region count for.
* @param job The current job configuration to adjust.
* @throws IOException When retrieving the table details fails.
*/
public static void limitNumReduceTasks(String table, JobConf job)
throws IOException {
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
int regions = outputTable.getRegionsInfo().size();
if (job.getNumReduceTasks() > regions)
job.setNumReduceTasks(regions);
}
/**
* Ensures that the given number of map tasks for the given job
* configuration does not exceed the number of regions for the given table.
*
* @param table The table to get the region count for.
* @param job The current job configuration to adjust.
* @throws IOException When retrieving the table details fails.
*/
public static void limitNumMapTasks(String table, JobConf job)
throws IOException {
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
int regions = outputTable.getRegionsInfo().size();
if (job.getNumMapTasks() > regions)
job.setNumMapTasks(regions);
}
/**
* Sets the number of reduce tasks for the given job configuration to the
* number of regions the given table has.
*
* @param table The table to get the region count for.
* @param job The current job configuration to adjust.
* @throws IOException When retrieving the table details fails.
*/
public static void setNumReduceTasks(String table, JobConf job)
throws IOException {
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
int regions = outputTable.getRegionsInfo().size();
job.setNumReduceTasks(regions);
}
/**
* Sets the number of map tasks for the given job configuration to the
* number of regions the given table has.
*
* @param table The table to get the region count for.
* @param job The current job configuration to adjust.
* @throws IOException When retrieving the table details fails.
*/
public static void setNumMapTasks(String table, JobConf job)
throws IOException {
HTable outputTable = new HTable(new HBaseConfiguration(job), table);
int regions = outputTable.getRegionsInfo().size();
job.setNumMapTasks(regions);
}
/**
* Sets the number of rows to return and cache with each scanner iteration.
* Higher caching values will enable faster mapreduce jobs at the expense of
* requiring more heap to contain the cached rows.
*
* @param job The current job configuration to adjust.
* @param batchSize The number of rows to return in batch with each scanner
* iteration.
*/
public static void setScannerCaching(JobConf job, int batchSize) {
job.setInt("hbase.client.scanner.caching", batchSize);
}
}

View File

@ -1,106 +0,0 @@
/**
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.hadoop.mapred.InvalidJobConfException;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Progressable;
/**
* Convert Map/Reduce output and write it to an HBase table
*/
@Deprecated
public class TableOutputFormat extends
FileOutputFormat<ImmutableBytesWritable, BatchUpdate> {
/** JobConf parameter that specifies the output table */
public static final String OUTPUT_TABLE = "hbase.mapred.outputtable";
private final Log LOG = LogFactory.getLog(TableOutputFormat.class);
/**
* Convert Reduce output (key, value) to (HStoreKey, KeyedDataArrayWritable)
* and write to an HBase table
*/
protected static class TableRecordWriter
implements RecordWriter<ImmutableBytesWritable, BatchUpdate> {
private HTable m_table;
/**
* Instantiate a TableRecordWriter with the HBase HClient for writing.
*
* @param table
*/
public TableRecordWriter(HTable table) {
m_table = table;
}
public void close(Reporter reporter)
throws IOException {
m_table.flushCommits();
}
public void write(ImmutableBytesWritable key,
BatchUpdate value) throws IOException {
m_table.commit(new BatchUpdate(value));
}
}
@Override
@SuppressWarnings("unchecked")
public RecordWriter getRecordWriter(FileSystem ignored,
JobConf job, String name, Progressable progress) throws IOException {
// expecting exactly one path
String tableName = job.get(OUTPUT_TABLE);
HTable table = null;
try {
table = new HTable(new HBaseConfiguration(job), tableName);
} catch(IOException e) {
LOG.error(e);
throw e;
}
table.setAutoFlush(false);
return new TableRecordWriter(table);
}
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job)
throws FileAlreadyExistsException, InvalidJobConfException, IOException {
String tableName = job.get(OUTPUT_TABLE);
if(tableName == null) {
throw new IOException("Must specify table name");
}
}
}

View File

@ -1,39 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.Reducer;
/**
* Write a table, sorting by the input key
*
* @param <K> key class
* @param <V> value class
*/
@Deprecated
@SuppressWarnings("unchecked")
public interface TableReduce<K extends WritableComparable, V extends Writable>
extends Reducer<K, V, ImmutableBytesWritable, BatchUpdate> {
}

View File

@ -1,113 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapred;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapred.InputSplit;
/**
* A table split corresponds to a key range [low, high)
*/
@Deprecated
public class TableSplit implements InputSplit, Comparable<TableSplit> {
private byte [] m_tableName;
private byte [] m_startRow;
private byte [] m_endRow;
private String m_regionLocation;
/** default constructor */
public TableSplit() {
this(HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY,
HConstants.EMPTY_BYTE_ARRAY, "");
}
/**
* Constructor
* @param tableName
* @param startRow
* @param endRow
* @param location
*/
public TableSplit(byte [] tableName, byte [] startRow, byte [] endRow,
final String location) {
this.m_tableName = tableName;
this.m_startRow = startRow;
this.m_endRow = endRow;
this.m_regionLocation = location;
}
/** @return table name */
public byte [] getTableName() {
return this.m_tableName;
}
/** @return starting row key */
public byte [] getStartRow() {
return this.m_startRow;
}
/** @return end row key */
public byte [] getEndRow() {
return this.m_endRow;
}
/** @return the region's hostname */
public String getRegionLocation() {
return this.m_regionLocation;
}
public String[] getLocations() {
return new String[] {this.m_regionLocation};
}
public long getLength() {
// Not clear how to obtain this... seems to be used only for sorting splits
return 0;
}
public void readFields(DataInput in) throws IOException {
this.m_tableName = Bytes.readByteArray(in);
this.m_startRow = Bytes.readByteArray(in);
this.m_endRow = Bytes.readByteArray(in);
this.m_regionLocation = Bytes.toString(Bytes.readByteArray(in));
}
public void write(DataOutput out) throws IOException {
Bytes.writeByteArray(out, this.m_tableName);
Bytes.writeByteArray(out, this.m_startRow);
Bytes.writeByteArray(out, this.m_endRow);
Bytes.writeByteArray(out, Bytes.toBytes(this.m_regionLocation));
}
@Override
public String toString() {
return m_regionLocation + ":" +
Bytes.toStringBinary(m_startRow) + "," + Bytes.toStringBinary(m_endRow);
}
public int compareTo(TableSplit o) {
return Bytes.compareTo(getStartRow(), o.getStartRow());
}
}

View File

@ -1,267 +0,0 @@
/*
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
Provides HBase <a href="http://wiki.apache.org/hadoop/HadoopMapReduce">MapReduce</a>
Input/OutputFormats, a table indexing MapReduce job, and utility
<h2>Table of Contents</h2>
<ul>
<li><a href="#classpath">HBase, MapReduce and the CLASSPATH</a></li>
<li><a href="#sink">HBase as MapReduce job data source and sink</a></li>
<li><a href="#examples">Example Code</a></li>
</ul>
<h2><a name="classpath">HBase, MapReduce and the CLASSPATH</a></h2>
<p>MapReduce jobs deployed to a MapReduce cluster do not by default have access
to the HBase configuration under <code>$HBASE_CONF_DIR</code> nor to HBase classes.
You could add <code>hbase-site.xml</code> to $HADOOP_HOME/conf and add
<code>hbase-X.X.X.jar</code> to the <code>$HADOOP_HOME/lib</code> and copy these
changes across your cluster but the cleanest means of adding hbase configuration
and classes to the cluster <code>CLASSPATH</code> is by uncommenting
<code>HADOOP_CLASSPATH</code> in <code>$HADOOP_HOME/conf/hadoop-env.sh</code>
and adding the path to the hbase jar and <code>$HBASE_CONF_DIR</code> directory.
Then copy the amended configuration around the cluster.
You'll probably need to restart the MapReduce cluster if you want it to notice
the new configuration.
</p>
<p>For example, here is how you would amend <code>hadoop-env.sh</code> adding the
built hbase jar, hbase conf, and the <code>PerformanceEvaluation</code> class from
the built hbase test jar to the hadoop <code>CLASSPATH<code>:
<blockquote><pre># Extra Java CLASSPATH elements. Optional.
# export HADOOP_CLASSPATH=
export HADOOP_CLASSPATH=$HBASE_HOME/build/test:$HBASE_HOME/build/hbase-X.X.X.jar:$HBASE_HOME/build/hbase-X.X.X-test.jar:$HBASE_HOME/conf</pre></blockquote>
<p>Expand <code>$HBASE_HOME</code> in the above appropriately to suit your
local environment.</p>
<p>After copying the above change around your cluster, this is how you would run
the PerformanceEvaluation MR job to put up 4 clients (Presumes a ready mapreduce
cluster):
<blockquote><pre>$HADOOP_HOME/bin/hadoop org.apache.hadoop.hbase.PerformanceEvaluation sequentialWrite 4</pre></blockquote>
The PerformanceEvaluation class wil be found on the CLASSPATH because you
added <code>$HBASE_HOME/build/test</code> to HADOOP_CLASSPATH
</p>
<p>Another possibility, if for example you do not have access to hadoop-env.sh or
are unable to restart the hadoop cluster, is bundling the hbase jar into a mapreduce
job jar adding it and its dependencies under the job jar <code>lib/</code>
directory and the hbase conf into a job jar <code>conf/</code> directory.
</a>
<h2><a name="sink">HBase as MapReduce job data source and sink</a></h2>
<p>HBase can be used as a data source, {@link org.apache.hadoop.hbase.mapred.TableInputFormat TableInputFormat},
and data sink, {@link org.apache.hadoop.hbase.mapred.TableOutputFormat TableOutputFormat}, for MapReduce jobs.
Writing MapReduce jobs that read or write HBase, you'll probably want to subclass
{@link org.apache.hadoop.hbase.mapred.TableMap TableMap} and/or
{@link org.apache.hadoop.hbase.mapred.TableReduce TableReduce}. See the do-nothing
pass-through classes {@link org.apache.hadoop.hbase.mapred.IdentityTableMap IdentityTableMap} and
{@link org.apache.hadoop.hbase.mapred.IdentityTableReduce IdentityTableReduce} for basic usage. For a more
involved example, see {@link org.apache.hadoop.hbase.mapred.BuildTableIndex BuildTableIndex}
or review the <code>org.apache.hadoop.hbase.mapred.TestTableMapReduce</code> unit test.
</p>
<p>Running mapreduce jobs that have hbase as source or sink, you'll need to
specify source/sink table and column names in your configuration.</p>
<p>Reading from hbase, the TableInputFormat asks hbase for the list of
regions and makes a map-per-region or <code>mapred.map.tasks maps</code>,
whichever is smaller (If your job only has two maps, up mapred.map.tasks
to a number > number of regions). Maps will run on the adjacent TaskTracker
if you are running a TaskTracer and RegionServer per node.
Writing, it may make sense to avoid the reduce step and write yourself back into
hbase from inside your map. You'd do this when your job does not need the sort
and collation that mapreduce does on the map emitted data; on insert,
hbase 'sorts' so there is no point double-sorting (and shuffling data around
your mapreduce cluster) unless you need to. If you do not need the reduce,
you might just have your map emit counts of records processed just so the
framework's report at the end of your job has meaning or set the number of
reduces to zero and use TableOutputFormat. See example code
below. If running the reduce step makes sense in your case, its usually better
to have lots of reducers so load is spread across the hbase cluster.</p>
<p>There is also a new hbase partitioner that will run as many reducers as
currently existing regions. The
{@link org.apache.hadoop.hbase.mapred.HRegionPartitioner} is suitable
when your table is large and your upload is not such that it will greatly
alter the number of existing regions when done; other use the default
partitioner.
</p>
<h2><a name="examples">Example Code</a></h2>
<h3>Sample Row Counter</h3>
<p>See {@link org.apache.hadoop.hbase.mapred.RowCounter}. You should be able to run
it by doing: <code>% ./bin/hadoop jar hbase-X.X.X.jar</code>. This will invoke
the hbase MapReduce Driver class. Select 'rowcounter' from the choice of jobs
offered. You may need to add the hbase conf directory to <code>$HADOOP_HOME/conf/hadoop-env.sh#HADOOP_CLASSPATH</code>
so the rowcounter gets pointed at the right hbase cluster (or, build a new jar
with an appropriate hbase-site.xml built into your job jar).
</p>
<h3>PerformanceEvaluation</h3>
<p>See org.apache.hadoop.hbase.PerformanceEvaluation from hbase src/test. It runs
a mapreduce job to run concurrent clients reading and writing hbase.
</p>
<h3>Sample MR Bulk Uploader</h3>
<p>A students/classes example based on a contribution by Naama Kraus with logs of
documentation can be found over in src/examples/mapred.
Its the <code>org.apache.hadoop.hbase.mapred.SampleUploader</code> class.
Just copy it under src/java/org/apache/hadoop/hbase/mapred to compile and try it
(until we start generating an hbase examples jar). The class reads a data file
from HDFS and per line, does an upload to HBase using TableReduce.
Read the class comment for specification of inputs, prerequisites, etc.
</p>
<h3>Example to bulk import/load a text file into an HTable
</h3>
<p>Here's a sample program from
<a href="http://www.spicylogic.com/allenday/blog/category/computing/distributed-systems/hadoop/hbase/">Allen Day</a>
that takes an HDFS text file path and an HBase table name as inputs, and loads the contents of the text file to the table
all up in the map phase.
</p>
<blockquote><pre>
package com.spicylogic.hbase;
package org.apache.hadoop.hbase.mapred;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.NullOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Class that adds the parsed line from the input to hbase
* in the map function. Map has no emissions and job
* has no reduce.
*&#x2f;
public class BulkImport implements Tool {
private static final String NAME = "BulkImport";
private Configuration conf;
public static class InnerMap extends MapReduceBase implements Mapper&lt;LongWritable, Text, Text, Text> {
private HTable table;
private HBaseConfiguration HBconf;
public void map(LongWritable key, Text value,
OutputCollector&lt;Text, Text> output, Reporter reporter)
throws IOException {
if ( table == null )
throw new IOException("table is null");
// Split input line on tab character
String [] splits = value.toString().split("\t");
if ( splits.length != 4 )
return;
String rowID = splits[0];
int timestamp = Integer.parseInt( splits[1] );
String colID = splits[2];
String cellValue = splits[3];
reporter.setStatus("Map emitting cell for row='" + rowID +
"', column='" + colID + "', time='" + timestamp + "'");
BatchUpdate bu = new BatchUpdate( rowID );
if ( timestamp > 0 )
bu.setTimestamp( timestamp );
bu.put(colID, cellValue.getBytes());
table.commit( bu );
}
public void configure(JobConf job) {
HBconf = new HBaseConfiguration(job);
try {
table = new HTable( HBconf, job.get("input.table") );
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public JobConf createSubmittableJob(String[] args) {
JobConf c = new JobConf(getConf(), BulkImport.class);
c.setJobName(NAME);
FileInputFormat.setInputPaths(c, new Path(args[0]));
c.set("input.table", args[1]);
c.setMapperClass(InnerMap.class);
c.setNumReduceTasks(0);
c.setOutputFormat(NullOutputFormat.class);
return c;
}
static int printUsage() {
System.err.println("Usage: " + NAME + " &lt;input> &lt;table_name>");
System.err.println("\twhere &lt;input> is a tab-delimited text file with 4 columns.");
System.err.println("\t\tcolumn 1 = row ID");
System.err.println("\t\tcolumn 2 = timestamp (use a negative value for current time)");
System.err.println("\t\tcolumn 3 = column ID");
System.err.println("\t\tcolumn 4 = cell value");
return -1;
}
public int run(@SuppressWarnings("unused") String[] args) throws Exception {
// Make sure there are exactly 3 parameters left.
if (args.length != 2) {
return printUsage();
}
JobClient.runJob(createSubmittableJob(args));
return 0;
}
public Configuration getConf() {
return this.conf;
}
public void setConf(final Configuration c) {
this.conf = c;
}
public static void main(String[] args) throws Exception {
int errCode = ToolRunner.run(new Configuration(), new BulkImport(), args);
System.exit(errCode);
}
}
</pre></blockquote>
*/
package org.apache.hadoop.hbase.mapred;

View File

@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
@ -127,7 +128,14 @@ public class BuildTableIndex {
// number of indexes to partition into
job.setNumReduceTasks(numReduceTasks);
Scan scan = new Scan();
scan.addColumns(columnNames.toString());
for(String columnName : columnNames.toString().split(" ")) {
String [] fields = columnName.split(":");
if(fields.length == 1) {
scan.addFamily(Bytes.toBytes(fields[0]));
} else {
scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1]));
}
}
// use identity map (a waste, but just as an example)
IdentityTableMapper.initJob(tableName, scan,
IdentityTableMapper.class, job);

View File

@ -19,7 +19,6 @@
*/
package org.apache.hadoop.hbase.mapreduce;
import org.apache.hadoop.hbase.migration.nineteen.HStoreFileToStoreFile;
import org.apache.hadoop.util.ProgramDriver;
/**
@ -35,9 +34,6 @@ public class Driver {
ProgramDriver pgd = new ProgramDriver();
pgd.addClass(RowCounter.NAME, RowCounter.class,
"Count rows in HBase table");
pgd.addClass(HStoreFileToStoreFile.JOBNAME,
HStoreFileToStoreFile.class,
"Bulk convert 0.19 HStoreFiles to 0.20 StoreFiles");
pgd.driver(args);
}
}

View File

@ -107,7 +107,8 @@ extends TableMapper<ImmutableBytesWritable,Result> implements Configurable {
int numCols = columns.length;
if (numCols > 0) {
for (KeyValue value: r.list()) {
byte [] column = value.getColumn();
byte [] column = KeyValue.makeColumn(value.getFamily(),
value.getQualifier());
for (int i = 0; i < numCols; i++) {
if (Bytes.equals(column, columns[i])) {
foundList.add(value.getValue());

View File

@ -75,7 +75,8 @@ implements Configurable {
// each column (name-value pair) is a field (name-value pair)
for (KeyValue kv: r.list()) {
// name is already UTF-8 encoded
String column = Bytes.toString(kv.getColumn());
String column = Bytes.toString(KeyValue.makeColumn(kv.getFamily(),
kv.getQualifier()));
byte[] columnValue = kv.getValue();
Field.Store store = indexConf.isStore(column)?
Field.Store.YES: Field.Store.NO;

View File

@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
@ -96,7 +97,14 @@ public class RowCounter {
sb.append(args[i]);
}
Scan scan = new Scan();
scan.addColumns(sb.toString());
for(String columnName : sb.toString().split(" ")) {
String [] fields = columnName.split(":");
if(fields.length == 1) {
scan.addFamily(Bytes.toBytes(fields[0]));
} else {
scan.addColumn(Bytes.toBytes(fields[0]), Bytes.toBytes(fields[1]));
}
}
// Second argument is the table name.
TableMapReduceUtil.initTableMapperJob(args[1], scan,
RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);

View File

@ -28,10 +28,9 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;

View File

@ -30,7 +30,6 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.util.Writables;
/** Instantiated to enable or disable a table */

View File

@ -19,11 +19,8 @@
*/
package org.apache.hadoop.hbase.master;
import java.util.Arrays;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.util.Bytes;

View File

@ -28,7 +28,6 @@ import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.io.BatchUpdate;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;

View File

@ -19,7 +19,6 @@
*/
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
/**

View File

@ -26,19 +26,16 @@ import java.util.List;
import java.util.Set;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.io.RowResult;
/**
* Instantiated when a server's lease has expired, meaning it has crashed.

View File

@ -734,8 +734,6 @@ class RegionManager implements HConstants {
byte [] regionName = region.getRegionName();
Put put = new Put(regionName);
byte [] infoBytes = Writables.getBytes(info);
String infoString = new String(infoBytes);
put.add(CATALOG_FAMILY, REGIONINFO_QUALIFIER, Writables.getBytes(info));
server.put(metaRegionName, put);

View File

@ -1,188 +0,0 @@
/*
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Migrate;
import org.apache.hadoop.hbase.util.FSUtils.DirFilter;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* Mapper that rewrites hbase 0.19 HStoreFiles as 0.20 StoreFiles.
* Creates passed directories as input and output. On startup, it does not
* check filesystem is 0.19 generation just in case it fails part way so it
* should be possible to rerun the MR job. It'll just fix the 0.19 regions
* found.
* If the input dir does not exist, it first crawls the filesystem to find the
* files to migrate writing a file into the input directory. Next it starts up
* the MR job to rewrite the 0.19 HStoreFiles as 0.20 StoreFiles deleting the
* old as it goes. Presumption is that only
* one file per in the family Store else stuff breaks; i.e. the 0.19 install
* was major compacted before migration began. If this job fails, fix why then
* it should be possible to rerun the job. You may want to edit the
* generated file in the input dir first.
*/
public class HStoreFileToStoreFile extends Configured implements Tool {
static final Log LOG = LogFactory.getLog(HStoreFileToStoreFile.class);
public static final String JOBNAME = "hsf2sf";
HStoreFileToStoreFile() {
super();
}
public static class Map extends Mapper<LongWritable, Text, LongWritable, LongWritable> {
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, LongWritable, LongWritable>.Context context)
throws java.io.IOException, InterruptedException {
HBaseConfiguration c = new HBaseConfiguration(context.getConfiguration());
Path p = new Path(value.toString());
context.setStatus(key.toString() + " " + p.toString());
Migrate.rewrite(c, FileSystem.get(c), p);
}
}
private static void writeInputFiles(final HBaseConfiguration conf,
final FileSystem fs, final Path dir)
throws IOException {
if (fs.exists(dir)) {
LOG.warn("Input directory already exits. Using content for this MR job.");
return;
}
FSDataOutputStream out = fs.create(new Path(dir, "mapfiles"));
try {
gathermapfiles(conf, fs, out);
} finally {
if (out != null) out.close();
}
}
private static void gathermapfiles(final HBaseConfiguration conf,
final FileSystem fs, final FSDataOutputStream out)
throws IOException {
// Presumes any directory under hbase.rootdir is a table.
FileStatus [] tableDirs =
fs.listStatus(FSUtils.getRootDir(conf), new DirFilter(fs));
for (int i = 0; i < tableDirs.length; i++) {
// Inside a table, there are compaction.dir directories to skip.
// Otherwise, all else should be regions. Then in each region, should
// only be family directories. Under each of these, should be a mapfile
// and info directory and in these only one file.
Path d = tableDirs[i].getPath();
if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) continue;
FileStatus [] regionDirs = fs.listStatus(d, new DirFilter(fs));
for (int j = 0; j < regionDirs.length; j++) {
Path dd = regionDirs[j].getPath();
if (dd.equals(HConstants.HREGION_COMPACTIONDIR_NAME)) continue;
// Else its a region name. Now look in region for families.
FileStatus [] familyDirs = fs.listStatus(dd, new DirFilter(fs));
for (int k = 0; k < familyDirs.length; k++) {
Path family = familyDirs[k].getPath();
FileStatus [] infoAndMapfile = fs.listStatus(family);
// Assert that only info and mapfile in family dir.
if (infoAndMapfile.length != 2) {
LOG.warn(family.toString() + " has more than just info and mapfile: " +
infoAndMapfile.length + ". Continuing...");
continue;
}
// Make sure directory named info or mapfile.
for (int ll = 0; ll < 2; ll++) {
if (infoAndMapfile[ll].getPath().getName().equals("info") ||
infoAndMapfile[ll].getPath().getName().equals("mapfiles"))
continue;
LOG.warn("Unexpected directory name: " +
infoAndMapfile[ll].getPath() + ". Continuing...");
continue;
}
// Now in family, there are 'mapfile' and 'info' subdirs. Just
// look in the 'mapfile' subdir.
Path mfsdir = new Path(family, "mapfiles");
FileStatus [] familyStatus = fs.listStatus(mfsdir);
if (familyStatus == null || familyStatus.length > 1) {
LOG.warn(family.toString() + " has " +
((familyStatus == null) ? "null": familyStatus.length) +
" files. Continuing...");
continue;
}
if (familyStatus.length == 1) {
// If we got here, then this is good. Add the mapfile to out
String str = familyStatus[0].getPath().makeQualified(fs).toString();
LOG.info(str);
out.write(Bytes.toBytes(str + "\n"));
} else {
// Special case. Empty region. Remove the mapfiles and info dirs.
Path infodir = new Path(family, "info");
LOG.info("Removing " + mfsdir + " and " + infodir + " because empty");
fs.delete(mfsdir, true);
fs.delete(infodir, true);
}
}
}
}
}
public int run(final String[] args) throws Exception {
if (args.length < 2) {
System.err.println("ERROR: Wrong number of arguments: " + args.length);
System.err.println("Usage: " + getClass().getSimpleName() +
" <inputdir> <outputdir>");
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
Path input = new Path(args[0]);
HBaseConfiguration conf = (HBaseConfiguration)getConf();
FileSystem fs = FileSystem.get(conf);
writeInputFiles(conf, fs, input);
Job job = new Job(conf);
job.setJarByClass(HStoreFileToStoreFile.class);
job.setJobName(JOBNAME);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Map.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, input);
Path output = new Path(args[1]);
FileOutputFormat.setOutputPath(job, output);
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new HBaseConfiguration(),
new HStoreFileToStoreFile(), args);
System.exit(exitCode);
}
}

View File

@ -1,738 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.hbase.ColumnNameParseException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/**
* A Key for a stored row.
*/
public class HStoreKey implements WritableComparable<HStoreKey>, HeapSize {
/**
* Colon character in UTF-8
*/
public static final char COLUMN_FAMILY_DELIMITER = ':';
private byte [] row = HConstants.EMPTY_BYTE_ARRAY;
private byte [] column = HConstants.EMPTY_BYTE_ARRAY;
private long timestamp = Long.MAX_VALUE;
/*
* regionInfo is only used as a hack to compare HSKs.
* It is not serialized. See https://issues.apache.org/jira/browse/HBASE-832
*/
private HRegionInfo regionInfo = null;
/**
* Estimated size tax paid for each instance of HSK. Estimate based on
* study of jhat and jprofiler numbers.
*/
// In jprofiler, says shallow size is 48 bytes. Add to it cost of two
// byte arrays and then something for the HRI hosting.
public static final int ESTIMATED_HEAP_TAX = 48;
/** Default constructor used in conjunction with Writable interface */
public HStoreKey() {
super();
}
/**
* Create an HStoreKey specifying only the row
* The column defaults to the empty string, the time stamp defaults to
* Long.MAX_VALUE and the table defaults to empty string
*
* @param row - row key
*/
public HStoreKey(final byte [] row) {
this(row, Long.MAX_VALUE);
}
/**
* Create an HStoreKey specifying only the row
* The column defaults to the empty string, the time stamp defaults to
* Long.MAX_VALUE and the table defaults to empty string
*
* @param row - row key
*/
public HStoreKey(final String row) {
this(row, Long.MAX_VALUE);
}
/**
* Create an HStoreKey specifying the row and timestamp
* The column and table names default to the empty string
*
* @param row row key
* @param hri
*/
public HStoreKey(final byte [] row, final HRegionInfo hri) {
this(row, HConstants.EMPTY_BYTE_ARRAY, hri);
}
/**
* Create an HStoreKey specifying the row and timestamp
* The column and table names default to the empty string
*
* @param row row key
* @param timestamp timestamp value
* @param hri HRegionInfo
*/
public HStoreKey(final byte [] row, long timestamp, final HRegionInfo hri) {
this(row, HConstants.EMPTY_BYTE_ARRAY, timestamp, hri);
}
/**
* Create an HStoreKey specifying the row and timestamp
* The column and table names default to the empty string
*
* @param row row key
* @param timestamp timestamp value
*/
public HStoreKey(final byte [] row, long timestamp) {
this(row, HConstants.EMPTY_BYTE_ARRAY, timestamp);
}
/**
* Create an HStoreKey specifying the row and timestamp
* The column and table names default to the empty string
*
* @param row row key
* @param timestamp timestamp value
*/
public HStoreKey(final String row, long timestamp) {
this (row, "", timestamp, new HRegionInfo());
}
/**
* Create an HStoreKey specifying the row and column names
* The timestamp defaults to LATEST_TIMESTAMP
* and table name defaults to the empty string
*
* @param row row key
* @param column column key
*/
public HStoreKey(final String row, final String column) {
this(row, column, HConstants.LATEST_TIMESTAMP, new HRegionInfo());
}
/**
* Create an HStoreKey specifying the row and column names
* The timestamp defaults to LATEST_TIMESTAMP
* and table name defaults to the empty string
*
* @param row row key
* @param column column key
*/
public HStoreKey(final byte [] row, final byte [] column) {
this(row, column, HConstants.LATEST_TIMESTAMP);
}
/**
* Create an HStoreKey specifying the row, column names and table name
* The timestamp defaults to LATEST_TIMESTAMP
*
* @param row row key
* @param column column key
* @param regionInfo region info
*/
public HStoreKey(final byte [] row,
final byte [] column, final HRegionInfo regionInfo) {
this(row, column, HConstants.LATEST_TIMESTAMP, regionInfo);
}
/**
* Create an HStoreKey specifying all the fields
* Does not make copies of the passed byte arrays. Presumes the passed
* arrays immutable.
* @param row row key
* @param column column key
* @param timestamp timestamp value
* @param regionInfo region info
*/
public HStoreKey(final String row,
final String column, long timestamp, final HRegionInfo regionInfo) {
this (Bytes.toBytes(row), Bytes.toBytes(column),
timestamp, regionInfo);
}
/**
* Create an HStoreKey specifying all the fields with unspecified table
* Does not make copies of the passed byte arrays. Presumes the passed
* arrays immutable.
* @param row row key
* @param column column key
* @param timestamp timestamp value
*/
public HStoreKey(final byte [] row, final byte [] column, long timestamp) {
this(row, column, timestamp, null);
}
/**
* Create an HStoreKey specifying all the fields with specified table
* Does not make copies of the passed byte arrays. Presumes the passed
* arrays immutable.
* @param row row key
* @param column column key
* @param timestamp timestamp value
* @param regionInfo region info
*/
public HStoreKey(final byte [] row,
final byte [] column, long timestamp, final HRegionInfo regionInfo) {
// Make copies
this.row = row;
this.column = column;
this.timestamp = timestamp;
this.regionInfo = regionInfo;
}
/**
* Constructs a new HStoreKey from another
*
* @param other the source key
*/
public HStoreKey(HStoreKey other) {
this(other.getRow(), other.getColumn(), other.getTimestamp(),
other.getHRegionInfo());
}
/**
* Change the value of the row key
*
* @param newrow new row key value
*/
public void setRow(byte [] newrow) {
this.row = newrow;
}
/**
* Change the value of the column in this key
*
* @param c new column family value
*/
public void setColumn(byte [] c) {
this.column = c;
}
/**
* Change the value of the timestamp field
*
* @param timestamp new timestamp value
*/
public void setVersion(long timestamp) {
this.timestamp = timestamp;
}
/**
* Set the value of this HStoreKey from the supplied key
*
* @param k key value to copy
*/
public void set(HStoreKey k) {
this.row = k.getRow();
this.column = k.getColumn();
this.timestamp = k.getTimestamp();
}
/** @return value of row key */
public byte [] getRow() {
return row;
}
/** @return value of column */
public byte [] getColumn() {
return this.column;
}
/** @return value of timestamp */
public long getTimestamp() {
return this.timestamp;
}
/** @return value of regioninfo */
public HRegionInfo getHRegionInfo() {
return this.regionInfo;
}
/**
* @param hri
*/
public void setHRegionInfo(final HRegionInfo hri) {
this.regionInfo = hri;
}
/**
* Compares the row and column of two keys
* @param other Key to compare against. Compares row and column.
* @return True if same row and column.
* @see #matchesWithoutColumn(HStoreKey)
* @see #matchesRowFamily(HStoreKey)
*/
public boolean matchesRowCol(HStoreKey other) {
return HStoreKey.equalsTwoRowKeys(getHRegionInfo(), getRow(), other.getRow()) &&
Bytes.equals(getColumn(), other.getColumn());
}
/**
* Compares the row and timestamp of two keys
*
* @param other Key to copmare against. Compares row and timestamp.
*
* @return True if same row and timestamp is greater than <code>other</code>
* @see #matchesRowCol(HStoreKey)
* @see #matchesRowFamily(HStoreKey)
*/
public boolean matchesWithoutColumn(HStoreKey other) {
return equalsTwoRowKeys(getHRegionInfo(), getRow(), other.getRow()) &&
getTimestamp() >= other.getTimestamp();
}
/**
* Compares the row and column family of two keys
*
* @param that Key to compare against. Compares row and column family
*
* @return true if same row and column family
* @see #matchesRowCol(HStoreKey)
* @see #matchesWithoutColumn(HStoreKey)
*/
public boolean matchesRowFamily(HStoreKey that) {
int delimiterIndex = getFamilyDelimiterIndex(getColumn());
return equalsTwoRowKeys(getHRegionInfo(), getRow(), that.getRow()) &&
Bytes.compareTo(getColumn(), 0, delimiterIndex, that.getColumn(), 0,
delimiterIndex) == 0;
}
@Override
public String toString() {
return Bytes.toString(this.row) + "/" + Bytes.toString(this.column) + "/" +
timestamp;
}
@Override
public boolean equals(Object obj) {
HStoreKey other = (HStoreKey)obj;
// Do a quick check.
if (this.row.length != other.row.length ||
this.column.length != other.column.length ||
this.timestamp != other.timestamp) {
return false;
}
return compareTo(other) == 0;
}
@Override
public int hashCode() {
int result = Bytes.hashCode(getRow());
result ^= Bytes.hashCode(getColumn());
result ^= getTimestamp();
return result;
}
// Comparable
public int compareTo(final HStoreKey o) {
return compareTo(this.regionInfo, this, o);
}
static int compareTo(final HRegionInfo hri, final HStoreKey left,
final HStoreKey right) {
// We can be passed null
if (left == null && right == null) return 0;
if (left == null) return -1;
if (right == null) return 1;
int result = compareTwoRowKeys(hri, left.getRow(), right.getRow());
if (result != 0) {
return result;
}
result = left.getColumn() == null && right.getColumn() == null? 0:
left.getColumn() == null && right.getColumn() != null? -1:
left.getColumn() != null && right.getColumn() == null? 1:
Bytes.compareTo(left.getColumn(), right.getColumn());
if (result != 0) {
return result;
}
// The below older timestamps sorting ahead of newer timestamps looks
// wrong but it is intentional. This way, newer timestamps are first
// found when we iterate over a memcache and newer versions are the
// first we trip over when reading from a store file.
if (left.getTimestamp() < right.getTimestamp()) {
result = 1;
} else if (left.getTimestamp() > right.getTimestamp()) {
result = -1;
}
// Because of HBASE-877, our BeforeThisStoreKey trick no longer works in
// mapfiles and so instead we need to do this weird check here below.
return result == 0 && left instanceof BeforeThisStoreKey? -1:
result == 0 && right instanceof BeforeThisStoreKey? 1:
result;
}
/**
* @param column
* @return New byte array that holds <code>column</code> family prefix only
* (Does not include the colon DELIMITER).
* @throws ColumnNameParseException
* @see #parseColumn(byte[])
*/
public static byte [] getFamily(final byte [] column)
throws ColumnNameParseException {
int index = getFamilyDelimiterIndex(column);
if (index <= 0) {
throw new ColumnNameParseException("Missing ':' delimiter between " +
"column family and qualifier in the passed column name <" +
Bytes.toString(column) + ">");
}
byte [] result = new byte[index];
System.arraycopy(column, 0, result, 0, index);
return result;
}
/**
* @param column
* @return Return hash of family portion of passed column.
*/
public static Integer getFamilyMapKey(final byte [] column) {
int index = getFamilyDelimiterIndex(column);
// If index < -1, presume passed column is a family name absent colon
// delimiter
return Bytes.mapKey(column, index > 0? index: column.length);
}
/**
* @param family
* @param column
* @return True if <code>column</code> has a family of <code>family</code>.
*/
public static boolean matchingFamily(final byte [] family,
final byte [] column) {
// Make sure index of the ':' is at same offset.
int index = getFamilyDelimiterIndex(column);
if (index != family.length) {
return false;
}
return Bytes.compareTo(family, 0, index, column, 0, index) == 0;
}
/**
* @param family
* @return Return <code>family</code> plus the family delimiter.
*/
public static byte [] addDelimiter(final byte [] family) {
// Manufacture key by adding delimiter to the passed in colFamily.
byte [] familyPlusDelimiter = new byte [family.length + 1];
System.arraycopy(family, 0, familyPlusDelimiter, 0, family.length);
familyPlusDelimiter[family.length] = HStoreKey.COLUMN_FAMILY_DELIMITER;
return familyPlusDelimiter;
}
/**
* @param column
* @return New byte array that holds <code>column</code> qualifier suffix.
* @see #parseColumn(byte[])
*/
public static byte [] getQualifier(final byte [] column) {
int index = getFamilyDelimiterIndex(column);
int len = column.length - (index + 1);
byte [] result = new byte[len];
System.arraycopy(column, index + 1, result, 0, len);
return result;
}
/**
* @param c Column name
* @return Return array of size two whose first element has the family
* prefix of passed column <code>c</code> and whose second element is the
* column qualifier.
* @throws ColumnNameParseException
*/
public static byte [][] parseColumn(final byte [] c)
throws ColumnNameParseException {
byte [][] result = new byte [2][];
int index = getFamilyDelimiterIndex(c);
if (index == -1) {
throw new ColumnNameParseException("Impossible column name: " + c);
}
result[0] = new byte [index];
System.arraycopy(c, 0, result[0], 0, index);
int len = c.length - (index + 1);
result[1] = new byte[len];
System.arraycopy(c, index + 1 /*Skip delimiter*/, result[1], 0,
len);
return result;
}
/**
* @param b
* @return Index of the family-qualifier colon delimiter character in passed
* buffer.
*/
public static int getFamilyDelimiterIndex(final byte [] b) {
if (b == null) {
throw new NullPointerException();
}
int result = -1;
for (int i = 0; i < b.length; i++) {
if (b[i] == COLUMN_FAMILY_DELIMITER) {
result = i;
break;
}
}
return result;
}
/**
* Returns row and column bytes out of an HStoreKey.
* @param hsk Store key.
* @return byte array encoding of HStoreKey
*/
public static byte[] getBytes(final HStoreKey hsk) {
return Bytes.add(hsk.getRow(), hsk.getColumn());
}
/**
* Utility method to compare two row keys.
* This is required because of the meta delimiters.
* This is a hack.
* @param regionInfo
* @param rowA
* @param rowB
* @return value of the comparison
*/
public static int compareTwoRowKeys(HRegionInfo regionInfo,
byte[] rowA, byte[] rowB) {
if (regionInfo != null && regionInfo.isMetaRegion()) {
byte[][] keysA = stripStartKeyMeta(rowA);
byte[][] KeysB = stripStartKeyMeta(rowB);
int rowCompare = Bytes.compareTo(keysA[0], KeysB[0]);
if(rowCompare == 0)
rowCompare = Bytes.compareTo(keysA[1], KeysB[1]);
return rowCompare;
}
return Bytes.compareTo(rowA, rowB);
}
/**
* Utility method to check if two row keys are equal.
* This is required because of the meta delimiters
* This is a hack
* @param regionInfo
* @param rowA
* @param rowB
* @return if it's equal
*/
public static boolean equalsTwoRowKeys(HRegionInfo regionInfo,
byte[] rowA, byte[] rowB) {
return ((rowA == null) && (rowB == null)) ? true:
(rowA == null) || (rowB == null) || (rowA.length != rowB.length) ? false:
compareTwoRowKeys(regionInfo,rowA,rowB) == 0;
}
private static byte[][] stripStartKeyMeta(byte[] rowKey) {
int offset = -1;
for (int i = rowKey.length - 1; i > 0; i--) {
if (rowKey[i] == HConstants.META_ROW_DELIMITER) {
offset = i;
break;
}
}
byte [] row = rowKey;
byte [] timestamp = HConstants.EMPTY_BYTE_ARRAY;
if (offset != -1) {
row = new byte[offset];
System.arraycopy(rowKey, 0, row, 0, offset);
timestamp = new byte[rowKey.length - offset - 1];
System.arraycopy(rowKey, offset+1, timestamp, 0,rowKey.length - offset - 1);
}
byte[][] elements = new byte[2][];
elements[0] = row;
elements[1] = timestamp;
return elements;
}
// Writable
public void write(DataOutput out) throws IOException {
Bytes.writeByteArray(out, this.row);
Bytes.writeByteArray(out, this.column);
out.writeLong(timestamp);
}
public void readFields(DataInput in) throws IOException {
this.row = Bytes.readByteArray(in);
this.column = Bytes.readByteArray(in);
this.timestamp = in.readLong();
}
public long heapSize() {
return getRow().length + Bytes.ESTIMATED_HEAP_TAX +
getColumn().length + Bytes.ESTIMATED_HEAP_TAX +
ESTIMATED_HEAP_TAX;
}
/**
* Passed as comparator for memcache and for store files. See HBASE-868.
*/
public static class HStoreKeyWritableComparator extends WritableComparator {
private final HRegionInfo hri;
/** @param hri */
public HStoreKeyWritableComparator(final HRegionInfo hri) {
super(HStoreKey.class);
this.hri = hri;
}
@SuppressWarnings("unchecked")
@Override
public int compare(final WritableComparable left,
final WritableComparable right) {
return compareTo(this.hri, (HStoreKey)left, (HStoreKey)right);
}
}
/**
* Pass this class into {@link org.apache.hadoop.io.MapFile}.getClosest when
* searching for the key that comes BEFORE this one but NOT this one. This
* class will return > 0 when asked to compare against itself rather than 0.
* This is a hack for case where getClosest returns a deleted key and we want
* to get the previous. Can't unless use use this class; it'll just keep
* returning us the deleted key (getClosest gets exact or nearest before when
* you pass true argument). TODO: Throw this class away when MapFile has
* a real 'previous' method. See HBASE-751.
*/
public static class BeforeThisStoreKey extends HStoreKey {
private final HStoreKey beforeThisKey;
/**
* @param beforeThisKey
*/
public BeforeThisStoreKey(final HStoreKey beforeThisKey) {
super();
this.beforeThisKey = beforeThisKey;
}
@Override
public int compareTo(final HStoreKey o) {
int result = this.beforeThisKey.compareTo(o);
return result == 0? -1: result;
}
@Override
public boolean equals(Object obj) {
return false;
}
@Override
public byte[] getColumn() {
return this.beforeThisKey.getColumn();
}
@Override
public byte[] getRow() {
return this.beforeThisKey.getRow();
}
@Override
public long heapSize() {
return this.beforeThisKey.heapSize();
}
@Override
public long getTimestamp() {
return this.beforeThisKey.getTimestamp();
}
@Override
public int hashCode() {
return this.beforeThisKey.hashCode();
}
@Override
public boolean matchesRowCol(HStoreKey other) {
return this.beforeThisKey.matchesRowCol(other);
}
@Override
public boolean matchesRowFamily(HStoreKey that) {
return this.beforeThisKey.matchesRowFamily(that);
}
@Override
public boolean matchesWithoutColumn(HStoreKey other) {
return this.beforeThisKey.matchesWithoutColumn(other);
}
@Override
public void readFields(DataInput in) throws IOException {
this.beforeThisKey.readFields(in);
}
@Override
public void set(HStoreKey k) {
this.beforeThisKey.set(k);
}
@Override
public void setColumn(byte[] c) {
this.beforeThisKey.setColumn(c);
}
@Override
public void setRow(byte[] newrow) {
this.beforeThisKey.setRow(newrow);
}
@Override
public void setVersion(long timestamp) {
this.beforeThisKey.setVersion(timestamp);
}
@Override
public String toString() {
return this.beforeThisKey.toString();
}
@Override
public void write(DataOutput out) throws IOException {
this.beforeThisKey.write(out);
}
@Override
public HRegionInfo getHRegionInfo() {
return this.beforeThisKey.getHRegionInfo();
}
@Override
public void setHRegionInfo(final HRegionInfo hri) {
this.beforeThisKey.setHRegionInfo(hri);
}
}
}

View File

@ -1,249 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.io;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
import org.apache.hadoop.hbase.migration.nineteen.onelab.filter.BloomFilter;
import org.apache.hadoop.hbase.migration.nineteen.onelab.filter.Key;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/**
* On write, all keys are added to a bloom filter. On read, all keys are
* tested first against bloom filter. Keys are HStoreKey. If passed bloom
* filter is null, just passes invocation to parent.
*/
// TODO should be fixed generic warnings from MapFile methods
@SuppressWarnings("unchecked")
public class BloomFilterMapFile extends HBaseMapFile {
@SuppressWarnings("hiding")
static final Log LOG = LogFactory.getLog(BloomFilterMapFile.class);
protected static final String BLOOMFILTER_FILE_NAME = "filter";
public static class Reader extends HBaseReader {
private final BloomFilter bloomFilter;
/**
* @param fs
* @param dirName
* @param conf
* @param filter
* @param blockCacheEnabled
* @param hri
* @throws IOException
*/
public Reader(FileSystem fs, String dirName, Configuration conf,
final boolean filter, final boolean blockCacheEnabled,
HRegionInfo hri)
throws IOException {
super(fs, dirName, conf, blockCacheEnabled, hri);
if (filter) {
this.bloomFilter = loadBloomFilter(fs, dirName);
} else {
this.bloomFilter = null;
}
}
private BloomFilter loadBloomFilter(FileSystem fs, String dirName)
throws IOException {
Path filterFile = new Path(dirName, BLOOMFILTER_FILE_NAME);
if(!fs.exists(filterFile)) {
LOG.warn("FileNotFound: " + filterFile + "; proceeding without");
return null;
}
BloomFilter filter = new BloomFilter();
FSDataInputStream in = fs.open(filterFile);
try {
filter.readFields(in);
} finally {
in.close();
}
return filter;
}
@Override
public Writable get(WritableComparable key, Writable val)
throws IOException {
if (bloomFilter == null) {
return super.get(key, val);
}
if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
if (LOG.isDebugEnabled()) {
LOG.debug("bloom filter reported that key exists");
}
return super.get(key, val);
}
if (LOG.isDebugEnabled()) {
LOG.debug("bloom filter reported that key does not exist");
}
return null;
}
@Override
public WritableComparable getClosest(WritableComparable key,
Writable val) throws IOException {
if (bloomFilter == null) {
return super.getClosest(key, val);
}
// Note - the key being passed to us is always a HStoreKey
if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
if (LOG.isDebugEnabled()) {
LOG.debug("bloom filter reported that key exists");
}
return super.getClosest(key, val);
}
if (LOG.isDebugEnabled()) {
LOG.debug("bloom filter reported that key does not exist");
}
return null;
}
/**
* @return size of the bloom filter
*/
public int getBloomFilterSize() {
return bloomFilter == null ? 0 : bloomFilter.getVectorSize();
}
}
public static class Writer extends HBaseWriter {
private static final double DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4.0;
private final BloomFilter bloomFilter;
private final String dirName;
private final FileSystem fs;
/**
* @param conf
* @param fs
* @param dirName
* @param compression
* @param filter
* @param nrows
* @param hri
* @throws IOException
*/
public Writer(Configuration conf, FileSystem fs, String dirName,
SequenceFile.CompressionType compression, final boolean filter,
int nrows, final HRegionInfo hri)
throws IOException {
super(conf, fs, dirName, compression, hri);
this.dirName = dirName;
this.fs = fs;
if (filter) {
/*
* There is no way to automatically determine the vector size and the
* number of hash functions to use. In particular, bloom filters are
* very sensitive to the number of elements inserted into them. For
* HBase, the number of entries depends on the size of the data stored
* in the column. Currently the default region size is 256MB, so the
* number of entries is approximately
* 256MB / (average value size for column).
*
* If m denotes the number of bits in the Bloom filter (vectorSize),
* n denotes the number of elements inserted into the Bloom filter and
* k represents the number of hash functions used (nbHash), then
* according to Broder and Mitzenmacher,
*
* ( http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf )
*
* the probability of false positives is minimized when k is
* approximately m/n ln(2).
*
* If we fix the number of hash functions and know the number of
* entries, then the optimal vector size m = (k * n) / ln(2)
*/
BloomFilter f = null;
try {
f = new BloomFilter(
(int) Math.ceil(
(DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) /
Math.log(2.0)),
(int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS,
Hash.getHashType(conf)
);
} catch (IllegalArgumentException e) {
LOG.warn("Failed creating bloomfilter; proceeding without", e);
}
this.bloomFilter = f;
} else {
this.bloomFilter = null;
}
}
@Override
public void append(WritableComparable key, Writable val)
throws IOException {
if (bloomFilter != null) {
bloomFilter.add(getBloomFilterKey(key));
}
super.append(key, val);
}
@Override
public synchronized void close() throws IOException {
super.close();
if (this.bloomFilter != null) {
flushBloomFilter();
}
}
/**
* Flushes bloom filter to disk
*
* @throws IOException
*/
private void flushBloomFilter() throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("flushing bloom filter for " + this.dirName);
}
FSDataOutputStream out =
fs.create(new Path(dirName, BLOOMFILTER_FILE_NAME));
try {
bloomFilter.write(out);
} finally {
out.close();
}
if (LOG.isDebugEnabled()) {
LOG.debug("flushed bloom filter for " + this.dirName);
}
}
}
/**
* Custom bloom filter key maker.
* @param key
* @return Key made of bytes of row only.
*/
protected static Key getBloomFilterKey(WritableComparable key) {
return new Key(((HStoreKey) key).getRow());
}
}

View File

@ -1,114 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.io;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HStoreKey;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
/**
* HBase customizations of MapFile.
*/
public class HBaseMapFile extends MapFile {
// TODO not used. remove?!
// private static final Log LOG = LogFactory.getLog(HBaseMapFile.class);
/**
* Values are instances of this class.
*/
public static final Class<? extends Writable> VALUE_CLASS =
ImmutableBytesWritable.class;
/**
* A reader capable of reading and caching blocks of the data file.
*/
public static class HBaseReader extends MapFile.Reader {
private final boolean blockCacheEnabled;
/**
* @param fs
* @param dirName
* @param conf
* @param hri
* @throws IOException
*/
public HBaseReader(FileSystem fs, String dirName, Configuration conf,
HRegionInfo hri)
throws IOException {
this(fs, dirName, conf, false, hri);
}
/**
* @param fs
* @param dirName
* @param conf
* @param blockCacheEnabled
* @param hri
* @throws IOException
*/
public HBaseReader(FileSystem fs, String dirName, Configuration conf,
boolean blockCacheEnabled, HRegionInfo hri)
throws IOException {
super(fs, dirName, new org.apache.hadoop.hbase.migration.nineteen.HStoreKey.HStoreKeyWritableComparator(hri),
conf, false); // defer opening streams
this.blockCacheEnabled = blockCacheEnabled;
open(fs, dirName, new org.apache.hadoop.hbase.migration.nineteen.HStoreKey.HStoreKeyWritableComparator(hri), conf);
// Force reading of the mapfile index by calling midKey. Reading the
// index will bring the index into memory over here on the client and
// then close the index file freeing up socket connection and resources
// in the datanode. Usually, the first access on a MapFile.Reader will
// load the index force the issue in HStoreFile MapFiles because an
// access may not happen for some time; meantime we're using up datanode
// resources (See HADOOP-2341). midKey() goes to index. Does not seek.
// Disable for migration !!! midKey();
}
}
public static class HBaseWriter extends MapFile.Writer {
/**
* @param conf
* @param fs
* @param dirName
* @param compression
* @param hri
* @throws IOException
*/
public HBaseWriter(Configuration conf, FileSystem fs, String dirName,
SequenceFile.CompressionType compression, final HRegionInfo hri)
throws IOException {
super(conf, fs, dirName, new org.apache.hadoop.hbase.migration.nineteen.HStoreKey.HStoreKeyWritableComparator(hri),
VALUE_CLASS, compression);
// Default for mapfiles is 128. Makes random reads faster if we
// have more keys indexed and we're not 'next'-ing around in the
// mapfile.
setIndexInterval(conf.getInt("hbase.io.index.interval", 128));
}
}
}

View File

@ -1,228 +0,0 @@
/**
* Copyright 2008 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.io;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
import org.apache.hadoop.hbase.migration.nineteen.io.Reference.Range;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
/**
* A facade for a {@link org.apache.hadoop.io.MapFile.Reader} that serves up
* either the top or bottom half of a MapFile where 'bottom' is the first half
* of the file containing the keys that sort lowest and 'top' is the second half
* of the file with keys that sort greater than those of the bottom half.
* The top includes the split files midkey, of the key that follows if it does
* not exist in the file.
*
* <p>This type works in tandem with the {@link Reference} type. This class
* is used reading while Reference is used writing.
*
* <p>This file is not splitable. Calls to {@link #midKey()} return null.
*/
//TODO should be fixed generic warnings from MapFile methods
public class HalfMapFileReader extends BloomFilterMapFile.Reader {
private static final Log LOG = LogFactory.getLog(HalfMapFileReader.class);
private final boolean top;
private final HStoreKey midkey;
private boolean firstNextCall = true;
/**
* @param fs
* @param dirName
* @param conf
* @param r
* @param mk
* @param hri
* @throws IOException
*/
public HalfMapFileReader(final FileSystem fs, final String dirName,
final Configuration conf, final Range r,
final WritableComparable<HStoreKey> mk,
final HRegionInfo hri)
throws IOException {
this(fs, dirName, conf, r, mk, false, false, hri);
}
/**
* @param fs
* @param dirName
* @param conf
* @param r
* @param mk
* @param filter
* @param blockCacheEnabled
* @param hri
* @throws IOException
*/
public HalfMapFileReader(final FileSystem fs, final String dirName,
final Configuration conf, final Range r,
final WritableComparable<HStoreKey> mk, final boolean filter,
final boolean blockCacheEnabled,
final HRegionInfo hri)
throws IOException {
super(fs, dirName, conf, filter, blockCacheEnabled, hri);
// This is not actual midkey for this half-file; its just border
// around which we split top and bottom. Have to look in files to find
// actual last and first keys for bottom and top halves. Half-files don't
// have an actual midkey themselves. No midkey is how we indicate file is
// not splittable.
this.midkey = new HStoreKey((HStoreKey)mk);
this.midkey.setHRegionInfo(hri);
// Is it top or bottom half?
this.top = Reference.isTopFileRegion(r);
}
/*
* Check key is not bleeding into wrong half of the file.
* @param key
* @throws IOException
*/
private void checkKey(final WritableComparable<HStoreKey> key)
throws IOException {
if (top) {
if (key.compareTo(midkey) < 0) {
throw new IOException("Illegal Access: Key is less than midKey of " +
"backing mapfile");
}
} else if (key.compareTo(midkey) >= 0) {
throw new IOException("Illegal Access: Key is greater than or equal " +
"to midKey of backing mapfile");
}
}
@SuppressWarnings("unchecked")
@Override
public synchronized void finalKey(WritableComparable key)
throws IOException {
if (top) {
super.finalKey(key);
} else {
Writable value = new ImmutableBytesWritable();
WritableComparable found = super.getClosest(midkey, value, true);
Writables.copyWritable(found, key);
}
}
@SuppressWarnings("unchecked")
@Override
public synchronized Writable get(WritableComparable key, Writable val)
throws IOException {
checkKey(key);
return super.get(key, val);
}
@SuppressWarnings("unchecked")
@Override
public synchronized WritableComparable getClosest(WritableComparable key,
Writable val)
throws IOException {
WritableComparable closest = null;
if (top) {
// If top, the lowest possible key is first key. Do not have to check
// what comes back from super getClosest. Will return exact match or
// greater.
closest = (key.compareTo(this.midkey) < 0)?
this.midkey: super.getClosest(key, val);
// we know that we just went past the midkey
firstNextCall = false;
} else {
// We're serving bottom of the file.
if (key.compareTo(this.midkey) < 0) {
// Check key is within range for bottom.
closest = super.getClosest(key, val);
// midkey was made against largest store file at time of split. Smaller
// store files could have anything in them. Check return value is
// not beyond the midkey (getClosest returns exact match or next after)
if (closest != null && closest.compareTo(this.midkey) >= 0) {
// Don't let this value out.
closest = null;
}
}
// Else, key is > midkey so let out closest = null.
}
return closest;
}
@SuppressWarnings("unchecked")
@Override
public synchronized WritableComparable midKey() throws IOException {
// Returns null to indicate file is not splitable.
return null;
}
@SuppressWarnings("unchecked")
@Override
public synchronized boolean next(WritableComparable key, Writable val)
throws IOException {
if (firstNextCall) {
firstNextCall = false;
if (this.top) {
// Seek to midkey. Midkey may not exist in this file. That should be
// fine. Then we'll either be positioned at end or start of file.
WritableComparable nearest = getClosest(this.midkey, val);
// Now copy the midkey into the passed key.
if (nearest != null) {
Writables.copyWritable(nearest, key);
return true;
}
return false;
}
}
boolean result = super.next(key, val);
int cmpresult = key.compareTo(midkey);
if (top && cmpresult < 0) {
LOG.error("BUG BUG BUG. HalfMapFileReader wanted to return key out of range. DANGER");
throw new IOException("BUG BUG BUG. HalfMapFileReader wanted to return key out of range. DANGER");
} else if (!top && cmpresult >= 0) {
result = false;
}
return result;
}
@Override
public synchronized void reset() throws IOException {
if (top) {
firstNextCall = true;
return;
}
super.reset();
}
@SuppressWarnings("unchecked")
@Override
public synchronized boolean seek(WritableComparable key)
throws IOException {
checkKey(key);
return super.seek(key);
}
}

View File

@ -1,117 +0,0 @@
/**
*
*/
package org.apache.hadoop.hbase.migration.nineteen.io;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
import org.apache.hadoop.io.Writable;
/**
* A reference to a part of a store file. The file referenced usually lives
* under a different region. The part referenced is usually the top or bottom
* half of the file. References are made at region split time. Being lazy
* about copying data between the parent of the split and the split daughters
* makes splitting faster.
*
* <p>References work with {@link HalfMapFileReader}. References know how to
* write out the reference format in the file system and are whats juggled when
* references are mixed in with direct store files. The
* {@link HalfMapFileReader} is used reading the referred to file.
*
* <p>References to store files located over in some other region look like
* this in the file system
* <code>1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184</code>:
* i.e. an id followed by the name of the referenced region. The data
* ('mapfiles') of references are empty. The accompanying <code>info</code> file
* contains the <code>midkey</code> that demarks top and bottom of the
* referenced storefile, the id of the remote store we're referencing and
* whether we're to serve the top or bottom region of the remote store file.
* Note, a region is itself not splitable if it has instances of store file
* references. References are cleaned up by compactions.
*/
public class Reference implements Writable {
// TODO: see if it makes sense making a ReferenceMapFile whose Writer is this
// class and whose Reader is the {@link HalfMapFileReader}.
private int encodedRegionName;
private long fileid;
private Range region;
private HStoreKey midkey;
/**
* For split HStoreFiles, it specifies if the file covers the lower half or
* the upper half of the key range
*/
public static enum Range {
/** HStoreFile contains upper half of key range */
top,
/** HStoreFile contains lower half of key range */
bottom
}
public Reference(final int ern, final long fid, final HStoreKey m,
final Range fr) {
this.encodedRegionName = ern;
this.fileid = fid;
this.region = fr;
this.midkey = m;
}
public Reference() {
this(-1, -1, null, Range.bottom);
}
public long getFileId() {
return fileid;
}
public Range getFileRegion() {
return region;
}
public HStoreKey getMidkey() {
return midkey;
}
public int getEncodedRegionName() {
return this.encodedRegionName;
}
@Override
public String toString() {
return encodedRegionName + "/" + fileid + "/" + region;
}
// Make it serializable.
public void write(DataOutput out) throws IOException {
// Write out the encoded region name as a String. Doing it as a String
// keeps a Reference's serialization backword compatible with
// pre-HBASE-82 serializations. ALternative is rewriting all
// info files in hbase (Serialized References are written into the
// 'info' file that accompanies HBase Store files).
out.writeUTF(Integer.toString(encodedRegionName));
out.writeLong(fileid);
// Write true if we're doing top of the file.
out.writeBoolean(isTopFileRegion(region));
this.midkey.write(out);
}
public void readFields(DataInput in) throws IOException {
this.encodedRegionName = Integer.parseInt(in.readUTF());
fileid = in.readLong();
boolean tmp = in.readBoolean();
// If true, set region to top.
region = tmp? Range.top: Range.bottom;
midkey = new HStoreKey();
midkey.readFields(in);
}
public static boolean isTopFileRegion(final Range r) {
return r.equals(Range.top);
}
}

View File

@ -1,236 +0,0 @@
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.BitSet;
import org.apache.hadoop.hbase.util.Hash;
/**
* Implements a <i>Bloom filter</i>, as defined by Bloom in 1970.
* <p>
* The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
* the networking research community in the past decade thanks to the bandwidth efficiencies that it
* offers for the transmission of set membership information between networked hosts. A sender encodes
* the information into a bit vector, the Bloom filter, that is more compact than a conventional
* representation. Computation and space costs for construction are linear in the number of elements.
* The receiver uses the filter to test whether various elements are members of the set. Though the
* filter will occasionally return a false positive, it will never return a false negative. When creating
* the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
*
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
*
* @version 1.0 - 2 Feb. 07
*/
public class BloomFilter extends Filter {
private static final byte[] bitvalues = new byte[] {
(byte)0x01,
(byte)0x02,
(byte)0x04,
(byte)0x08,
(byte)0x10,
(byte)0x20,
(byte)0x40,
(byte)0x80
};
/** The bit vector. */
BitSet bits;
/** Default constructor - use with readFields */
public BloomFilter() {
super();
}
/**
* Constructor
* @param vectorSize The vector size of <i>this</i> filter.
* @param nbHash The number of hash function to consider.
* @param hashType type of the hashing function (see {@link Hash}).
*/
public BloomFilter(int vectorSize, int nbHash, int hashType){
super(vectorSize, nbHash, hashType);
bits = new BitSet(this.vectorSize);
}//end constructor
@Override
public void add(Key key) {
if(key == null) {
throw new NullPointerException("key cannot be null");
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
bits.set(h[i]);
}
}//end add()
@Override
public void and(Filter filter){
if(filter == null
|| !(filter instanceof BloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be and-ed");
}
this.bits.and(((BloomFilter) filter).bits);
}//end and()
@Override
public boolean membershipTest(Key key){
if(key == null) {
throw new NullPointerException("key cannot be null");
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
if(!bits.get(h[i])) {
return false;
}
}
return true;
}//end memberhsipTest()
@Override
public void not(){
bits.flip(0, vectorSize - 1);
}//end not()
@Override
public void or(Filter filter){
if(filter == null
|| !(filter instanceof BloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be or-ed");
}
bits.or(((BloomFilter) filter).bits);
}//end or()
@Override
public void xor(Filter filter){
if(filter == null
|| !(filter instanceof BloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be xor-ed");
}
bits.xor(((BloomFilter) filter).bits);
}//and xor()
@Override
public String toString(){
return bits.toString();
}//end toString()
@Override
public Object clone(){
BloomFilter bf = new BloomFilter(vectorSize, nbHash, hashType);
bf.or(this);
return bf;
}//end clone()
/**
* @return size of the the bloomfilter
*/
public int getVectorSize() {
return this.vectorSize;
}
// Writable
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
byte[] bytes = new byte[getNBytes()];
for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
if (bitIndex == 8) {
bitIndex = 0;
byteIndex++;
}
if (bitIndex == 0) {
bytes[byteIndex] = 0;
}
if (bits.get(i)) {
bytes[byteIndex] |= bitvalues[bitIndex];
}
}
out.write(bytes);
}
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
bits = new BitSet(this.vectorSize);
byte[] bytes = new byte[getNBytes()];
in.readFully(bytes);
for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
if (bitIndex == 8) {
bitIndex = 0;
byteIndex++;
}
if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
bits.set(i);
}
}
}
/* @return number of bytes needed to hold bit vector */
private int getNBytes() {
return (vectorSize + 7) / 8;
}
}//end class

View File

@ -1,311 +0,0 @@
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays; //TODO: remove
import org.apache.hadoop.hbase.util.Hash;
/**
* Implements a <i>counting Bloom filter</i>, as defined by Fan et al. in a ToN
* 2000 paper.
* <p>
* A counting Bloom filter is an improvement to standard a Bloom filter as it
* allows dynamic additions and deletions of set membership information. This
* is achieved through the use of a counting vector instead of a bit vector.
*
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
*
* @version 1.1 - 19 Jan. 08
*
*/
public final class CountingBloomFilter extends Filter {
/** Storage for the counting buckets */
private long[] buckets;
/** We are using 4bit buckets, so each bucket can count to 15 */
private final static long BUCKET_MAX_VALUE = 15;
/** Default constructor - use with readFields */
public CountingBloomFilter() {}
/**
* Constructor
* @param vectorSize The vector size of <i>this</i> filter.
* @param nbHash The number of hash function to consider.
* @param hashType type of the hashing function (see {@link Hash}).
*/
public CountingBloomFilter(int vectorSize, int nbHash, int hashType){
super(vectorSize, nbHash, hashType);
buckets = new long[buckets2words(vectorSize)];
}//end constructor
/** returns the number of 64 bit words it would take to hold vectorSize buckets */
private static int buckets2words(int vectorSize) {
return ((vectorSize - 1) >>> 4) + 1;
}
@Override
public void add(Key key) {
if(key == null) {
throw new NullPointerException("key can not be null");
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
// find the bucket
int wordNum = h[i] >> 4; // div 16
int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
long bucketMask = 15L << bucketShift;
long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
// only increment if the count in the bucket is less than BUCKET_MAX_VALUE
if(bucketValue < BUCKET_MAX_VALUE) {
// increment by 1
buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift);
}
}
}//end add()
/**
* Removes a specified key from <i>this</i> counting Bloom filter.
* <p>
* <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter.
* @param key The key to remove.
*/
public void delete(Key key) {
if(key == null) {
throw new NullPointerException("Key may not be null");
}
if(!membershipTest(key)) {
throw new IllegalArgumentException("Key is not a member");
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
// find the bucket
int wordNum = h[i] >> 4; // div 16
int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
long bucketMask = 15L << bucketShift;
long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
// only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE
if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) {
// decrement by 1
buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift);
}
}
}//end delete
@Override
public void and(Filter filter){
if(filter == null
|| !(filter instanceof CountingBloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be and-ed");
}
CountingBloomFilter cbf = (CountingBloomFilter)filter;
int sizeInWords = buckets2words(vectorSize);
for(int i = 0; i < sizeInWords; i++) {
this.buckets[i] &= cbf.buckets[i];
}
}//end and()
@Override
public boolean membershipTest(Key key){
if(key == null) {
throw new NullPointerException("Key may not be null");
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
// find the bucket
int wordNum = h[i] >> 4; // div 16
int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
long bucketMask = 15L << bucketShift;
if((buckets[wordNum] & bucketMask) == 0) {
return false;
}
}
return true;
}//end membershipTest()
/**
* This method calculates an approximate count of the key, i.e. how many
* times the key was added to the filter. This allows the filter to be
* used as an approximate <code>key -&gt; count</code> map.
* <p>NOTE: due to the bucket size of this filter, inserting the same
* key more than 15 times will cause an overflow at all filter positions
* associated with this key, and it will significantly increase the error
* rate for this and other keys. For this reason the filter can only be
* used to store small count values <code>0 &lt;= N &lt;&lt; 15</code>.
* @param key key to be tested
* @return 0 if the key is not present. Otherwise, a positive value v will
* be returned such that <code>v == count</code> with probability equal to the
* error rate of this filter, and <code>v &gt; count</code> otherwise.
* Additionally, if the filter experienced an underflow as a result of
* {@link #delete(Key)} operation, the return value may be lower than the
* <code>count</code> with the probability of the false negative rate of such
* filter.
*/
public int approximateCount(Key key) {
int res = Integer.MAX_VALUE;
int[] h = hash.hash(key);
hash.clear();
for (int i = 0; i < nbHash; i++) {
// find the bucket
int wordNum = h[i] >> 4; // div 16
int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
long bucketMask = 15L << bucketShift;
long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
if (bucketValue < res) res = (int)bucketValue;
}
if (res != Integer.MAX_VALUE) {
return res;
} else {
return 0;
}
}
@Override
public void not(){
throw new UnsupportedOperationException("not() is undefined for "
+ this.getClass().getName());
}//end not()
@Override
public void or(Filter filter){
if(filter == null
|| !(filter instanceof CountingBloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be or-ed");
}
CountingBloomFilter cbf = (CountingBloomFilter)filter;
int sizeInWords = buckets2words(vectorSize);
for(int i = 0; i < sizeInWords; i++) {
this.buckets[i] |= cbf.buckets[i];
}
}//end or()
@Override
@SuppressWarnings("unused")
public void xor(Filter filter){
throw new UnsupportedOperationException("xor() is undefined for "
+ this.getClass().getName());
}//end xor()
@Override
public String toString(){
StringBuilder res = new StringBuilder();
for(int i = 0; i < vectorSize; i++) {
if(i > 0) {
res.append(" ");
}
int wordNum = i >> 4; // div 16
int bucketShift = (i & 0x0f) << 2; // (mod 16) * 4
long bucketMask = 15L << bucketShift;
long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
res.append(bucketValue);
}
return res.toString();
}//end toString()
@Override
public Object clone(){
CountingBloomFilter cbf = new CountingBloomFilter(vectorSize, nbHash, hashType);
cbf.buckets = this.buckets.clone();
return cbf;
}//end clone()
// Writable
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
int sizeInWords = buckets2words(vectorSize);
for(int i = 0; i < sizeInWords; i++) {
out.writeLong(buckets[i]);
}
}
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
int sizeInWords = buckets2words(vectorSize);
buckets = new long[sizeInWords];
for(int i = 0; i < sizeInWords; i++) {
buckets[i] = in.readLong();
}
}
}//end class

View File

@ -1,299 +0,0 @@
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.hbase.util.Hash;
/**
* Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper.
* <p>
* A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but
* each of the <code>s</code> rows is a standard Bloom filter. The creation
* process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code>
* bit matrix, i.e., it is composed of a single standard Bloom filter.
* It assumes that <code>n<sub>r</sub></code> elements are recorded in the
* initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is
* the cardinality of the set <code>A</code> to record in the filter).
* <p>
* As the size of <code>A</code> grows during the execution of the application,
* several keys must be inserted in the DBF. When inserting a key into the DBF,
* one must first get an active Bloom filter in the matrix. A Bloom filter is
* active when the number of recorded keys, <code>n<sub>r</sub></code>, is
* strictly less than the current cardinality of <code>A</code>, <code>n</code>.
* If an active Bloom filter is found, the key is inserted and
* <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there
* is no active Bloom filter, a new one is created (i.e., a new row is added to
* the matrix) according to the current size of <code>A</code> and the element
* is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of
* this new Bloom filter is set to one. A given key is said to belong to the
* DBF if the <code>k</code> positions are set to one in one of the matrix rows.
*
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
*
* @version 1.0 - 6 Feb. 07
*
*/
public class DynamicBloomFilter extends Filter {
/**
* Threshold for the maximum number of key to record in a dynamic Bloom filter row.
*/
private int nr;
/**
* The number of keys recorded in the current standard active Bloom filter.
*/
private int currentNbRecord;
/**
* The matrix of Bloom filter.
*/
private BloomFilter[] matrix;
/**
* Zero-args constructor for the serialization.
*/
public DynamicBloomFilter() { }
/**
* Constructor.
* <p>
* Builds an empty Dynamic Bloom filter.
* @param vectorSize The number of bits in the vector.
* @param nbHash The number of hash function to consider.
* @param hashType type of the hashing function (see {@link Hash}).
* @param nr The threshold for the maximum number of keys to record in a dynamic Bloom filter row.
*/
public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
super(vectorSize, nbHash, hashType);
this.nr = nr;
this.currentNbRecord = 0;
matrix = new BloomFilter[1];
matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
}//end constructor
@Override
public void add(Key key){
if(key == null) {
throw new NullPointerException("Key can not be null");
}
BloomFilter bf = getActiveStandardBF();
if(bf == null){
addRow();
bf = matrix[matrix.length - 1];
currentNbRecord = 0;
}
bf.add(key);
currentNbRecord++;
}//end add()
@Override
public void and(Filter filter) {
if(filter == null
|| !(filter instanceof DynamicBloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be and-ed");
}
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
throw new IllegalArgumentException("filters cannot be and-ed");
}
for(int i = 0; i < matrix.length; i++) {
matrix[i].and(dbf.matrix[i]);
}
}//end and()
@Override
public boolean membershipTest(Key key){
if(key == null) {
return true;
}
for(int i = 0; i < matrix.length; i++) {
if(matrix[i].membershipTest(key)) {
return true;
}
}
return false;
}//end membershipTest()
@Override
public void not(){
for(int i = 0; i < matrix.length; i++) {
matrix[i].not();
}
}//end not()
@Override
public void or(Filter filter){
if(filter == null
|| !(filter instanceof DynamicBloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be or-ed");
}
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
throw new IllegalArgumentException("filters cannot be or-ed");
}
for(int i = 0; i < matrix.length; i++) {
matrix[i].or(dbf.matrix[i]);
}
}//end or()
@Override
public void xor(Filter filter){
if(filter == null
|| !(filter instanceof DynamicBloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be xor-ed");
}
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
throw new IllegalArgumentException("filters cannot be xor-ed");
}
for(int i = 0; i<matrix.length; i++) {
matrix[i].xor(dbf.matrix[i]);
}
}//end xor()
@Override
public String toString(){
StringBuilder res = new StringBuilder();
for(int i=0; i<matrix.length; i++) {
res.append(matrix[i]);
res.append(Character.LINE_SEPARATOR);
}
return res.toString();
}//end toString()
@Override
public Object clone(){
DynamicBloomFilter dbf = new DynamicBloomFilter(vectorSize, nbHash, hashType, nr);
dbf.currentNbRecord = this.currentNbRecord;
dbf.matrix = new BloomFilter[this.matrix.length];
for(int i = 0; i < this.matrix.length; i++) {
dbf.matrix[i] = (BloomFilter)this.matrix[i].clone();
}
return dbf;
}//end clone()
// Writable
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
out.writeInt(nr);
out.writeInt(currentNbRecord);
out.writeInt(matrix.length);
for (int i = 0; i < matrix.length; i++) {
matrix[i].write(out);
}
}
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
nr = in.readInt();
currentNbRecord = in.readInt();
int len = in.readInt();
matrix = new BloomFilter[len];
for (int i = 0; i < matrix.length; i++) {
matrix[i] = new BloomFilter();
matrix[i].readFields(in);
}
}
/**
* Adds a new row to <i>this</i> dynamic Bloom filter.
*/
private void addRow(){
BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
for(int i = 0; i < matrix.length; i++) {
tmp[i] = (BloomFilter)matrix[i].clone();
}
tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
matrix = tmp;
}//end addRow()
/**
* Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter.
* @return BloomFilter The active standard Bloom filter.
* <code>Null</code> otherwise.
*/
private BloomFilter getActiveStandardBF() {
if(currentNbRecord >= nr) {
return null;
}
return matrix[matrix.length - 1];
}//end getActiveStandardBF()
}//end class

View File

@ -1,211 +0,0 @@
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819
* (http://www.one-lab.org)
*
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.io.Writable;
/**
* Defines the general behavior of a filter.
* <p>
* A filter is a data structure which aims at offering a lossy summary of a set <code>A</code>. The
* key idea is to map entries of <code>A</code> (also called <i>keys</i>) into several positions
* in a vector through the use of several hash functions.
* <p>
* Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension).
* <p>
* It must be extended in order to define the real behavior.
*
* @version 1.0 - 2 Feb. 07
*/
public abstract class Filter implements Writable {
private static final int VERSION = -1; // negative to accommodate for old format
/** The vector size of <i>this</i> filter. */
protected int vectorSize;
/** The hash function used to map a key to several positions in the vector. */
protected HashFunction hash;
/** The number of hash function to consider. */
protected int nbHash;
/** Type of hashing function to use. */
protected int hashType;
protected Filter() {}
/**
* Constructor.
* @param vectorSize The vector size of <i>this</i> filter.
* @param nbHash The number of hash functions to consider.
* @param hashType type of the hashing function (see {@link Hash}).
*/
protected Filter(int vectorSize, int nbHash, int hashType) {
this.vectorSize = vectorSize;
this.nbHash = nbHash;
this.hashType = hashType;
this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}//end constructor
/**
* Adds a key to <i>this</i> filter.
* @param key The key to add.
*/
public abstract void add(Key key);
/**
* Determines wether a specified key belongs to <i>this</i> filter.
* @param key The key to test.
* @return boolean True if the specified key belongs to <i>this</i> filter.
* False otherwise.
*/
public abstract boolean membershipTest(Key key);
/**
* Peforms a logical AND between <i>this</i> filter and a specified filter.
* <p>
* <b>Invariant</b>: The result is assigned to <i>this</i> filter.
* @param filter The filter to AND with.
*/
public abstract void and(Filter filter);
/**
* Peforms a logical OR between <i>this</i> filter and a specified filter.
* <p>
* <b>Invariant</b>: The result is assigned to <i>this</i> filter.
* @param filter The filter to OR with.
*/
public abstract void or(Filter filter);
/**
* Peforms a logical XOR between <i>this</i> filter and a specified filter.
* <p>
* <b>Invariant</b>: The result is assigned to <i>this</i> filter.
* @param filter The filter to XOR with.
*/
public abstract void xor(Filter filter);
/**
* Performs a logical NOT on <i>this</i> filter.
* <p>
* The result is assigned to <i>this</i> filter.
*/
public abstract void not();
/**
* Adds a list of keys to <i>this</i> filter.
* @param keys The list of keys.
*/
public void add(List<Key> keys){
if(keys == null) {
throw new IllegalArgumentException("ArrayList<Key> may not be null");
}
for(Key key: keys) {
add(key);
}
}//end add()
/**
* Adds a collection of keys to <i>this</i> filter.
* @param keys The collection of keys.
*/
public void add(Collection<Key> keys){
if(keys == null) {
throw new IllegalArgumentException("Collection<Key> may not be null");
}
for(Key key: keys) {
add(key);
}
}//end add()
/**
* Adds an array of keys to <i>this</i> filter.
* @param keys The array of keys.
*/
public void add(Key[] keys){
if(keys == null) {
throw new IllegalArgumentException("Key[] may not be null");
}
for(int i = 0; i < keys.length; i++) {
add(keys[i]);
}
}//end add()
// Writable interface
public void write(DataOutput out) throws IOException {
out.writeInt(VERSION);
out.writeInt(this.nbHash);
out.writeByte(this.hashType);
out.writeInt(this.vectorSize);
}
public void readFields(DataInput in) throws IOException {
int ver = in.readInt();
if (ver > 0) { // old unversioned format
this.nbHash = ver;
this.hashType = Hash.JENKINS_HASH;
} else if (ver == VERSION) {
this.nbHash = in.readInt();
this.hashType = in.readByte();
} else {
throw new IOException("Unsupported version: " + ver);
}
this.vectorSize = in.readInt();
this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
}
}//end class

View File

@ -1,120 +0,0 @@
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819
* (http://www.one-lab.org)
*
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
import org.apache.hadoop.hbase.util.Hash;
/**
* Implements a hash object that returns a certain number of hashed values.
* <p>
* It is based on the SHA-1 algorithm.
*
* @version 1.0 - 2 Feb. 07
*/
public final class HashFunction {
/** The number of hashed values. */
private int nbHash;
/** The maximum highest returned value. */
private int maxValue;
/** Hashing algorithm to use. */
private Hash hashFunction;
/**
* Constructor.
* <p>
* Builds a hash function that must obey to a given maximum number of returned values and a highest value.
* @param maxValue The maximum highest returned value.
* @param nbHash The number of resulting hashed values.
* @param hashType type of the hashing function (see {@link Hash}).
*/
public HashFunction(int maxValue, int nbHash, int hashType) {
if(maxValue <= 0) {
throw new IllegalArgumentException("maxValue must be > 0");
}
if(nbHash <= 0) {
throw new IllegalArgumentException("nbHash must be > 0");
}
this.maxValue = maxValue;
this.nbHash = nbHash;
this.hashFunction = Hash.getInstance(hashType);
if (this.hashFunction == null)
throw new IllegalArgumentException("hashType must be known");
}//end constructor
/** Clears <i>this</i> hash function. A NOOP */
public void clear() {
}
/**
* Hashes a specified key into several integers.
* @param k The specified key.
* @return The array of hashed values.
*/
public int[] hash(Key k){
byte[] b = k.getBytes();
if(b == null) {
throw new NullPointerException("buffer reference is null");
}
if(b.length == 0) {
throw new IllegalArgumentException("key length must be > 0");
}
int[] result = new int[nbHash];
for (int i = 0, initval = 0; i < nbHash; i++) {
initval = hashFunction.hash(b, initval);
result[i] = Math.abs(initval) % maxValue;
}
return result;
}//end hash()
}//end class

View File

@ -1,174 +0,0 @@
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
/**
* The general behavior of a key that must be stored in a filter.
*/
public class Key implements WritableComparable {
/** Byte value of key */
byte[] bytes;
/**
* The weight associated to <i>this</i> key.
* <p>
* <b>Invariant</b>: if it is not specified, each instance of
* <code>Key</code> will have a default weight of 1.0
*/
double weight;
/** default constructor - use with readFields */
public Key() {}
/**
* Constructor.
* <p>
* Builds a key with a default weight.
* @param value The byte value of <i>this</i> key.
*/
public Key(byte[] value) {
this(value, 1.0);
}//end constructor
/**
* Constructor.
* <p>
* Builds a key with a specified weight.
* @param value The value of <i>this</i> key.
* @param weight The weight associated to <i>this</i> key.
*/
public Key(byte[] value, double weight) {
set(value, weight);
}//end constructor
/**
* @param value
* @param weight
*/
public void set(byte[] value, double weight) {
if(value == null) {
throw new IllegalArgumentException("value can not be null");
}
this.bytes = value;
this.weight = weight;
}
/** @return byte[] The value of <i>this</i> key. */
public byte[] getBytes() {
return this.bytes;
}
/** @return Returns the weight associated to <i>this</i> key. */
public double getWeight(){
return weight;
}//end getWeight()
/**
* Increments the weight of <i>this</i> key with a specified value.
* @param weight The increment.
*/
public void incrementWeight(double weight){
this.weight += weight;
}//end incrementWeight()
/** Increments the weight of <i>this</i> key by one. */
public void incrementWeight(){
this.weight++;
}//end incrementWeight()
@Override
public boolean equals(Object o) {
return this.compareTo(o) == 0;
}
@Override
public int hashCode() {
int result = 0;
for(int i = 0; i < bytes.length; i++) {
result ^= Byte.valueOf(bytes[i]).hashCode();
}
result ^= Double.valueOf(weight).hashCode();
return result;
}
// Writable
public void write(DataOutput out) throws IOException {
out.writeInt(bytes.length);
out.write(bytes);
out.writeDouble(weight);
}
public void readFields(DataInput in) throws IOException {
this.bytes = new byte[in.readInt()];
in.readFully(this.bytes);
weight = in.readDouble();
}
// Comparable
public int compareTo(Object o) {
Key other = (Key)o;
int result = this.bytes.length - other.getBytes().length;
for(int i = 0; result == 0 && i < bytes.length; i++) {
result = this.bytes[i] - other.bytes[i];
}
if(result == 0) {
result = Double.valueOf(this.weight - other.weight).intValue();
}
return result;
}
}//end class

View File

@ -1,91 +0,0 @@
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819
* (http://www.one-lab.org)
*
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
/**
* Defines the different remove scheme for retouched Bloom filters.
*
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
*
* @version 1.0 - 7 Feb. 07
*/
public interface RemoveScheme {
/**
* Random selection.
* <p>
* The idea is to randomly select a bit to reset.
*/
public final static short RANDOM = 0;
/**
* MinimumFN Selection.
* <p>
* The idea is to select the bit to reset that will generate the minimum
* number of false negative.
*/
public final static short MINIMUM_FN = 1;
/**
* MaximumFP Selection.
* <p>
* The idea is to select the bit to reset that will remove the maximum number
* of false positive.
*/
public final static short MAXIMUM_FP = 2;
/**
* Ratio Selection.
* <p>
* The idea is to select the bit to reset that will, at the same time, remove
* the maximum number of false positve while minimizing the amount of false
* negative generated.
*/
public final static short RATIO = 3;
}//end interface

View File

@ -1,445 +0,0 @@
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import org.apache.hadoop.hbase.util.Hash;
/**
* Implements a <i>retouched Bloom filter</i>, as defined in the CoNEXT 2006 paper.
* <p>
* It allows the removal of selected false positives at the cost of introducing
* random false negatives, and with the benefit of eliminating some random false
* positives at the same time.
*
* contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
*
* @version 1.0 - 7 Feb. 07
*
*/
public final class RetouchedBloomFilter extends BloomFilter
implements RemoveScheme {
/**
* KeyList vector (or ElementList Vector, as defined in the paper) of false positives.
*/
List<Key>[] fpVector;
/**
* KeyList vector of keys recorded in the filter.
*/
List<Key>[] keyVector;
/**
* Ratio vector.
*/
double[] ratio;
private Random rand;
/** Default constructor - use with readFields */
public RetouchedBloomFilter() {}
/**
* Constructor
* @param vectorSize The vector size of <i>this</i> filter.
* @param nbHash The number of hash function to consider.
* @param hashType type of the hashing function (see {@link Hash}).
*/
public RetouchedBloomFilter(int vectorSize, int nbHash, int hashType) {
super(vectorSize, nbHash, hashType);
this.rand = null;
createVector();
}//end constructor
@Override
public void add(Key key){
if(key == null) {
throw new NullPointerException("key can not be null");
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
bits.set(h[i]);
keyVector[h[i]].add(key);
}//end for - i
}//end add()
/**
* Adds a false positive information to <i>this</i> retouched Bloom filter.
* <p>
* <b>Invariant</b>: if the false positive is <code>null</code>, nothing happens.
* @param key The false positive key to add.
*/
public void addFalsePositive(Key key){
if(key == null) {
throw new NullPointerException("key can not be null");
}
int[] h = hash.hash(key);
hash.clear();
for(int i = 0; i < nbHash; i++) {
fpVector[h[i]].add(key);
}
}//end addFalsePositive()
/**
* Adds a collection of false positive information to <i>this</i> retouched Bloom filter.
* @param coll The collection of false positive.
*/
public void addFalsePositive(Collection<Key> coll) {
if(coll == null) {
throw new NullPointerException("Collection<Key> can not be null");
}
for(Key k: coll) {
addFalsePositive(k);
}
}//end addFalsePositive()
/**
* Adds a list of false positive information to <i>this</i> retouched Bloom filter.
* @param keys The list of false positive.
*/
public void addFalsePositive(List<Key> keys){
if(keys == null) {
throw new NullPointerException("ArrayList<Key> can not be null");
}
for(Key k: keys) {
addFalsePositive(k);
}
}//end addFalsePositive()
/**
* Adds an array of false positive information to <i>this</i> retouched Bloom filter.
* @param keys The array of false positive.
*/
public void addFalsePositive(Key[] keys){
if(keys == null) {
throw new NullPointerException("Key[] can not be null");
}
for(int i = 0; i < keys.length; i++) {
addFalsePositive(keys[i]);
}
}//end addFalsePositive()
/**
* Performs the selective clearing for a given key.
* @param k The false positive key to remove from <i>this</i> retouched Bloom filter.
* @param scheme The selective clearing scheme to apply.
*/
public void selectiveClearing(Key k, short scheme) {
if(k == null) {
throw new NullPointerException("Key can not be null");
}
if(!membershipTest(k)) {
throw new IllegalArgumentException("Key is not a member");
}
int index = 0;
int[] h = hash.hash(k);
switch(scheme) {
case RANDOM:
index = randomRemove();
break;
case MINIMUM_FN:
index = minimumFnRemove(h);
break;
case MAXIMUM_FP:
index = maximumFpRemove(h);
break;
case RATIO:
index = ratioRemove(h);
break;
default:
throw new AssertionError("Undefined selective clearing scheme");
}//end switch
clearBit(index);
}//end selectiveClearing()
private int randomRemove() {
if(rand == null) {
rand = new Random();
}
return rand.nextInt(nbHash);
}//end randomRemove()
/**
* Chooses the bit position that minimizes the number of false negative generated.
* @param h The different bit positions.
* @return int The position that minimizes the number of false negative generated.
*/
private int minimumFnRemove(int[] h) {
int minIndex = Integer.MAX_VALUE;
double minValue = Double.MAX_VALUE;
for(int i = 0; i < nbHash; i++) {
double keyWeight = getWeight(keyVector[h[i]]);
if(keyWeight < minValue) {
minIndex = h[i];
minValue = keyWeight;
}
}//end for - i
return minIndex;
}//end minimumFnRemove()
/**
* Chooses the bit position that maximizes the number of false positive removed.
* @param h The different bit positions.
* @return int The position that maximizes the number of false positive removed.
*/
private int maximumFpRemove(int[] h){
int maxIndex = Integer.MIN_VALUE;
double maxValue = Double.MIN_VALUE;
for(int i = 0; i < nbHash; i++) {
double fpWeight = getWeight(fpVector[h[i]]);
if(fpWeight > maxValue) {
maxValue = fpWeight;
maxIndex = h[i];
}
}
return maxIndex;
}//end maximumFpRemove()
/**
* Chooses the bit position that minimizes the number of false negative generated while maximizing.
* the number of false positive removed.
* @param h The different bit positions.
* @return int The position that minimizes the number of false negative generated while maximizing.
*/
private int ratioRemove(int[] h){
computeRatio();
int minIndex = Integer.MAX_VALUE;
double minValue = Double.MAX_VALUE;
for(int i = 0; i < nbHash; i++) {
if(ratio[h[i]] < minValue) {
minValue = ratio[h[i]];
minIndex = h[i];
}
}//end for - i
return minIndex;
}//end ratioRemove()
/**
* Clears a specified bit in the bit vector and keeps up-to-date the KeyList vectors.
* @param index The position of the bit to clear.
*/
private void clearBit(int index){
if(index < 0 || index >= vectorSize) {
throw new ArrayIndexOutOfBoundsException(index);
}
List<Key> kl = keyVector[index];
List<Key> fpl = fpVector[index];
// update key list
int listSize = kl.size();
for(int i = 0; i < listSize && !kl.isEmpty(); i++) {
removeKey(kl.get(0), keyVector);
}
kl.clear();
keyVector[index].clear();
//update false positive list
listSize = fpl.size();
for(int i = 0; i < listSize && !fpl.isEmpty(); i++) {
removeKey(fpl.get(0), fpVector);
}
fpl.clear();
fpVector[index].clear();
//update ratio
ratio[index] = 0.0;
//update bit vector
bits.clear(index);
}//end clearBit()
/**
* Removes a given key from <i>this</i> filer.
* @param k The key to remove.
* @param vector The counting vector associated to the key.
*/
private void removeKey(Key k, List<Key>[] vector) {
if(k == null) {
throw new NullPointerException("Key can not be null");
}
if(vector == null) {
throw new NullPointerException("ArrayList<Key>[] can not be null");
}
int[] h = hash.hash(k);
hash.clear();
for(int i = 0; i < nbHash; i++) {
vector[h[i]].remove(k);
}
}//end removeKey()
/**
* Computes the ratio A/FP.
*/
private void computeRatio() {
for(int i = 0; i < vectorSize; i++) {
double keyWeight = getWeight(keyVector[i]);
double fpWeight = getWeight(fpVector[i]);
if(keyWeight > 0 && fpWeight > 0) {
ratio[i] = keyWeight/fpWeight;
}
}//end for - i
}//end computeRatio()
private double getWeight(List<Key> keyList) {
double weight = 0.0;
for(Key k: keyList) {
weight += k.getWeight();
}
return weight;
}
/**
* Creates and initialises the various vectors.
*/
@SuppressWarnings("unchecked")
private void createVector() {
fpVector = new List[vectorSize];
keyVector = new List[vectorSize];
ratio = new double[vectorSize];
for(int i = 0; i < vectorSize; i++) {
fpVector[i] = Collections.synchronizedList(new ArrayList<Key>());
keyVector[i] = Collections.synchronizedList(new ArrayList<Key>());
ratio[i] = 0.0;
}//end for -i
}//end createVector()
// Writable
@Override
public void write(DataOutput out) throws IOException {
super.write(out);
for(int i = 0; i < fpVector.length; i++) {
List<Key> list = fpVector[i];
out.writeInt(list.size());
for(Key k: list) {
k.write(out);
}
}
for(int i = 0; i < keyVector.length; i++) {
List<Key> list = keyVector[i];
out.writeInt(list.size());
for(Key k: list) {
k.write(out);
}
}
for(int i = 0; i < ratio.length; i++) {
out.writeDouble(ratio[i]);
}
}
@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
createVector();
for(int i = 0; i < fpVector.length; i++) {
List<Key> list = fpVector[i];
int size = in.readInt();
for(int j = 0; j < size; j++) {
Key k = new Key();
k.readFields(in);
list.add(k);
}
}
for(int i = 0; i < keyVector.length; i++) {
List<Key> list = keyVector[i];
int size = in.readInt();
for(int j = 0; j < size; j++) {
Key k = new Key();
k.readFields(in);
list.add(k);
}
}
for(int i = 0; i < ratio.length; i++) {
ratio[i] = in.readDouble();
}
}
}//end class

View File

@ -1,585 +0,0 @@
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.migration.nineteen.regionserver;
import java.io.DataInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.migration.nineteen.io.BloomFilterMapFile;
import org.apache.hadoop.hbase.migration.nineteen.io.HalfMapFileReader;
import org.apache.hadoop.hbase.migration.nineteen.io.Reference;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.SequenceFile;
/**
* A HStore data file. HStores usually have one or more of these files. They
* are produced by flushing the memcache to disk.
*
* <p>This one has been doctored to be used in migrations. Private and
* protecteds have been made public, etc.
*
* <p>Each HStore maintains a bunch of different data files. The filename is a
* mix of the parent dir, the region name, the column name, and a file
* identifier. The name may also be a reference to a store file located
* elsewhere. This class handles all that path-building stuff for you.
*
* <p>An HStoreFile usually tracks 4 things: its parent dir, the region
* identifier, the column family, and the file identifier. If you know those
* four things, you know how to obtain the right HStoreFile. HStoreFiles may
* also reference store files in another region serving either from
* the top-half of the remote file or from the bottom-half. Such references
* are made fast splitting regions.
*
* <p>Plain HStoreFiles are named for a randomly generated id as in:
* <code>1278437856009925445</code> A file by this name is made in both the
* <code>mapfiles</code> and <code>info</code> subdirectories of a
* HStore columnfamily directoy: E.g. If the column family is 'anchor:', then
* under the region directory there is a subdirectory named 'anchor' within
* which is a 'mapfiles' and 'info' subdirectory. In each will be found a
* file named something like <code>1278437856009925445</code>, one to hold the
* data in 'mapfiles' and one under 'info' that holds the sequence id for this
* store file.
*
* <p>References to store files located over in some other region look like
* this:
* <code>1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184</code>:
* i.e. an id followed by the name of the referenced region. The data
* ('mapfiles') of HStoreFile references are empty. The accompanying
* <code>info</code> file contains the
* midkey, the id of the remote store we're referencing and whether we're
* to serve the top or bottom region of the remote store file. Note, a region
* is not splitable if it has instances of store file references (References
* are cleaned up by compactions).
*
* <p>When merging or splitting HRegions, we might want to modify one of the
* params for an HStoreFile (effectively moving it elsewhere).
*/
public class HStoreFile implements HConstants {
static final Log LOG = LogFactory.getLog(HStoreFile.class.getName());
static final byte INFO_SEQ_NUM = 0;
static final byte MAJOR_COMPACTION = INFO_SEQ_NUM + 1;
static final String HSTORE_DATFILE_DIR = "mapfiles";
static final String HSTORE_INFO_DIR = "info";
static final String HSTORE_FILTER_DIR = "filter";
private final static Random rand = new Random();
private final Path basedir;
private final int encodedRegionName;
private final byte [] colFamily;
private final long fileId;
private final HBaseConfiguration conf;
private final FileSystem fs;
private final Reference reference;
private final HRegionInfo hri;
/* If true, this file was product of a major compaction.
*/
private boolean majorCompaction = false;
private long indexLength;
/**
* Constructor that fully initializes the object
* @param conf Configuration object
* @param basedir qualified path that is parent of region directory
* @param colFamily name of the column family
* @param fileId file identifier
* @param ref Reference to another HStoreFile.
* @param encodedName Encoded name.
* @throws IOException
*/
public HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
final int encodedName, byte [] colFamily, long fileId,
final Reference ref)
throws IOException {
this(conf, fs, basedir, encodedName, colFamily, fileId, ref, false);
}
/**
* Constructor that fully initializes the object
* @param conf Configuration object
* @param basedir qualified path that is parent of region directory
* @param colFamily name of the column family
* @param fileId file identifier
* @param ref Reference to another HStoreFile.
* @param encodedName Encoded name.
* @param mc Try if this file was result of a major compression.
* @throws IOException
*/
HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
final int encodedName, byte [] colFamily, long fileId,
final Reference ref, final boolean mc)
throws IOException {
this.conf = conf;
this.fs = fs;
this.basedir = basedir;
this.encodedRegionName = encodedName;
this.colFamily = colFamily;
// NOT PASSED IN MIGRATIONS
this.hri = null;
long id = fileId;
if (id == -1) {
Path mapdir = HStoreFile.getMapDir(basedir, encodedRegionName, colFamily);
Path testpath = null;
do {
id = Math.abs(rand.nextLong());
testpath = new Path(mapdir, createHStoreFilename(id, -1));
} while(fs.exists(testpath));
}
this.fileId = id;
// If a reference, construction does not write the pointer files. Thats
// done by invocations of writeReferenceFiles(hsf, fs). Happens at split.
this.reference = ref;
this.majorCompaction = mc;
}
/** @return the region name */
boolean isReference() {
return reference != null;
}
private static final Pattern REF_NAME_PARSER =
Pattern.compile("^(\\d+)(?:\\.(.+))?$");
/**
* @param p Path to check.
* @return True if the path has format of a HStoreFile reference.
*/
public static boolean isReference(final Path p) {
return isReference(p, REF_NAME_PARSER.matcher(p.getName()));
}
private static boolean isReference(final Path p, final Matcher m) {
if (m == null || !m.matches()) {
LOG.warn("Failed match of store file name " + p.toString());
throw new RuntimeException("Failed match of store file name " +
p.toString());
}
return m.groupCount() > 1 && m.group(2) != null;
}
Reference getReference() {
return reference;
}
int getEncodedRegionName() {
return this.encodedRegionName;
}
/** @return the column family */
byte [] getColFamily() {
return colFamily;
}
/** @return the file identifier */
long getFileId() {
return fileId;
}
// Build full filenames from those components
/** @return path for MapFile */
Path getMapFilePath() {
if (isReference()) {
return getMapFilePath(encodedRegionName, fileId,
reference.getEncodedRegionName());
}
return getMapFilePath(this.encodedRegionName, fileId);
}
private Path getMapFilePath(final Reference r) {
if (r == null) {
return getMapFilePath();
}
return getMapFilePath(r.getEncodedRegionName(), r.getFileId());
}
private Path getMapFilePath(final int encodedName, final long fid) {
return getMapFilePath(encodedName, fid, HRegionInfo.NO_HASH);
}
private Path getMapFilePath(final int encodedName, final long fid,
final int ern) {
return new Path(HStoreFile.getMapDir(basedir, encodedName, colFamily),
createHStoreFilename(fid, ern));
}
/** @return path for info file */
Path getInfoFilePath() {
if (isReference()) {
return getInfoFilePath(encodedRegionName, fileId,
reference.getEncodedRegionName());
}
return getInfoFilePath(encodedRegionName, fileId);
}
private Path getInfoFilePath(final int encodedName, final long fid) {
return getInfoFilePath(encodedName, fid, HRegionInfo.NO_HASH);
}
private Path getInfoFilePath(final int encodedName, final long fid,
final int ern) {
return new Path(HStoreFile.getInfoDir(basedir, encodedName, colFamily),
createHStoreFilename(fid, ern));
}
// File handling
/*
* Split by making two new store files that reference top and bottom regions
* of original store file.
* @param midKey
* @param dstA
* @param dstB
* @param fs
* @param c
* @throws IOException
*
* @param midKey the key which will be the starting key of the second region
* @param dstA the file which will contain keys from the start of the source
* @param dstB the file which will contain keys from midKey to end of source
* @param fs file system
* @param c configuration
* @throws IOException
*/
void splitStoreFile(final HStoreFile dstA, final HStoreFile dstB,
final FileSystem fs)
throws IOException {
dstA.writeReferenceFiles(fs);
dstB.writeReferenceFiles(fs);
}
void writeReferenceFiles(final FileSystem fs)
throws IOException {
createOrFail(fs, getMapFilePath());
writeSplitInfo(fs);
}
/*
* If reference, create and write the remote store file id, the midkey and
* whether we're going against the top file region of the referent out to
* the info file.
* @param p Path to info file.
* @param hsf
* @param fs
* @throws IOException
*/
private void writeSplitInfo(final FileSystem fs) throws IOException {
Path p = getInfoFilePath();
if (fs.exists(p)) {
throw new IOException("File already exists " + p.toString());
}
FSDataOutputStream out = fs.create(p);
try {
reference.write(out);
} finally {
out.close();
}
}
/**
* @see #writeSplitInfo(FileSystem fs)
*/
static Reference readSplitInfo(final Path p, final FileSystem fs)
throws IOException {
FSDataInputStream in = fs.open(p);
try {
Reference r = new Reference();
r.readFields(in);
return r;
} finally {
in.close();
}
}
private void createOrFail(final FileSystem fs, final Path p)
throws IOException {
if (fs.exists(p)) {
throw new IOException("File already exists " + p.toString());
}
if (!fs.createNewFile(p)) {
throw new IOException("Failed create of " + p);
}
}
/**
* Reads in an info file
*
* @param filesystem file system
* @return The sequence id contained in the info file
* @throws IOException
*/
public long loadInfo(final FileSystem filesystem) throws IOException {
Path p = null;
if (isReference()) {
p = getInfoFilePath(reference.getEncodedRegionName(),
this.reference.getFileId());
} else {
p = getInfoFilePath();
}
long length = filesystem.getFileStatus(p).getLen();
boolean hasMoreThanSeqNum = length > (Byte.SIZE + Bytes.SIZEOF_LONG);
DataInputStream in = new DataInputStream(filesystem.open(p));
try {
byte flag = in.readByte();
if (flag == INFO_SEQ_NUM) {
if (hasMoreThanSeqNum) {
flag = in.readByte();
if (flag == MAJOR_COMPACTION) {
this.majorCompaction = in.readBoolean();
}
}
return in.readLong();
}
throw new IOException("Cannot process log file: " + p);
} finally {
in.close();
}
}
/**
* Writes the file-identifier to disk
*
* @param filesystem file system
* @param infonum file id
* @throws IOException
*/
void writeInfo(final FileSystem filesystem, final long infonum)
throws IOException {
writeInfo(filesystem, infonum, false);
}
/**
* Writes the file-identifier to disk
*
* @param filesystem file system
* @param infonum file id
* @param mc True if this file is product of a major compaction
* @throws IOException
*/
void writeInfo(final FileSystem filesystem, final long infonum,
final boolean mc)
throws IOException {
Path p = getInfoFilePath();
FSDataOutputStream out = filesystem.create(p);
try {
out.writeByte(INFO_SEQ_NUM);
out.writeLong(infonum);
if (mc) {
// Set whether major compaction flag on this file.
this.majorCompaction = mc;
out.writeByte(MAJOR_COMPACTION);
out.writeBoolean(mc);
}
} finally {
out.close();
}
}
/**
* Delete store map files.
* @throws IOException
*/
public void delete() throws IOException {
fs.delete(getMapFilePath(), true);
fs.delete(getInfoFilePath(), true);
}
/**
* Renames the mapfiles and info directories under the passed
* <code>hsf</code> directory.
* @param fs
* @param hsf
* @return True if succeeded.
* @throws IOException
*/
public boolean rename(final FileSystem fs, final HStoreFile hsf)
throws IOException {
Path src = getMapFilePath();
if (!fs.exists(src)) {
throw new FileNotFoundException(src.toString());
}
boolean success = fs.rename(src, hsf.getMapFilePath());
if (!success) {
LOG.warn("Failed rename of " + src + " to " + hsf.getMapFilePath());
} else {
src = getInfoFilePath();
if (!fs.exists(src)) {
throw new FileNotFoundException(src.toString());
}
success = fs.rename(src, hsf.getInfoFilePath());
if (!success) {
LOG.warn("Failed rename of " + src + " to " + hsf.getInfoFilePath());
}
}
return success;
}
/**
* Get reader for the store file map file.
* Client is responsible for closing file when done.
* @param fs
* @param bloomFilter If true, a bloom filter exists
* @param blockCacheEnabled If true, MapFile blocks should be cached.
* @return BloomFilterMapFile.Reader
* @throws IOException
*/
public synchronized BloomFilterMapFile.Reader getReader(final FileSystem fs,
final boolean bloomFilter, final boolean blockCacheEnabled)
throws IOException {
if (isReference()) {
return new HalfMapFileReader(fs,
getMapFilePath(reference).toString(), conf,
reference.getFileRegion(), reference.getMidkey(), bloomFilter,
blockCacheEnabled, this.hri);
}
return new BloomFilterMapFile.Reader(fs, getMapFilePath().toString(),
conf, bloomFilter, blockCacheEnabled, this.hri);
}
/**
* Get a store file writer.
* Client is responsible for closing file when done.
* @param fs
* @param compression Pass <code>SequenceFile.CompressionType.NONE</code>
* for none.
* @param bloomFilter If true, create a bloom filter
* @param nrows number of rows expected. Required if bloomFilter is true.
* @return MapFile.Writer
* @throws IOException
*/
public MapFile.Writer getWriter(final FileSystem fs,
final SequenceFile.CompressionType compression,
final boolean bloomFilter, int nrows)
throws IOException {
if (isReference()) {
throw new IOException("Illegal Access: Cannot get a writer on a" +
"HStoreFile reference");
}
return new BloomFilterMapFile.Writer(conf, fs,
getMapFilePath().toString(), compression, bloomFilter, nrows, this.hri);
}
/**
* @return Length of the store map file. If a reference, size is
* approximation.
* @throws IOException
*/
public long length() throws IOException {
Path p = new Path(getMapFilePath(reference), MapFile.DATA_FILE_NAME);
long l = p.getFileSystem(conf).getFileStatus(p).getLen();
return (isReference())? l / 2: l;
}
/**
* @return Length of the store map file index.
* @throws IOException
*/
public synchronized long indexLength() throws IOException {
if (indexLength == 0) {
Path p = new Path(getMapFilePath(reference), MapFile.INDEX_FILE_NAME);
indexLength = p.getFileSystem(conf).getFileStatus(p).getLen();
}
return indexLength;
}
@Override
public String toString() {
return encodedRegionName + "/" + Bytes.toString(colFamily) + "/" + fileId +
(isReference()? "-" + reference.toString(): "");
}
/**
* @return True if this file was made by a major compaction.
*/
public boolean isMajorCompaction() {
return this.majorCompaction;
}
private static String createHStoreFilename(final long fid,
final int encodedRegionName) {
return Long.toString(fid) +
((encodedRegionName != HRegionInfo.NO_HASH)?
"." + encodedRegionName : "");
}
/**
* @param dir Base directory
* @param encodedRegionName Encoding of region name.
* @param f Column family.
* @return path for map file directory
*/
public static Path getMapDir(Path dir, int encodedRegionName,
final byte [] f) {
return getFamilySubDir(dir, encodedRegionName, f, HSTORE_DATFILE_DIR);
}
/**
* @param dir Base directory
* @param encodedRegionName Encoding of region name.
* @param f Column family.
* @return the info directory path
*/
public static Path getInfoDir(Path dir, int encodedRegionName, byte [] f) {
return getFamilySubDir(dir, encodedRegionName, f, HSTORE_INFO_DIR);
}
/**
* @param dir Base directory
* @param encodedRegionName Encoding of region name.
* @param f Column family.
* @return the bloom filter directory path
*/
@Deprecated
public static Path getFilterDir(Path dir, int encodedRegionName,
final byte [] f) {
return getFamilySubDir(dir, encodedRegionName, f, HSTORE_FILTER_DIR);
}
/*
* @param base Base directory
* @param encodedRegionName Encoding of region name.
* @param f Column family.
* @param subdir Subdirectory to create under column family/store directory.
* @return
*/
private static Path getFamilySubDir(final Path base,
final int encodedRegionName, final byte [] f, final String subdir) {
return new Path(base, new Path(Integer.toString(encodedRegionName),
new Path(Bytes.toString(f), subdir)));
}
}

View File

@ -1,26 +0,0 @@
/*
* Copyright 2009 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
Provides classes from old hbase versions used migrating data.
Nineteen package has classes from hbase 0.19. See
&lt;a href="http://wiki.apache.org/hadoop/Hbase/HowToMigrate">How to Migrate&lt;/a>
for more on migrations.
*/
package org.apache.hadoop.hbase.migration;

View File

@ -54,7 +54,6 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.RowFilterInterface;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.Reference.Range;
import org.apache.hadoop.hbase.io.hfile.BlockCache;
@ -1678,12 +1677,10 @@ public class HRegion implements HConstants, HeapSize { // , Writable{
private final KeyValueHeap storeHeap;
private final byte [] stopRow;
private Filter filter;
private RowFilterInterface oldFilter;
private List<KeyValue> results = new ArrayList<KeyValue>();
RegionScanner(Scan scan, List<KeyValueScanner> additionalScanners) {
this.filter = scan.getFilter();
this.oldFilter = scan.getOldFilter();
if (Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
this.stopRow = null;
} else {
@ -1711,9 +1708,6 @@ public class HRegion implements HConstants, HeapSize { // , Writable{
if (filter != null) {
filter.reset();
}
if (oldFilter != null) {
oldFilter.reset();
}
}
/**
@ -1760,8 +1754,7 @@ public class HRegion implements HConstants, HeapSize { // , Writable{
continue;
}
// see if current row should be filtered based on row key
if ((filter != null && filter.filterRowKey(row, 0, row.length)) ||
(oldFilter != null && oldFilter.filterRowKey(row, 0, row.length))) {
if (filter != null && filter.filterRowKey(row, 0, row.length)) {
if(!results.isEmpty() && !Bytes.equals(currentRow, row)) {
return true;
}

View File

@ -782,19 +782,22 @@ public class MemStore implements HeapSize {
long size = 0;
final int count = 10000;
byte [] column = Bytes.toBytes("col:umn");
byte [] fam = Bytes.toBytes("col");
byte [] qf = Bytes.toBytes("umn");
byte [] empty = new byte[0];
for (int i = 0; i < count; i++) {
// Give each its own ts
size += memstore1.add(new KeyValue(Bytes.toBytes(i), column, i));
size += memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty));
}
LOG.info("memstore1 estimated size=" + size);
for (int i = 0; i < count; i++) {
size += memstore1.add(new KeyValue(Bytes.toBytes(i), column, i));
size += memstore1.add(new KeyValue(Bytes.toBytes(i), fam, qf, i, empty));
}
LOG.info("memstore1 estimated size (2nd loading of same data)=" + size);
// Make a variably sized memstore.
MemStore memstore2 = new MemStore();
for (int i = 0; i < count; i++) {
size += memstore2.add(new KeyValue(Bytes.toBytes(i), column, i,
size += memstore2.add(new KeyValue(Bytes.toBytes(i), fam, qf, i,
new byte[i]));
}
LOG.info("memstore2 estimated size=" + size);

View File

@ -24,7 +24,6 @@ import java.util.NavigableSet;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.RowFilterInterface;
import org.apache.hadoop.hbase.filter.Filter.ReturnCode;
import org.apache.hadoop.hbase.util.Bytes;
@ -32,8 +31,6 @@ import org.apache.hadoop.hbase.util.Bytes;
* A query matcher that is specifically designed for the scan case.
*/
public class ScanQueryMatcher extends QueryMatcher {
// have to support old style filter for now.
private RowFilterInterface oldFilter;
// Optimization so we can skip lots of compares when we decide to skip
// to the next row.
private boolean stickyNextRow;
@ -57,7 +54,6 @@ public class ScanQueryMatcher extends QueryMatcher {
this.startKey = KeyValue.createFirstOnRow(scan.getStartRow());
this.stopKey = KeyValue.createFirstOnRow(scan.getStopRow());
this.filter = scan.getFilter();
this.oldFilter = scan.getOldFilter();
// Single branch to deal with two types of reads (columns vs all in family)
if (columns == null || columns.size() == 0) {
@ -84,9 +80,6 @@ public class ScanQueryMatcher extends QueryMatcher {
public MatchCode match(KeyValue kv) {
if (filter != null && filter.filterAllRemaining()) {
return MatchCode.DONE_SCAN;
} else if (oldFilter != null && oldFilter.filterAllRemaining()) {
// the old filter runs only if the other filter didnt work.
return MatchCode.DONE_SCAN;
}
byte [] bytes = kv.getBuffer();

Some files were not shown because too many files have changed in this diff Show More