HBASE-11908 Region replicas should be added to the meta table at the time of table creation

This commit is contained in:
Enis Soztutar 2015-01-26 10:56:59 -08:00
parent a85cb0f89a
commit 2fb19fb951
10 changed files with 167 additions and 26 deletions

View File

@ -739,8 +739,14 @@ public class MetaTableAccessor {
if (replicaId < 0) { if (replicaId < 0) {
break; break;
} }
HRegionLocation location = getRegionLocation(r, regionInfo, replicaId);
locations.add(getRegionLocation(r, regionInfo, replicaId)); // In case the region replica is newly created, it's location might be null. We usually do not
// have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs.
if (location == null || location.getServerName() == null) {
locations.add(null);
} else {
locations.add(location);
}
} }
return new RegionLocations(locations); return new RegionLocations(locations);
@ -1089,8 +1095,7 @@ public class MetaTableAccessor {
* Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this
* does not add its daughter's as different rows, but adds information about the daughters * does not add its daughter's as different rows, but adds information about the daughters
* in the same row as the parent. Use * in the same row as the parent. Use
* {@link #splitRegion(org.apache.hadoop.hbase.client.Connection, * {@link #splitRegion(Connection, HRegionInfo, HRegionInfo, HRegionInfo, ServerName, int)
* HRegionInfo, HRegionInfo, HRegionInfo, ServerName)}
* if you want to do that. * if you want to do that.
* @param meta the Table for META * @param meta the Table for META
* @param regionInfo region information * @param regionInfo region information
@ -1112,7 +1117,7 @@ public class MetaTableAccessor {
* Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this
* does not add its daughter's as different rows, but adds information about the daughters * does not add its daughter's as different rows, but adds information about the daughters
* in the same row as the parent. Use * in the same row as the parent. Use
* {@link #splitRegion(Connection, HRegionInfo, HRegionInfo, HRegionInfo, ServerName)} * {@link #splitRegion(Connection, HRegionInfo, HRegionInfo, HRegionInfo, ServerName, int)
* if you want to do that. * if you want to do that.
* @param connection connection we're using * @param connection connection we're using
* @param regionInfo region information * @param regionInfo region information
@ -1137,12 +1142,19 @@ public class MetaTableAccessor {
* @throws IOException if problem connecting or updating meta * @throws IOException if problem connecting or updating meta
*/ */
public static void addRegionsToMeta(Connection connection, public static void addRegionsToMeta(Connection connection,
List<HRegionInfo> regionInfos) List<HRegionInfo> regionInfos, int regionReplication)
throws IOException { throws IOException {
List<Put> puts = new ArrayList<Put>(); List<Put> puts = new ArrayList<Put>();
for (HRegionInfo regionInfo : regionInfos) { for (HRegionInfo regionInfo : regionInfos) {
if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { if (RegionReplicaUtil.isDefaultReplica(regionInfo)) {
puts.add(makePutFromRegionInfo(regionInfo)); puts.add(makePutFromRegionInfo(regionInfo));
Put put = makePutFromRegionInfo(regionInfo);
// Add empty locations for region replicas so that number of replicas can be cached
// whenever the primary region is looked up from meta
for (int i = 1; i < regionReplication; i++) {
addEmptyLocation(put, i);
}
puts.add(put);
} }
} }
putsToMetaTable(connection, puts); putsToMetaTable(connection, puts);
@ -1180,7 +1192,8 @@ public class MetaTableAccessor {
* @throws IOException * @throws IOException
*/ */
public static void mergeRegions(final Connection connection, HRegionInfo mergedRegion, public static void mergeRegions(final Connection connection, HRegionInfo mergedRegion,
HRegionInfo regionA, HRegionInfo regionB, ServerName sn) throws IOException { HRegionInfo regionA, HRegionInfo regionB, ServerName sn, int regionReplication)
throws IOException {
Table meta = getMetaHTable(connection); Table meta = getMetaHTable(connection);
try { try {
HRegionInfo copyOfMerged = new HRegionInfo(mergedRegion); HRegionInfo copyOfMerged = new HRegionInfo(mergedRegion);
@ -1199,6 +1212,12 @@ public class MetaTableAccessor {
// The merged is a new region, openSeqNum = 1 is fine. // The merged is a new region, openSeqNum = 1 is fine.
addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId()); addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId());
// Add empty locations for region replicas of the merged region so that number of replicas can
// be cached whenever the primary region is looked up from meta
for (int i = 1; i < regionReplication; i++) {
addEmptyLocation(putOfMerged, i);
}
byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString()
+ HConstants.DELIMITER); + HConstants.DELIMITER);
multiMutate(meta, tableRow, putOfMerged, deleteA, deleteB); multiMutate(meta, tableRow, putOfMerged, deleteA, deleteB);
@ -1220,7 +1239,7 @@ public class MetaTableAccessor {
*/ */
public static void splitRegion(final Connection connection, public static void splitRegion(final Connection connection,
HRegionInfo parent, HRegionInfo splitA, HRegionInfo splitB, HRegionInfo parent, HRegionInfo splitA, HRegionInfo splitB,
ServerName sn) throws IOException { ServerName sn, int regionReplication) throws IOException {
Table meta = getMetaHTable(connection); Table meta = getMetaHTable(connection);
try { try {
HRegionInfo copyOfParent = new HRegionInfo(parent); HRegionInfo copyOfParent = new HRegionInfo(parent);
@ -1238,6 +1257,13 @@ public class MetaTableAccessor {
addLocation(putA, sn, 1, splitA.getReplicaId()); //new regions, openSeqNum = 1 is fine. addLocation(putA, sn, 1, splitA.getReplicaId()); //new regions, openSeqNum = 1 is fine.
addLocation(putB, sn, 1, splitB.getReplicaId()); addLocation(putB, sn, 1, splitB.getReplicaId());
// Add empty locations for region replicas of daughters so that number of replicas can be
// cached whenever the primary region is looked up from meta
for (int i = 1; i < regionReplication; i++) {
addEmptyLocation(putA, i);
addEmptyLocation(putB, i);
}
byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER); byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER);
multiMutate(meta, tableRow, putParent, putA, putB); multiMutate(meta, tableRow, putParent, putA, putB);
} finally { } finally {
@ -1385,14 +1411,14 @@ public class MetaTableAccessor {
* @throws IOException * @throws IOException
*/ */
public static void overwriteRegions(Connection connection, public static void overwriteRegions(Connection connection,
List<HRegionInfo> regionInfos) throws IOException { List<HRegionInfo> regionInfos, int regionReplication) throws IOException {
deleteRegions(connection, regionInfos); deleteRegions(connection, regionInfos);
// Why sleep? This is the easiest way to ensure that the previous deletes does not // Why sleep? This is the easiest way to ensure that the previous deletes does not
// eclipse the following puts, that might happen in the same ts from the server. // eclipse the following puts, that might happen in the same ts from the server.
// See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed,
// or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep.
Threads.sleep(20); Threads.sleep(20);
addRegionsToMeta(connection, regionInfos); addRegionsToMeta(connection, regionInfos, regionReplication);
LOG.info("Overwritten " + regionInfos); LOG.info("Overwritten " + regionInfos);
} }
@ -1433,4 +1459,12 @@ public class MetaTableAccessor {
Bytes.toBytes(openSeqNum)); Bytes.toBytes(openSeqNum));
return p; return p;
} }
public static Put addEmptyLocation(final Put p, int replicaId) {
long now = EnvironmentEdgeManager.currentTime();
p.addImmutable(HConstants.CATALOG_FAMILY, getServerColumn(replicaId), now, null);
p.addImmutable(HConstants.CATALOG_FAMILY, getStartCodeColumn(replicaId), now, null);
p.addImmutable(HConstants.CATALOG_FAMILY, getSeqNumColumn(replicaId), now, null);
return p;
}
} }

View File

@ -239,12 +239,12 @@ public class RegionStateStore {
} }
void splitRegion(HRegionInfo p, void splitRegion(HRegionInfo p,
HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException { HRegionInfo a, HRegionInfo b, ServerName sn, int regionReplication) throws IOException {
MetaTableAccessor.splitRegion(server.getConnection(), p, a, b, sn); MetaTableAccessor.splitRegion(server.getConnection(), p, a, b, sn, regionReplication);
} }
void mergeRegions(HRegionInfo p, void mergeRegions(HRegionInfo p,
HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException { HRegionInfo a, HRegionInfo b, ServerName sn, int regionReplication) throws IOException {
MetaTableAccessor.mergeRegions(server.getConnection(), p, a, b, sn); MetaTableAccessor.mergeRegions(server.getConnection(), p, a, b, sn, regionReplication);
} }
} }

View File

@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerLoad;
@ -772,7 +773,8 @@ public class RegionStates {
void splitRegion(HRegionInfo p, void splitRegion(HRegionInfo p,
HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException { HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
regionStateStore.splitRegion(p, a, b, sn);
regionStateStore.splitRegion(p, a, b, sn, getRegionReplication(p));
synchronized (this) { synchronized (this) {
// After PONR, split is considered to be done. // After PONR, split is considered to be done.
// Update server holdings to be aligned with the meta. // Update server holdings to be aligned with the meta.
@ -788,7 +790,7 @@ public class RegionStates {
void mergeRegions(HRegionInfo p, void mergeRegions(HRegionInfo p,
HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException { HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
regionStateStore.mergeRegions(p, a, b, sn); regionStateStore.mergeRegions(p, a, b, sn, getRegionReplication(a));
synchronized (this) { synchronized (this) {
// After PONR, merge is considered to be done. // After PONR, merge is considered to be done.
// Update server holdings to be aligned with the meta. // Update server holdings to be aligned with the meta.
@ -802,6 +804,16 @@ public class RegionStates {
} }
} }
private int getRegionReplication(HRegionInfo r) throws IOException {
if (tableStateManager != null) {
HTableDescriptor htd = tableStateManager.getTableDescriptors().get(r.getTable());
if (htd != null) {
return htd.getRegionReplication();
}
}
return 1;
}
/** /**
* At cluster clean re/start, mark all user regions closed except those of tables * At cluster clean re/start, mark all user regions closed except those of tables
* that are excluded, such as disabled/disabling/enabling tables. All user regions * that are excluded, such as disabled/disabling/enabling tables. All user regions

View File

@ -183,6 +183,10 @@ public class TableStateManager {
return tableState; return tableState;
} }
TableDescriptors getTableDescriptors() {
return descriptors;
}
/** /**
* Write descriptor in place, update cache of states. * Write descriptor in place, update cache of states.
* Write lock should be hold by caller. * Write lock should be hold by caller.

View File

@ -225,7 +225,7 @@ public class CreateTableHandler extends EventHandler {
if (regionInfos != null && regionInfos.size() > 0) { if (regionInfos != null && regionInfos.size() > 0) {
// 4. Add regions to META // 4. Add regions to META
addRegionsToMeta(regionInfos); addRegionsToMeta(regionInfos, hTableDescriptor.getRegionReplication());
// 5. Add replicas if needed // 5. Add replicas if needed
regionInfos = addReplicas(hTableDescriptor, regionInfos); regionInfos = addReplicas(hTableDescriptor, regionInfos);
@ -296,8 +296,8 @@ public class CreateTableHandler extends EventHandler {
/** /**
* Add the specified set of regions to the hbase:meta table. * Add the specified set of regions to the hbase:meta table.
*/ */
protected void addRegionsToMeta(final List<HRegionInfo> regionInfos) protected void addRegionsToMeta(final List<HRegionInfo> regionInfos, int regionReplication)
throws IOException { throws IOException {
MetaTableAccessor.addRegionsToMeta(this.server.getConnection(), regionInfos); MetaTableAccessor.addRegionsToMeta(this.server.getConnection(), regionInfos, regionReplication);
} }
} }

View File

@ -125,7 +125,7 @@ public class TruncateTableHandler extends DeleteTableHandler {
// 4. Add regions to META // 4. Add regions to META
MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(), MetaTableAccessor.addRegionsToMeta(masterServices.getConnection(),
regionInfos); regionInfos, hTableDescriptor.getRegionReplication());
// 5. Trigger immediate assignment of the regions in round-robin fashion // 5. Trigger immediate assignment of the regions in round-robin fashion
ModifyRegionUtils.assignRegions(assignmentManager, regionInfos); ModifyRegionUtils.assignRegions(assignmentManager, regionInfos);

View File

@ -138,9 +138,10 @@ public class CloneSnapshotHandler extends CreateTableHandler implements Snapshot
} }
@Override @Override
protected void addRegionsToMeta(final List<HRegionInfo> regionInfos) protected void addRegionsToMeta(final List<HRegionInfo> regionInfos,
int regionReplication)
throws IOException { throws IOException {
super.addRegionsToMeta(regionInfos); super.addRegionsToMeta(regionInfos, regionReplication);
metaChanges.updateMetaParentRegions(this.server.getConnection(), regionInfos); metaChanges.updateMetaParentRegions(this.server.getConnection(), regionInfos);
} }

View File

@ -159,9 +159,10 @@ public class RestoreSnapshotHandler extends TableEventHandler implements Snapsho
// in the snapshot folder. // in the snapshot folder.
hris.clear(); hris.clear();
if (metaChanges.hasRegionsToAdd()) hris.addAll(metaChanges.getRegionsToAdd()); if (metaChanges.hasRegionsToAdd()) hris.addAll(metaChanges.getRegionsToAdd());
MetaTableAccessor.addRegionsToMeta(conn, hris); MetaTableAccessor.addRegionsToMeta(conn, hris, hTableDescriptor.getRegionReplication());
if (metaChanges.hasRegionsToRestore()) { if (metaChanges.hasRegionsToRestore()) {
MetaTableAccessor.overwriteRegions(conn, metaChanges.getRegionsToRestore()); MetaTableAccessor.overwriteRegions(conn, metaChanges.getRegionsToRestore(),
hTableDescriptor.getRegionReplication());
} }
metaChanges.updateMetaParentRegions(this.server.getConnection(), hris); metaChanges.updateMetaParentRegions(this.server.getConnection(), hris);

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hbase;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull; import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
@ -49,6 +50,8 @@ import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
import com.google.common.collect.Lists;
/** /**
* Test {@link org.apache.hadoop.hbase.MetaTableAccessor}. * Test {@link org.apache.hadoop.hbase.MetaTableAccessor}.
*/ */
@ -58,6 +61,7 @@ public class TestMetaTableAccessor {
private static final Log LOG = LogFactory.getLog(TestMetaTableAccessor.class); private static final Log LOG = LogFactory.getLog(TestMetaTableAccessor.class);
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
private static Connection connection; private static Connection connection;
private Random random = new Random();
@BeforeClass public static void beforeClass() throws Exception { @BeforeClass public static void beforeClass() throws Exception {
UTIL.startMiniCluster(3); UTIL.startMiniCluster(3);
@ -320,7 +324,6 @@ public class TestMetaTableAccessor {
@Test @Test
public void testMetaLocationsForRegionReplicas() throws IOException { public void testMetaLocationsForRegionReplicas() throws IOException {
Random random = new Random();
ServerName serverName0 = ServerName.valueOf("foo", 60010, random.nextLong()); ServerName serverName0 = ServerName.valueOf("foo", 60010, random.nextLong());
ServerName serverName1 = ServerName.valueOf("bar", 60010, random.nextLong()); ServerName serverName1 = ServerName.valueOf("bar", 60010, random.nextLong());
ServerName serverName100 = ServerName.valueOf("baz", 60010, random.nextLong()); ServerName serverName100 = ServerName.valueOf("baz", 60010, random.nextLong());
@ -381,5 +384,91 @@ public class TestMetaTableAccessor {
Bytes.toBytes(seqNum))); Bytes.toBytes(seqNum)));
} }
} }
public static void assertEmptyMetaLocation(Table meta, byte[] row, int replicaId)
throws IOException {
Get get = new Get(row);
Result result = meta.get(get);
Cell serverCell = result.getColumnLatestCell(HConstants.CATALOG_FAMILY,
MetaTableAccessor.getServerColumn(replicaId));
Cell startCodeCell = result.getColumnLatestCell(HConstants.CATALOG_FAMILY,
MetaTableAccessor.getStartCodeColumn(replicaId));
assertNotNull(serverCell);
assertNotNull(startCodeCell);
assertEquals(0, serverCell.getValueLength());
assertEquals(0, startCodeCell.getValueLength());
}
@Test
public void testMetaLocationForRegionReplicasIsAddedAtTableCreation() throws IOException {
long regionId = System.currentTimeMillis();
HRegionInfo primary = new HRegionInfo(TableName.valueOf("table_foo"),
HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false, regionId, 0);
Table meta = MetaTableAccessor.getMetaHTable(connection);
try {
List<HRegionInfo> regionInfos = Lists.newArrayList(primary);
MetaTableAccessor.addRegionsToMeta(connection, regionInfos, 3);
assertEmptyMetaLocation(meta, primary.getRegionName(), 1);
assertEmptyMetaLocation(meta, primary.getRegionName(), 2);
} finally {
meta.close();
}
}
@Test
public void testMetaLocationForRegionReplicasIsAddedAtRegionSplit() throws IOException {
long regionId = System.currentTimeMillis();
ServerName serverName0 = ServerName.valueOf("foo", 60010, random.nextLong());
HRegionInfo parent = new HRegionInfo(TableName.valueOf("table_foo"),
HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false, regionId, 0);
HRegionInfo splitA = new HRegionInfo(TableName.valueOf("table_foo"),
HConstants.EMPTY_START_ROW, Bytes.toBytes("a"), false, regionId+1, 0);
HRegionInfo splitB = new HRegionInfo(TableName.valueOf("table_foo"),
Bytes.toBytes("a"), HConstants.EMPTY_END_ROW, false, regionId+1, 0);
Table meta = MetaTableAccessor.getMetaHTable(connection);
try {
List<HRegionInfo> regionInfos = Lists.newArrayList(parent);
MetaTableAccessor.addRegionsToMeta(connection, regionInfos, 3);
MetaTableAccessor.splitRegion(connection, parent, splitA, splitB, serverName0, 3);
assertEmptyMetaLocation(meta, splitA.getRegionName(), 1);
assertEmptyMetaLocation(meta, splitA.getRegionName(), 2);
assertEmptyMetaLocation(meta, splitB.getRegionName(), 1);
assertEmptyMetaLocation(meta, splitB.getRegionName(), 2);
} finally {
meta.close();
}
}
@Test
public void testMetaLocationForRegionReplicasIsAddedAtRegionMerge() throws IOException {
long regionId = System.currentTimeMillis();
ServerName serverName0 = ServerName.valueOf("foo", 60010, random.nextLong());
HRegionInfo parentA = new HRegionInfo(TableName.valueOf("table_foo"),
Bytes.toBytes("a"), HConstants.EMPTY_END_ROW, false, regionId, 0);
HRegionInfo parentB = new HRegionInfo(TableName.valueOf("table_foo"),
HConstants.EMPTY_START_ROW, Bytes.toBytes("a"), false, regionId, 0);
HRegionInfo merged = new HRegionInfo(TableName.valueOf("table_foo"),
HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false, regionId+1, 0);
Table meta = MetaTableAccessor.getMetaHTable(connection);
try {
List<HRegionInfo> regionInfos = Lists.newArrayList(parentA, parentB);
MetaTableAccessor.addRegionsToMeta(connection, regionInfos, 3);
MetaTableAccessor.mergeRegions(connection, merged, parentA, parentB, serverName0, 3);
assertEmptyMetaLocation(meta, merged.getRegionName(), 1);
assertEmptyMetaLocation(meta, merged.getRegionName(), 2);
} finally {
meta.close();
}
}
} }

View File

@ -166,7 +166,7 @@ public class TestMetaScanner {
end); end);
MetaTableAccessor.splitRegion(connection, MetaTableAccessor.splitRegion(connection,
parent, splita, splitb, ServerName.valueOf("fooserver", 1, 0)); parent, splita, splitb, ServerName.valueOf("fooserver", 1, 0), 1);
Threads.sleep(random.nextInt(200)); Threads.sleep(random.nextInt(200));
} catch (Throwable e) { } catch (Throwable e) {