HBASE-5070 Constraints implementation and javadoc changes (Jesse Yates)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1223024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2011-12-24 21:42:31 +00:00
parent 73e8383359
commit 499d11b741
6 changed files with 165 additions and 101 deletions

View File

@ -870,7 +870,9 @@ System.out.println("md5 digest as string length: " + sbDigest.length); // ret
<section xml:id="constraints"><title>Constraints</title> <section xml:id="constraints"><title>Constraints</title>
<para>HBase currently supports 'constraints' in traditional (SQL) database parlance. The advised usage for Constraints is in enforcing business rules for attributes in the table (eg. make sure values are in the range 1-10). <para>HBase currently supports 'constraints' in traditional (SQL) database parlance. The advised usage for Constraints is in enforcing business rules for attributes in the table (eg. make sure values are in the range 1-10).
Constraints could also be used to enforce referential integrity, but this is strongly discouraged as it will dramatically decrease the write throughput of the tables where integrity checking enabled.</para> Constraints could also be used to enforce referential integrity, but this is strongly discouraged as it will dramatically decrease the write throughput of the tables where integrity checking is enabled.
Extensive documentation on using Constraints can be found at: <link xlink:href="http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/constraint">Constraint</link> since version 0.94.
</para>
</section> </section>
</chapter> <!-- schema design --> </chapter> <!-- schema design -->

View File

@ -17,25 +17,12 @@
*/ */
package org.apache.hadoop.hbase.constraint; package org.apache.hadoop.hbase.constraint;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured;
/** /**
* Base class to use when actually implementing a {@link Constraint}. It takes * Base class to use when actually implementing a {@link Constraint}. It takes
* care of getting and setting of configuration for the constraint. * care of getting and setting of configuration for the constraint.
*/ */
public abstract class BaseConstraint implements public abstract class BaseConstraint extends Configured implements Constraint {
Constraint {
private Configuration conf;
@Override
public void setConf(Configuration conf) {
this.conf = conf;
}
@Override
public Configuration getConf() {
return this.conf;
}
} }

View File

@ -37,8 +37,8 @@ import org.apache.hadoop.hbase.client.Put;
* {@link Constraints#disableConstraint(HTableDescriptor, Class)} for * {@link Constraints#disableConstraint(HTableDescriptor, Class)} for
* enabling/disabling of a given {@link Constraint} after it has been added. * enabling/disabling of a given {@link Constraint} after it has been added.
* <p> * <p>
* If a {@link Put} is not valid, the Constraint should throw some sort * If a {@link Put} is invalid, the Constraint should throw some sort of
* {@link ConstraintException} indicating that the {@link Put} has failed. When * {@link ConstraintException}, indicating that the {@link Put} has failed. When
* this exception is thrown, not further retries of the {@link Put} are * this exception is thrown, not further retries of the {@link Put} are
* attempted nor are any other {@link Constraint Constraints} attempted (the * attempted nor are any other {@link Constraint Constraints} attempted (the
* {@link Put} is clearly not valid). Therefore, there are performance * {@link Put} is clearly not valid). Therefore, there are performance
@ -49,14 +49,17 @@ import org.apache.hadoop.hbase.client.Put;
* {@link ConstraintException}, but instead throws a {@link RuntimeException}, * {@link ConstraintException}, but instead throws a {@link RuntimeException},
* the entire constraint processing mechanism ({@link ConstraintProcessor}) will * the entire constraint processing mechanism ({@link ConstraintProcessor}) will
* be unloaded from the table. This ensures that the region server is still * be unloaded from the table. This ensures that the region server is still
* functional, but not more {@link Put Puts} will be checked via * functional, but no more {@link Put Puts} will be checked via
* {@link Constraint Constraints}. * {@link Constraint Constraints}.
* <p> * <p>
* Further, {@link Constraint Constraints} probably not be used to enforce * Further, {@link Constraint Constraints} should probably not be used to
* cross-table references as it will cause tremendous write slowdowns, but it is * enforce cross-table references as it will cause tremendous write slowdowns,
* possible. * but it is possible.
* <p> * <p>
* NOTE: Implementing classes must have a nullary (no-args) constructor * NOTE: Implementing classes must have a nullary (no-args) constructor
*
* @see BaseConstraint
* @see Constraints
*/ */
public interface Constraint extends Configurable { public interface Constraint extends Configurable {

View File

@ -83,8 +83,9 @@ public final class Constraints {
public static void enable(HTableDescriptor desc) throws IOException { public static void enable(HTableDescriptor desc) throws IOException {
// if the CP has already been loaded, do nothing // if the CP has already been loaded, do nothing
String clazz = ConstraintProcessor.class.getName(); String clazz = ConstraintProcessor.class.getName();
if (desc.hasCoprocessor(clazz)) if (desc.hasCoprocessor(clazz)) {
return; return;
}
// add the constrain processor CP to the table // add the constrain processor CP to the table
desc.addCoprocessor(clazz); desc.addCoprocessor(clazz);
@ -164,9 +165,8 @@ public final class Constraints {
// get the serialized version of the constraint // get the serialized version of the constraint
String key = serializeConstraintClass(clazz); String key = serializeConstraintClass(clazz);
String value = desc.getValue(key); String value = desc.getValue(key);
if (value == null)
return null; return value == null ? null : new Pair<String, String>(key, value);
return new Pair<String, String>(key, value);
} }
/** /**
@ -174,6 +174,12 @@ public final class Constraints {
* <p> * <p>
* This will overwrite any configuration associated with the previous * This will overwrite any configuration associated with the previous
* constraint of the same class. * constraint of the same class.
* <p>
* Each constraint, when added to the table, will have a specific priority,
* dictating the order in which the {@link Constraint} will be run. A
* {@link Constraint} earlier in the list will be run before those later in
* the list. The same logic applies between two Constraints over time (earlier
* added is run first on the regionserver).
* *
* @param desc * @param desc
* {@link HTableDescriptor} to add {@link Constraint Constraints} * {@link HTableDescriptor} to add {@link Constraint Constraints}
@ -201,6 +207,12 @@ public final class Constraints {
* <p> * <p>
* Adding the same constraint class twice will overwrite the first * Adding the same constraint class twice will overwrite the first
* constraint's configuration * constraint's configuration
* <p>
* Each constraint, when added to the table, will have a specific priority,
* dictating the order in which the {@link Constraint} will be run. A
* {@link Constraint} earlier in the list will be run before those later in
* the list. The same logic applies between two Constraints over time (earlier
* added is run first on the regionserver).
* *
* @param desc * @param desc
* {@link HTableDescriptor} to add a {@link Constraint} * {@link HTableDescriptor} to add a {@link Constraint}
@ -227,6 +239,11 @@ public final class Constraints {
/** /**
* Add a {@link Constraint} to the table with the given configuration * Add a {@link Constraint} to the table with the given configuration
* <p>
* Each constraint, when added to the table, will have a specific priority,
* dictating the order in which the {@link Constraint} will be run. A
* {@link Constraint} added will run on the regionserver before those added to
* the {@link HTableDescriptor} later.
* *
* @param desc * @param desc
* table descriptor to the constraint to * table descriptor to the constraint to
@ -296,9 +313,11 @@ public final class Constraints {
} }
/** /**
* Just write the class to the byte [] we are expecting * Just write the class to a String representation of the class as a key for
* the {@link HTableDescriptor}
* *
* @param clazz * @param clazz
* Constraint class to convert to a {@link HTableDescriptor} key
* @return key to store in the {@link HTableDescriptor} * @return key to store in the {@link HTableDescriptor}
*/ */
private static String serializeConstraintClass( private static String serializeConstraintClass(
@ -385,8 +404,8 @@ public final class Constraints {
} }
/** /**
* Update the configuration for the {@link Constraint}. Does not change the * Update the configuration for the {@link Constraint}; does not change the
* order in which the constraint is run. If the * order in which the constraint is run.
* *
* @param desc * @param desc
* {@link HTableDescriptor} to update * {@link HTableDescriptor} to update
@ -405,9 +424,10 @@ public final class Constraints {
// get the entry for this class // get the entry for this class
Pair<String, String> e = getKeyValueForClass(desc, clazz); Pair<String, String> e = getKeyValueForClass(desc, clazz);
if (e == null) if (e == null) {
throw new IllegalArgumentException("Constraint: " + clazz.getName() throw new IllegalArgumentException("Constraint: " + clazz.getName()
+ " is not associated with this table."); + " is not associated with this table.");
}
// clone over the configuration elements // clone over the configuration elements
Configuration conf = new Configuration(configuration); Configuration conf = new Configuration(configuration);
@ -480,9 +500,10 @@ public final class Constraints {
Class<? extends Constraint> clazz, boolean enabled) throws IOException { Class<? extends Constraint> clazz, boolean enabled) throws IOException {
// get the original constraint // get the original constraint
Pair<String, String> entry = getKeyValueForClass(desc, clazz); Pair<String, String> entry = getKeyValueForClass(desc, clazz);
if (entry == null) if (entry == null) {
throw new IllegalArgumentException("Constraint: " + clazz.getName() throw new IllegalArgumentException("Constraint: " + clazz.getName()
+ " is not associated with this table. You can't enable it!"); + " is not associated with this table. You can't enable it!");
}
// create a new configuration from that conf // create a new configuration from that conf
Configuration conf = readConfiguration(entry.getSecond()); Configuration conf = readConfiguration(entry.getSecond());
@ -511,8 +532,9 @@ public final class Constraints {
// get the kv // get the kv
Pair<String, String> entry = getKeyValueForClass(desc, clazz); Pair<String, String> entry = getKeyValueForClass(desc, clazz);
// its not enabled so just return false. In fact, its not even present! // its not enabled so just return false. In fact, its not even present!
if (entry == null) if (entry == null) {
return false; return false;
}
// get the info about the constraint // get the info about the constraint
Configuration conf = readConfiguration(entry.getSecond()); Configuration conf = readConfiguration(entry.getSecond());
@ -526,7 +548,10 @@ public final class Constraints {
* @param desc * @param desc
* To read from * To read from
* @param classloader * @param classloader
* To use when loading classes * To use when loading classes. If a special classloader is used on a
* region, for instance, then that should be the classloader used to
* load the constraints. This could also apply to unit-testing
* situation, where want to ensure that class is reloaded or not.
* @return List of configured {@link Constraint Constraints} * @return List of configured {@link Constraint Constraints}
* @throws IOException * @throws IOException
* if any part of reading/arguments fails * if any part of reading/arguments fails

View File

@ -35,51 +35,54 @@
For instance, you can ensure that a certain column family-column qualifier pair always has a value between 1 and 10. For instance, you can ensure that a certain column family-column qualifier pair always has a value between 1 and 10.
Otherwise, the {@link org.apache.hadoop.hbase.client.Put} is rejected and the data integrity is maintained. Otherwise, the {@link org.apache.hadoop.hbase.client.Put} is rejected and the data integrity is maintained.
<p> <p>
Constraints are designed to be configurable, so a constraints can be used across different tables, but implement different behavior depending on the specific configuration given to that constraint. Constraints are designed to be configurable, so a constraint can be used across different tables, but implement different
behavior depending on the specific configuration given to that constraint.
<p> <p>
By adding a constraint to a table (see <a href="#usage">Example Usage</a>), constraints will automatically enabled. By adding a constraint to a table (see <a href="#usage">Example Usage</a>), constraints will automatically enabled.
You also then have the option of disabling (just 'turn off') or remove (delete all associated information) constraints on a table. You also then have the option of to disable (just 'turn off') or remove (delete all associated information) all constraints on a table.
If you remove all constraints (see {@link org.apache.hadoop.hbase.constraint.Constraints#remove(org.apache.hadoop.hbase.HTableDescriptor)}, you must re-add any {@link org.apache.hadoop.hbase.constraint.Constraint} you want on that table. If you remove all constraints
However, if they are just disabled (see {@link org.apache.hadoop.hbase.constraint.Constraints#disable(org.apache.hadoop.hbase.HTableDescriptor)}, all you need to do is enable constraints again, and everything will be turned back on. (see {@link org.apache.hadoop.hbase.constraint.Constraints#remove(org.apache.hadoop.hbase.HTableDescriptor)},
Individual constraints can also be individually enabled, disabled or removed without affecting others. you must re-add any {@link org.apache.hadoop.hbase.constraint.Constraint} you want on that table.
However, if they are just disabled (see {@link org.apache.hadoop.hbase.constraint.Constraints#disable(org.apache.hadoop.hbase.HTableDescriptor)},
all you need to do is enable constraints again, and everything will be turned back on as it was configured.
Individual constraints can also be individually enabled, disabled or removed without affecting other constraints.
<p> <p>
By default, constraints are disabled on a table. By default, constraints are disabled on a table.
This means you will not see <i>any</i> slow down on a table if constraints are not enabled. This means you will not see <i>any</i> slow down on a table if constraints are not enabled.
<p> <p>
<b>NOTES</b>
<ol>
<li>Constraints are run in the order that they are added to a table. This has implications for what order constraints should be added to a table.</li>
<li>There are certain keys that are reserved for the Configuration namespace:
<ul>
<li>_ENABLED - used server-side to determine if a constraint should be run</li>
<li>_PRIORITY - used server-side to determine what order a constraint should be run</li>
</ul>
If these items are set, they will be respected in the constraint configuration, but they are taken care of by default in when adding constraints to an {@link org.apache.hadoop.hbase.HTableDescriptor} via the usual method.</li>
</ol>
<p>
Under the hood, constraints are implemented as a Coprocessor (see {@link org.apache.hadoop.hbase.constraint.ConstraintProcessor} if you are interested).
<h2><a name="concurrency">Concurrency and Atomicity</a></h2> <h2><a name="concurrency">Concurrency and Atomicity</a></h2>
Currently, no attempts at enforcing correctness in a multi-threaded scenario when modifying a constraint, via {@link org.apache.hadoop.hbase.constraint.Constraints}, to the the {@link org.apache.hadoop.hbase.HTableDescriptor}. Currently, no attempts at enforcing correctness in a multi-threaded scenario when modifying a constraint, via
This is particularly important when adding a constraint(s) to the descriptor as it first retrieves the next priority from a custom value set in the descriptor, {@link org.apache.hadoop.hbase.constraint.Constraints}, to the the {@link org.apache.hadoop.hbase.HTableDescriptor}.
adds each constraint (with increasing priority) to the descriptor, and then the next available priority is re-stored back in the {@link org.apache.hadoop.hbase.HTableDescriptor}. This is particularly important when adding a constraint(s) to the {@link org.apache.hadoop.hbase.HTableDescriptor}
Locking is recommended around each of Constraints add methods: {@link org.apache.hadoop.hbase.constraint.Constraints#add(org.apache.hadoop.hbase.HTableDescriptor, Class...)}, as it first retrieves the next priority from a custom value set in the descriptor,
{@link org.apache.hadoop.hbase.constraint.Constraints#add(org.apache.hadoop.hbase.HTableDescriptor, org.apache.hadoop.hbase.util.Pair...)}, and {@link org.apache.hadoop.hbase.constraint.Constraints#add(org.apache.hadoop.hbase.HTableDescriptor, Class, org.apache.hadoop.conf.Configuration)}. adds each constraint (with increasing priority) to the descriptor, and then the next available priority is re-stored
back in the {@link org.apache.hadoop.hbase.HTableDescriptor}.
<p>
Locking is recommended around each of Constraints add methods:
{@link org.apache.hadoop.hbase.constraint.Constraints#add(org.apache.hadoop.hbase.HTableDescriptor, Class...)},
{@link org.apache.hadoop.hbase.constraint.Constraints#add(org.apache.hadoop.hbase.HTableDescriptor, org.apache.hadoop.hbase.util.Pair...)},
and {@link org.apache.hadoop.hbase.constraint.Constraints#add(org.apache.hadoop.hbase.HTableDescriptor, Class, org.apache.hadoop.conf.Configuration)}.
Any changes on <i>a single HTableDescriptor</i> should be serialized, either within a single thread or via external mechanisms. Any changes on <i>a single HTableDescriptor</i> should be serialized, either within a single thread or via external mechanisms.
<p> <p>
Note that having a higher priority means that a constraint will run later; e.g. a constraint with priority 1 will run before a constraint with priority 2. Note that having a higher priority means that a constraint will run later; e.g. a constraint with priority 1 will run before a
constraint with priority 2.
<p> <p>
Since Constraints currently are designed to just implement simple checks (e.g. is the value in the right range), there will be no atomicity conflicts. Since Constraints currently are designed to just implement simple checks (e.g. is the value in the right range), there will
Even if one of the puts finishes the constraint first, the single row will not be corrupted and the 'fastest' write will win; the underlying region takes care of breaking the tie and ensuring that writes get serialized to the table. be no atomicity conflicts.
Even if one of the puts finishes the constraint first, the single row will not be corrupted and the 'fastest' write will win;
the underlying region takes care of breaking the tie and ensuring that writes get serialized to the table.
So yes, this doesn't ensure that we are going to get specific ordering or even a fully consistent view of the underlying data. So yes, this doesn't ensure that we are going to get specific ordering or even a fully consistent view of the underlying data.
<p> <p>
Each constraint should only use local/instance variables, unless doing more advanced usage. Static variables could cause difficulties when checking concurrent writes to the same region, leading to either highly locked situations (decreasing throughtput) or higher probability of errors. Each constraint should only use local/instance variables, unless doing more advanced usage. Static variables could cause difficulties
However, as long as each constraint just uses local variables, each thread interacting with the constraint will be completely fine. when checking concurrent writes to the same region, leading to either highly locked situations (decreasing through-put) or higher probability of errors.
However, as long as each constraint just uses local variables, each thread interacting with the constraint will execute correctly and efficiently.
<h2><a name="caveats">Caveats</a></h2> <h2><a name="caveats">Caveats</a></h2>
In traditional (SQL) databases, Constraints are often used to enforce <a href="http://en.wikipedia.org/wiki/Relational_database#Constraints">referential integrity</a>. In traditional (SQL) databases, Constraints are often used to enforce <a href="http://en.wikipedia.org/wiki/Relational_database#Constraints">referential integrity</a>.
However, in HBase, this will likely cause significant overhead and dramatically decrease the number of {@link org.apache.hadoop.hbase.client.Put Puts}/second possible on a table. However, in HBase, this will likely cause significant overhead and dramatically decrease the number of
This is because to check the referential integrity when making a {@link org.apache.hadoop.hbase.client.Put}, one must block on a scan for the 'remote' table, checking for the valid reference. {@link org.apache.hadoop.hbase.client.Put Puts}/second possible on a table. This is because to check the referential integrity
when making a {@link org.apache.hadoop.hbase.client.Put}, one must block on a scan for the 'remote' table, checking for the valid reference.
For millions of {@link org.apache.hadoop.hbase.client.Put Puts} a second, this will breakdown very quickly. For millions of {@link org.apache.hadoop.hbase.client.Put Puts} a second, this will breakdown very quickly.
There are several options around the blocking behavior including, but not limited to: There are several options around the blocking behavior including, but not limited to:
<ul> <ul>
@ -87,14 +90,37 @@
<li>Designing for 'incorrect' references</li> <li>Designing for 'incorrect' references</li>
<li>Using an external enforcement mechanism</li> <li>Using an external enforcement mechanism</li>
</ul> </ul>
There are also several general considerations that must be taken into account, when using Constraints:
<ol>
<li>All changes made via {@link org.apache.hadoop.hbase.constraint.Constraints} will make modifications to the
{@link org.apache.hadoop.hbase.HTableDescriptor} for a given table. As such, the usual renabling of tables should be used for
propagating changes to the table. When at all possible, Constraints should be added to the table before the table is created.</li>
<li>Constraints are run in the order that they are added to a table. This has implications for what order constraints should
be added to a table.</li>
<li>Whenever new Constraint jars are added to a region server, those region servers need to go through a rolling restart to
make sure that they pick up the new jars and can enable the new constraints.</li>
<li>There are certain keys that are reserved for the Configuration namespace:
<ul>
<li>_ENABLED - used server-side to determine if a constraint should be run</li>
<li>_PRIORITY - used server-side to determine what order a constraint should be run</li>
</ul>
If these items are set, they will be respected in the constraint configuration, but they are taken care of by default in when
adding constraints to an {@link org.apache.hadoop.hbase.HTableDescriptor} via the usual method.</li>
</ol>
<p>
Under the hood, constraints are implemented as a Coprocessor (see {@link org.apache.hadoop.hbase.constraint.ConstraintProcessor}
if you are interested).
<h2><a name="usage">Example usage</a></h2> <h2><a name="usage">Example usage</a></h2>
First, you must define a {@link org.apache.hadoop.hbase.constraint.Constraint}. First, you must define a {@link org.apache.hadoop.hbase.constraint.Constraint}.
The best way to do this is to extend {@link org.apache.hadoop.hbase.constraint.BaseConstraint}, which takes care of some of the more mundane details of using a {@link org.apache.hadoop.hbase.constraint.Constraint}. The best way to do this is to extend {@link org.apache.hadoop.hbase.constraint.BaseConstraint}, which takes care of some of the more
mundane details of using a {@link org.apache.hadoop.hbase.constraint.Constraint}.
<p> <p>
Let's look at one possible implementation of a constraint - an IntegerConstraint(there are also several simple examples in the tests). Let's look at one possible implementation of a constraint - an IntegerConstraint(there are also several simple examples in the tests).
The IntegerConstraint checks to make sure that the value is a String-encoded <code>int</code>. The IntegerConstraint checks to make sure that the value is a String-encoded <code>int</code>.
It is really simple to implement this kind of constraint, the only method needs to be implemented is {@link org.apache.hadoop.hbase.constraint.Constraint#check(org.apache.hadoop.hbase.client.Put)}: It is really simple to implement this kind of constraint, the only method needs to be implemented is
{@link org.apache.hadoop.hbase.constraint.Constraint#check(org.apache.hadoop.hbase.client.Put)}:
<div style="background-color: #cccccc; padding: 2px"> <div style="background-color: #cccccc; padding: 2px">
<blockquote><pre> <blockquote><pre>
@ -119,12 +145,16 @@
</pre></blockquote> </pre></blockquote>
</div> </div>
<p> <p>
Note that all exceptions that you expect to be thrown must be caught and then rethrown as a {@link org.apache.hadoop.hbase.constraint.ConstraintException}. Note that all exceptions that you expect to be thrown must be caught and then rethrown as a
This way, you can be sue that a {@link org.apache.hadoop.hbase.client.Put} fails for an expected reason, rather than for any reason. {@link org.apache.hadoop.hbase.constraint.ConstraintException}. This way, you can be sure that a
For example, an {@link java.lang.OutOfMemoryError} is probably indicative of an inherent problem in the {@link org.apache.hadoop.hbase.constraint.Constraint}, rather than a failed {@link org.apache.hadoop.hbase.client.Put}. {@link org.apache.hadoop.hbase.client.Put} fails for an expected reason, rather than for any reason.
For example, an {@link java.lang.OutOfMemoryError} is probably indicative of an inherent problem in
the {@link org.apache.hadoop.hbase.constraint.Constraint}, rather than a failed {@link org.apache.hadoop.hbase.client.Put}.
<p> <p>
If an unexpected exception is thrown (for example, any kind of uncaught {@link java.lang.RuntimeException}), constraint-checking will be 'unloaded' from the regionserver where that error occurred. If an unexpected exception is thrown (for example, any kind of uncaught {@link java.lang.RuntimeException}),
This means no further {@link org.apache.hadoop.hbase.constraint.Constraint Constraints} will be checked on that server until it is reloaded. This is done to ensure the system remains as available as possible. constraint-checking will be 'unloaded' from the regionserver where that error occurred.
This means no further {@link org.apache.hadoop.hbase.constraint.Constraint Constraints} will be checked on that server
until it is reloaded. This is done to ensure the system remains as available as possible.
Therefore, be careful when writing your own Constraint. Therefore, be careful when writing your own Constraint.
<p> <p>
So now that we have a Constraint, we want to add it to a table. It's as easy as: So now that we have a Constraint, we want to add it to a table. It's as easy as:
@ -136,10 +166,12 @@
Constraints.add(desc, IntegerConstraint.class); Constraints.add(desc, IntegerConstraint.class);
</pre></blockquote></div> </pre></blockquote></div>
<p> <p>
Once we added the IntegerConstraint, constraints will be enabled on the table (once it is created) and we will always check to make sure that the value is an String-encoded integer. Once we added the IntegerConstraint, constraints will be enabled on the table (once it is created) and
we will always check to make sure that the value is an String-encoded integer.
<p> <p>
However, suppose we also write our own constraint, <code>MyConstraint.java</code>. However, suppose we also write our own constraint, <code>MyConstraint.java</code>.
First, you need to make sure this class-files are in the classpath (in a jar) on the regionserver where that constraint will be run. First, you need to make sure this class-files are in the classpath (in a jar) on the regionserver where
that constraint will be run (this could require a rolling restart on the region server - see <a href="#caveats">Caveats</a> above)
<p> <p>
Suppose that MyConstraint also uses a Configuration (see {@link org.apache.hadoop.hbase.constraint.Constraint#getConf()}). Suppose that MyConstraint also uses a Configuration (see {@link org.apache.hadoop.hbase.constraint.Constraint#getConf()}).
Then adding MyConstraint looks like this: Then adding MyConstraint looks like this:
@ -147,7 +179,7 @@
<div style="background-color: #cccccc; padding: 2px"> <div style="background-color: #cccccc; padding: 2px">
<blockquote><pre> <blockquote><pre>
HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);
Configuration conf = new Configuration(); Configuration conf = new Configuration(false);
... ...
(add values to the conf) (add values to the conf)
(modify the table descriptor) (modify the table descriptor)
@ -155,9 +187,12 @@
Constraints.add(desc, new Pair(MyConstraint.class, conf)); Constraints.add(desc, new Pair(MyConstraint.class, conf));
</pre></blockquote></div> </pre></blockquote></div>
<p> <p>
At this point we added both the IntegerConstraint and MyConstraint to the table, the IntegerConstraint <i>will be run first</i>, followed by MyConstraint. At this point we added both the IntegerConstraint and MyConstraint to the table, the IntegerConstraint
<i>will be run first</i>, followed by MyConstraint.
<p> <p>
Suppose we realize that the {@link org.apache.hadoop.conf.Configuration} for MyConstraint is actually wrong when it was added to the table. Note, when it is added to the table, it is <i>not</i> added by reference, but is instead copied into the {@link org.apache.hadoop.hbase.HTableDescriptor}. Suppose we realize that the {@link org.apache.hadoop.conf.Configuration} for MyConstraint is actually wrong
when it was added to the table. Note, when it is added to the table, it is <i>not</i> added by reference,
but is instead copied into the {@link org.apache.hadoop.hbase.HTableDescriptor}.
Thus, to change the {@link org.apache.hadoop.conf.Configuration} we are using for MyConstraint, we need to do this: Thus, to change the {@link org.apache.hadoop.conf.Configuration} we are using for MyConstraint, we need to do this:
<div style="background-color: #cccccc; padding: 2px"> <div style="background-color: #cccccc; padding: 2px">
@ -167,7 +202,8 @@
Constraints.setConfiguration(desc, MyConstraint.class, conf); Constraints.setConfiguration(desc, MyConstraint.class, conf);
</pre></blockquote></div> </pre></blockquote></div>
<p> <p>
This will overwrite the previous configuration for MyConstraint, but <i>not</i> change the order of the constraint nor if it is enabled/disabled. This will overwrite the previous configuration for MyConstraint, but <i>not</i> change the order of the
constraint nor if it is enabled/disabled.
<p> <p>
Note that the same constraint class can be added multiple times to a table without repercussion. Note that the same constraint class can be added multiple times to a table without repercussion.
A use case for this is the same constraint working differently based on its configuration. A use case for this is the same constraint working differently based on its configuration.
@ -212,7 +248,17 @@
Constraints.remove(desc); Constraints.remove(desc);
</pre></blockquote></div> </pre></blockquote></div>
This will remove all constraints (and associated information) from the table and turn off the constraint processing. This will remove all constraints (and associated information) from the table and turn off the constraint processing.
<p><b>NOTE</b><p>
It is important to note the use above of
<div style="background-color: #cccccc">
<blockquote><pre>
Configuration conf = new Configuration(false);
</pre></blockquote></div>
If you just use <code> new Configuration()</code>, then the Configuration will be loaded with the default
properties. While in the simple case, this is not going to be an issue, it will cause pain down the road.
First, these extra properties are going to cause serious bloat in your {@link org.apache.hadoop.hbase.HTableDescriptor},
meaning you are keeping around a ton of redundant information. Second, it is going to make examining
your table in the shell, via <code>describe 'table'</code>, a huge pain as you will have to dig through
a ton of irrelevant config values to find the ones you set. In short, just do it the right way.
*/ */
package org.apache.hadoop.hbase.constraint; package org.apache.hadoop.hbase.constraint;

View File

@ -25,7 +25,6 @@ import org.apache.hadoop.hbase.client.Put;
*/ */
public class CheckConfigurationConstraint extends BaseConstraint { public class CheckConfigurationConstraint extends BaseConstraint {
private static String key = "testKey"; private static String key = "testKey";
private static String value = "testValue"; private static String value = "testValue";
@ -42,11 +41,13 @@ public class CheckConfigurationConstraint extends BaseConstraint {
@Override @Override
public void setConf(Configuration conf) { public void setConf(Configuration conf) {
super.setConf(conf);
if (conf != null) {
String val = conf.get(key); String val = conf.get(key);
if (val == null || !val.equals(value)) if (val == null || !val.equals(value))
throw new IllegalArgumentException( throw new IllegalArgumentException(
"Configuration was not passed correctly"); "Configuration was not passed correctly");
super.setConf(conf); }
} }
} }