HBASE-18075 Support non-latin table names and namespaces

This commit is contained in:
Josh Elser 2017-05-18 18:38:25 -04:00
parent f1544c3466
commit 709f5a1980
3 changed files with 73 additions and 34 deletions

View File

@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
@ -181,10 +182,13 @@ public class TestHTableDescriptor {
String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok",
"with-dash.with_underscore", "02-01-2012.my_table_01-02", "xyz._mytable_", "9_9_0.table_02"
, "dot1.dot2.table", "new.-mytable", "with-dash.with.dot", "legal..t2", "legal..legal.t2",
"trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02"};
"trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02",
"", "汉:字", "_字_", "foo:字", "foo.字", "字.foo"};
// Avoiding "zookeeper" in here as it's tough to encode in regex
String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", "spaces not ok",
"-dash-.start_illegal", "new.table with space", "01 .table", "ns:-illegaldash",
"new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2"};
"new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2", String.valueOf((char)130),
String.valueOf((char)5), String.valueOf((char)65530)};
@Test
public void testLegalHTableNames() {
@ -205,6 +209,18 @@ public class TestHTableDescriptor {
}
}
@Test
public void testIllegalZooKeeperName() {
for (String name : Arrays.asList("zookeeper", "ns:zookeeper", "zookeeper:table")) {
try {
TableName.isLegalFullyQualifiedTableName(Bytes.toBytes(name));
fail("invalid tablename " + name + " should have failed");
} catch (Exception e) {
// expected
}
}
}
@Test
public void testLegalHTableNamesRegex() {
for (String tn : legalTableNames) {

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.hbase;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet;
@ -66,10 +67,10 @@ public final class TableName implements Comparable<TableName> {
// in default namespace
//Allows only letters, digits and '_'
public static final String VALID_NAMESPACE_REGEX =
"(?:[a-zA-Z_0-9]+)";
"(?:[_\\p{Digit}\\p{IsAlphabetic}]+)";
//Allows only letters, digits, '_', '-' and '.'
public static final String VALID_TABLE_QUALIFIER_REGEX =
"(?:[a-zA-Z_0-9][a-zA-Z_0-9-.]*)";
"(?:[_\\p{Digit}\\p{IsAlphabetic}][-_.\\p{Digit}\\p{IsAlphabetic}]*)";
//Concatenation of NAMESPACE_REGEX and TABLE_QUALIFIER_REGEX,
//with NAMESPACE_DELIM as delimiter
public static final String VALID_USER_TABLE_REGEX =
@ -87,6 +88,9 @@ public final class TableName implements Comparable<TableName> {
public static final String OLD_META_STR = ".META.";
public static final String OLD_ROOT_STR = "-ROOT-";
/** One globally disallowed name */
public static final String DISALLOWED_TABLE_NAME = "zookeeper";
/**
* @return True if <code>tn</code> is the hbase:meta table name.
*/
@ -118,14 +122,14 @@ public final class TableName implements Comparable<TableName> {
* @return Returns passed <code>tableName</code> param
* @throws IllegalArgumentException if passed a tableName is null or
* is made of other than 'word' characters or underscores: i.e.
* <code>[a-zA-Z_0-9.-:]</code>. The ':' is used to delimit the namespace
* <code>[\p{IsAlphabetic}\p{Digit}.-:]</code>. The ':' is used to delimit the namespace
* from the table name and can be used for nothing else.
*
* Namespace names can only contain 'word' characters
* <code>[a-zA-Z_0-9]</code> or '_'
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_'
*
* Qualifier names can only contain 'word' characters
* <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'.
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
* The name may not start with '.' or '-'.
*
* Valid fully qualified table names:
@ -161,7 +165,7 @@ public final class TableName implements Comparable<TableName> {
/**
* Qualifier names can only contain 'word' characters
* <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'.
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
* The name may not start with '.' or '-'.
*
* @param qualifierName byte array containing the qualifier name
@ -181,29 +185,37 @@ public final class TableName implements Comparable<TableName> {
if(end - start < 1) {
throw new IllegalArgumentException(isSnapshot ? "Snapshot" : "Table" + " qualifier must not be empty");
}
if (qualifierName[start] == '.' || qualifierName[start] == '-') {
throw new IllegalArgumentException("Illegal first character <" + qualifierName[start] +
"> at 0. " + (isSnapshot ? "Snapshot" : "User-space table") +
" qualifiers can only start with 'alphanumeric " +
"characters': i.e. [a-zA-Z_0-9]: " +
"characters' from any language: " +
Bytes.toString(qualifierName, start, end));
}
for (int i = start; i < end; i++) {
if (Character.isLetterOrDigit(qualifierName[i]) ||
qualifierName[i] == '_' ||
qualifierName[i] == '-' ||
qualifierName[i] == '.') {
// Treat the bytes as UTF-8
String qualifierString = new String(
qualifierName, start, (end - start), StandardCharsets.UTF_8);
if (qualifierString.equals(DISALLOWED_TABLE_NAME)) {
// Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
// A znode named "zookeeper" is disallowed by zookeeper.
throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'");
}
for (int i = 0; i < qualifierString.length(); i++) {
// Treat the string as a char-array as some characters may be multi-byte
char c = qualifierString.charAt(i);
// Check for letter, digit, underscore, hyphen, or period, and allowed by ZK.
// ZooKeeper also has limitations, but Character.isAlphabetic omits those all
// See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
if (Character.isAlphabetic(c) || Character.isDigit(c) || c == '_' || c == '-' || c == '.') {
continue;
}
throw new IllegalArgumentException("Illegal character code:" + qualifierName[i] +
", <" + (char) qualifierName[i] + "> at " + i +
". " + (isSnapshot ? "Snapshot" : "User-space table") +
" qualifiers can only contain " +
"'alphanumeric characters': i.e. [a-zA-Z_0-9-.]: " +
Bytes.toString(qualifierName, start, end));
throw new IllegalArgumentException("Illegal character code:" + (int) c + ", <" + c + "> at " +
i + ". " + (isSnapshot ? "Snapshot" : "User-space table") +
" qualifiers may only contain 'alphanumeric characters' and digits: " +
qualifierString);
}
}
public static void isLegalNamespaceName(byte[] namespaceName) {
isLegalNamespaceName(namespaceName, 0, namespaceName.length);
}
@ -217,14 +229,23 @@ public final class TableName implements Comparable<TableName> {
if(end - start < 1) {
throw new IllegalArgumentException("Namespace name must not be empty");
}
for (int i = start; i < end; i++) {
if (Character.isLetterOrDigit(namespaceName[i])|| namespaceName[i] == '_') {
String nsString = new String(namespaceName, start, (end - start), StandardCharsets.UTF_8);
if (nsString.equals(DISALLOWED_TABLE_NAME)) {
// Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
// A znode named "zookeeper" is disallowed by zookeeper.
throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'");
}
for (int i = 0; i < nsString.length(); i++) {
// Treat the string as a char-array as some characters may be multi-byte
char c = nsString.charAt(i);
// ZooKeeper also has limitations, but Character.isAlphabetic omits those all
// See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
if (Character.isAlphabetic(c) || Character.isDigit(c)|| c == '_') {
continue;
}
throw new IllegalArgumentException("Illegal character <" + namespaceName[i] +
"> at " + i + ". Namespaces can only contain " +
"'alphanumeric characters': i.e. [a-zA-Z_0-9]: " + Bytes.toString(namespaceName,
start, end));
throw new IllegalArgumentException("Illegal character <" + c +
"> at " + i + ". Namespaces may only contain " +
"'alphanumeric characters' from any language and digits: " + nsString);
}
}
@ -441,18 +462,19 @@ public final class TableName implements Comparable<TableName> {
}
}
int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
byte[] nameB = Bytes.toBytes(name);
final int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
if (namespaceDelimIndex < 0) {
return createTableNameIfNecessary(
ByteBuffer.wrap(NamespaceDescriptor.DEFAULT_NAMESPACE_NAME),
ByteBuffer.wrap(nameB));
ByteBuffer.wrap(Bytes.toBytes(name)));
} else {
// indexOf is by character, not byte (consider multi-byte characters)
String ns = name.substring(0, namespaceDelimIndex);
String qualifier = name.substring(namespaceDelimIndex + 1);
return createTableNameIfNecessary(
ByteBuffer.wrap(nameB, 0, namespaceDelimIndex),
ByteBuffer.wrap(nameB, namespaceDelimIndex + 1,
nameB.length - (namespaceDelimIndex + 1)));
ByteBuffer.wrap(Bytes.toBytes(ns)),
ByteBuffer.wrap(Bytes.toBytes(qualifier)));
}
}

View File

@ -64,7 +64,8 @@ public class HFileLink extends FileLink {
* The HFileLink describe a link to an hfile in a different table/region
* and the name is in the form: table=region-hfile.
* <p>
* Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
* Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
* character for the table name.
* Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
* HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
* and the bulk loaded (_SeqId_[0-9]+_) hfiles.