HBASE-18075 Support non-latin table names and namespaces

This commit is contained in:
Josh Elser 2017-05-18 18:38:25 -04:00
parent f1544c3466
commit 709f5a1980
3 changed files with 73 additions and 34 deletions

View File

@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -181,10 +182,13 @@ public class TestHTableDescriptor {
String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok", String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok",
"with-dash.with_underscore", "02-01-2012.my_table_01-02", "xyz._mytable_", "9_9_0.table_02" "with-dash.with_underscore", "02-01-2012.my_table_01-02", "xyz._mytable_", "9_9_0.table_02"
, "dot1.dot2.table", "new.-mytable", "with-dash.with.dot", "legal..t2", "legal..legal.t2", , "dot1.dot2.table", "new.-mytable", "with-dash.with.dot", "legal..t2", "legal..legal.t2",
"trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02"}; "trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02",
"", "汉:字", "_字_", "foo:字", "foo.字", "字.foo"};
// Avoiding "zookeeper" in here as it's tough to encode in regex
String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", "spaces not ok", String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", "spaces not ok",
"-dash-.start_illegal", "new.table with space", "01 .table", "ns:-illegaldash", "-dash-.start_illegal", "new.table with space", "01 .table", "ns:-illegaldash",
"new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2"}; "new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2", String.valueOf((char)130),
String.valueOf((char)5), String.valueOf((char)65530)};
@Test @Test
public void testLegalHTableNames() { public void testLegalHTableNames() {
@ -205,6 +209,18 @@ public class TestHTableDescriptor {
} }
} }
@Test
public void testIllegalZooKeeperName() {
for (String name : Arrays.asList("zookeeper", "ns:zookeeper", "zookeeper:table")) {
try {
TableName.isLegalFullyQualifiedTableName(Bytes.toBytes(name));
fail("invalid tablename " + name + " should have failed");
} catch (Exception e) {
// expected
}
}
}
@Test @Test
public void testLegalHTableNamesRegex() { public void testLegalHTableNamesRegex() {
for (String tn : legalTableNames) { for (String tn : legalTableNames) {

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.hbase; package org.apache.hadoop.hbase;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays; import java.util.Arrays;
import java.util.Set; import java.util.Set;
import java.util.concurrent.CopyOnWriteArraySet; import java.util.concurrent.CopyOnWriteArraySet;
@ -66,10 +67,10 @@ public final class TableName implements Comparable<TableName> {
// in default namespace // in default namespace
//Allows only letters, digits and '_' //Allows only letters, digits and '_'
public static final String VALID_NAMESPACE_REGEX = public static final String VALID_NAMESPACE_REGEX =
"(?:[a-zA-Z_0-9]+)"; "(?:[_\\p{Digit}\\p{IsAlphabetic}]+)";
//Allows only letters, digits, '_', '-' and '.' //Allows only letters, digits, '_', '-' and '.'
public static final String VALID_TABLE_QUALIFIER_REGEX = public static final String VALID_TABLE_QUALIFIER_REGEX =
"(?:[a-zA-Z_0-9][a-zA-Z_0-9-.]*)"; "(?:[_\\p{Digit}\\p{IsAlphabetic}][-_.\\p{Digit}\\p{IsAlphabetic}]*)";
//Concatenation of NAMESPACE_REGEX and TABLE_QUALIFIER_REGEX, //Concatenation of NAMESPACE_REGEX and TABLE_QUALIFIER_REGEX,
//with NAMESPACE_DELIM as delimiter //with NAMESPACE_DELIM as delimiter
public static final String VALID_USER_TABLE_REGEX = public static final String VALID_USER_TABLE_REGEX =
@ -87,6 +88,9 @@ public final class TableName implements Comparable<TableName> {
public static final String OLD_META_STR = ".META."; public static final String OLD_META_STR = ".META.";
public static final String OLD_ROOT_STR = "-ROOT-"; public static final String OLD_ROOT_STR = "-ROOT-";
/** One globally disallowed name */
public static final String DISALLOWED_TABLE_NAME = "zookeeper";
/** /**
* @return True if <code>tn</code> is the hbase:meta table name. * @return True if <code>tn</code> is the hbase:meta table name.
*/ */
@ -118,14 +122,14 @@ public final class TableName implements Comparable<TableName> {
* @return Returns passed <code>tableName</code> param * @return Returns passed <code>tableName</code> param
* @throws IllegalArgumentException if passed a tableName is null or * @throws IllegalArgumentException if passed a tableName is null or
* is made of other than 'word' characters or underscores: i.e. * is made of other than 'word' characters or underscores: i.e.
* <code>[a-zA-Z_0-9.-:]</code>. The ':' is used to delimit the namespace * <code>[\p{IsAlphabetic}\p{Digit}.-:]</code>. The ':' is used to delimit the namespace
* from the table name and can be used for nothing else. * from the table name and can be used for nothing else.
* *
* Namespace names can only contain 'word' characters * Namespace names can only contain 'word' characters
* <code>[a-zA-Z_0-9]</code> or '_' * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_'
* *
* Qualifier names can only contain 'word' characters * Qualifier names can only contain 'word' characters
* <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'. * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
* The name may not start with '.' or '-'. * The name may not start with '.' or '-'.
* *
* Valid fully qualified table names: * Valid fully qualified table names:
@ -161,7 +165,7 @@ public final class TableName implements Comparable<TableName> {
/** /**
* Qualifier names can only contain 'word' characters * Qualifier names can only contain 'word' characters
* <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'. * <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
* The name may not start with '.' or '-'. * The name may not start with '.' or '-'.
* *
* @param qualifierName byte array containing the qualifier name * @param qualifierName byte array containing the qualifier name
@ -181,29 +185,37 @@ public final class TableName implements Comparable<TableName> {
if(end - start < 1) { if(end - start < 1) {
throw new IllegalArgumentException(isSnapshot ? "Snapshot" : "Table" + " qualifier must not be empty"); throw new IllegalArgumentException(isSnapshot ? "Snapshot" : "Table" + " qualifier must not be empty");
} }
if (qualifierName[start] == '.' || qualifierName[start] == '-') { if (qualifierName[start] == '.' || qualifierName[start] == '-') {
throw new IllegalArgumentException("Illegal first character <" + qualifierName[start] + throw new IllegalArgumentException("Illegal first character <" + qualifierName[start] +
"> at 0. " + (isSnapshot ? "Snapshot" : "User-space table") + "> at 0. " + (isSnapshot ? "Snapshot" : "User-space table") +
" qualifiers can only start with 'alphanumeric " + " qualifiers can only start with 'alphanumeric " +
"characters': i.e. [a-zA-Z_0-9]: " + "characters' from any language: " +
Bytes.toString(qualifierName, start, end)); Bytes.toString(qualifierName, start, end));
} }
for (int i = start; i < end; i++) { // Treat the bytes as UTF-8
if (Character.isLetterOrDigit(qualifierName[i]) || String qualifierString = new String(
qualifierName[i] == '_' || qualifierName, start, (end - start), StandardCharsets.UTF_8);
qualifierName[i] == '-' || if (qualifierString.equals(DISALLOWED_TABLE_NAME)) {
qualifierName[i] == '.') { // Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
// A znode named "zookeeper" is disallowed by zookeeper.
throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'");
}
for (int i = 0; i < qualifierString.length(); i++) {
// Treat the string as a char-array as some characters may be multi-byte
char c = qualifierString.charAt(i);
// Check for letter, digit, underscore, hyphen, or period, and allowed by ZK.
// ZooKeeper also has limitations, but Character.isAlphabetic omits those all
// See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
if (Character.isAlphabetic(c) || Character.isDigit(c) || c == '_' || c == '-' || c == '.') {
continue; continue;
} }
throw new IllegalArgumentException("Illegal character code:" + qualifierName[i] + throw new IllegalArgumentException("Illegal character code:" + (int) c + ", <" + c + "> at " +
", <" + (char) qualifierName[i] + "> at " + i + i + ". " + (isSnapshot ? "Snapshot" : "User-space table") +
". " + (isSnapshot ? "Snapshot" : "User-space table") + " qualifiers may only contain 'alphanumeric characters' and digits: " +
" qualifiers can only contain " + qualifierString);
"'alphanumeric characters': i.e. [a-zA-Z_0-9-.]: " +
Bytes.toString(qualifierName, start, end));
} }
} }
public static void isLegalNamespaceName(byte[] namespaceName) { public static void isLegalNamespaceName(byte[] namespaceName) {
isLegalNamespaceName(namespaceName, 0, namespaceName.length); isLegalNamespaceName(namespaceName, 0, namespaceName.length);
} }
@ -217,14 +229,23 @@ public final class TableName implements Comparable<TableName> {
if(end - start < 1) { if(end - start < 1) {
throw new IllegalArgumentException("Namespace name must not be empty"); throw new IllegalArgumentException("Namespace name must not be empty");
} }
for (int i = start; i < end; i++) { String nsString = new String(namespaceName, start, (end - start), StandardCharsets.UTF_8);
if (Character.isLetterOrDigit(namespaceName[i])|| namespaceName[i] == '_') { if (nsString.equals(DISALLOWED_TABLE_NAME)) {
// Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
// A znode named "zookeeper" is disallowed by zookeeper.
throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'");
}
for (int i = 0; i < nsString.length(); i++) {
// Treat the string as a char-array as some characters may be multi-byte
char c = nsString.charAt(i);
// ZooKeeper also has limitations, but Character.isAlphabetic omits those all
// See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
if (Character.isAlphabetic(c) || Character.isDigit(c)|| c == '_') {
continue; continue;
} }
throw new IllegalArgumentException("Illegal character <" + namespaceName[i] + throw new IllegalArgumentException("Illegal character <" + c +
"> at " + i + ". Namespaces can only contain " + "> at " + i + ". Namespaces may only contain " +
"'alphanumeric characters': i.e. [a-zA-Z_0-9]: " + Bytes.toString(namespaceName, "'alphanumeric characters' from any language and digits: " + nsString);
start, end));
} }
} }
@ -441,18 +462,19 @@ public final class TableName implements Comparable<TableName> {
} }
} }
int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM); final int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
byte[] nameB = Bytes.toBytes(name);
if (namespaceDelimIndex < 0) { if (namespaceDelimIndex < 0) {
return createTableNameIfNecessary( return createTableNameIfNecessary(
ByteBuffer.wrap(NamespaceDescriptor.DEFAULT_NAMESPACE_NAME), ByteBuffer.wrap(NamespaceDescriptor.DEFAULT_NAMESPACE_NAME),
ByteBuffer.wrap(nameB)); ByteBuffer.wrap(Bytes.toBytes(name)));
} else { } else {
// indexOf is by character, not byte (consider multi-byte characters)
String ns = name.substring(0, namespaceDelimIndex);
String qualifier = name.substring(namespaceDelimIndex + 1);
return createTableNameIfNecessary( return createTableNameIfNecessary(
ByteBuffer.wrap(nameB, 0, namespaceDelimIndex), ByteBuffer.wrap(Bytes.toBytes(ns)),
ByteBuffer.wrap(nameB, namespaceDelimIndex + 1, ByteBuffer.wrap(Bytes.toBytes(qualifier)));
nameB.length - (namespaceDelimIndex + 1)));
} }
} }

View File

@ -64,7 +64,8 @@ public class HFileLink extends FileLink {
* The HFileLink describe a link to an hfile in a different table/region * The HFileLink describe a link to an hfile in a different table/region
* and the name is in the form: table=region-hfile. * and the name is in the form: table=region-hfile.
* <p> * <p>
* Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name. * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
* character for the table name.
* Region name is ([a-f0-9]+), so '-' is an invalid character for the region name. * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
* HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
* and the bulk loaded (_SeqId_[0-9]+_) hfiles. * and the bulk loaded (_SeqId_[0-9]+_) hfiles.