HBASE-18075 Support non-latin table names and namespaces
This commit is contained in:
parent
f1544c3466
commit
709f5a1980
|
@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
|
|||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -181,10 +182,13 @@ public class TestHTableDescriptor {
|
|||
String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok",
|
||||
"with-dash.with_underscore", "02-01-2012.my_table_01-02", "xyz._mytable_", "9_9_0.table_02"
|
||||
, "dot1.dot2.table", "new.-mytable", "with-dash.with.dot", "legal..t2", "legal..legal.t2",
|
||||
"trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02"};
|
||||
"trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02",
|
||||
"汉", "汉:字", "_字_", "foo:字", "foo.字", "字.foo"};
|
||||
// Avoiding "zookeeper" in here as it's tough to encode in regex
|
||||
String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", "spaces not ok",
|
||||
"-dash-.start_illegal", "new.table with space", "01 .table", "ns:-illegaldash",
|
||||
"new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2"};
|
||||
"new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2", String.valueOf((char)130),
|
||||
String.valueOf((char)5), String.valueOf((char)65530)};
|
||||
|
||||
@Test
|
||||
public void testLegalHTableNames() {
|
||||
|
@ -205,6 +209,18 @@ public class TestHTableDescriptor {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIllegalZooKeeperName() {
|
||||
for (String name : Arrays.asList("zookeeper", "ns:zookeeper", "zookeeper:table")) {
|
||||
try {
|
||||
TableName.isLegalFullyQualifiedTableName(Bytes.toBytes(name));
|
||||
fail("invalid tablename " + name + " should have failed");
|
||||
} catch (Exception e) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLegalHTableNamesRegex() {
|
||||
for (String tn : legalTableNames) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
package org.apache.hadoop.hbase;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CopyOnWriteArraySet;
|
||||
|
@ -66,10 +67,10 @@ public final class TableName implements Comparable<TableName> {
|
|||
// in default namespace
|
||||
//Allows only letters, digits and '_'
|
||||
public static final String VALID_NAMESPACE_REGEX =
|
||||
"(?:[a-zA-Z_0-9]+)";
|
||||
"(?:[_\\p{Digit}\\p{IsAlphabetic}]+)";
|
||||
//Allows only letters, digits, '_', '-' and '.'
|
||||
public static final String VALID_TABLE_QUALIFIER_REGEX =
|
||||
"(?:[a-zA-Z_0-9][a-zA-Z_0-9-.]*)";
|
||||
"(?:[_\\p{Digit}\\p{IsAlphabetic}][-_.\\p{Digit}\\p{IsAlphabetic}]*)";
|
||||
//Concatenation of NAMESPACE_REGEX and TABLE_QUALIFIER_REGEX,
|
||||
//with NAMESPACE_DELIM as delimiter
|
||||
public static final String VALID_USER_TABLE_REGEX =
|
||||
|
@ -87,6 +88,9 @@ public final class TableName implements Comparable<TableName> {
|
|||
public static final String OLD_META_STR = ".META.";
|
||||
public static final String OLD_ROOT_STR = "-ROOT-";
|
||||
|
||||
/** One globally disallowed name */
|
||||
public static final String DISALLOWED_TABLE_NAME = "zookeeper";
|
||||
|
||||
/**
|
||||
* @return True if <code>tn</code> is the hbase:meta table name.
|
||||
*/
|
||||
|
@ -118,14 +122,14 @@ public final class TableName implements Comparable<TableName> {
|
|||
* @return Returns passed <code>tableName</code> param
|
||||
* @throws IllegalArgumentException if passed a tableName is null or
|
||||
* is made of other than 'word' characters or underscores: i.e.
|
||||
* <code>[a-zA-Z_0-9.-:]</code>. The ':' is used to delimit the namespace
|
||||
* <code>[\p{IsAlphabetic}\p{Digit}.-:]</code>. The ':' is used to delimit the namespace
|
||||
* from the table name and can be used for nothing else.
|
||||
*
|
||||
* Namespace names can only contain 'word' characters
|
||||
* <code>[a-zA-Z_0-9]</code> or '_'
|
||||
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_'
|
||||
*
|
||||
* Qualifier names can only contain 'word' characters
|
||||
* <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'.
|
||||
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
|
||||
* The name may not start with '.' or '-'.
|
||||
*
|
||||
* Valid fully qualified table names:
|
||||
|
@ -161,7 +165,7 @@ public final class TableName implements Comparable<TableName> {
|
|||
|
||||
/**
|
||||
* Qualifier names can only contain 'word' characters
|
||||
* <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'.
|
||||
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
|
||||
* The name may not start with '.' or '-'.
|
||||
*
|
||||
* @param qualifierName byte array containing the qualifier name
|
||||
|
@ -181,29 +185,37 @@ public final class TableName implements Comparable<TableName> {
|
|||
if(end - start < 1) {
|
||||
throw new IllegalArgumentException(isSnapshot ? "Snapshot" : "Table" + " qualifier must not be empty");
|
||||
}
|
||||
|
||||
if (qualifierName[start] == '.' || qualifierName[start] == '-') {
|
||||
throw new IllegalArgumentException("Illegal first character <" + qualifierName[start] +
|
||||
"> at 0. " + (isSnapshot ? "Snapshot" : "User-space table") +
|
||||
" qualifiers can only start with 'alphanumeric " +
|
||||
"characters': i.e. [a-zA-Z_0-9]: " +
|
||||
"characters' from any language: " +
|
||||
Bytes.toString(qualifierName, start, end));
|
||||
}
|
||||
for (int i = start; i < end; i++) {
|
||||
if (Character.isLetterOrDigit(qualifierName[i]) ||
|
||||
qualifierName[i] == '_' ||
|
||||
qualifierName[i] == '-' ||
|
||||
qualifierName[i] == '.') {
|
||||
// Treat the bytes as UTF-8
|
||||
String qualifierString = new String(
|
||||
qualifierName, start, (end - start), StandardCharsets.UTF_8);
|
||||
if (qualifierString.equals(DISALLOWED_TABLE_NAME)) {
|
||||
// Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
|
||||
// A znode named "zookeeper" is disallowed by zookeeper.
|
||||
throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'");
|
||||
}
|
||||
for (int i = 0; i < qualifierString.length(); i++) {
|
||||
// Treat the string as a char-array as some characters may be multi-byte
|
||||
char c = qualifierString.charAt(i);
|
||||
// Check for letter, digit, underscore, hyphen, or period, and allowed by ZK.
|
||||
// ZooKeeper also has limitations, but Character.isAlphabetic omits those all
|
||||
// See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
|
||||
if (Character.isAlphabetic(c) || Character.isDigit(c) || c == '_' || c == '-' || c == '.') {
|
||||
continue;
|
||||
}
|
||||
throw new IllegalArgumentException("Illegal character code:" + qualifierName[i] +
|
||||
", <" + (char) qualifierName[i] + "> at " + i +
|
||||
". " + (isSnapshot ? "Snapshot" : "User-space table") +
|
||||
" qualifiers can only contain " +
|
||||
"'alphanumeric characters': i.e. [a-zA-Z_0-9-.]: " +
|
||||
Bytes.toString(qualifierName, start, end));
|
||||
throw new IllegalArgumentException("Illegal character code:" + (int) c + ", <" + c + "> at " +
|
||||
i + ". " + (isSnapshot ? "Snapshot" : "User-space table") +
|
||||
" qualifiers may only contain 'alphanumeric characters' and digits: " +
|
||||
qualifierString);
|
||||
}
|
||||
}
|
||||
|
||||
public static void isLegalNamespaceName(byte[] namespaceName) {
|
||||
isLegalNamespaceName(namespaceName, 0, namespaceName.length);
|
||||
}
|
||||
|
@ -217,14 +229,23 @@ public final class TableName implements Comparable<TableName> {
|
|||
if(end - start < 1) {
|
||||
throw new IllegalArgumentException("Namespace name must not be empty");
|
||||
}
|
||||
for (int i = start; i < end; i++) {
|
||||
if (Character.isLetterOrDigit(namespaceName[i])|| namespaceName[i] == '_') {
|
||||
String nsString = new String(namespaceName, start, (end - start), StandardCharsets.UTF_8);
|
||||
if (nsString.equals(DISALLOWED_TABLE_NAME)) {
|
||||
// Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
|
||||
// A znode named "zookeeper" is disallowed by zookeeper.
|
||||
throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'");
|
||||
}
|
||||
for (int i = 0; i < nsString.length(); i++) {
|
||||
// Treat the string as a char-array as some characters may be multi-byte
|
||||
char c = nsString.charAt(i);
|
||||
// ZooKeeper also has limitations, but Character.isAlphabetic omits those all
|
||||
// See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
|
||||
if (Character.isAlphabetic(c) || Character.isDigit(c)|| c == '_') {
|
||||
continue;
|
||||
}
|
||||
throw new IllegalArgumentException("Illegal character <" + namespaceName[i] +
|
||||
"> at " + i + ". Namespaces can only contain " +
|
||||
"'alphanumeric characters': i.e. [a-zA-Z_0-9]: " + Bytes.toString(namespaceName,
|
||||
start, end));
|
||||
throw new IllegalArgumentException("Illegal character <" + c +
|
||||
"> at " + i + ". Namespaces may only contain " +
|
||||
"'alphanumeric characters' from any language and digits: " + nsString);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -441,18 +462,19 @@ public final class TableName implements Comparable<TableName> {
|
|||
}
|
||||
}
|
||||
|
||||
int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
|
||||
byte[] nameB = Bytes.toBytes(name);
|
||||
final int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
|
||||
|
||||
if (namespaceDelimIndex < 0) {
|
||||
return createTableNameIfNecessary(
|
||||
ByteBuffer.wrap(NamespaceDescriptor.DEFAULT_NAMESPACE_NAME),
|
||||
ByteBuffer.wrap(nameB));
|
||||
ByteBuffer.wrap(Bytes.toBytes(name)));
|
||||
} else {
|
||||
// indexOf is by character, not byte (consider multi-byte characters)
|
||||
String ns = name.substring(0, namespaceDelimIndex);
|
||||
String qualifier = name.substring(namespaceDelimIndex + 1);
|
||||
return createTableNameIfNecessary(
|
||||
ByteBuffer.wrap(nameB, 0, namespaceDelimIndex),
|
||||
ByteBuffer.wrap(nameB, namespaceDelimIndex + 1,
|
||||
nameB.length - (namespaceDelimIndex + 1)));
|
||||
ByteBuffer.wrap(Bytes.toBytes(ns)),
|
||||
ByteBuffer.wrap(Bytes.toBytes(qualifier)));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -64,7 +64,8 @@ public class HFileLink extends FileLink {
|
|||
* The HFileLink describe a link to an hfile in a different table/region
|
||||
* and the name is in the form: table=region-hfile.
|
||||
* <p>
|
||||
* Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
|
||||
* Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
|
||||
* character for the table name.
|
||||
* Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
|
||||
* HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
|
||||
* and the bulk loaded (_SeqId_[0-9]+_) hfiles.
|
||||
|
|
Loading…
Reference in New Issue