HBASE-18075 Support non-latin table names and namespaces
This commit is contained in:
parent
f1544c3466
commit
709f5a1980
|
@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
|
||||||
import static org.junit.Assert.fail;
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
|
@ -181,10 +182,13 @@ public class TestHTableDescriptor {
|
||||||
String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok",
|
String legalTableNames[] = { "foo", "with-dash_under.dot", "_under_start_ok",
|
||||||
"with-dash.with_underscore", "02-01-2012.my_table_01-02", "xyz._mytable_", "9_9_0.table_02"
|
"with-dash.with_underscore", "02-01-2012.my_table_01-02", "xyz._mytable_", "9_9_0.table_02"
|
||||||
, "dot1.dot2.table", "new.-mytable", "with-dash.with.dot", "legal..t2", "legal..legal.t2",
|
, "dot1.dot2.table", "new.-mytable", "with-dash.with.dot", "legal..t2", "legal..legal.t2",
|
||||||
"trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02"};
|
"trailingdots..", "trailing.dots...", "ns:mytable", "ns:_mytable_", "ns:my_table_01-02",
|
||||||
|
"汉", "汉:字", "_字_", "foo:字", "foo.字", "字.foo"};
|
||||||
|
// Avoiding "zookeeper" in here as it's tough to encode in regex
|
||||||
String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", "spaces not ok",
|
String illegalTableNames[] = { ".dot_start_illegal", "-dash_start_illegal", "spaces not ok",
|
||||||
"-dash-.start_illegal", "new.table with space", "01 .table", "ns:-illegaldash",
|
"-dash-.start_illegal", "new.table with space", "01 .table", "ns:-illegaldash",
|
||||||
"new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2"};
|
"new:.illegaldot", "new:illegalcolon1:", "new:illegalcolon1:2", String.valueOf((char)130),
|
||||||
|
String.valueOf((char)5), String.valueOf((char)65530)};
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLegalHTableNames() {
|
public void testLegalHTableNames() {
|
||||||
|
@ -205,6 +209,18 @@ public class TestHTableDescriptor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testIllegalZooKeeperName() {
|
||||||
|
for (String name : Arrays.asList("zookeeper", "ns:zookeeper", "zookeeper:table")) {
|
||||||
|
try {
|
||||||
|
TableName.isLegalFullyQualifiedTableName(Bytes.toBytes(name));
|
||||||
|
fail("invalid tablename " + name + " should have failed");
|
||||||
|
} catch (Exception e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLegalHTableNamesRegex() {
|
public void testLegalHTableNamesRegex() {
|
||||||
for (String tn : legalTableNames) {
|
for (String tn : legalTableNames) {
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.hbase;
|
package org.apache.hadoop.hbase;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.CopyOnWriteArraySet;
|
import java.util.concurrent.CopyOnWriteArraySet;
|
||||||
|
@ -66,10 +67,10 @@ public final class TableName implements Comparable<TableName> {
|
||||||
// in default namespace
|
// in default namespace
|
||||||
//Allows only letters, digits and '_'
|
//Allows only letters, digits and '_'
|
||||||
public static final String VALID_NAMESPACE_REGEX =
|
public static final String VALID_NAMESPACE_REGEX =
|
||||||
"(?:[a-zA-Z_0-9]+)";
|
"(?:[_\\p{Digit}\\p{IsAlphabetic}]+)";
|
||||||
//Allows only letters, digits, '_', '-' and '.'
|
//Allows only letters, digits, '_', '-' and '.'
|
||||||
public static final String VALID_TABLE_QUALIFIER_REGEX =
|
public static final String VALID_TABLE_QUALIFIER_REGEX =
|
||||||
"(?:[a-zA-Z_0-9][a-zA-Z_0-9-.]*)";
|
"(?:[_\\p{Digit}\\p{IsAlphabetic}][-_.\\p{Digit}\\p{IsAlphabetic}]*)";
|
||||||
//Concatenation of NAMESPACE_REGEX and TABLE_QUALIFIER_REGEX,
|
//Concatenation of NAMESPACE_REGEX and TABLE_QUALIFIER_REGEX,
|
||||||
//with NAMESPACE_DELIM as delimiter
|
//with NAMESPACE_DELIM as delimiter
|
||||||
public static final String VALID_USER_TABLE_REGEX =
|
public static final String VALID_USER_TABLE_REGEX =
|
||||||
|
@ -87,6 +88,9 @@ public final class TableName implements Comparable<TableName> {
|
||||||
public static final String OLD_META_STR = ".META.";
|
public static final String OLD_META_STR = ".META.";
|
||||||
public static final String OLD_ROOT_STR = "-ROOT-";
|
public static final String OLD_ROOT_STR = "-ROOT-";
|
||||||
|
|
||||||
|
/** One globally disallowed name */
|
||||||
|
public static final String DISALLOWED_TABLE_NAME = "zookeeper";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return True if <code>tn</code> is the hbase:meta table name.
|
* @return True if <code>tn</code> is the hbase:meta table name.
|
||||||
*/
|
*/
|
||||||
|
@ -118,14 +122,14 @@ public final class TableName implements Comparable<TableName> {
|
||||||
* @return Returns passed <code>tableName</code> param
|
* @return Returns passed <code>tableName</code> param
|
||||||
* @throws IllegalArgumentException if passed a tableName is null or
|
* @throws IllegalArgumentException if passed a tableName is null or
|
||||||
* is made of other than 'word' characters or underscores: i.e.
|
* is made of other than 'word' characters or underscores: i.e.
|
||||||
* <code>[a-zA-Z_0-9.-:]</code>. The ':' is used to delimit the namespace
|
* <code>[\p{IsAlphabetic}\p{Digit}.-:]</code>. The ':' is used to delimit the namespace
|
||||||
* from the table name and can be used for nothing else.
|
* from the table name and can be used for nothing else.
|
||||||
*
|
*
|
||||||
* Namespace names can only contain 'word' characters
|
* Namespace names can only contain 'word' characters
|
||||||
* <code>[a-zA-Z_0-9]</code> or '_'
|
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_'
|
||||||
*
|
*
|
||||||
* Qualifier names can only contain 'word' characters
|
* Qualifier names can only contain 'word' characters
|
||||||
* <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'.
|
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
|
||||||
* The name may not start with '.' or '-'.
|
* The name may not start with '.' or '-'.
|
||||||
*
|
*
|
||||||
* Valid fully qualified table names:
|
* Valid fully qualified table names:
|
||||||
|
@ -161,7 +165,7 @@ public final class TableName implements Comparable<TableName> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Qualifier names can only contain 'word' characters
|
* Qualifier names can only contain 'word' characters
|
||||||
* <code>[a-zA-Z_0-9]</code> or '_', '.' or '-'.
|
* <code>[\p{IsAlphabetic}\p{Digit}]</code> or '_', '.' or '-'.
|
||||||
* The name may not start with '.' or '-'.
|
* The name may not start with '.' or '-'.
|
||||||
*
|
*
|
||||||
* @param qualifierName byte array containing the qualifier name
|
* @param qualifierName byte array containing the qualifier name
|
||||||
|
@ -181,29 +185,37 @@ public final class TableName implements Comparable<TableName> {
|
||||||
if(end - start < 1) {
|
if(end - start < 1) {
|
||||||
throw new IllegalArgumentException(isSnapshot ? "Snapshot" : "Table" + " qualifier must not be empty");
|
throw new IllegalArgumentException(isSnapshot ? "Snapshot" : "Table" + " qualifier must not be empty");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (qualifierName[start] == '.' || qualifierName[start] == '-') {
|
if (qualifierName[start] == '.' || qualifierName[start] == '-') {
|
||||||
throw new IllegalArgumentException("Illegal first character <" + qualifierName[start] +
|
throw new IllegalArgumentException("Illegal first character <" + qualifierName[start] +
|
||||||
"> at 0. " + (isSnapshot ? "Snapshot" : "User-space table") +
|
"> at 0. " + (isSnapshot ? "Snapshot" : "User-space table") +
|
||||||
" qualifiers can only start with 'alphanumeric " +
|
" qualifiers can only start with 'alphanumeric " +
|
||||||
"characters': i.e. [a-zA-Z_0-9]: " +
|
"characters' from any language: " +
|
||||||
Bytes.toString(qualifierName, start, end));
|
Bytes.toString(qualifierName, start, end));
|
||||||
}
|
}
|
||||||
for (int i = start; i < end; i++) {
|
// Treat the bytes as UTF-8
|
||||||
if (Character.isLetterOrDigit(qualifierName[i]) ||
|
String qualifierString = new String(
|
||||||
qualifierName[i] == '_' ||
|
qualifierName, start, (end - start), StandardCharsets.UTF_8);
|
||||||
qualifierName[i] == '-' ||
|
if (qualifierString.equals(DISALLOWED_TABLE_NAME)) {
|
||||||
qualifierName[i] == '.') {
|
// Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
|
||||||
|
// A znode named "zookeeper" is disallowed by zookeeper.
|
||||||
|
throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < qualifierString.length(); i++) {
|
||||||
|
// Treat the string as a char-array as some characters may be multi-byte
|
||||||
|
char c = qualifierString.charAt(i);
|
||||||
|
// Check for letter, digit, underscore, hyphen, or period, and allowed by ZK.
|
||||||
|
// ZooKeeper also has limitations, but Character.isAlphabetic omits those all
|
||||||
|
// See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
|
||||||
|
if (Character.isAlphabetic(c) || Character.isDigit(c) || c == '_' || c == '-' || c == '.') {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
throw new IllegalArgumentException("Illegal character code:" + qualifierName[i] +
|
throw new IllegalArgumentException("Illegal character code:" + (int) c + ", <" + c + "> at " +
|
||||||
", <" + (char) qualifierName[i] + "> at " + i +
|
i + ". " + (isSnapshot ? "Snapshot" : "User-space table") +
|
||||||
". " + (isSnapshot ? "Snapshot" : "User-space table") +
|
" qualifiers may only contain 'alphanumeric characters' and digits: " +
|
||||||
" qualifiers can only contain " +
|
qualifierString);
|
||||||
"'alphanumeric characters': i.e. [a-zA-Z_0-9-.]: " +
|
|
||||||
Bytes.toString(qualifierName, start, end));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void isLegalNamespaceName(byte[] namespaceName) {
|
public static void isLegalNamespaceName(byte[] namespaceName) {
|
||||||
isLegalNamespaceName(namespaceName, 0, namespaceName.length);
|
isLegalNamespaceName(namespaceName, 0, namespaceName.length);
|
||||||
}
|
}
|
||||||
|
@ -217,14 +229,23 @@ public final class TableName implements Comparable<TableName> {
|
||||||
if(end - start < 1) {
|
if(end - start < 1) {
|
||||||
throw new IllegalArgumentException("Namespace name must not be empty");
|
throw new IllegalArgumentException("Namespace name must not be empty");
|
||||||
}
|
}
|
||||||
for (int i = start; i < end; i++) {
|
String nsString = new String(namespaceName, start, (end - start), StandardCharsets.UTF_8);
|
||||||
if (Character.isLetterOrDigit(namespaceName[i])|| namespaceName[i] == '_') {
|
if (nsString.equals(DISALLOWED_TABLE_NAME)) {
|
||||||
|
// Per https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
|
||||||
|
// A znode named "zookeeper" is disallowed by zookeeper.
|
||||||
|
throw new IllegalArgumentException("Tables may not be named '" + DISALLOWED_TABLE_NAME + "'");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < nsString.length(); i++) {
|
||||||
|
// Treat the string as a char-array as some characters may be multi-byte
|
||||||
|
char c = nsString.charAt(i);
|
||||||
|
// ZooKeeper also has limitations, but Character.isAlphabetic omits those all
|
||||||
|
// See https://zookeeper.apache.org/doc/r3.4.10/zookeeperProgrammers.html#ch_zkDataModel
|
||||||
|
if (Character.isAlphabetic(c) || Character.isDigit(c)|| c == '_') {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
throw new IllegalArgumentException("Illegal character <" + namespaceName[i] +
|
throw new IllegalArgumentException("Illegal character <" + c +
|
||||||
"> at " + i + ". Namespaces can only contain " +
|
"> at " + i + ". Namespaces may only contain " +
|
||||||
"'alphanumeric characters': i.e. [a-zA-Z_0-9]: " + Bytes.toString(namespaceName,
|
"'alphanumeric characters' from any language and digits: " + nsString);
|
||||||
start, end));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -441,18 +462,19 @@ public final class TableName implements Comparable<TableName> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
|
final int namespaceDelimIndex = name.indexOf(NAMESPACE_DELIM);
|
||||||
byte[] nameB = Bytes.toBytes(name);
|
|
||||||
|
|
||||||
if (namespaceDelimIndex < 0) {
|
if (namespaceDelimIndex < 0) {
|
||||||
return createTableNameIfNecessary(
|
return createTableNameIfNecessary(
|
||||||
ByteBuffer.wrap(NamespaceDescriptor.DEFAULT_NAMESPACE_NAME),
|
ByteBuffer.wrap(NamespaceDescriptor.DEFAULT_NAMESPACE_NAME),
|
||||||
ByteBuffer.wrap(nameB));
|
ByteBuffer.wrap(Bytes.toBytes(name)));
|
||||||
} else {
|
} else {
|
||||||
|
// indexOf is by character, not byte (consider multi-byte characters)
|
||||||
|
String ns = name.substring(0, namespaceDelimIndex);
|
||||||
|
String qualifier = name.substring(namespaceDelimIndex + 1);
|
||||||
return createTableNameIfNecessary(
|
return createTableNameIfNecessary(
|
||||||
ByteBuffer.wrap(nameB, 0, namespaceDelimIndex),
|
ByteBuffer.wrap(Bytes.toBytes(ns)),
|
||||||
ByteBuffer.wrap(nameB, namespaceDelimIndex + 1,
|
ByteBuffer.wrap(Bytes.toBytes(qualifier)));
|
||||||
nameB.length - (namespaceDelimIndex + 1)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -64,7 +64,8 @@ public class HFileLink extends FileLink {
|
||||||
* The HFileLink describe a link to an hfile in a different table/region
|
* The HFileLink describe a link to an hfile in a different table/region
|
||||||
* and the name is in the form: table=region-hfile.
|
* and the name is in the form: table=region-hfile.
|
||||||
* <p>
|
* <p>
|
||||||
* Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
|
* Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
|
||||||
|
* character for the table name.
|
||||||
* Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
|
* Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
|
||||||
* HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
|
* HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
|
||||||
* and the bulk loaded (_SeqId_[0-9]+_) hfiles.
|
* and the bulk loaded (_SeqId_[0-9]+_) hfiles.
|
||||||
|
|
Loading…
Reference in New Issue