From a3073cc6dacc03756387a788de62498f1a64a411 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Thu, 5 Jul 2007 20:12:37 +0000 Subject: [PATCH] HADOOP-1566 [hbase] Key-making utility A src/java/org/apache/hadoop/hbase/util/Keying.java A src/test/org/apache/hadoop/hbase/util/TestKeying.java Added. git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@553623 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../org/apache/hadoop/hbase/util/Keying.java | 112 ++++++++++++++++++ .../apache/hadoop/hbase/util/TestKeying.java | 51 ++++++++ 3 files changed, 164 insertions(+) create mode 100644 src/java/org/apache/hadoop/hbase/util/Keying.java create mode 100644 src/test/org/apache/hadoop/hbase/util/TestKeying.java diff --git a/CHANGES.txt b/CHANGES.txt index 7b36af7921b..7f4b70d5b3a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -50,3 +50,4 @@ Trunk (unreleased changes) 30. HADOOP-1531 Add RowFilter to HRegion.HScanner Adds a row filtering interface and two implemenentations: A page scanner, and a regex row/column-data matcher. (James Kennedy via Stack) + 31. HADOOP-1566 Key-making utility diff --git a/src/java/org/apache/hadoop/hbase/util/Keying.java b/src/java/org/apache/hadoop/hbase/util/Keying.java new file mode 100644 index 00000000000..2422be587ec --- /dev/null +++ b/src/java/org/apache/hadoop/hbase/util/Keying.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.util.StringTokenizer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Utility creating hbase friendly keys. + * Use fabricating row names or column qualifiers. + *

TODO: Add createSchemeless key, a key that doesn't care if scheme is + * http or https. + */ +public class Keying { + private static final String SCHEME = "r:"; + private static final Pattern URI_RE_PARSER = + Pattern.compile("^([^:/?#]+://(?:[^/?#@]+@)?)([^:/?#]+)(.*)$"); + + /** + * Makes a key out of passed URI for use as row name or column qualifier. + * + * This method runs transforms on the passed URI so it sits better + * as a key (or portion-of-a-key) in hbase. The host portion of + * the URI authority is reversed so subdomains sort under their parent + * domain. The returned String is an opaque URI of an artificial + * r: scheme to prevent the result being considered an URI of + * the original scheme. Here is an example of the transform: The url + * http://lucene.apache.org/index.html?query=something#middle is + * returned as + * r:http://org.apache.lucene/index.html?query=something#middle + * The transforms are reversible. No transform is done if passed URI is + * not hierarchical. + * + *

If authority userinfo is present, will mess up the sort + * (until we do more work).

+ * + * @param u URL to transform. + * @return An opaque URI of artificial 'r' scheme with host portion of URI + * authority reversed (if present). + * @see #keyToUri(String) + * @see RFC2396 + */ + public static String createKey(final String u) { + if (u.startsWith(SCHEME)) { + throw new IllegalArgumentException("Starts with " + SCHEME); + } + Matcher m = getMatcher(u); + if (m == null || !m.matches()) { + // If no match, return original String. + return u; + } + return SCHEME + m.group(1) + reverseHostname(m.group(2)) + m.group(3); + } + + /** + * Reverse the {@link #createKey(String)} transform. + * + * @param s URI made by {@link #createKey(String)}. + * @return 'Restored' URI made by reversing the {@link #createKey(String)} + * transform. + */ + public static String keyToUri(final String s) { + if (!s.startsWith(SCHEME)) { + return s; + } + Matcher m = getMatcher(s.substring(SCHEME.length())); + if (m == null || !m.matches()) { + // If no match, return original String. + return s; + } + return m.group(1) + reverseHostname(m.group(2)) + m.group(3); + } + + private static Matcher getMatcher(final String u) { + if (u == null || u.length() <= 0) { + return null; + } + return URI_RE_PARSER.matcher(u); + } + + private static String reverseHostname(final String hostname) { + if (hostname == null) { + return ""; + } + StringBuilder sb = new StringBuilder(hostname.length()); + for (StringTokenizer st = new StringTokenizer(hostname, ".", false); + st.hasMoreElements();) { + Object next = st.nextElement(); + if (sb.length() > 0) { + sb.insert(0, "."); + } + sb.insert(0, next); + } + return sb.toString(); + } +} \ No newline at end of file diff --git a/src/test/org/apache/hadoop/hbase/util/TestKeying.java b/src/test/org/apache/hadoop/hbase/util/TestKeying.java new file mode 100644 index 00000000000..e1a64d4694b --- /dev/null +++ b/src/test/org/apache/hadoop/hbase/util/TestKeying.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import junit.framework.TestCase; + +public class TestKeying extends TestCase { + + protected void setUp() throws Exception { + super.setUp(); + } + + protected void tearDown() throws Exception { + super.tearDown(); + } + + public void testURI() throws Exception { + checkTransform("http://abc:bcd@www.example.com/index.html" + + "?query=something#middle"); + checkTransform("file:///usr/bin/java"); + checkTransform("dns:www.powerset.com"); + checkTransform("dns://dns.powerset.com/www.powerset.com"); + checkTransform("http://one.two.three/index.html"); + checkTransform("https://one.two.three:9443/index.html"); + checkTransform("ftp://one.two.three/index.html"); + + checkTransform("filename"); + } + + private void checkTransform(final String u) { + String k = Keying.createKey(u); + String uri = Keying.keyToUri(k); + System.out.println("Original url " + u + ", Transformed url " + k); + assertEquals(u, uri); + } +} \ No newline at end of file