From b26cdb24f57660811914e28e3dcdef418647187c Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Mon, 12 Oct 2009 23:57:13 +0000 Subject: [PATCH] HBASE-1885 Simplify use of IndexedTable outside Java API git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@824546 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + src/contrib/transactional/bin/TableIndexed.rb | 54 +++++++++++++++ src/contrib/transactional/build.xml | 15 +++++ .../tableindexed/IndexSpecification.java | 10 +++ .../tableindexed/SimpleIndexKeyGenerator.java | 6 +- .../tableindexed/UniqueIndexKeyGenerator.java | 66 +++++++++++++++++++ .../hbase/client/tableindexed/package.html | 3 +- 7 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 src/contrib/transactional/bin/TableIndexed.rb create mode 100644 src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/UniqueIndexKeyGenerator.java diff --git a/CHANGES.txt b/CHANGES.txt index 0c5ad9b1996..f85a782c27b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -113,6 +113,8 @@ Release 0.21.0 - Unreleased all test sync/append HBASE-1902 Let PerformanceEvaluation support setting tableName and compress algorithm (Schubert Zhang via Stack) + HBASE-1885 Simplify use of IndexedTable outside Java API + (Kevin Patterson via Stack) OPTIMIZATIONS diff --git a/src/contrib/transactional/bin/TableIndexed.rb b/src/contrib/transactional/bin/TableIndexed.rb new file mode 100644 index 00000000000..74c1856dda4 --- /dev/null +++ b/src/contrib/transactional/bin/TableIndexed.rb @@ -0,0 +1,54 @@ +# Copyright 2009 The Apache Software Foundation +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TableIndexed.rb +# Extends HBase shell with operations on IndexedTables. + +# Usage: within the HBase shell, load 'TableIndexed.rb'. Transactional +# jar must be in the classpath. + +import org.apache.hadoop.hbase.client.tableindexed.IndexedTableAdmin +import org.apache.hadoop.hbase.client.tableindexed.IndexSpecification + +# Creates an index using the supplied index specification. +# [table_name] the name of the table to index. +# [index_spec] the IndexSpecification describing the index wanted. +def create_index(table_name, index_spec) + @iadmin ||= IndexedTableAdmin.new(@configuration) + @iadmin.addIndex(table_name.to_java_bytes, index_spec) +end + +# Creates an index for a field guaranteed to have unique values. If +# application code does not ensure uniqueness, behavior is undefined. +# [table_name] the name of the table to index. +# [index_name] the name of the index. +# [column] the column name to be indexed, must respond_to to_java_bytes. +def create_unique_index(table_name, index_name, column) + spec = IndexSpecification.for_unique_index(index_name, column.to_java_bytes) + create_index(table_name, spec) +end + +# Creates an index using the standard simple index key. Supports one +# to many mappings from indexed values to rows in the primary table. +# [table_name] the name of the table to index. +# [index_name] the name of the index. +# [column] the column name to be indexed, must respond_to to_java_bytes. +def create_simple_index(table_name, index_name, column) + spec = new IndexSpecification(index_name, column.to_java_bytes) + create_index(table_name, spec) +end diff --git a/src/contrib/transactional/build.xml b/src/contrib/transactional/build.xml index bdd6f782c94..56f1cd748df 100644 --- a/src/contrib/transactional/build.xml +++ b/src/contrib/transactional/build.xml @@ -23,4 +23,19 @@ to call at top-level: ant deploy-contrib compile-core-test --> + + + + + + + + + + + + + + diff --git a/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java b/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java index cba69f9419a..552d83dfaab 100644 --- a/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java +++ b/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java @@ -56,6 +56,16 @@ public class IndexSpecification implements Writable { new SimpleIndexKeyGenerator(indexedColumn)); } + /**Construct an index spec for a single column that has only unique values. + * @param indexId the name of the index + * @param indexedColumn the column to index + * @return the IndexSpecification + */ + public static IndexSpecification forUniqueIndex(String indexId, byte[] indexedColumn) { + return new IndexSpecification(indexId, new byte[][] { indexedColumn }, + null, new UniqueIndexKeyGenerator(indexedColumn)); + } + /** * Construct an index spec by specifying everything. * diff --git a/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/SimpleIndexKeyGenerator.java b/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/SimpleIndexKeyGenerator.java index ed801852110..f7ff031c882 100644 --- a/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/SimpleIndexKeyGenerator.java +++ b/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/SimpleIndexKeyGenerator.java @@ -26,7 +26,11 @@ import java.util.Map; import org.apache.hadoop.hbase.util.Bytes; -/** Creates indexed keys for a single column.... +/**Creates indexed keys for a single column. Index key consists of the column + * value followed by the row key of the indexed table to disambiguate. + * + * If the column values are guaranteed to be unique, consider + * {@link UniqueIndexKeyGenerator}. * */ public class SimpleIndexKeyGenerator implements IndexKeyGenerator { diff --git a/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/UniqueIndexKeyGenerator.java b/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/UniqueIndexKeyGenerator.java new file mode 100644 index 00000000000..b9fe770dcf1 --- /dev/null +++ b/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/UniqueIndexKeyGenerator.java @@ -0,0 +1,66 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client.tableindexed; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Map; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Creates index row keys which exactly match the indexed column. This allows a + * direct get() lookup on the index table, but at the cost that the column + * values must be unique. + * + * If you are indexing a column which can have duplicated values, consider + * {@link SimpleIndexKeyGenerator}. + */ +public class UniqueIndexKeyGenerator implements IndexKeyGenerator { + private byte[] column; + + /** + * @param column the column to index + */ + public UniqueIndexKeyGenerator(byte[] column) { + this.column = column; + } + + public UniqueIndexKeyGenerator() { + // For Writable + } + + /** {@inheritDoc} */ + public byte[] createIndexKey(byte[] rowKey, Map columns) { + return columns.get(column).clone(); + } + + /** {@inheritDoc} */ + public void readFields(DataInput in) throws IOException { + column = Bytes.readByteArray(in); + } + + /** {@inheritDoc} */ + public void write(DataOutput out) throws IOException { + Bytes.writeByteArray(out, column); + } + +} diff --git a/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/package.html b/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/package.html index 300345636d6..fe544bb29e6 100644 --- a/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/package.html +++ b/src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/tableindexed/package.html @@ -33,7 +33,8 @@ The IndexSpecification class provides the metadata for the index. This includes: IndexesSpecifications can be added to a table's metadata (HTableDescriptor) before the table is constructed. Afterwards, updates and deletes to the original table will trigger the updates in the index, and -the indexes can be scanned using the API on IndexedTable. +the indexes can be scanned using the API on IndexedTable. If you prefer not to use the Java API, you can +load IndexedTable.rb to create indexes from within the HBase shell. For a simple example, look at the unit test in org.apache.hadoop.hbase.client.tableIndexed.