From 36f0d36de99c5001b12626a366912e459827004b Mon Sep 17 00:00:00 2001 From: Jim Kellerman Date: Fri, 11 Jul 2008 21:41:44 +0000 Subject: [PATCH] HBASE-696 Make bloomfilter true/false and self-sizing git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@676088 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../hadoop/hbase/BloomFilterDescriptor.java | 9 +- .../hadoop/hbase/HColumnDescriptor.java | 68 ++--- .../org/apache/hadoop/hbase/HConstants.java | 4 +- .../apache/hadoop/hbase/HTableDescriptor.java | 6 +- .../hbase/regionserver/CompactionReader.java | 53 ---- .../hadoop/hbase/regionserver/HRegion.java | 3 - .../hadoop/hbase/regionserver/HStore.java | 264 +++++------------- .../hadoop/hbase/regionserver/HStoreFile.java | 201 +++++++------ .../regionserver/MapFileCompactionReader.java | 53 ---- .../hbase/regionserver/StoreFileScanner.java | 3 +- .../hadoop/hbase/rest/TableHandler.java | 2 +- .../hadoop/hbase/thrift/ThriftUtilities.java | 22 +- .../apache/hadoop/hbase/util/MetaUtils.java | 9 +- .../org/apache/hadoop/hbase/util/Migrate.java | 64 ++++- src/java/org/onelab/filter/BloomFilter.java | 7 + .../apache/hadoop/hbase/HBaseTestCase.java | 6 +- .../apache/hadoop/hbase/TestBloomFilters.java | 82 +----- .../hbase/regionserver/TestHStoreFile.java | 6 +- .../hbase/regionserver/TestTimestamp.java | 2 +- 20 files changed, 310 insertions(+), 555 deletions(-) delete mode 100644 src/java/org/apache/hadoop/hbase/regionserver/CompactionReader.java delete mode 100644 src/java/org/apache/hadoop/hbase/regionserver/MapFileCompactionReader.java diff --git a/CHANGES.txt b/CHANGES.txt index 5aa41d68376..8afdd14c184 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -14,6 +14,7 @@ Trunk (unreleased changes) HBASE-521 Improve client scanner interface HBASE-288 Add in-memory caching of data. Required update of hadoop to 0.17.0-dev.2008-02-07_12-01-58. (Tom White via Stack) + HBASE-696 Make bloomfilter true/false and self-sizing BUG FIXES HBASE-574 HBase does not load hadoop native libs (Rong-En Fan via Stack) diff --git a/src/java/org/apache/hadoop/hbase/BloomFilterDescriptor.java b/src/java/org/apache/hadoop/hbase/BloomFilterDescriptor.java index ea80e1eb043..53848bf314e 100644 --- a/src/java/org/apache/hadoop/hbase/BloomFilterDescriptor.java +++ b/src/java/org/apache/hadoop/hbase/BloomFilterDescriptor.java @@ -47,6 +47,7 @@ import org.apache.hadoop.io.WritableComparable; * m/n ln(2). * */ +@Deprecated public class BloomFilterDescriptor implements WritableComparable { private static final double DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4.0; @@ -152,14 +153,12 @@ public class BloomFilterDescriptor implements WritableComparable { return value.toString(); } - public BloomFilterType getType() { - return filterType; - } - + /** @return the vector size */ public int getVectorSize() { return vectorSize; } - + + /** @return number of hash functions */ public int getNbHash() { return nbHash; } diff --git a/src/java/org/apache/hadoop/hbase/HColumnDescriptor.java b/src/java/org/apache/hadoop/hbase/HColumnDescriptor.java index 5790e5fa122..df0e5fa6ffd 100644 --- a/src/java/org/apache/hadoop/hbase/HColumnDescriptor.java +++ b/src/java/org/apache/hadoop/hbase/HColumnDescriptor.java @@ -41,7 +41,8 @@ public class HColumnDescriptor implements WritableComparable { // Version 3 was when column names becaome byte arrays and when we picked up // Time-to-live feature. Version 4 was when we moved to byte arrays, HBASE-82. - private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)4; + // Version 5 was when bloom filter descriptors were removed. + private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)5; /** * The type of compression. @@ -96,11 +97,6 @@ public class HColumnDescriptor implements WritableComparable { */ public static final int DEFAULT_TTL = HConstants.FOREVER; - /** - * Default bloom filter description. - */ - public static final BloomFilterDescriptor DEFAULT_BLOOMFILTER = null; - // Column family name private byte [] name; // Number of versions to keep @@ -116,9 +112,7 @@ public class HColumnDescriptor implements WritableComparable { // Time to live of cell contents, in seconds from last timestamp private int timeToLive = DEFAULT_TTL; // True if bloom filter was specified - private boolean bloomFilterSpecified = false; - // Descriptor of bloom filter - private BloomFilterDescriptor bloomFilter = DEFAULT_BLOOMFILTER; + private boolean bloomFilter = false; /** * Default constructor. Must be present for Writable. @@ -155,11 +149,9 @@ public class HColumnDescriptor implements WritableComparable { */ public HColumnDescriptor(final byte [] columnName) { this (columnName == null || columnName.length <= 0? - HConstants.EMPTY_BYTE_ARRAY: columnName, - DEFAULT_VERSIONS, DEFAULT_COMPRESSION, DEFAULT_IN_MEMORY, - DEFAULT_BLOCKCACHE, - Integer.MAX_VALUE, DEFAULT_TTL, - DEFAULT_BLOOMFILTER); + HConstants.EMPTY_BYTE_ARRAY: columnName, DEFAULT_VERSIONS, + DEFAULT_COMPRESSION, DEFAULT_IN_MEMORY, DEFAULT_BLOCKCACHE, + Integer.MAX_VALUE, DEFAULT_TTL, false); } /** @@ -182,9 +174,8 @@ public class HColumnDescriptor implements WritableComparable { */ public HColumnDescriptor(final byte [] columnName, final int maxVersions, final CompressionType compression, final boolean inMemory, - final boolean blockCacheEnabled, - final int maxValueLength, final int timeToLive, - final BloomFilterDescriptor bloomFilter) { + final boolean blockCacheEnabled, final int maxValueLength, + final int timeToLive, final boolean bloomFilter) { isLegalFamilyName(columnName); this.name = stripColon(columnName); if (maxVersions <= 0) { @@ -198,7 +189,6 @@ public class HColumnDescriptor implements WritableComparable { this.maxValueLength = maxValueLength; this.timeToLive = timeToLive; this.bloomFilter = bloomFilter; - this.bloomFilterSpecified = this.bloomFilter == null ? false : true; this.compressionType = compression; } @@ -295,9 +285,9 @@ public class HColumnDescriptor implements WritableComparable { } /** - * @return Bloom filter descriptor or null if none set. + * @return true if a bloom filter is enabled */ - public BloomFilterDescriptor getBloomFilter() { + public boolean isBloomFilterEnabled() { return this.bloomFilter; } @@ -313,9 +303,7 @@ public class HColumnDescriptor implements WritableComparable { ", " + TTL + " => " + (timeToLive == HConstants.FOREVER ? "FOREVER" : Integer.toString(timeToLive)) + - ", " + BLOOMFILTER + " => " + - (bloomFilterSpecified ? bloomFilter.toString() : CompressionType.NONE) + - "}"; + ", " + BLOOMFILTER + " => " + bloomFilter + "}"; } /** {@inheritDoc} */ @@ -334,11 +322,8 @@ public class HColumnDescriptor implements WritableComparable { result ^= Boolean.valueOf(this.blockCacheEnabled).hashCode(); result ^= Integer.valueOf(this.maxValueLength).hashCode(); result ^= Integer.valueOf(this.timeToLive).hashCode(); - result ^= Boolean.valueOf(this.bloomFilterSpecified).hashCode(); + result ^= Boolean.valueOf(this.bloomFilter).hashCode(); result ^= Byte.valueOf(COLUMN_DESCRIPTOR_VERSION).hashCode(); - if (this.bloomFilterSpecified) { - result ^= this.bloomFilter.hashCode(); - } return result; } @@ -362,13 +347,15 @@ public class HColumnDescriptor implements WritableComparable { this.compressionType = CompressionType.values()[ordinal]; this.inMemory = in.readBoolean(); this.maxValueLength = in.readInt(); - this.bloomFilterSpecified = in.readBoolean(); - - if(bloomFilterSpecified) { - bloomFilter = new BloomFilterDescriptor(); - bloomFilter.readFields(in); + this.bloomFilter = in.readBoolean(); + if (this.bloomFilter && versionNumber < 5) { + // If a bloomFilter is enabled and the column descriptor is less than + // version 5, we need to skip over it to read the rest of the column + // descriptor. There are no BloomFilterDescriptors written to disk for + // column descriptors with a version number >= 5 + BloomFilterDescriptor junk = new BloomFilterDescriptor(); + junk.readFields(in); } - if (versionNumber > 1) { this.blockCacheEnabled = in.readBoolean(); } @@ -386,11 +373,7 @@ public class HColumnDescriptor implements WritableComparable { out.writeInt(this.compressionType.ordinal()); out.writeBoolean(this.inMemory); out.writeInt(this.maxValueLength); - out.writeBoolean(this.bloomFilterSpecified); - - if(bloomFilterSpecified) { - bloomFilter.write(out); - } + out.writeBoolean(this.bloomFilter); out.writeBoolean(this.blockCacheEnabled); out.writeInt(this.timeToLive); } @@ -443,21 +426,16 @@ public class HColumnDescriptor implements WritableComparable { } if(result == 0) { - if(this.bloomFilterSpecified == other.bloomFilterSpecified) { + if(this.bloomFilter == other.bloomFilter) { result = 0; - } else if(this.bloomFilterSpecified) { + } else if(this.bloomFilter) { result = -1; } else { result = 1; } } - - if(result == 0 && this.bloomFilterSpecified) { - result = this.bloomFilter.compareTo(other.bloomFilter); - } - return result; } } \ No newline at end of file diff --git a/src/java/org/apache/hadoop/hbase/HConstants.java b/src/java/org/apache/hadoop/hbase/HConstants.java index 0488b8f808f..8078875c374 100644 --- a/src/java/org/apache/hadoop/hbase/HConstants.java +++ b/src/java/org/apache/hadoop/hbase/HConstants.java @@ -40,9 +40,9 @@ public interface HConstants { /** * Current version of file system - * Version 3 added 'historian' family to .META. + * Version 4 supports only one kind of bloom filter */ - public static final String FILE_SYSTEM_VERSION = "3"; + public static final String FILE_SYSTEM_VERSION = "4"; // Configuration parameters diff --git a/src/java/org/apache/hadoop/hbase/HTableDescriptor.java b/src/java/org/apache/hadoop/hbase/HTableDescriptor.java index 959e0366ab8..06dd9b98daf 100644 --- a/src/java/org/apache/hadoop/hbase/HTableDescriptor.java +++ b/src/java/org/apache/hadoop/hbase/HTableDescriptor.java @@ -42,17 +42,17 @@ public class HTableDescriptor implements WritableComparable { HConstants.ROOT_TABLE_NAME, new HColumnDescriptor[] { new HColumnDescriptor(HConstants.COLUMN_FAMILY, 1, HColumnDescriptor.CompressionType.NONE, false, false, - Integer.MAX_VALUE, HConstants.FOREVER, null) }); + Integer.MAX_VALUE, HConstants.FOREVER, false) }); /** Table descriptor for .META. catalog table */ public static final HTableDescriptor META_TABLEDESC = new HTableDescriptor( HConstants.META_TABLE_NAME, new HColumnDescriptor[] { new HColumnDescriptor(HConstants.COLUMN_FAMILY, 1, HColumnDescriptor.CompressionType.NONE, false, false, - Integer.MAX_VALUE, HConstants.FOREVER, null), + Integer.MAX_VALUE, HConstants.FOREVER, false), new HColumnDescriptor(HConstants.COLUMN_FAMILY_HISTORIAN, HConstants.ALL_VERSIONS, HColumnDescriptor.CompressionType.NONE, - false, false, Integer.MAX_VALUE, HConstants.FOREVER, null) }); + false, false, Integer.MAX_VALUE, HConstants.FOREVER, false) }); private boolean rootregion = false; private boolean metaregion = false; diff --git a/src/java/org/apache/hadoop/hbase/regionserver/CompactionReader.java b/src/java/org/apache/hadoop/hbase/regionserver/CompactionReader.java deleted file mode 100644 index 7a1f2ea591e..00000000000 --- a/src/java/org/apache/hadoop/hbase/regionserver/CompactionReader.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright 2008 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.regionserver; - -import java.io.IOException; - -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.Writable; - -/** Interface for generic reader for compactions */ -interface CompactionReader { - - /** - * Closes the reader - * @throws IOException - */ - public void close() throws IOException; - - /** - * Get the next key/value pair - * - * @param key - * @param val - * @return true if more data was returned - * @throws IOException - */ - public boolean next(WritableComparable key, Writable val) - throws IOException; - - /** - * Resets the reader - * @throws IOException - */ - public void reset() throws IOException; -} \ No newline at end of file diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java index cf4f197e8e5..8a53aac8e7f 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -2147,8 +2147,5 @@ public class HRegion implements HConstants { throws IOException { fs.mkdirs(HStoreFile.getMapDir(basedir, encodedRegionName, colFamily)); fs.mkdirs(HStoreFile.getInfoDir(basedir, encodedRegionName, colFamily)); - if (tabledesc.getFamily(colFamily).getBloomFilter() != null) { - fs.mkdirs(HStoreFile.getFilterDir(basedir, encodedRegionName, colFamily)); - } } } diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HStore.java b/src/java/org/apache/hadoop/hbase/regionserver/HStore.java index dde230c4cfb..8f206e6a70a 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -27,7 +27,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; -import java.util.Collection; import java.util.Map; import java.util.Set; import java.util.SortedMap; @@ -38,12 +37,9 @@ import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.BloomFilterDescriptor; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; @@ -60,10 +56,6 @@ import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Writable; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.StringUtils; -import org.onelab.filter.BloomFilter; -import org.onelab.filter.CountingBloomFilter; -import org.onelab.filter.Filter; -import org.onelab.filter.RetouchedBloomFilter; /** * HStore maintains a bunch of data files. It is responsible for maintaining @@ -84,8 +76,6 @@ public class HStore implements HConstants { private static final Pattern REF_NAME_PARSER = Pattern.compile("^(\\d+)(?:\\.(.+))?$"); - private static final String BLOOMFILTER_FILE_NAME = "filter"; - protected final Memcache memcache; private final Path basedir; private final HRegionInfo info; @@ -93,8 +83,6 @@ public class HStore implements HConstants { private final SequenceFile.CompressionType compression; final FileSystem fs; private final HBaseConfiguration conf; - private final Path filterDir; - final Filter bloomFilter; protected long ttl; private final long desiredMaxFileSize; @@ -215,18 +203,6 @@ public class HStore implements HConstants { fs.mkdirs(infodir); } - if(family.getBloomFilter() == null) { - this.filterDir = null; - this.bloomFilter = null; - } else { - this.filterDir = HStoreFile.getFilterDir(basedir, info.getEncodedName(), - family.getName()); - if (!fs.exists(filterDir)) { - fs.mkdirs(filterDir); - } - this.bloomFilter = loadOrCreateBloomFilter(); - } - // Go through the 'mapdir' and 'infodir' together, make sure that all // MapFiles are in a reliable state. Every entry in 'mapdir' must have a // corresponding one in 'loginfodir'. Without a corresponding log info @@ -266,11 +242,12 @@ public class HStore implements HConstants { if (first) { // Use a block cache (if configured) for the first reader only // so as to control memory usage. - r = e.getValue().getReader(this.fs, this.bloomFilter, + r = e.getValue().getReader(this.fs, this.family.isBloomFilterEnabled(), family.isBlockCacheEnabled()); first = false; } else { - r = e.getValue().getReader(this.fs, this.bloomFilter); + r = e.getValue().getReader(this.fs, this.family.isBloomFilterEnabled(), + false); } this.readers.put(e.getKey(), r); } @@ -516,105 +493,6 @@ public class HStore implements HConstants { this.fs.getFileStatus(f).getLen() == 0; } - ////////////////////////////////////////////////////////////////////////////// - // Bloom filters - ////////////////////////////////////////////////////////////////////////////// - - /** - * Called by constructor if a bloom filter is enabled for this column family. - * If the HStore already exists, it will read in the bloom filter saved - * previously. Otherwise, it will create a new bloom filter. - */ - private Filter loadOrCreateBloomFilter() throws IOException { - Path filterFile = new Path(filterDir, BLOOMFILTER_FILE_NAME); - Filter bloomFilter = null; - if(fs.exists(filterFile)) { - if (LOG.isDebugEnabled()) { - LOG.debug("loading bloom filter for " + this.storeNameStr); - } - - BloomFilterDescriptor.BloomFilterType type = - family.getBloomFilter().getType(); - - switch(type) { - - case BLOOMFILTER: - bloomFilter = new BloomFilter(); - break; - - case COUNTING_BLOOMFILTER: - bloomFilter = new CountingBloomFilter(); - break; - - case RETOUCHED_BLOOMFILTER: - bloomFilter = new RetouchedBloomFilter(); - break; - - default: - throw new IllegalArgumentException("unknown bloom filter type: " + - type); - } - FSDataInputStream in = fs.open(filterFile); - try { - bloomFilter.readFields(in); - } finally { - fs.close(); - } - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("creating bloom filter for " + this.storeNameStr); - } - - BloomFilterDescriptor.BloomFilterType type = - family.getBloomFilter().getType(); - - switch(type) { - - case BLOOMFILTER: - bloomFilter = new BloomFilter(family.getBloomFilter().getVectorSize(), - family.getBloomFilter().getNbHash()); - break; - - case COUNTING_BLOOMFILTER: - bloomFilter = - new CountingBloomFilter(family.getBloomFilter().getVectorSize(), - family.getBloomFilter().getNbHash()); - break; - - case RETOUCHED_BLOOMFILTER: - bloomFilter = - new RetouchedBloomFilter(family.getBloomFilter().getVectorSize(), - family.getBloomFilter().getNbHash()); - } - } - return bloomFilter; - } - - /** - * Flushes bloom filter to disk - * - * @throws IOException - */ - private void flushBloomFilter() throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("flushing bloom filter for " + this.storeNameStr); - } - FSDataOutputStream out = - fs.create(new Path(filterDir, BLOOMFILTER_FILE_NAME)); - try { - bloomFilter.write(out); - } finally { - out.close(); - } - if (LOG.isDebugEnabled()) { - LOG.debug("flushed bloom filter for " + this.storeNameStr); - } - } - - ////////////////////////////////////////////////////////////////////////////// - // End bloom filters - ////////////////////////////////////////////////////////////////////////////// - /** * Adds a value to the memcache * @@ -704,7 +582,7 @@ public class HStore implements HConstants { HStoreFile flushedFile = new HStoreFile(conf, fs, basedir, info.getEncodedName(), family.getName(), -1L, null); MapFile.Writer out = flushedFile.getWriter(this.fs, this.compression, - this.bloomFilter); + this.family.isBloomFilterEnabled(), cache.size()); // Here we tried picking up an existing HStoreFile from disk and // interlacing the memcache flush compacting as we go. The notion was @@ -746,12 +624,7 @@ public class HStore implements HConstants { // MapFile. The MapFile is current up to and including the log seq num. flushedFile.writeInfo(fs, logCacheFlushId); - // C. Flush the bloom filter if any - if (bloomFilter != null) { - flushBloomFilter(); - } - - // D. Finally, make the new MapFile available. + // C. Finally, make the new MapFile available. updateReaders(logCacheFlushId, flushedFile); if(LOG.isDebugEnabled()) { LOG.debug("Added " + FSUtils.getPath(flushedFile.getMapFilePath()) + @@ -778,7 +651,8 @@ public class HStore implements HConstants { Long flushid = Long.valueOf(logCacheFlushId); // Open the map file reader. this.readers.put(flushid, - flushedFile.getReader(this.fs, this.bloomFilter)); + flushedFile.getReader(this.fs, this.family.isBloomFilterEnabled(), + this.family.isBlockCacheEnabled())); this.storefiles.put(flushid, flushedFile); // Tell listeners of the change in readers. notifyChangedReadersObservers(); @@ -819,21 +693,6 @@ public class HStore implements HConstants { // Compaction ////////////////////////////////////////////////////////////////////////////// - /* - * @param files - * @return True if any of the files in files are References. - */ - private boolean hasReferences(Collection files) { - if (files != null && files.size() > 0) { - for (HStoreFile hsf: files) { - if (hsf.isReference()) { - return true; - } - } - } - return false; - } - /** * Compact the back-HStores. This method may take some time, so the calling * thread must be able to block for long periods. @@ -858,44 +717,69 @@ public class HStore implements HConstants { StoreSize compact(final boolean force) throws IOException { synchronized (compactLock) { long maxId = -1; + int nrows = -1; List filesToCompact = null; synchronized (storefiles) { if (this.storefiles.size() <= 0) { return null; } filesToCompact = new ArrayList(this.storefiles.values()); - if (!force && !hasReferences(filesToCompact) && - filesToCompact.size() < compactionThreshold) { - return checkSplit(); - } - if (!fs.exists(compactionDir) && !fs.mkdirs(compactionDir)) { - LOG.warn("Mkdir on " + compactionDir.toString() + " failed"); - return checkSplit(); - } // The max-sequenceID in any of the to-be-compacted TreeMaps is the // last key of storefiles. maxId = this.storefiles.lastKey().longValue(); } + if (!force && filesToCompact.size() < compactionThreshold) { + return checkSplit(); + } + if (!fs.exists(compactionDir) && !fs.mkdirs(compactionDir)) { + LOG.warn("Mkdir on " + compactionDir.toString() + " failed"); + return checkSplit(); + } + /* + * We create a new list of MapFile.Reader objects so we don't screw up the + * caching associated with the currently-loaded ones. Our iteration-based + * access pattern is practically designed to ruin the cache. + */ + List readers = new ArrayList(); + for (HStoreFile file: filesToCompact) { + try { + HStoreFile.BloomFilterMapFile.Reader reader = file.getReader(fs, + this.family.isBloomFilterEnabled(), false); + readers.add(reader); + + // Compute the size of the new bloomfilter if needed + if (this.family.isBloomFilterEnabled()) { + nrows += reader.getBloomFilterSize(); + } + } catch (IOException e) { + // Add info about which file threw exception. It may not be in the + // exception message so output a message here where we know the + // culprit. + LOG.warn("Failed with " + e.toString() + ": " + file.toString()); + closeCompactionReaders(readers); + throw e; + } + } + // Storefiles are keyed by sequence id. The oldest file comes first. // We need to return out of here a List that has the newest file first. - Collections.reverse(filesToCompact); + Collections.reverse(readers); // Step through them, writing to the brand-new MapFile HStoreFile compactedOutputFile = new HStoreFile(conf, fs, this.compactionDir, info.getEncodedName(), family.getName(), -1L, null); if (LOG.isDebugEnabled()) { - LOG.debug("started compaction of " + filesToCompact.size() + - " files " + filesToCompact.toString() + " into " + + LOG.debug("started compaction of " + readers.size() + " files into " + FSUtils.getPath(compactedOutputFile.getMapFilePath())); } - MapFile.Writer compactedOut = compactedOutputFile.getWriter(this.fs, - this.compression, this.bloomFilter); + MapFile.Writer writer = compactedOutputFile.getWriter(this.fs, + this.compression, this.family.isBloomFilterEnabled(), nrows); try { - compactHStoreFiles(compactedOut, filesToCompact); + compactHStoreFiles(writer, readers); } finally { - compactedOut.close(); + writer.close(); } // Now, write out an HSTORE_LOGINFOFILE for the brand-new TreeMap. @@ -913,36 +797,17 @@ public class HStore implements HConstants { } /* - * Compact passed toCompactFiles into compactedOut. - * We create a new set of MapFile.Reader objects so we don't screw up the - * caching associated with the currently-loaded ones. Our iteration-based - * access pattern is practically designed to ruin the cache. + * Compact a list of MapFile.Readers into MapFile.Writer. * - * We work by opening a single MapFile.Reader for each file, and iterating - * through them in parallel. We always increment the lowest-ranked one. + * We work by iterating through the readers in parallel. We always increment + * the lowest-ranked one. * Updates to a single row/column will appear ranked by timestamp. This allows - * us to throw out deleted values or obsolete versions. @param compactedOut - * @param toCompactFiles @throws IOException + * us to throw out deleted values or obsolete versions. */ private void compactHStoreFiles(final MapFile.Writer compactedOut, - final List toCompactFiles) throws IOException { + final List readers) throws IOException { - int size = toCompactFiles.size(); - CompactionReader[] rdrs = new CompactionReader[size]; - int index = 0; - for (HStoreFile hsf: toCompactFiles) { - try { - rdrs[index++] = - new MapFileCompactionReader(hsf.getReader(fs, bloomFilter)); - } catch (IOException e) { - // Add info about which file threw exception. It may not be in the - // exception message so output a message here where we know the - // culprit. - LOG.warn("Failed with " + e.toString() + ": " + hsf.toString()); - closeCompactionReaders(rdrs); - throw e; - } - } + MapFile.Reader[] rdrs = readers.toArray(new MapFile.Reader[readers.size()]); try { HStoreKey[] keys = new HStoreKey[rdrs.length]; ImmutableBytesWritable[] vals = new ImmutableBytesWritable[rdrs.length]; @@ -1035,18 +900,16 @@ public class HStore implements HConstants { } } } finally { - closeCompactionReaders(rdrs); + closeCompactionReaders(readers); } } - private void closeCompactionReaders(final CompactionReader [] rdrs) { - for (int i = 0; i < rdrs.length; i++) { - if (rdrs[i] != null) { - try { - rdrs[i].close(); - } catch (IOException e) { - LOG.warn("Exception closing reader for " + this.storeNameStr, e); - } + private void closeCompactionReaders(final List rdrs) { + for (MapFile.Reader r: rdrs) { + try { + r.close(); + } catch (IOException e) { + LOG.warn("Exception closing reader for " + this.storeNameStr, e); } } } @@ -1163,10 +1026,11 @@ public class HStore implements HConstants { // Add new compacted Reader and store file. Long orderVal = Long.valueOf(finalCompactedFile.loadInfo(fs)); this.readers.put(orderVal, - // Use a block cache (if configured) for this reader since + // Use a block cache (if configured) for this reader since // it is the only one. - finalCompactedFile.getReader(this.fs, this.bloomFilter, family - .isBlockCacheEnabled())); + finalCompactedFile.getReader(this.fs, + this.family.isBloomFilterEnabled(), + this.family.isBlockCacheEnabled())); this.storefiles.put(orderVal, finalCompactedFile); // Tell observers that list of Readers has changed. notifyChangedReadersObservers(); diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java b/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java index 1f117c9dca3..187460bc4fa 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java @@ -27,7 +27,6 @@ import java.io.DataOutput; import java.io.DataOutputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.util.Random; import org.apache.commons.logging.Log; @@ -45,7 +44,7 @@ import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; -import org.onelab.filter.Filter; +import org.onelab.filter.BloomFilter; import org.onelab.filter.Key; import org.apache.hadoop.hbase.HConstants; @@ -394,33 +393,13 @@ public class HStoreFile implements HConstants { * Get reader for the store file map file. * Client is responsible for closing file when done. * @param fs - * @param bloomFilter If null, no filtering is done. - * @return MapFile.Reader - * @throws IOException - */ - public synchronized MapFile.Reader getReader(final FileSystem fs, - final Filter bloomFilter) - throws IOException { - if (isReference()) { - return new HStoreFile.HalfMapFileReader(fs, - getMapFilePath(reference).toString(), conf, - reference.getFileRegion(), reference.getMidkey(), bloomFilter); - } - return new BloomFilterMapFile.Reader(fs, getMapFilePath().toString(), - conf, bloomFilter); - } - - /** - * Get reader for the store file map file. - * Client is responsible for closing file when done. - * @param fs - * @param bloomFilter If null, no filtering is done. + * @param bloomFilter If true, a bloom filter exists * @param blockCacheEnabled If true, MapFile blocks should be cached. - * @return MapFile.Reader + * @return BloomFilterMapFile.Reader * @throws IOException */ - public synchronized MapFile.Reader getReader(final FileSystem fs, - final Filter bloomFilter, final boolean blockCacheEnabled) + public synchronized BloomFilterMapFile.Reader getReader(final FileSystem fs, + final boolean bloomFilter, final boolean blockCacheEnabled) throws IOException { if (isReference()) { return new HStoreFile.HalfMapFileReader(fs, @@ -438,20 +417,21 @@ public class HStoreFile implements HConstants { * @param fs * @param compression Pass SequenceFile.CompressionType.NONE * for none. - * @param bloomFilter If null, no filtering is done. + * @param bloomFilter If true, create a bloom filter + * @param nrows number of rows expected. Required if bloomFilter is true. * @return MapFile.Writer * @throws IOException */ public MapFile.Writer getWriter(final FileSystem fs, final SequenceFile.CompressionType compression, - final Filter bloomFilter) + final boolean bloomFilter, int nrows) throws IOException { if (isReference()) { throw new IOException("Illegal Access: Cannot get a writer on a" + "HStoreFile reference"); } return new BloomFilterMapFile.Writer(conf, fs, - getMapFilePath().toString(), compression, bloomFilter); + getMapFilePath().toString(), compression, bloomFilter, nrows); } /** @@ -472,25 +452,6 @@ public class HStoreFile implements HConstants { (isReference()? "-" + reference.toString(): ""); } - /** - * Custom bloom filter key maker. - * @param key - * @return Key made of bytes of row and column only. - * @throws IOException - */ - static Key getBloomFilterKey(WritableComparable key) - throws IOException { - HStoreKey hsk = (HStoreKey)key; - byte [] bytes = null; - try { - bytes = (hsk.getRow().toString() + hsk.getColumn().toString()). - getBytes(UTF8_ENCODING); - } catch (UnsupportedEncodingException e) { - throw new IOException(e.toString()); - } - return new Key(bytes); - } - static boolean isTopFileRegion(final Range r) { return r.equals(Range.top); } @@ -529,6 +490,7 @@ public class HStoreFile implements HConstants { * @param f Column family. * @return the bloom filter directory path */ + @Deprecated public static Path getFilterDir(Path dir, int encodedRegionName, final byte [] f) { return getFamilySubDir(dir, encodedRegionName, f, HSTORE_FILTER_DIR); @@ -626,6 +588,15 @@ public class HStoreFile implements HConstants { static final Class VALUE_CLASS = ImmutableBytesWritable.class; + /** + * Custom bloom filter key maker. + * @param key + * @return Key made of bytes of row only. + */ + protected static Key getBloomFilterKey(WritableComparable key) { + return new Key(((HStoreKey) key).getRow()); + } + /** * A reader capable of reading and caching blocks of the data file. */ @@ -718,22 +689,10 @@ public class HStoreFile implements HConstants { * filter is null, just passes invocation to parent. */ static class BloomFilterMapFile extends HbaseMapFile { - static class Reader extends HbaseReader { - private final Filter bloomFilter; + protected static final String BLOOMFILTER_FILE_NAME = "filter"; - /** - * @param fs - * @param dirName - * @param conf - * @param filter - * @throws IOException - */ - public Reader(FileSystem fs, String dirName, Configuration conf, - final Filter filter) - throws IOException { - super(fs, dirName, conf); - bloomFilter = filter; - } + static class Reader extends HbaseReader { + private final BloomFilter bloomFilter; /** * @param fs @@ -744,10 +703,31 @@ public class HStoreFile implements HConstants { * @throws IOException */ public Reader(FileSystem fs, String dirName, Configuration conf, - final Filter filter, final boolean blockCacheEnabled) + final boolean filter, final boolean blockCacheEnabled) throws IOException { super(fs, dirName, conf, blockCacheEnabled); - bloomFilter = filter; + if (filter) { + this.bloomFilter = loadBloomFilter(fs, dirName); + } else { + this.bloomFilter = null; + } + } + + private BloomFilter loadBloomFilter(FileSystem fs, String dirName) + throws IOException { + Path filterFile = new Path(dirName, BLOOMFILTER_FILE_NAME); + if(!fs.exists(filterFile)) { + throw new FileNotFoundException("Could not find bloom filter: " + + filterFile); + } + BloomFilter filter = new BloomFilter(); + FSDataInputStream in = fs.open(filterFile); + try { + bloomFilter.readFields(in); + } finally { + fs.close(); + } + return filter; } /** {@inheritDoc} */ @@ -788,27 +768,65 @@ public class HStoreFile implements HConstants { } return null; } + + /* @return size of the bloom filter */ + int getBloomFilterSize() { + return bloomFilter == null ? 0 : bloomFilter.getVectorSize(); + } } static class Writer extends HbaseWriter { - private final Filter bloomFilter; + private static final double DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4.0; + private final BloomFilter bloomFilter; + private final String dirName; + private final FileSystem fs; /** * @param conf * @param fs * @param dirName - * @param keyClass - * @param valClass * @param compression * @param filter + * @param nrows * @throws IOException */ @SuppressWarnings("unchecked") public Writer(Configuration conf, FileSystem fs, String dirName, - SequenceFile.CompressionType compression, final Filter filter) + SequenceFile.CompressionType compression, final boolean filter, + int nrows) throws IOException { super(conf, fs, dirName, compression); - bloomFilter = filter; + this.dirName = dirName; + this.fs = fs; + if (filter) { + /* + * There is no way to automatically determine the vector size and the + * number of hash functions to use. In particular, bloom filters are + * very sensitive to the number of elements inserted into them. For + * HBase, the number of entries depends on the size of the data stored + * in the column. Currently the default region size is 256MB, so the + * number of entries is approximately + * 256MB / (average value size for column). + * + * If m denotes the number of bits in the Bloom filter (vectorSize), + * n denotes the number of elements inserted into the Bloom filter and + * k represents the number of hash functions used (nbHash), then + * according to Broder and Mitzenmacher, + * + * ( http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf ) + * + * the probability of false positives is minimized when k is + * approximately m/n ln(2). + */ + this.bloomFilter = new BloomFilter( + (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS, + (int) Math.ceil( + (DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) / + Math.log(2.0)) + ); + } else { + this.bloomFilter = null; + } } /** {@inheritDoc} */ @@ -820,6 +838,36 @@ public class HStoreFile implements HConstants { } super.append(key, val); } + + /** {@inheritDoc} */ + @Override + public synchronized void close() throws IOException { + super.close(); + if (this.bloomFilter != null) { + flushBloomFilter(); + } + } + + /** + * Flushes bloom filter to disk + * + * @throws IOException + */ + private void flushBloomFilter() throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("flushing bloom filter for " + this.dirName); + } + FSDataOutputStream out = + fs.create(new Path(dirName, BLOOMFILTER_FILE_NAME)); + try { + bloomFilter.write(out); + } finally { + out.close(); + } + if (LOG.isDebugEnabled()) { + LOG.debug("flushed bloom filter for " + this.dirName); + } + } } } @@ -841,21 +889,12 @@ public class HStoreFile implements HConstants { final Configuration conf, final Range r, final WritableComparable midKey) throws IOException { - this(fs, dirName, conf, r, midKey, null, false); + this(fs, dirName, conf, r, midKey, false, false); } HalfMapFileReader(final FileSystem fs, final String dirName, final Configuration conf, final Range r, - final WritableComparable midKey, final Filter filter) - throws IOException { - super(fs, dirName, conf, filter); - top = isTopFileRegion(r); - midkey = midKey; - } - - HalfMapFileReader(final FileSystem fs, final String dirName, - final Configuration conf, final Range r, - final WritableComparable midKey, final Filter filter, + final WritableComparable midKey, final boolean filter, final boolean blockCacheEnabled) throws IOException { super(fs, dirName, conf, filter, blockCacheEnabled); diff --git a/src/java/org/apache/hadoop/hbase/regionserver/MapFileCompactionReader.java b/src/java/org/apache/hadoop/hbase/regionserver/MapFileCompactionReader.java deleted file mode 100644 index ab5bf4ee1c7..00000000000 --- a/src/java/org/apache/hadoop/hbase/regionserver/MapFileCompactionReader.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright 2008 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.regionserver; - -import java.io.IOException; - -import org.apache.hadoop.io.MapFile; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.Writable; - -/** A compaction reader for MapFile */ -class MapFileCompactionReader implements CompactionReader { - final MapFile.Reader reader; - - MapFileCompactionReader(final MapFile.Reader r) { - this.reader = r; - } - - /** {@inheritDoc} */ - public void close() throws IOException { - this.reader.close(); - } - - /** {@inheritDoc} */ - public boolean next(WritableComparable key, Writable val) - throws IOException { - return this.reader.next(key, val); - } - - /** {@inheritDoc} */ - public void reset() throws IOException { - this.reader.reset(); - } -} - diff --git a/src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index 24e9d5699de..597c3dbe27d 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -92,7 +92,8 @@ implements ChangedReadersObserver { // Most recent map file should be first int i = readers.length - 1; for(HStoreFile curHSF: store.getStorefiles().values()) { - readers[i--] = curHSF.getReader(store.fs, store.bloomFilter); + readers[i--] = curHSF.getReader(store.fs, + store.getFamily().isBloomFilterEnabled(), false); } this.keys = new HStoreKey[readers.length]; diff --git a/src/java/org/apache/hadoop/hbase/rest/TableHandler.java b/src/java/org/apache/hadoop/hbase/rest/TableHandler.java index c6bbaf319ef..2ea28a26231 100644 --- a/src/java/org/apache/hadoop/hbase/rest/TableHandler.java +++ b/src/java/org/apache/hadoop/hbase/rest/TableHandler.java @@ -413,7 +413,7 @@ public class TableHandler extends GenericHandler { doElement(outputter, "name", Bytes.toString(e.getName())); doElement(outputter, "compression", e.getCompression().toString()); doElement(outputter, "bloomfilter", - e.getBloomFilter() == null? "NONE": e.getBloomFilter().toString()); + Boolean.toString(e.isBloomFilterEnabled())); doElement(outputter, "max-versions", Integer.toString(e.getMaxVersions())); doElement(outputter, "maximum-cell-size", diff --git a/src/java/org/apache/hadoop/hbase/thrift/ThriftUtilities.java b/src/java/org/apache/hadoop/hbase/thrift/ThriftUtilities.java index 4d55484125e..0ce6dbd6735 100644 --- a/src/java/org/apache/hadoop/hbase/thrift/ThriftUtilities.java +++ b/src/java/org/apache/hadoop/hbase/thrift/ThriftUtilities.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hbase.thrift; -import org.apache.hadoop.hbase.BloomFilterDescriptor; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HColumnDescriptor.CompressionType; import org.apache.hadoop.hbase.thrift.generated.ColumnDescriptor; @@ -38,19 +37,9 @@ public class ThriftUtilities { static public HColumnDescriptor colDescFromThrift(ColumnDescriptor in) throws IllegalArgument { CompressionType comp = CompressionType.valueOf(in.compression); - BloomFilterDescriptor bloom = null; + boolean bloom = false; if (in.bloomFilterType.compareTo("NONE") != 0) { - if (in.bloomFilterVectorSize > 0 && in.bloomFilterNbHashes > 0) { - bloom = new BloomFilterDescriptor(BloomFilterDescriptor.BloomFilterType - .valueOf(in.bloomFilterType), in.bloomFilterVectorSize, - in.bloomFilterNbHashes); - } else if (in.bloomFilterVectorSize > 0) { - bloom = new BloomFilterDescriptor(BloomFilterDescriptor.BloomFilterType - .valueOf(in.bloomFilterType), in.bloomFilterVectorSize); - } else { - throw new IllegalArgument( - "must specify number of entries for bloom filter"); - } + bloom = true; } if (in.name == null || in.name.length <= 0) { @@ -78,12 +67,7 @@ public class ThriftUtilities { col.inMemory = in.isInMemory(); col.blockCacheEnabled = in.isBlockCacheEnabled(); col.maxValueLength = in.getMaxValueLength(); - BloomFilterDescriptor bloom = in.getBloomFilter(); - if (bloom != null) { - col.bloomFilterType = bloom.getType().toString(); - col.bloomFilterVectorSize = bloom.getVectorSize(); - col.bloomFilterNbHashes = bloom.getNbHash(); - } + col.bloomFilterType = Boolean.toString(in.isBloomFilterEnabled()); return col; } diff --git a/src/java/org/apache/hadoop/hbase/util/MetaUtils.java b/src/java/org/apache/hadoop/hbase/util/MetaUtils.java index 2c0fd5f705a..309f065ec50 100644 --- a/src/java/org/apache/hadoop/hbase/util/MetaUtils.java +++ b/src/java/org/apache/hadoop/hbase/util/MetaUtils.java @@ -389,7 +389,14 @@ public class MetaUtils { } } - private void updateMETARegionInfo(HRegion r, final HRegionInfo hri) + /** + * Update COL_REGIONINFO in meta region r with HRegionInfo hri + * + * @param r + * @param hri + * @throws IOException + */ + public void updateMETARegionInfo(HRegion r, final HRegionInfo hri) throws IOException { if (LOG.isDebugEnabled()) { HRegionInfo h = Writables.getHRegionInfoOrNull( diff --git a/src/java/org/apache/hadoop/hbase/util/Migrate.java b/src/java/org/apache/hadoop/hbase/util/Migrate.java index be3dc4967ce..2060c0766e2 100644 --- a/src/java/org/apache/hadoop/hbase/util/Migrate.java +++ b/src/java/org/apache/hadoop/hbase/util/Migrate.java @@ -44,10 +44,12 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HStore; +import org.apache.hadoop.hbase.regionserver.HStoreFile; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; @@ -218,12 +220,16 @@ public class Migrate extends Configured implements Tool { migrateFromNoVersion(rootFiles); migrateToV2(rootFiles); migrateToV3(); + migrateToV4(); } else if (version.compareTo("0.1") == 0) { migrateToV2(getRootDirFiles()); migrateToV3(); + migrateToV4(); } else if (version.compareTo("2") == 0) { migrateToV3(); } else if (version.compareTo("3") == 0) { + migrateToV4(); + } else if (version.compareTo("4") == 0) { // Nothing to do. } else { throw new IOException("Unrecognized version: " + version); @@ -289,6 +295,11 @@ public class Migrate extends Configured implements Tool { LOG.info("Checking to see if hbase in Filesystem is at version 3."); addHistorianFamilyToMeta(); } + + private void migrateToV4() throws IOException { + LOG.info("Checking to see if hbase in Filesystem is at version 4."); + updateBloomFilters(); + } private FileStatus[] getRootDirFiles() throws IOException { FileStatus[] stats = fs.listStatus(FSUtils.getRootDir(this.conf)); @@ -496,7 +507,7 @@ public class Migrate extends Configured implements Tool { utils.addColumn(HConstants.META_TABLE_NAME, new HColumnDescriptor(HConstants.COLUMN_FAMILY_HISTORIAN, HConstants.ALL_VERSIONS, HColumnDescriptor.CompressionType.NONE, - false, false, Integer.MAX_VALUE, HConstants.FOREVER, null)); + false, false, Integer.MAX_VALUE, HConstants.FOREVER, false)); LOG.info("Historian family added to .META."); // Flush out the meta edits. } @@ -504,6 +515,57 @@ public class Migrate extends Configured implements Tool { utils.shutdown(); } } + + private void updateBloomFilters() throws IOException { + if (this.migrationNeeded && this.readOnly) { + return; + } + final Path rootDir = FSUtils.getRootDir(conf); + final MetaUtils utils = new MetaUtils(this.conf); + try { + // Scan the root region + utils.scanRootRegion(new MetaUtils.ScannerListener() { + public boolean processRow(HRegionInfo info) throws IOException { + // Scan every meta region + final HRegion metaRegion = utils.getMetaRegion(info); + utils.scanMetaRegion(info, new MetaUtils.ScannerListener() { + public boolean processRow(HRegionInfo tableInfo) throws IOException { + HTableDescriptor desc = tableInfo.getTableDesc(); + Path tableDir = + HTableDescriptor.getTableDir(rootDir, desc.getName()); + for (HColumnDescriptor column: desc.getFamilies()) { + if (column.isBloomFilterEnabled()) { + // Column has a bloom filter + migrationNeeded = true; + + Path filterDir = HStoreFile.getFilterDir(tableDir, + tableInfo.getEncodedName(), column.getName()); + if (fs.exists(filterDir)) { + // Filter dir exists + if (readOnly) { + // And if we are only checking to see if a migration is + // needed - it is. We're done. + return false; + } + // Delete the filter + fs.delete(filterDir, true); + // Update the HRegionInfo in meta + utils.updateMETARegionInfo(metaRegion, tableInfo); + } + } + } + return true; + } + }); + // Stop scanning if only doing a check and we've determined that a + // migration is needed. Otherwise continue by returning true + return readOnly && migrationNeeded ? false : true; + } + }); + } finally { + utils.shutdown(); + } + } @SuppressWarnings("static-access") private int parseArgs(String[] args) { diff --git a/src/java/org/onelab/filter/BloomFilter.java b/src/java/org/onelab/filter/BloomFilter.java index 2fcf93c9b66..ef4968edc91 100644 --- a/src/java/org/onelab/filter/BloomFilter.java +++ b/src/java/org/onelab/filter/BloomFilter.java @@ -192,6 +192,13 @@ public class BloomFilter extends Filter { bf.or(this); return bf; }//end clone() + + /** + * @return size of the the bloomfilter + */ + public int getVectorSize() { + return this.vectorSize; + } // Writable diff --git a/src/test/org/apache/hadoop/hbase/HBaseTestCase.java b/src/test/org/apache/hadoop/hbase/HBaseTestCase.java index 488387c03e2..f1cf48c88d6 100644 --- a/src/test/org/apache/hadoop/hbase/HBaseTestCase.java +++ b/src/test/org/apache/hadoop/hbase/HBaseTestCase.java @@ -194,13 +194,13 @@ public abstract class HBaseTestCase extends TestCase { HTableDescriptor htd = new HTableDescriptor(name); htd.addFamily(new HColumnDescriptor(COLFAMILY_NAME1, versions, CompressionType.NONE, false, false, Integer.MAX_VALUE, - HConstants.FOREVER, null)); + HConstants.FOREVER, false)); htd.addFamily(new HColumnDescriptor(COLFAMILY_NAME2, versions, CompressionType.NONE, false, false, Integer.MAX_VALUE, - HConstants.FOREVER, null)); + HConstants.FOREVER, false)); htd.addFamily(new HColumnDescriptor(COLFAMILY_NAME3, versions, CompressionType.NONE, false, false, Integer.MAX_VALUE, - HConstants.FOREVER, null)); + HConstants.FOREVER, false)); return htd; } diff --git a/src/test/org/apache/hadoop/hbase/TestBloomFilters.java b/src/test/org/apache/hadoop/hbase/TestBloomFilters.java index 640eb320de9..aea8d8c8808 100644 --- a/src/test/org/apache/hadoop/hbase/TestBloomFilters.java +++ b/src/test/org/apache/hadoop/hbase/TestBloomFilters.java @@ -145,78 +145,8 @@ public class TestBloomFilters extends HBaseClusterTestCase { Bytes.toBytes("yzabcdef") }; - /** constructor */ - public TestBloomFilters() { - super(); - } - /** - * Test that specifies explicit parameters for the bloom filter - * @throws IOException - */ - public void testExplicitParameters() throws IOException { - HTable table = null; - - // Setup - - HTableDescriptor desc = new HTableDescriptor(getName()); - BloomFilterDescriptor bloomFilter = - new BloomFilterDescriptor( // if we insert 1000 values - BloomFilterDescriptor.BloomFilterType.BLOOMFILTER, // plain old bloom filter - 12499, // number of bits - 4 // number of hash functions - ); - - desc.addFamily( - new HColumnDescriptor(CONTENTS, // Column name - 1, // Max versions - HColumnDescriptor.CompressionType.NONE, // no compression - HColumnDescriptor.DEFAULT_IN_MEMORY, // not in memory - HColumnDescriptor.DEFAULT_BLOCKCACHE, - HColumnDescriptor.DEFAULT_LENGTH, - HColumnDescriptor.DEFAULT_TTL, - bloomFilter - ) - ); - - // Create the table - - HBaseAdmin admin = new HBaseAdmin(conf); - admin.createTable(desc); - - // Open table - - table = new HTable(conf, desc.getName()); - - // Store some values - - for(int i = 0; i < 100; i++) { - byte [] row = rows[i]; - String value = row.toString(); - BatchUpdate b = new BatchUpdate(row); - b.put(CONTENTS, value.getBytes(HConstants.UTF8_ENCODING)); - table.commit(b); - } - try { - // Give cache flusher and log roller a chance to run - // Otherwise we'll never hit the bloom filter, just the memcache - Thread.sleep(conf.getLong(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000) * 2); - } catch (InterruptedException e) { - // ignore - } - - - for(int i = 0; i < testKeys.length; i++) { - Cell value = table.get(testKeys[i], CONTENTS); - if(value != null && value.getValue().length != 0) { - LOG.info("non existant key: " + testKeys[i] + " returned value: " + - new String(value.getValue(), HConstants.UTF8_ENCODING)); - } - } - } - - /** - * Test that uses computed for the bloom filter + * Test that uses automatic bloom filter * @throws IOException */ public void testComputedParameters() throws IOException { @@ -225,14 +155,6 @@ public class TestBloomFilters extends HBaseClusterTestCase { // Setup HTableDescriptor desc = new HTableDescriptor(getName()); - - BloomFilterDescriptor bloomFilter = - new BloomFilterDescriptor( - BloomFilterDescriptor.BloomFilterType.BLOOMFILTER, // plain old bloom filter - 1000 // estimated number of entries - ); - LOG.info("vector size: " + bloomFilter.vectorSize); - desc.addFamily( new HColumnDescriptor(CONTENTS, // Column name 1, // Max versions @@ -241,7 +163,7 @@ public class TestBloomFilters extends HBaseClusterTestCase { HColumnDescriptor.DEFAULT_BLOCKCACHE, HColumnDescriptor.DEFAULT_LENGTH, HColumnDescriptor.DEFAULT_TTL, - bloomFilter + true ) ); diff --git a/src/test/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java b/src/test/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java index 60ffd05be95..701fb3921f9 100644 --- a/src/test/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java +++ b/src/test/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java @@ -129,9 +129,9 @@ public class TestHStoreFile extends HBaseTestCase { JenkinsHash.hash(Bytes.toBytes(getName())), Bytes.toBytes("colfamily"), 1234567890L, null); MapFile.Writer writer = - hsf.getWriter(this.fs, SequenceFile.CompressionType.NONE, null); + hsf.getWriter(this.fs, SequenceFile.CompressionType.NONE, false, 0); writeStoreFile(writer); - MapFile.Reader reader = hsf.getReader(this.fs, null); + MapFile.Reader reader = hsf.getReader(this.fs, false, false); // Split on a row, not in middle of row. Midkey returned by reader // may be in middle of row. Create new one with empty column and // timestamp. @@ -162,7 +162,7 @@ public class TestHStoreFile extends HBaseTestCase { otherReference.getMidkey().toString()); // Now confirm that I can read from the reference and that it only gets // keys from top half of the file. - MapFile.Reader halfReader = refHsf.getReader(this.fs, null); + MapFile.Reader halfReader = refHsf.getReader(this.fs, false, false); HStoreKey key = new HStoreKey(); ImmutableBytesWritable value = new ImmutableBytesWritable(); boolean first = true; diff --git a/src/test/org/apache/hadoop/hbase/regionserver/TestTimestamp.java b/src/test/org/apache/hadoop/hbase/regionserver/TestTimestamp.java index 85bda3330ec..1d6817f5dfb 100644 --- a/src/test/org/apache/hadoop/hbase/regionserver/TestTimestamp.java +++ b/src/test/org/apache/hadoop/hbase/regionserver/TestTimestamp.java @@ -80,7 +80,7 @@ public class TestTimestamp extends HBaseClusterTestCase { HTableDescriptor htd = createTableDescriptor(getName()); htd.addFamily(new HColumnDescriptor(COLUMN, VERSIONS, CompressionType.NONE, false, false, Integer.MAX_VALUE, - HConstants.FOREVER, null)); + HConstants.FOREVER, false)); return createNewHRegion(htd, null, null); } }