HBASE-17905 [hbase-spark] bulkload does not work when table not exist
Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
parent
02da5a6104
commit
d7ddc79198
|
@ -33,8 +33,8 @@ import org.apache.spark.Partitioner
|
|||
@InterfaceAudience.Public
|
||||
class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
|
||||
extends Partitioner {
|
||||
|
||||
override def numPartitions: Int = startKeys.length
|
||||
// when table not exist, startKeys = Byte[0][]
|
||||
override def numPartitions: Int = if (startKeys.length == 0) 1 else startKeys.length
|
||||
|
||||
override def getPartition(key: Any): Int = {
|
||||
|
||||
|
@ -53,8 +53,11 @@ class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
|
|||
case _ =>
|
||||
key.asInstanceOf[Array[Byte]]
|
||||
}
|
||||
val partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
|
||||
if (partition < 0) partition * -1 + -2
|
||||
else partition
|
||||
var partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
|
||||
if (partition < 0)
|
||||
partition = partition * -1 + -2
|
||||
if (partition < 0)
|
||||
partition = 0
|
||||
partition
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ import org.apache.spark.streaming.dstream.DStream
|
|||
import java.io._
|
||||
import org.apache.hadoop.security.UserGroupInformation
|
||||
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod
|
||||
import org.apache.hadoop.fs.{Path, FileSystem}
|
||||
import org.apache.hadoop.fs.{Path, FileAlreadyExistsException, FileSystem}
|
||||
import scala.collection.mutable
|
||||
|
||||
/**
|
||||
|
@ -620,9 +620,17 @@ class HBaseContext(@transient sc: SparkContext,
|
|||
compactionExclude: Boolean = false,
|
||||
maxSize:Long = HConstants.DEFAULT_MAX_FILE_SIZE):
|
||||
Unit = {
|
||||
val stagingPath = new Path(stagingDir)
|
||||
val fs = stagingPath.getFileSystem(config)
|
||||
if (fs.exists(stagingPath)) {
|
||||
throw new FileAlreadyExistsException("Path " + stagingDir + " already exists")
|
||||
}
|
||||
val conn = HBaseConnectionCache.getConnection(config)
|
||||
val regionLocator = conn.getRegionLocator(tableName)
|
||||
val startKeys = regionLocator.getStartKeys
|
||||
if (startKeys.length == 0) {
|
||||
logInfo("Table " + tableName.toString + " was not found")
|
||||
}
|
||||
val defaultCompressionStr = config.get("hfile.compression",
|
||||
Compression.Algorithm.NONE.getName)
|
||||
val hfileCompression = HFileWriterImpl
|
||||
|
@ -743,9 +751,17 @@ class HBaseContext(@transient sc: SparkContext,
|
|||
compactionExclude: Boolean = false,
|
||||
maxSize:Long = HConstants.DEFAULT_MAX_FILE_SIZE):
|
||||
Unit = {
|
||||
val stagingPath = new Path(stagingDir)
|
||||
val fs = stagingPath.getFileSystem(config)
|
||||
if (fs.exists(stagingPath)) {
|
||||
throw new FileAlreadyExistsException("Path " + stagingDir + " already exists")
|
||||
}
|
||||
val conn = HBaseConnectionCache.getConnection(config)
|
||||
val regionLocator = conn.getRegionLocator(tableName)
|
||||
val startKeys = regionLocator.getStartKeys
|
||||
if (startKeys.length == 0) {
|
||||
logInfo("Table " + tableName.toString + " was not found")
|
||||
}
|
||||
val defaultCompressionStr = config.get("hfile.compression",
|
||||
Compression.Algorithm.NONE.getName)
|
||||
val defaultCompression = HFileWriterImpl
|
||||
|
|
Loading…
Reference in New Issue