Revert "Define public API for spark integration module" for missing JIRA number.
This reverts commit 58b6d9759e
.
This commit is contained in:
parent
58b6d9759e
commit
a95570cfa0
|
@ -35,8 +35,6 @@ public final class HBaseInterfaceAudience {
|
||||||
public static final String COPROC = "Coprocesssor";
|
public static final String COPROC = "Coprocesssor";
|
||||||
public static final String REPLICATION = "Replication";
|
public static final String REPLICATION = "Replication";
|
||||||
public static final String PHOENIX = "Phoenix";
|
public static final String PHOENIX = "Phoenix";
|
||||||
public static final String SPARK = "Spark";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Denotes class names that appear in user facing configuration files.
|
* Denotes class names that appear in user facing configuration files.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.spark;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.hbase.Cell;
|
import org.apache.hadoop.hbase.Cell;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.exceptions.DeserializationException;
|
import org.apache.hadoop.hbase.exceptions.DeserializationException;
|
||||||
import org.apache.hadoop.hbase.filter.FilterBase;
|
import org.apache.hadoop.hbase.filter.FilterBase;
|
||||||
import org.apache.hadoop.hbase.spark.datasources.BytesEncoder;
|
import org.apache.hadoop.hbase.spark.datasources.BytesEncoder;
|
||||||
|
@ -44,7 +43,6 @@ import com.google.protobuf.ByteString;
|
||||||
* by SparkSQL so that we have make the filters at the region server level
|
* by SparkSQL so that we have make the filters at the region server level
|
||||||
* and avoid sending the data back to the client to be filtered.
|
* and avoid sending the data back to the client to be filtered.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
public class SparkSQLPushDownFilter extends FilterBase{
|
public class SparkSQLPushDownFilter extends FilterBase{
|
||||||
protected static final Log log = LogFactory.getLog(SparkSQLPushDownFilter.class);
|
protected static final Log log = LogFactory.getLog(SparkSQLPushDownFilter.class);
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,6 @@ package org.apache.hadoop.hbase.spark
|
||||||
import java.util
|
import java.util
|
||||||
import java.util.Comparator
|
import java.util.Comparator
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
import org.apache.spark.Partitioner
|
import org.apache.spark.Partitioner
|
||||||
|
|
||||||
|
@ -31,8 +29,6 @@ import org.apache.spark.Partitioner
|
||||||
*
|
*
|
||||||
* @param startKeys The start keys for the given table
|
* @param startKeys The start keys for the given table
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
|
class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
|
||||||
extends Partitioner {
|
extends Partitioner {
|
||||||
|
|
||||||
|
|
|
@ -19,12 +19,8 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.spark
|
package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
|
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class ByteArrayComparable(val bytes:Array[Byte], val offset:Int = 0, var length:Int = -1)
|
class ByteArrayComparable(val bytes:Array[Byte], val offset:Int = 0, var length:Int = -1)
|
||||||
extends Comparable[ByteArrayComparable] {
|
extends Comparable[ByteArrayComparable] {
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,6 @@ package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import java.io.Serializable
|
import java.io.Serializable
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -28,8 +26,6 @@ import org.apache.hadoop.hbase.util.Bytes
|
||||||
*
|
*
|
||||||
* @param value The Byte Array value
|
* @param value The Byte Array value
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class ByteArrayWrapper (var value:Array[Byte])
|
class ByteArrayWrapper (var value:Array[Byte])
|
||||||
extends Comparable[ByteArrayWrapper] with Serializable {
|
extends Comparable[ByteArrayWrapper] with Serializable {
|
||||||
override def compareTo(valueOther: ByteArrayWrapper): Int = {
|
override def compareTo(valueOther: ByteArrayWrapper): Int = {
|
||||||
|
|
|
@ -17,8 +17,6 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.spark
|
package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -32,8 +30,6 @@ import org.apache.hadoop.hbase.util.Bytes
|
||||||
* @param qualifierOffSet Offset of qualifier value in the array
|
* @param qualifierOffSet Offset of qualifier value in the array
|
||||||
* @param qualifierLength Length of the qualifier value with in the array
|
* @param qualifierLength Length of the qualifier value with in the array
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class ColumnFamilyQualifierMapKeyWrapper(val columnFamily:Array[Byte],
|
class ColumnFamilyQualifierMapKeyWrapper(val columnFamily:Array[Byte],
|
||||||
val columnFamilyOffSet:Int,
|
val columnFamilyOffSet:Int,
|
||||||
val columnFamilyLength:Int,
|
val columnFamilyLength:Int,
|
||||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.spark
|
||||||
import java.util
|
import java.util
|
||||||
import java.util.concurrent.ConcurrentLinkedQueue
|
import java.util.concurrent.ConcurrentLinkedQueue
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.client._
|
import org.apache.hadoop.hbase.client._
|
||||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
|
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
|
||||||
import org.apache.hadoop.hbase.mapred.TableOutputFormat
|
import org.apache.hadoop.hbase.mapred.TableOutputFormat
|
||||||
|
@ -46,9 +45,6 @@ import scala.collection.mutable
|
||||||
* DefaultSource for integration with Spark's dataframe datasources.
|
* DefaultSource for integration with Spark's dataframe datasources.
|
||||||
* This class will produce a relationProvider based on input given to it from spark
|
* This class will produce a relationProvider based on input given to it from spark
|
||||||
*
|
*
|
||||||
* This class needs to stay in the current package 'org.apache.hadoop.hbase.spark'
|
|
||||||
* for Spark to match the hbase data source name.
|
|
||||||
*
|
|
||||||
* In all this DefaultSource support the following datasource functionality
|
* In all this DefaultSource support the following datasource functionality
|
||||||
* - Scan range pruning through filter push down logic based on rowKeys
|
* - Scan range pruning through filter push down logic based on rowKeys
|
||||||
* - Filter push down logic on HBase Cells
|
* - Filter push down logic on HBase Cells
|
||||||
|
@ -56,7 +52,6 @@ import scala.collection.mutable
|
||||||
* - Type conversions of basic SQL types. All conversions will be
|
* - Type conversions of basic SQL types. All conversions will be
|
||||||
* Through the HBase Bytes object commands.
|
* Through the HBase Bytes object commands.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
class DefaultSource extends RelationProvider with CreatableRelationProvider with Logging {
|
class DefaultSource extends RelationProvider with CreatableRelationProvider with Logging {
|
||||||
/**
|
/**
|
||||||
* Is given input from SparkSQL to construct a BaseRelation
|
* Is given input from SparkSQL to construct a BaseRelation
|
||||||
|
@ -90,7 +85,6 @@ class DefaultSource extends RelationProvider with CreatableRelationProvider wit
|
||||||
*
|
*
|
||||||
* @param sqlContext SparkSQL context
|
* @param sqlContext SparkSQL context
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class HBaseRelation (
|
case class HBaseRelation (
|
||||||
@transient parameters: Map[String, String],
|
@transient parameters: Map[String, String],
|
||||||
userSpecifiedSchema: Option[StructType]
|
userSpecifiedSchema: Option[StructType]
|
||||||
|
@ -575,7 +569,6 @@ case class HBaseRelation (
|
||||||
* @param lowerBound Lower bound of scan
|
* @param lowerBound Lower bound of scan
|
||||||
* @param isLowerBoundEqualTo Include lower bound value in the results
|
* @param isLowerBoundEqualTo Include lower bound value in the results
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
class ScanRange(var upperBound:Array[Byte], var isUpperBoundEqualTo:Boolean,
|
class ScanRange(var upperBound:Array[Byte], var isUpperBoundEqualTo:Boolean,
|
||||||
var lowerBound:Array[Byte], var isLowerBoundEqualTo:Boolean)
|
var lowerBound:Array[Byte], var isLowerBoundEqualTo:Boolean)
|
||||||
extends Serializable {
|
extends Serializable {
|
||||||
|
@ -729,7 +722,6 @@ class ScanRange(var upperBound:Array[Byte], var isUpperBoundEqualTo:Boolean,
|
||||||
* @param currentPoint the initial point when the filter is created
|
* @param currentPoint the initial point when the filter is created
|
||||||
* @param currentRange the initial scanRange when the filter is created
|
* @param currentRange the initial scanRange when the filter is created
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
class ColumnFilter (currentPoint:Array[Byte] = null,
|
class ColumnFilter (currentPoint:Array[Byte] = null,
|
||||||
currentRange:ScanRange = null,
|
currentRange:ScanRange = null,
|
||||||
var points:mutable.MutableList[Array[Byte]] =
|
var points:mutable.MutableList[Array[Byte]] =
|
||||||
|
@ -859,7 +851,6 @@ class ColumnFilter (currentPoint:Array[Byte] = null,
|
||||||
* Also contains merge commends that will consolidate the filters
|
* Also contains merge commends that will consolidate the filters
|
||||||
* per column name
|
* per column name
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
class ColumnFilterCollection {
|
class ColumnFilterCollection {
|
||||||
val columnFilterMap = new mutable.HashMap[String, ColumnFilter]
|
val columnFilterMap = new mutable.HashMap[String, ColumnFilter]
|
||||||
|
|
||||||
|
@ -925,7 +916,6 @@ class ColumnFilterCollection {
|
||||||
* Status object to store static functions but also to hold last executed
|
* Status object to store static functions but also to hold last executed
|
||||||
* information that can be used for unit testing.
|
* information that can be used for unit testing.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
object DefaultSourceStaticUtils {
|
object DefaultSourceStaticUtils {
|
||||||
|
|
||||||
val rawInteger = new RawInteger
|
val rawInteger = new RawInteger
|
||||||
|
@ -1068,7 +1058,6 @@ object DefaultSourceStaticUtils {
|
||||||
* @param currentPoint the initial point when the filter is created
|
* @param currentPoint the initial point when the filter is created
|
||||||
* @param currentRange the initial scanRange when the filter is created
|
* @param currentRange the initial scanRange when the filter is created
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
class RowKeyFilter (currentPoint:Array[Byte] = null,
|
class RowKeyFilter (currentPoint:Array[Byte] = null,
|
||||||
currentRange:ScanRange =
|
currentRange:ScanRange =
|
||||||
new ScanRange(null, true, new Array[Byte](0), true),
|
new ScanRange(null, true, new Array[Byte](0), true),
|
||||||
|
@ -1219,6 +1208,7 @@ class RowKeyFilter (currentPoint:Array[Byte] = null,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
|
|
||||||
class ExecutionRuleForUnitTesting(val rowKeyFilter: RowKeyFilter,
|
class ExecutionRuleForUnitTesting(val rowKeyFilter: RowKeyFilter,
|
||||||
val dynamicLogicExpression: DynamicLogicExpression)
|
val dynamicLogicExpression: DynamicLogicExpression)
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import java.util
|
import java.util
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.spark.datasources.{BytesEncoder, JavaBytesEncoder}
|
import org.apache.hadoop.hbase.spark.datasources.{BytesEncoder, JavaBytesEncoder}
|
||||||
import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
|
import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
|
@ -34,7 +33,6 @@ import org.apache.spark.sql.types._
|
||||||
* A logic tree can be written out as a string and reconstructed from that string
|
* A logic tree can be written out as a string and reconstructed from that string
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
trait DynamicLogicExpression {
|
trait DynamicLogicExpression {
|
||||||
def execute(columnToCurrentRowValueMap: util.HashMap[String, ByteArrayComparable],
|
def execute(columnToCurrentRowValueMap: util.HashMap[String, ByteArrayComparable],
|
||||||
valueFromQueryValueArray:Array[Array[Byte]]): Boolean
|
valueFromQueryValueArray:Array[Array[Byte]]): Boolean
|
||||||
|
@ -55,7 +53,6 @@ trait DynamicLogicExpression {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
trait CompareTrait {
|
trait CompareTrait {
|
||||||
self: DynamicLogicExpression =>
|
self: DynamicLogicExpression =>
|
||||||
def columnName: String
|
def columnName: String
|
||||||
|
@ -71,7 +68,6 @@ trait CompareTrait {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class AndLogicExpression (val leftExpression:DynamicLogicExpression,
|
class AndLogicExpression (val leftExpression:DynamicLogicExpression,
|
||||||
val rightExpression:DynamicLogicExpression)
|
val rightExpression:DynamicLogicExpression)
|
||||||
extends DynamicLogicExpression{
|
extends DynamicLogicExpression{
|
||||||
|
@ -91,7 +87,6 @@ class AndLogicExpression (val leftExpression:DynamicLogicExpression,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class OrLogicExpression (val leftExpression:DynamicLogicExpression,
|
class OrLogicExpression (val leftExpression:DynamicLogicExpression,
|
||||||
val rightExpression:DynamicLogicExpression)
|
val rightExpression:DynamicLogicExpression)
|
||||||
extends DynamicLogicExpression{
|
extends DynamicLogicExpression{
|
||||||
|
@ -110,7 +105,6 @@ class OrLogicExpression (val leftExpression:DynamicLogicExpression,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class EqualLogicExpression (val columnName:String,
|
class EqualLogicExpression (val columnName:String,
|
||||||
val valueFromQueryIndex:Int,
|
val valueFromQueryIndex:Int,
|
||||||
val isNot:Boolean) extends DynamicLogicExpression{
|
val isNot:Boolean) extends DynamicLogicExpression{
|
||||||
|
@ -131,7 +125,6 @@ class EqualLogicExpression (val columnName:String,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class IsNullLogicExpression (val columnName:String,
|
class IsNullLogicExpression (val columnName:String,
|
||||||
val isNot:Boolean) extends DynamicLogicExpression{
|
val isNot:Boolean) extends DynamicLogicExpression{
|
||||||
override def execute(columnToCurrentRowValueMap:
|
override def execute(columnToCurrentRowValueMap:
|
||||||
|
@ -147,7 +140,6 @@ class IsNullLogicExpression (val columnName:String,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class GreaterThanLogicExpression (override val columnName:String,
|
class GreaterThanLogicExpression (override val columnName:String,
|
||||||
override val valueFromQueryIndex:Int)
|
override val valueFromQueryIndex:Int)
|
||||||
extends DynamicLogicExpression with CompareTrait{
|
extends DynamicLogicExpression with CompareTrait{
|
||||||
|
@ -157,7 +149,6 @@ class GreaterThanLogicExpression (override val columnName:String,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class GreaterThanOrEqualLogicExpression (override val columnName:String,
|
class GreaterThanOrEqualLogicExpression (override val columnName:String,
|
||||||
override val valueFromQueryIndex:Int)
|
override val valueFromQueryIndex:Int)
|
||||||
extends DynamicLogicExpression with CompareTrait{
|
extends DynamicLogicExpression with CompareTrait{
|
||||||
|
@ -167,7 +158,6 @@ class GreaterThanOrEqualLogicExpression (override val columnName:String,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class LessThanLogicExpression (override val columnName:String,
|
class LessThanLogicExpression (override val columnName:String,
|
||||||
override val valueFromQueryIndex:Int)
|
override val valueFromQueryIndex:Int)
|
||||||
extends DynamicLogicExpression with CompareTrait {
|
extends DynamicLogicExpression with CompareTrait {
|
||||||
|
@ -177,7 +167,6 @@ class LessThanLogicExpression (override val columnName:String,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class LessThanOrEqualLogicExpression (val columnName:String,
|
class LessThanOrEqualLogicExpression (val columnName:String,
|
||||||
val valueFromQueryIndex:Int)
|
val valueFromQueryIndex:Int)
|
||||||
extends DynamicLogicExpression with CompareTrait{
|
extends DynamicLogicExpression with CompareTrait{
|
||||||
|
@ -187,7 +176,6 @@ class LessThanOrEqualLogicExpression (val columnName:String,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class PassThroughLogicExpression() extends DynamicLogicExpression {
|
class PassThroughLogicExpression() extends DynamicLogicExpression {
|
||||||
override def execute(columnToCurrentRowValueMap:
|
override def execute(columnToCurrentRowValueMap:
|
||||||
util.HashMap[String, ByteArrayComparable],
|
util.HashMap[String, ByteArrayComparable],
|
||||||
|
@ -202,7 +190,6 @@ class PassThroughLogicExpression() extends DynamicLogicExpression {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
object DynamicLogicExpressionBuilder {
|
object DynamicLogicExpressionBuilder {
|
||||||
def build(expressionString: String, encoder: BytesEncoder): DynamicLogicExpression = {
|
def build(expressionString: String, encoder: BytesEncoder): DynamicLogicExpression = {
|
||||||
|
|
||||||
|
|
|
@ -18,15 +18,10 @@ package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import java.util
|
import java.util
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This object is a clean way to store and sort all cells that will be bulk
|
* This object is a clean way to store and sort all cells that will be bulk
|
||||||
* loaded into a single row
|
* loaded into a single row
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class FamiliesQualifiersValues extends Serializable {
|
class FamiliesQualifiersValues extends Serializable {
|
||||||
//Tree maps are used because we need the results to
|
//Tree maps are used because we need the results to
|
||||||
// be sorted when we read them
|
// be sorted when we read them
|
||||||
|
|
|
@ -19,9 +19,6 @@ package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import java.io.Serializable
|
import java.io.Serializable
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This object will hold optional data for how a given column family's
|
* This object will hold optional data for how a given column family's
|
||||||
* writer will work
|
* writer will work
|
||||||
|
@ -32,8 +29,6 @@ import org.apache.hadoop.hbase.classification.InterfaceStability;
|
||||||
* @param dataBlockEncoding String to define the data block encoding to be used
|
* @param dataBlockEncoding String to define the data block encoding to be used
|
||||||
* in the HFile
|
* in the HFile
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class FamilyHFileWriteOptions( val compression:String,
|
class FamilyHFileWriteOptions( val compression:String,
|
||||||
val bloomType: String,
|
val bloomType: String,
|
||||||
val blockSize: Int,
|
val blockSize: Int,
|
||||||
|
|
|
@ -22,8 +22,6 @@ import java.util
|
||||||
import java.util.UUID
|
import java.util.UUID
|
||||||
import javax.management.openmbean.KeyAlreadyExistsException
|
import javax.management.openmbean.KeyAlreadyExistsException
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.fs.HFileSystem
|
import org.apache.hadoop.hbase.fs.HFileSystem
|
||||||
import org.apache.hadoop.hbase._
|
import org.apache.hadoop.hbase._
|
||||||
import org.apache.hadoop.hbase.io.compress.Compression
|
import org.apache.hadoop.hbase.io.compress.Compression
|
||||||
|
@ -60,8 +58,6 @@ import scala.collection.mutable
|
||||||
* of disseminating the configuration information
|
* of disseminating the configuration information
|
||||||
* to the working and managing the life cycle of Connections.
|
* to the working and managing the life cycle of Connections.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class HBaseContext(@transient sc: SparkContext,
|
class HBaseContext(@transient sc: SparkContext,
|
||||||
@transient val config: Configuration,
|
@transient val config: Configuration,
|
||||||
val tmpHdfsConfgFile: String = null)
|
val tmpHdfsConfgFile: String = null)
|
||||||
|
|
|
@ -17,8 +17,6 @@
|
||||||
package org.apache.hadoop.hbase.spark
|
package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.TableName
|
import org.apache.hadoop.hbase.TableName
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.client._
|
import org.apache.hadoop.hbase.client._
|
||||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
|
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
|
||||||
import org.apache.spark.streaming.dstream.DStream
|
import org.apache.spark.streaming.dstream.DStream
|
||||||
|
@ -29,8 +27,6 @@ import scala.reflect.ClassTag
|
||||||
* HBaseDStreamFunctions contains a set of implicit functions that can be
|
* HBaseDStreamFunctions contains a set of implicit functions that can be
|
||||||
* applied to a Spark DStream so that we can easily interact with HBase
|
* applied to a Spark DStream so that we can easily interact with HBase
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
object HBaseDStreamFunctions {
|
object HBaseDStreamFunctions {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -20,8 +20,6 @@ package org.apache.hadoop.hbase.spark
|
||||||
import java.util
|
import java.util
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.{HConstants, TableName}
|
import org.apache.hadoop.hbase.{HConstants, TableName}
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.client._
|
import org.apache.hadoop.hbase.client._
|
||||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
|
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
|
||||||
import org.apache.spark.rdd.RDD
|
import org.apache.spark.rdd.RDD
|
||||||
|
@ -32,8 +30,6 @@ import scala.reflect.ClassTag
|
||||||
* HBaseRDDFunctions contains a set of implicit functions that can be
|
* HBaseRDDFunctions contains a set of implicit functions that can be
|
||||||
* applied to a Spark RDD so that we can easily interact with HBase
|
* applied to a Spark RDD so that we can easily interact with HBase
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
object HBaseRDDFunctions
|
object HBaseRDDFunctions
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
|
@ -19,8 +19,6 @@ package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.hbase.TableName
|
import org.apache.hadoop.hbase.TableName
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.client.{Connection, Delete, Get, Put, Result, Scan}
|
import org.apache.hadoop.hbase.client.{Connection, Delete, Get, Put, Result, Scan}
|
||||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
|
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
|
||||||
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
|
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
|
||||||
|
@ -38,8 +36,6 @@ import scala.reflect.ClassTag
|
||||||
* @param jsc This is the JavaSparkContext that we will wrap
|
* @param jsc This is the JavaSparkContext that we will wrap
|
||||||
* @param config This is the config information to out HBase cluster
|
* @param config This is the config information to out HBase cluster
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class JavaHBaseContext(@transient jsc: JavaSparkContext,
|
class JavaHBaseContext(@transient jsc: JavaSparkContext,
|
||||||
@transient config: Configuration) extends Serializable {
|
@transient config: Configuration) extends Serializable {
|
||||||
val hbaseContext = new HBaseContext(jsc.sc, config)
|
val hbaseContext = new HBaseContext(jsc.sc, config)
|
||||||
|
|
|
@ -19,8 +19,6 @@ package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import java.io.Serializable
|
import java.io.Serializable
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -32,8 +30,6 @@ import org.apache.hadoop.hbase.util.Bytes
|
||||||
* @param family Record ColumnFamily
|
* @param family Record ColumnFamily
|
||||||
* @param qualifier Cell Qualifier
|
* @param qualifier Cell Qualifier
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte])
|
class KeyFamilyQualifier(val rowKey:Array[Byte], val family:Array[Byte], val qualifier:Array[Byte])
|
||||||
extends Comparable[KeyFamilyQualifier] with Serializable {
|
extends Comparable[KeyFamilyQualifier] with Serializable {
|
||||||
override def compareTo(o: KeyFamilyQualifier): Int = {
|
override def compareTo(o: KeyFamilyQualifier): Int = {
|
||||||
|
|
|
@ -18,14 +18,10 @@
|
||||||
package org.apache.hadoop.hbase.spark
|
package org.apache.hadoop.hbase.spark
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.mapreduce.InputFormat
|
import org.apache.hadoop.mapreduce.InputFormat
|
||||||
import org.apache.spark.rdd.NewHadoopRDD
|
import org.apache.spark.rdd.NewHadoopRDD
|
||||||
import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
|
import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
|
||||||
|
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
class NewHBaseRDD[K,V](@transient sc : SparkContext,
|
class NewHBaseRDD[K,V](@transient sc : SparkContext,
|
||||||
@transient inputFormatClass: Class[_ <: InputFormat[K, V]],
|
@transient inputFormatClass: Class[_ <: InputFormat[K, V]],
|
||||||
@transient keyClass: Class[K],
|
@transient keyClass: Class[K],
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.spark.datasources
|
package org.apache.hadoop.hbase.spark.datasources
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.spark.hbase._
|
import org.apache.hadoop.hbase.spark.hbase._
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -26,14 +25,9 @@ import org.apache.hadoop.hbase.spark.hbase._
|
||||||
* @param b The byte array of the bound
|
* @param b The byte array of the bound
|
||||||
* @param inc inclusive or not.
|
* @param inc inclusive or not.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class Bound(b: Array[Byte], inc: Boolean)
|
case class Bound(b: Array[Byte], inc: Boolean)
|
||||||
// The non-overlapping ranges we need to scan, if lower is equal to upper, it is a get request
|
// The non-overlapping ranges we need to scan, if lower is equal to upper, it is a get request
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class Range(lower: Option[Bound], upper: Option[Bound])
|
case class Range(lower: Option[Bound], upper: Option[Bound])
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
object Range {
|
object Range {
|
||||||
def apply(region: HBaseRegion): Range = {
|
def apply(region: HBaseRegion): Range = {
|
||||||
Range(region.start.map(Bound(_, true)), if (region.end.get.size == 0) {
|
Range(region.start.map(Bound(_, true)), if (region.end.get.size == 0) {
|
||||||
|
@ -44,7 +38,6 @@ object Range {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
object Ranges {
|
object Ranges {
|
||||||
// We assume that
|
// We assume that
|
||||||
// 1. r.lower.inc is true, and r.upper.inc is false
|
// 1. r.lower.inc is true, and r.upper.inc is false
|
||||||
|
@ -94,7 +87,6 @@ object Ranges {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
object Points {
|
object Points {
|
||||||
def and(r: Range, ps: Seq[Array[Byte]]): Seq[Array[Byte]] = {
|
def and(r: Range, ps: Seq[Array[Byte]]): Seq[Array[Byte]] = {
|
||||||
ps.flatMap { p =>
|
ps.flatMap { p =>
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.spark.datasources
|
package org.apache.hadoop.hbase.spark.datasources
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.TableName
|
import org.apache.hadoop.hbase.TableName
|
||||||
import org.apache.hadoop.hbase.client._
|
import org.apache.hadoop.hbase.client._
|
||||||
import org.apache.hadoop.hbase.spark.{HBaseConnectionKey, SmartConnection,
|
import org.apache.hadoop.hbase.spark.{HBaseConnectionKey, SmartConnection,
|
||||||
|
@ -29,12 +28,10 @@ import scala.language.implicitConversions
|
||||||
|
|
||||||
// User has to invoke release explicitly to release the resource,
|
// User has to invoke release explicitly to release the resource,
|
||||||
// and potentially parent resources
|
// and potentially parent resources
|
||||||
@InterfaceAudience.Private
|
|
||||||
trait Resource {
|
trait Resource {
|
||||||
def release(): Unit
|
def release(): Unit
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class ScanResource(tbr: TableResource, rs: ResultScanner) extends Resource {
|
case class ScanResource(tbr: TableResource, rs: ResultScanner) extends Resource {
|
||||||
def release() {
|
def release() {
|
||||||
rs.close()
|
rs.close()
|
||||||
|
@ -42,14 +39,12 @@ case class ScanResource(tbr: TableResource, rs: ResultScanner) extends Resource
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class GetResource(tbr: TableResource, rs: Array[Result]) extends Resource {
|
case class GetResource(tbr: TableResource, rs: Array[Result]) extends Resource {
|
||||||
def release() {
|
def release() {
|
||||||
tbr.release()
|
tbr.release()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
trait ReferencedResource {
|
trait ReferencedResource {
|
||||||
var count: Int = 0
|
var count: Int = 0
|
||||||
def init(): Unit
|
def init(): Unit
|
||||||
|
@ -89,7 +84,6 @@ trait ReferencedResource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class TableResource(relation: HBaseRelation) extends ReferencedResource {
|
case class TableResource(relation: HBaseRelation) extends ReferencedResource {
|
||||||
var connection: SmartConnection = _
|
var connection: SmartConnection = _
|
||||||
var table: Table = _
|
var table: Table = _
|
||||||
|
@ -119,7 +113,6 @@ case class TableResource(relation: HBaseRelation) extends ReferencedResource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class RegionResource(relation: HBaseRelation) extends ReferencedResource {
|
case class RegionResource(relation: HBaseRelation) extends ReferencedResource {
|
||||||
var connection: SmartConnection = _
|
var connection: SmartConnection = _
|
||||||
var rl: RegionLocator = _
|
var rl: RegionLocator = _
|
||||||
|
@ -151,7 +144,6 @@ case class RegionResource(relation: HBaseRelation) extends ReferencedResource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
object HBaseResources{
|
object HBaseResources{
|
||||||
implicit def ScanResToScan(sr: ScanResource): ResultScanner = {
|
implicit def ScanResToScan(sr: ScanResource): ResultScanner = {
|
||||||
sr.rs
|
sr.rs
|
||||||
|
|
|
@ -17,11 +17,6 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.spark.datasources
|
package org.apache.hadoop.hbase.spark.datasources
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
|
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
object HBaseSparkConf{
|
object HBaseSparkConf{
|
||||||
// This is the hbase configuration. User can either set them in SparkConf, which
|
// This is the hbase configuration. User can either set them in SparkConf, which
|
||||||
// will take effect globally, or configure it per table, which will overwrite the value
|
// will take effect globally, or configure it per table, which will overwrite the value
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.spark.datasources
|
||||||
|
|
||||||
import java.util.ArrayList
|
import java.util.ArrayList
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.client._
|
import org.apache.hadoop.hbase.client._
|
||||||
import org.apache.hadoop.hbase.spark._
|
import org.apache.hadoop.hbase.spark._
|
||||||
import org.apache.hadoop.hbase.spark.hbase._
|
import org.apache.hadoop.hbase.spark.hbase._
|
||||||
|
@ -31,7 +30,6 @@ import org.apache.spark.rdd.RDD
|
||||||
|
|
||||||
import scala.collection.mutable
|
import scala.collection.mutable
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class HBaseTableScanRDD(relation: HBaseRelation,
|
class HBaseTableScanRDD(relation: HBaseRelation,
|
||||||
val hbaseContext: HBaseContext,
|
val hbaseContext: HBaseContext,
|
||||||
@transient val filter: Option[SparkSQLPushDownFilter] = None,
|
@transient val filter: Option[SparkSQLPushDownFilter] = None,
|
||||||
|
|
|
@ -17,9 +17,6 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.spark.datasources
|
package org.apache.hadoop.hbase.spark.datasources
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
|
import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
import org.apache.spark.Logging
|
import org.apache.spark.Logging
|
||||||
|
@ -32,8 +29,6 @@ import org.apache.spark.sql.types._
|
||||||
* @param low: the lower bound of the range.
|
* @param low: the lower bound of the range.
|
||||||
* @param upper: the upper bound of the range.
|
* @param upper: the upper bound of the range.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
case class BoundRange(low: Array[Byte],upper: Array[Byte])
|
case class BoundRange(low: Array[Byte],upper: Array[Byte])
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -45,8 +40,6 @@ case class BoundRange(low: Array[Byte],upper: Array[Byte])
|
||||||
* @param greater: the set of ranges for GreaterThan/GreaterThanOrEqualTo
|
* @param greater: the set of ranges for GreaterThan/GreaterThanOrEqualTo
|
||||||
* @param value: the byte array of the original value
|
* @param value: the byte array of the original value
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
case class BoundRanges(less: Array[BoundRange], greater: Array[BoundRange], value: Array[Byte])
|
case class BoundRanges(less: Array[BoundRange], greater: Array[BoundRange], value: Array[Byte])
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,8 +47,6 @@ case class BoundRanges(less: Array[BoundRange], greater: Array[BoundRange], valu
|
||||||
* encode is used for serializing the data type to byte array and the filter is
|
* encode is used for serializing the data type to byte array and the filter is
|
||||||
* used to filter out the unnecessary records.
|
* used to filter out the unnecessary records.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
trait BytesEncoder {
|
trait BytesEncoder {
|
||||||
def encode(dt: DataType, value: Any): Array[Byte]
|
def encode(dt: DataType, value: Any): Array[Byte]
|
||||||
|
|
||||||
|
@ -92,8 +83,6 @@ trait BytesEncoder {
|
||||||
def ranges(in: Any): Option[BoundRanges]
|
def ranges(in: Any): Option[BoundRanges]
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.LimitedPrivate(Array(HBaseInterfaceAudience.SPARK))
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
object JavaBytesEncoder extends Enumeration with Logging{
|
object JavaBytesEncoder extends Enumeration with Logging{
|
||||||
type JavaBytesEncoder = Value
|
type JavaBytesEncoder = Value
|
||||||
val Greater, GreaterEqual, Less, LessEqual, Equal, Unknown = Value
|
val Greater, GreaterEqual, Less, LessEqual, Equal, Unknown = Value
|
||||||
|
|
|
@ -16,7 +16,6 @@ package org.apache.hadoop.hbase.spark.datasources
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
|
import org.apache.hadoop.hbase.spark.datasources.JavaBytesEncoder.JavaBytesEncoder
|
||||||
import org.apache.hadoop.hbase.spark.hbase._
|
import org.apache.hadoop.hbase.spark.hbase._
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
|
@ -32,7 +31,6 @@ import org.apache.spark.unsafe.types.UTF8String
|
||||||
* can work correctly, which is done by wrapping the type into the first byte
|
* can work correctly, which is done by wrapping the type into the first byte
|
||||||
* of the serialized array.
|
* of the serialized array.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
class NaiveEncoder extends BytesEncoder with Logging{
|
class NaiveEncoder extends BytesEncoder with Logging{
|
||||||
var code = 0
|
var code = 0
|
||||||
def nextCode: Byte = {
|
def nextCode: Byte = {
|
||||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.avro.SchemaBuilder.FieldDefault
|
||||||
import org.apache.avro.SchemaBuilder.RecordBuilder
|
import org.apache.avro.SchemaBuilder.RecordBuilder
|
||||||
import org.apache.avro.io._
|
import org.apache.avro.io._
|
||||||
import org.apache.commons.io.output.ByteArrayOutputStream
|
import org.apache.commons.io.output.ByteArrayOutputStream
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
|
|
||||||
import scala.collection.JavaConversions._
|
import scala.collection.JavaConversions._
|
||||||
|
@ -44,10 +43,8 @@ import org.apache.spark.sql.types._
|
||||||
|
|
||||||
import scala.collection.immutable.Map
|
import scala.collection.immutable.Map
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
abstract class AvroException(msg: String) extends Exception(msg)
|
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
abstract class AvroException(msg: String) extends Exception(msg)
|
||||||
case class SchemaConversionException(msg: String) extends AvroException(msg)
|
case class SchemaConversionException(msg: String) extends AvroException(msg)
|
||||||
|
|
||||||
/***
|
/***
|
||||||
|
@ -58,7 +55,6 @@ case class SchemaConversionException(msg: String) extends AvroException(msg)
|
||||||
* 3. convertTypeToAvro: This function constructs converter function for a given sparkSQL
|
* 3. convertTypeToAvro: This function constructs converter function for a given sparkSQL
|
||||||
* datatype. This is used in writing Avro records out to disk
|
* datatype. This is used in writing Avro records out to disk
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
object SchemaConverters {
|
object SchemaConverters {
|
||||||
|
|
||||||
case class SchemaType(dataType: DataType, nullable: Boolean)
|
case class SchemaType(dataType: DataType, nullable: Boolean)
|
||||||
|
@ -397,7 +393,7 @@ object SchemaConverters {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
object AvroSerdes {
|
object AvroSerdes {
|
||||||
// We only handle top level is record or primary type now
|
// We only handle top level is record or primary type now
|
||||||
def serialize(input: Any, schema: Schema): Array[Byte]= {
|
def serialize(input: Any, schema: Schema): Array[Byte]= {
|
||||||
|
|
|
@ -28,15 +28,14 @@ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericR
|
||||||
import org.apache.avro.io._
|
import org.apache.avro.io._
|
||||||
import org.apache.commons.io.output.ByteArrayOutputStream
|
import org.apache.commons.io.output.ByteArrayOutputStream
|
||||||
import org.apache.hadoop.hbase.util.Bytes
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
|
|
||||||
// TODO: This is not really used in code.
|
|
||||||
trait SerDes {
|
trait SerDes {
|
||||||
def serialize(value: Any): Array[Byte]
|
def serialize(value: Any): Array[Byte]
|
||||||
def deserialize(bytes: Array[Byte], start: Int, end: Int): Any
|
def deserialize(bytes: Array[Byte], start: Int, end: Int): Any
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This is not really used in code.
|
|
||||||
class DoubleSerDes extends SerDes {
|
class DoubleSerDes extends SerDes {
|
||||||
override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double])
|
override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double])
|
||||||
override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = {
|
override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = {
|
||||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.hadoop.hbase.spark.datasources
|
||||||
import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
|
import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration
|
import org.apache.hadoop.conf.Configuration
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
|
|
||||||
import scala.util.control.NonFatal
|
import scala.util.control.NonFatal
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
class SerializableConfiguration(@transient var value: Configuration) extends Serializable {
|
class SerializableConfiguration(@transient var value: Configuration) extends Serializable {
|
||||||
private def writeObject(out: ObjectOutputStream): Unit = tryOrIOException {
|
private def writeObject(out: ObjectOutputStream): Unit = tryOrIOException {
|
||||||
out.defaultWriteObject()
|
out.defaultWriteObject()
|
||||||
|
|
|
@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.SqlLexical
|
||||||
import org.apache.spark.sql.catalyst.util.DataTypeParser
|
import org.apache.spark.sql.catalyst.util.DataTypeParser
|
||||||
import org.apache.spark.sql.types.DataType
|
import org.apache.spark.sql.types.DataType
|
||||||
|
|
||||||
// TODO: Only used in test suite.
|
|
||||||
object DataTypeParserWrapper {
|
object DataTypeParserWrapper {
|
||||||
lazy val dataTypeParser = new DataTypeParser {
|
lazy val dataTypeParser = new DataTypeParser {
|
||||||
override val lexical = new SqlLexical
|
override val lexical = new SqlLexical
|
||||||
|
|
|
@ -18,8 +18,6 @@
|
||||||
package org.apache.spark.sql.datasources.hbase
|
package org.apache.spark.sql.datasources.hbase
|
||||||
|
|
||||||
import org.apache.avro.Schema
|
import org.apache.avro.Schema
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
|
||||||
import org.apache.hadoop.hbase.spark.SchemaConverters
|
import org.apache.hadoop.hbase.spark.SchemaConverters
|
||||||
import org.apache.hadoop.hbase.spark.datasources._
|
import org.apache.hadoop.hbase.spark.datasources._
|
||||||
import org.apache.hadoop.hbase.spark.hbase._
|
import org.apache.hadoop.hbase.spark.hbase._
|
||||||
|
@ -34,7 +32,6 @@ import scala.collection.mutable
|
||||||
// Due the access issue defined in spark, we have to locate the file in this package.
|
// Due the access issue defined in spark, we have to locate the file in this package.
|
||||||
// The definition of each column cell, which may be composite type
|
// The definition of each column cell, which may be composite type
|
||||||
// TODO: add avro support
|
// TODO: add avro support
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class Field(
|
case class Field(
|
||||||
colName: String,
|
colName: String,
|
||||||
cf: String,
|
cf: String,
|
||||||
|
@ -115,7 +112,6 @@ case class Field(
|
||||||
|
|
||||||
// The row key definition, with each key refer to the col defined in Field, e.g.,
|
// The row key definition, with each key refer to the col defined in Field, e.g.,
|
||||||
// key1:key2:key3
|
// key1:key2:key3
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class RowKey(k: String) {
|
case class RowKey(k: String) {
|
||||||
val keys = k.split(":")
|
val keys = k.split(":")
|
||||||
var fields: Seq[Field] = _
|
var fields: Seq[Field] = _
|
||||||
|
@ -131,7 +127,6 @@ case class RowKey(k: String) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// The map between the column presented to Spark and the HBase field
|
// The map between the column presented to Spark and the HBase field
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class SchemaMap(map: mutable.HashMap[String, Field]) {
|
case class SchemaMap(map: mutable.HashMap[String, Field]) {
|
||||||
def toFields = map.map { case (name, field) =>
|
def toFields = map.map { case (name, field) =>
|
||||||
StructField(name, field.dt)
|
StructField(name, field.dt)
|
||||||
|
@ -144,7 +139,6 @@ case class SchemaMap(map: mutable.HashMap[String, Field]) {
|
||||||
|
|
||||||
|
|
||||||
// The definition of HBase and Relation relation schema
|
// The definition of HBase and Relation relation schema
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class HBaseTableCatalog(
|
case class HBaseTableCatalog(
|
||||||
namespace: String,
|
namespace: String,
|
||||||
name: String,
|
name: String,
|
||||||
|
@ -209,8 +203,6 @@ case class HBaseTableCatalog(
|
||||||
initRowKey
|
initRowKey
|
||||||
}
|
}
|
||||||
|
|
||||||
@InterfaceAudience.Public
|
|
||||||
@InterfaceStability.Evolving
|
|
||||||
object HBaseTableCatalog {
|
object HBaseTableCatalog {
|
||||||
// If defined and larger than 3, a new table will be created with the nubmer of region specified.
|
// If defined and larger than 3, a new table will be created with the nubmer of region specified.
|
||||||
val newTable = "newtable"
|
val newTable = "newtable"
|
||||||
|
@ -294,7 +286,6 @@ object HBaseTableCatalog {
|
||||||
|}""".stripMargin
|
|}""".stripMargin
|
||||||
*/
|
*/
|
||||||
@deprecated("Please use new json format to define HBaseCatalog")
|
@deprecated("Please use new json format to define HBaseCatalog")
|
||||||
// TODO: There is no need to deprecate since this is the first release.
|
|
||||||
def convert(parameters: Map[String, String]): Map[String, String] = {
|
def convert(parameters: Map[String, String]): Map[String, String] = {
|
||||||
val tableName = parameters.get(TABLE_KEY).getOrElse(null)
|
val tableName = parameters.get(TABLE_KEY).getOrElse(null)
|
||||||
// if the hbase.table is not defined, we assume it is json format already.
|
// if the hbase.table is not defined, we assume it is json format already.
|
||||||
|
@ -328,7 +319,6 @@ object HBaseTableCatalog {
|
||||||
* @param schemaMappingString The schema mapping string from the SparkSQL map
|
* @param schemaMappingString The schema mapping string from the SparkSQL map
|
||||||
* @return A map of definitions keyed by the SparkSQL column name
|
* @return A map of definitions keyed by the SparkSQL column name
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
def generateSchemaMappingMap(schemaMappingString:String):
|
def generateSchemaMappingMap(schemaMappingString:String):
|
||||||
java.util.HashMap[String, SchemaQualifierDefinition] = {
|
java.util.HashMap[String, SchemaQualifierDefinition] = {
|
||||||
println(schemaMappingString)
|
println(schemaMappingString)
|
||||||
|
@ -372,7 +362,6 @@ object HBaseTableCatalog {
|
||||||
* @param columnFamily HBase column family
|
* @param columnFamily HBase column family
|
||||||
* @param qualifier HBase qualifier name
|
* @param qualifier HBase qualifier name
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
|
||||||
case class SchemaQualifierDefinition(columnName:String,
|
case class SchemaQualifierDefinition(columnName:String,
|
||||||
colType:String,
|
colType:String,
|
||||||
columnFamily:String,
|
columnFamily:String,
|
||||||
|
|
|
@ -24,9 +24,6 @@ import org.apache.spark.sql.execution.SparkSqlSerializer
|
||||||
import org.apache.spark.sql.types._
|
import org.apache.spark.sql.types._
|
||||||
import org.apache.spark.unsafe.types.UTF8String
|
import org.apache.spark.unsafe.types.UTF8String
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
object Utils {
|
object Utils {
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue